aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 11:43:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 11:43:54 -0800
commitbb26c6c29b7cc9f39e491b074b09f3c284738d36 (patch)
treec7867af2bb4ff0feae889183efcd4d79b0f9a325 /kernel
parente14e61e967f2b3bdf23f05e4ae5b9aa830151a44 (diff)
parentcbacc2c7f066a1e01b33b0e27ae5efbf534bc2db (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6: (105 commits) SELinux: don't check permissions for kernel mounts security: pass mount flags to security_sb_kern_mount() SELinux: correctly detect proc filesystems of the form "proc/foo" Audit: Log TIOCSTI user namespaces: document CFS behavior user namespaces: require cap_set{ug}id for CLONE_NEWUSER user namespaces: let user_ns be cloned with fairsched CRED: fix sparse warnings User namespaces: use the current_user_ns() macro User namespaces: set of cleanups (v2) nfsctl: add headers for credentials coda: fix creds reference capabilities: define get_vfs_caps_from_disk when file caps are not enabled CRED: Allow kernel services to override LSM settings for task actions CRED: Add a kernel_service object class to SELinux CRED: Differentiate objective and effective subjective credentials on a task CRED: Documentation CRED: Use creds in file structs CRED: Prettify commoncap.c CRED: Make execve() take advantage of copy-on-write credentials ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/acct.c7
-rw-r--r--kernel/auditsc.c255
-rw-r--r--kernel/capability.c288
-rw-r--r--kernel/cgroup.c17
-rw-r--r--kernel/cred-internals.h21
-rw-r--r--kernel/cred.c588
-rw-r--r--kernel/exit.c23
-rw-r--r--kernel/fork.c62
-rw-r--r--kernel/futex.c20
-rw-r--r--kernel/futex_compat.c7
-rw-r--r--kernel/kmod.c30
-rw-r--r--kernel/nsproxy.c15
-rw-r--r--kernel/ptrace.c29
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/signal.c60
-rw-r--r--kernel/sys.c586
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/timer.c8
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/tsacct.c6
-rw-r--r--kernel/uid16.c31
-rw-r--r--kernel/user.c96
-rw-r--r--kernel/user_namespace.c65
-rw-r--r--kernel/workqueue.c8
25 files changed, 1485 insertions, 769 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 19fad003b19..b1e6b6625ea 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
- notifier.o ksysfs.o pm_qos_params.o sched_clock.o
+ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
diff --git a/kernel/acct.c b/kernel/acct.c
index f6006a60df5..d57b7cbb98b 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -530,15 +530,14 @@ static void do_acct_process(struct bsd_acct_struct *acct,
do_div(elapsed, AHZ);
ac.ac_btime = get_seconds() - elapsed;
/* we really need to bite the bullet and change layout */
- ac.ac_uid = current->uid;
- ac.ac_gid = current->gid;
+ current_uid_gid(&ac.ac_uid, &ac.ac_gid);
#if ACCT_VERSION==2
ac.ac_ahz = AHZ;
#endif
#if ACCT_VERSION==1 || ACCT_VERSION==2
/* backward-compatible 16 bit fields */
- ac.ac_uid16 = current->uid;
- ac.ac_gid16 = current->gid;
+ ac.ac_uid16 = ac.ac_uid;
+ ac.ac_gid16 = ac.ac_gid;
#endif
#if ACCT_VERSION==3
ac.ac_pid = task_tgid_nr_ns(current, ns);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2a3f0afc4d2..4819f371197 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -65,6 +65,7 @@
#include <linux/highmem.h>
#include <linux/syscalls.h>
#include <linux/inotify.h>
+#include <linux/capability.h>
#include "audit.h"
@@ -84,6 +85,15 @@ int audit_n_rules;
/* determines whether we collect data for signals sent */
int audit_signals;
+struct audit_cap_data {
+ kernel_cap_t permitted;
+ kernel_cap_t inheritable;
+ union {
+ unsigned int fE; /* effective bit of a file capability */
+ kernel_cap_t effective; /* effective set of a process */
+ };
+};
+
/* When fs/namei.c:getname() is called, we store the pointer in name and
* we don't let putname() free it (instead we free all of the saved
* pointers at syscall exit time).
@@ -100,6 +110,8 @@ struct audit_names {
gid_t gid;
dev_t rdev;
u32 osid;
+ struct audit_cap_data fcap;
+ unsigned int fcap_ver;
};
struct audit_aux_data {
@@ -184,6 +196,20 @@ struct audit_aux_data_pids {
int pid_count;
};
+struct audit_aux_data_bprm_fcaps {
+ struct audit_aux_data d;
+ struct audit_cap_data fcap;
+ unsigned int fcap_ver;
+ struct audit_cap_data old_pcap;
+ struct audit_cap_data new_pcap;
+};
+
+struct audit_aux_data_capset {
+ struct audit_aux_data d;
+ pid_t pid;
+ struct audit_cap_data cap;
+};
+
struct audit_tree_refs {
struct audit_tree_refs *next;
struct audit_chunk *c[31];
@@ -421,6 +447,7 @@ static int audit_filter_rules(struct task_struct *tsk,
struct audit_names *name,
enum audit_state *state)
{
+ const struct cred *cred = get_task_cred(tsk);
int i, j, need_sid = 1;
u32 sid;
@@ -440,28 +467,28 @@ static int audit_filter_rules(struct task_struct *tsk,
}
break;
case AUDIT_UID:
- result = audit_comparator(tsk->uid, f->op, f->val);
+ result = audit_comparator(cred->uid, f->op, f->val);
break;
case AUDIT_EUID:
- result = audit_comparator(tsk->euid, f->op, f->val);
+ result = audit_comparator(cred->euid, f->op, f->val);
break;
case AUDIT_SUID:
- result = audit_comparator(tsk->suid, f->op, f->val);
+ result = audit_comparator(cred->suid, f->op, f->val);
break;
case AUDIT_FSUID:
- result = audit_comparator(tsk->fsuid, f->op, f->val);
+ result = audit_comparator(cred->fsuid, f->op, f->val);
break;
case AUDIT_GID:
- result = audit_comparator(tsk->gid, f->op, f->val);
+ result = audit_comparator(cred->gid, f->op, f->val);
break;
case AUDIT_EGID:
- result = audit_comparator(tsk->egid, f->op, f->val);
+ result = audit_comparator(cred->egid, f->op, f->val);
break;
case AUDIT_SGID:
- result = audit_comparator(tsk->sgid, f->op, f->val);
+ result = audit_comparator(cred->sgid, f->op, f->val);
break;
case AUDIT_FSGID:
- result = audit_comparator(tsk->fsgid, f->op, f->val);
+ result = audit_comparator(cred->fsgid, f->op, f->val);
break;
case AUDIT_PERS:
result = audit_comparator(tsk->personality, f->op, f->val);
@@ -615,8 +642,10 @@ static int audit_filter_rules(struct task_struct *tsk,
break;
}
- if (!result)
+ if (!result) {
+ put_cred(cred);
return 0;
+ }
}
if (rule->filterkey && ctx)
ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC);
@@ -624,6 +653,7 @@ static int audit_filter_rules(struct task_struct *tsk,
case AUDIT_NEVER: *state = AUDIT_DISABLED; break;
case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break;
}
+ put_cred(cred);
return 1;
}
@@ -1171,8 +1201,38 @@ static void audit_log_execve_info(struct audit_context *context,
kfree(buf);
}
+static void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
+{
+ int i;
+
+ audit_log_format(ab, " %s=", prefix);
+ CAP_FOR_EACH_U32(i) {
+ audit_log_format(ab, "%08x", cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]);
+ }
+}
+
+static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
+{
+ kernel_cap_t *perm = &name->fcap.permitted;
+ kernel_cap_t *inh = &name->fcap.inheritable;
+ int log = 0;
+
+ if (!cap_isclear(*perm)) {
+ audit_log_cap(ab, "cap_fp", perm);
+ log = 1;
+ }
+ if (!cap_isclear(*inh)) {
+ audit_log_cap(ab, "cap_fi", inh);
+ log = 1;
+ }
+
+ if (log)
+ audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver);
+}
+
static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
{
+ const struct cred *cred;
int i, call_panic = 0;
struct audit_buffer *ab;
struct audit_aux_data *aux;
@@ -1182,14 +1242,15 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
context->pid = tsk->pid;
if (!context->ppid)
context->ppid = sys_getppid();
- context->uid = tsk->uid;
- context->gid = tsk->gid;
- context->euid = tsk->euid;
- context->suid = tsk->suid;
- context->fsuid = tsk->fsuid;
- context->egid = tsk->egid;
- context->sgid = tsk->sgid;
- context->fsgid = tsk->fsgid;
+ cred = current_cred();
+ context->uid = cred->uid;
+ context->gid = cred->gid;
+ context->euid = cred->euid;
+ context->suid = cred->suid;
+ context->fsuid = cred->fsuid;
+ context->egid = cred->egid;
+ context->sgid = cred->sgid;
+ context->fsgid = cred->fsgid;
context->personality = tsk->personality;
ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL);
@@ -1334,6 +1395,28 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]);
break; }
+ case AUDIT_BPRM_FCAPS: {
+ struct audit_aux_data_bprm_fcaps *axs = (void *)aux;
+ audit_log_format(ab, "fver=%x", axs->fcap_ver);
+ audit_log_cap(ab, "fp", &axs->fcap.permitted);
+ audit_log_cap(ab, "fi", &axs->fcap.inheritable);
+ audit_log_format(ab, " fe=%d", axs->fcap.fE);
+ audit_log_cap(ab, "old_pp", &axs->old_pcap.permitted);
+ audit_log_cap(ab, "old_pi", &axs->old_pcap.inheritable);
+ audit_log_cap(ab, "old_pe", &axs->old_pcap.effective);
+ audit_log_cap(ab, "new_pp", &axs->new_pcap.permitted);
+ audit_log_cap(ab, "new_pi", &axs->new_pcap.inheritable);
+ audit_log_cap(ab, "new_pe", &axs->new_pcap.effective);
+ break; }
+
+ case AUDIT_CAPSET: {
+ struct audit_aux_data_capset *axs = (void *)aux;
+ audit_log_format(ab, "pid=%d", axs->pid);
+ audit_log_cap(ab, "cap_pi", &axs->cap.inheritable);
+ audit_log_cap(ab, "cap_pp", &axs->cap.permitted);
+ audit_log_cap(ab, "cap_pe", &axs->cap.effective);
+ break; }
+
}
audit_log_end(ab);
}
@@ -1421,6 +1504,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
}
}
+ audit_log_fcaps(ab, n);
+
audit_log_end(ab);
}
@@ -1802,8 +1887,36 @@ static int audit_inc_name_count(struct audit_context *context,
return 0;
}
+
+static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry)
+{
+ struct cpu_vfs_cap_data caps;
+ int rc;
+
+ memset(&name->fcap.permitted, 0, sizeof(kernel_cap_t));
+ memset(&name->fcap.inheritable, 0, sizeof(kernel_cap_t));
+ name->fcap.fE = 0;
+ name->fcap_ver = 0;
+
+ if (!dentry)
+ return 0;
+
+ rc = get_vfs_caps_from_disk(dentry, &caps);
+ if (rc)
+ return rc;
+
+ name->fcap.permitted = caps.permitted;
+ name->fcap.inheritable = caps.inheritable;
+ name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
+ name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT;
+
+ return 0;
+}
+
+
/* Copy inode data into an audit_names. */
-static void audit_copy_inode(struct audit_names *name, const struct inode *inode)
+static void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
+ const struct inode *inode)
{
name->ino = inode->i_ino;
name->dev = inode->i_sb->s_dev;
@@ -1812,6 +1925,7 @@ static void audit_copy_inode(struct audit_names *name, const struct inode *inode
name->gid = inode->i_gid;
name->rdev = inode->i_rdev;
security_inode_getsecid(inode, &name->osid);
+ audit_copy_fcaps(name, dentry);
}
/**
@@ -1846,7 +1960,7 @@ void __audit_inode(const char *name, const struct dentry *dentry)
context->names[idx].name = NULL;
}
handle_path(dentry);
- audit_copy_inode(&context->names[idx], inode);
+ audit_copy_inode(&context->names[idx], dentry, inode);
}
/**
@@ -1907,7 +2021,7 @@ void __audit_inode_child(const char *dname, const struct dentry *dentry,
if (!strcmp(dname, n->name) ||
!audit_compare_dname_path(dname, n->name, &dirlen)) {
if (inode)
- audit_copy_inode(n, inode);
+ audit_copy_inode(n, NULL, inode);
else
n->ino = (unsigned long)-1;
found_child = n->name;
@@ -1921,7 +2035,7 @@ add_names:
return;
idx = context->name_count - 1;
context->names[idx].name = NULL;
- audit_copy_inode(&context->names[idx], parent);
+ audit_copy_inode(&context->names[idx], NULL, parent);
}
if (!found_child) {
@@ -1942,7 +2056,7 @@ add_names:
}
if (inode)
- audit_copy_inode(&context->names[idx], inode);
+ audit_copy_inode(&context->names[idx], NULL, inode);
else
context->names[idx].ino = (unsigned long)-1;
}
@@ -1996,7 +2110,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
audit_log_format(ab, "login pid=%d uid=%u "
"old auid=%u new auid=%u"
" old ses=%u new ses=%u",
- task->pid, task->uid,
+ task->pid, task_uid(task),
task->loginuid, loginuid,
task->sessionid, sessionid);
audit_log_end(ab);
@@ -2379,7 +2493,7 @@ void __audit_ptrace(struct task_struct *t)
context->target_pid = t->pid;
context->target_auid = audit_get_loginuid(t);
- context->target_uid = t->uid;
+ context->target_uid = task_uid(t);
context->target_sessionid = audit_get_sessionid(t);
security_task_getsecid(t, &context->target_sid);
memcpy(context->target_comm, t->comm, TASK_COMM_LEN);
@@ -2398,6 +2512,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
struct audit_aux_data_pids *axp;
struct task_struct *tsk = current;
struct audit_context *ctx = tsk->audit_context;
+ uid_t uid = current_uid(), t_uid = task_uid(t);
if (audit_pid && t->tgid == audit_pid) {
if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
@@ -2405,7 +2520,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
if (tsk->loginuid != -1)
audit_sig_uid = tsk->loginuid;
else
- audit_sig_uid = tsk->uid;
+ audit_sig_uid = uid;
security_task_getsecid(tsk, &audit_sig_sid);
}
if (!audit_signals || audit_dummy_context())
@@ -2417,7 +2532,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
if (!ctx->target_pid) {
ctx->target_pid = t->tgid;
ctx->target_auid = audit_get_loginuid(t);
- ctx->target_uid = t->uid;
+ ctx->target_uid = t_uid;
ctx->target_sessionid = audit_get_sessionid(t);
security_task_getsecid(t, &ctx->target_sid);
memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN);
@@ -2438,7 +2553,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
axp->target_pid[axp->pid_count] = t->tgid;
axp->target_auid[axp->pid_count] = audit_get_loginuid(t);
- axp->target_uid[axp->pid_count] = t->uid;
+ axp->target_uid[axp->pid_count] = t_uid;
axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t);
security_task_getsecid(t, &axp->target_sid[axp->pid_count]);
memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN);
@@ -2448,6 +2563,86 @@ int __audit_signal_info(int sig, struct task_struct *t)
}
/**
+ * __audit_log_bprm_fcaps - store information about a loading bprm and relevant fcaps
+ * @bprm: pointer to the bprm being processed
+ * @new: the proposed new credentials
+ * @old: the old credentials
+ *
+ * Simply check if the proc already has the caps given by the file and if not
+ * store the priv escalation info for later auditing at the end of the syscall
+ *
+ * -Eric
+ */
+int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
+ const struct cred *new, const struct cred *old)
+{
+ struct audit_aux_data_bprm_fcaps *ax;
+ struct audit_context *context = current->audit_context;
+ struct cpu_vfs_cap_data vcaps;
+ struct dentry *dentry;
+
+ ax = kmalloc(sizeof(*ax), GFP_KERNEL);
+ if (!ax)
+ return -ENOMEM;
+
+ ax->d.type = AUDIT_BPRM_FCAPS;
+ ax->d.next = context->aux;
+ context->aux = (void *)ax;
+
+ dentry = dget(bprm->file->f_dentry);
+ get_vfs_caps_from_disk(dentry, &vcaps);
+ dput(dentry);
+
+ ax->fcap.permitted = vcaps.permitted;
+ ax->fcap.inheritable = vcaps.inheritable;
+ ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
+ ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT;
+
+ ax->old_pcap.permitted = old->cap_permitted;
+ ax->old_pcap.inheritable = old->cap_inheritable;
+ ax->old_pcap.effective = old->cap_effective;
+
+ ax->new_pcap.permitted = new->cap_permitted;
+ ax->new_pcap.inheritable = new->cap_inheritable;
+ ax->new_pcap.effective = new->cap_effective;
+ return 0;
+}
+
+/**
+ * __audit_log_capset - store information about the arguments to the capset syscall
+ * @pid: target pid of the capset call
+ * @new: the new credentials
+ * @old: the old (current) credentials
+ *
+ * Record the aguments userspace sent to sys_capset for later printing by the
+ * audit system if applicable
+ */
+int __audit_log_capset(pid_t pid,
+ const struct cred *new, const struct cred *old)
+{
+ struct audit_aux_data_capset *ax;
+ struct audit_context *context = current->audit_context;
+
+ if (likely(!audit_enabled || !context || context->dummy))
+ return 0;
+
+ ax = kmalloc(sizeof(*ax), GFP_KERNEL);
+ if (!ax)
+ return -ENOMEM;
+
+ ax->d.type = AUDIT_CAPSET;
+ ax->d.next = context->aux;
+ context->aux = (void *)ax;
+
+ ax->pid = pid;
+ ax->cap.effective = new->cap_effective;
+ ax->cap.inheritable = new->cap_effective;
+ ax->cap.permitted = new->cap_permitted;
+
+ return 0;
+}
+
+/**
* audit_core_dumps - record information about processes that end abnormally
* @signr: signal value
*
@@ -2458,7 +2653,8 @@ void audit_core_dumps(long signr)
{
struct audit_buffer *ab;
u32 sid;
- uid_t auid = audit_get_loginuid(current);
+ uid_t auid = audit_get_loginuid(current), uid;
+ gid_t gid;
unsigned int sessionid = audit_get_sessionid(current);
if (!audit_enabled)
@@ -2468,8 +2664,9 @@ void audit_core_dumps(long signr)
return;
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+ current_uid_gid(&uid, &gid);
audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
- auid, current->uid, current->gid, sessionid);
+ auid, uid, gid, sessionid);
security_task_getsecid(current, &sid);
if (sid) {
char *ctx = NULL;
diff --git a/kernel/capability.c b/kernel/capability.c
index 33e51e78c2d..36b4b4daebe 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -7,6 +7,7 @@
* 30 May 2002: Cleanup, Robert M. Love <rml@tech9.net>
*/
+#include <linux/audit.h>
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/module.h>
@@ -14,12 +15,7 @@
#include <linux/syscalls.h>
#include <linux/pid_namespace.h>
#include <asm/uaccess.h>
-
-/*
- * This lock protects task->cap_* for all tasks including current.
- * Locking rule: acquire this prior to tasklist_lock.
- */
-static DEFINE_SPINLOCK(task_capability_lock);
+#include "cred-internals.h"
/*
* Leveraged for setting/resetting capabilities
@@ -33,6 +29,17 @@ EXPORT_SYMBOL(__cap_empty_set);
EXPORT_SYMBOL(__cap_full_set);
EXPORT_SYMBOL(__cap_init_eff_set);
+#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
+int file_caps_enabled = 1;
+
+static int __init file_caps_disable(char *str)
+{
+ file_caps_enabled = 0;
+ return 1;
+}
+__setup("no_file_caps", file_caps_disable);
+#endif
+
/*
* More recent versions of libcap are available from:
*
@@ -115,167 +122,12 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
return 0;
}
-#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
-
-/*
- * Without filesystem capability support, we nominally support one process
- * setting the capabilities of another
- */
-static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
- kernel_cap_t *pIp, kernel_cap_t *pPp)
-{
- struct task_struct *target;
- int ret;
-
- spin_lock(&task_capability_lock);
- read_lock(&tasklist_lock);
-
- if (pid && pid != task_pid_vnr(current)) {
- target = find_task_by_vpid(pid);
- if (!target) {
- ret = -ESRCH;
- goto out;
- }
- } else
- target = current;
-
- ret = security_capget(target, pEp, pIp, pPp);
-
-out:
- read_unlock(&tasklist_lock);
- spin_unlock(&task_capability_lock);
-
- return ret;
-}
-
-/*
- * cap_set_pg - set capabilities for all processes in a given process
- * group. We call this holding task_capability_lock and tasklist_lock.
- */
-static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
- kernel_cap_t *inheritable,
- kernel_cap_t *permitted)
-{
- struct task_struct *g, *target;
- int ret = -EPERM;
- int found = 0;
- struct pid *pgrp;
-
- spin_lock(&task_capability_lock);
- read_lock(&tasklist_lock);
-
- pgrp = find_vpid(pgrp_nr);
- do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
- target = g;
- while_each_thread(g, target) {
- if (!security_capset_check(target, effective,
- inheritable, permitted)) {
- security_capset_set(target, effective,
- inheritable, permitted);
- ret = 0;
- }
- found = 1;
- }
- } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
-
- read_unlock(&tasklist_lock);
- spin_unlock(&task_capability_lock);
-
- if (!found)
- ret = 0;
- return ret;
-}
-
-/*
- * cap_set_all - set capabilities for all processes other than init
- * and self. We call this holding task_capability_lock and tasklist_lock.
- */
-static inline int cap_set_all(kernel_cap_t *effective,
- kernel_cap_t *inheritable,
- kernel_cap_t *permitted)
-{
- struct task_struct *g, *target;
- int ret = -EPERM;
- int found = 0;
-
- spin_lock(&task_capability_lock);
- read_lock(&tasklist_lock);
-
- do_each_thread(g, target) {
- if (target == current
- || is_container_init(target->group_leader))
- continue;
- found = 1;
- if (security_capset_check(target, effective, inheritable,
- permitted))
- continue;
- ret = 0;
- security_capset_set(target, effective, inheritable, permitted);
- } while_each_thread(g, target);
-
- read_unlock(&tasklist_lock);
- spin_unlock(&task_capability_lock);
-
- if (!found)
- ret = 0;
-
- return ret;
-}
-
-/*
- * Given the target pid does not refer to the current process we
- * need more elaborate support... (This support is not present when
- * filesystem capabilities are configured.)
- */
-static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective,
- kernel_cap_t *inheritable,
- kernel_cap_t *permitted)
-{
- struct task_struct *target;
- int ret;
-
- if (!capable(CAP_SETPCAP))
- return -EPERM;
-
- if (pid == -1) /* all procs other than current and init */
- return cap_set_all(effective, inheritable, permitted);
-
- else if (pid < 0) /* all procs in process group */
- return cap_set_pg(-pid, effective, inheritable, permitted);
-
- /* target != current */
- spin_lock(&task_capability_lock);
- read_lock(&tasklist_lock);
-
- target = find_task_by_vpid(pid);
- if (!target)
- ret = -ESRCH;
- else {
- ret = security_capset_check(target, effective, inheritable,
- permitted);
-
- /* having verified that the proposed changes are legal,
- we now put them into effect. */
- if (!ret)
- security_capset_set(target, effective, inheritable,
- permitted);
- }
-
- read_unlock(&tasklist_lock);
- spin_unlock(&task_capability_lock);
-
- return ret;
-}
-
-#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */
-
/*
- * If we have configured with filesystem capability support, then the
- * only thing that can change the capabilities of the current process
- * is the current process. As such, we can't be in this code at the
- * same time as we are in the process of setting capabilities in this
- * process. The net result is that we can limit our use of locks to
- * when we are reading the caps of another process.
+ * The only thing that can change the capabilities of the current
+ * process is the current process. As such, we can't be in this code
+ * at the same time as we are in the process of setting capabilities
+ * in this process. The net result is that we can limit our use of
+ * locks to when we are reading the caps of another process.
*/
static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
kernel_cap_t *pIp, kernel_cap_t *pPp)
@@ -285,7 +137,6 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
if (pid && (pid != task_pid_vnr(current))) {
struct task_struct *target;
- spin_lock(&task_capability_lock);
read_lock(&tasklist_lock);
target = find_task_by_vpid(pid);
@@ -295,50 +146,12 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
ret = security_capget(target, pEp, pIp, pPp);
read_unlock(&tasklist_lock);
- spin_unlock(&task_capability_lock);
} else
ret = security_capget(current, pEp, pIp, pPp);
return ret;
}
-/*
- * With filesystem capability support configured, the kernel does not
- * permit the changing of capabilities in one process by another
- * process. (CAP_SETPCAP has much less broad semantics when configured
- * this way.)
- */
-static inline int do_sys_capset_other_tasks(pid_t pid,
- kernel_cap_t *effective,
- kernel_cap_t *inheritable,
- kernel_cap_t *permitted)
-{
- return -EPERM;
-}
-
-#endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
-
-/*
- * Atomically modify the effective capabilities returning the original
- * value. No permission check is performed here - it is assumed that the
- * caller is permitted to set the desired effective capabilities.
- */
-kernel_cap_t cap_set_effective(const kernel_cap_t pE_new)
-{
- kernel_cap_t pE_old;
-
- spin_lock(&task_capability_lock);
-
- pE_old = current->cap_effective;
- current->cap_effective = pE_new;
-
- spin_unlock(&task_capability_lock);
-
- return pE_old;
-}
-
-EXPORT_SYMBOL(cap_set_effective);
-
/**
* sys_capget - get the capabilities of a given process.
* @header: pointer to struct that contains capability version and
@@ -366,7 +179,6 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
return -EINVAL;
ret = cap_get_target_pid(pid, &pE, &pI, &pP);
-
if (!ret) {
struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
unsigned i;
@@ -412,16 +224,14 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
* @data: pointer to struct that contains the effective, permitted,
* and inheritable capabilities
*
- * Set capabilities for a given process, all processes, or all
- * processes in a given process group.
+ * Set capabilities for the current process only. The ability to any other
+ * process(es) has been deprecated and removed.
*
* The restrictions on setting capabilities are specified as:
*
- * [pid is for the 'target' task. 'current' is the calling task.]
- *
- * I: any raised capabilities must be a subset of the (old current) permitted
- * P: any raised capabilities must be a subset of the (old current) permitted
- * E: must be set to a subset of (new target) permitted
+ * I: any raised capabilities must be a subset of the old permitted
+ * P: any raised capabilities must be a subset of the old permitted
+ * E: must be set to a subset of new permitted
*
* Returns 0 on success and < 0 on error.
*/
@@ -430,6 +240,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
unsigned i, tocopy;
kernel_cap_t inheritable, permitted, effective;
+ struct cred *new;
int ret;
pid_t pid;
@@ -440,10 +251,13 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
if (get_user(pid, &header->pid))
return -EFAULT;
- if (copy_from_user(&kdata, data, tocopy
- * sizeof(struct __user_cap_data_struct))) {
+ /* may only affect current now */
+ if (pid != 0 && pid != task_pid_vnr(current))
+ return -EPERM;
+
+ if (copy_from_user(&kdata, data,
+ tocopy * sizeof(struct __user_cap_data_struct)))
return -EFAULT;
- }
for (i = 0; i < tocopy; i++) {
effective.cap[i] = kdata[i].effective;
@@ -457,32 +271,23 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
i++;
}
- if (pid && (pid != task_pid_vnr(current)))
- ret = do_sys_capset_other_tasks(pid, &effective, &inheritable,
- &permitted);
- else {
- /*
- * This lock is required even when filesystem
- * capability support is configured - it protects the
- * sys_capget() call from returning incorrect data in
- * the case that the targeted process is not the
- * current one.
- */
- spin_lock(&task_capability_lock);
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
- ret = security_capset_check(current, &effective, &inheritable,
- &permitted);
- /*
- * Having verified that the proposed changes are
- * legal, we now put them into effect.
- */
- if (!ret)
- security_capset_set(current, &effective, &inheritable,
- &permitted);
- spin_unlock(&task_capability_lock);
- }
+ ret = security_capset(new, current_cred(),
+ &effective, &inheritable, &permitted);
+ if (ret < 0)
+ goto error;
+
+ ret = audit_log_capset(pid, new, current_cred());
+ if (ret < 0)
+ return ret;
+ return commit_creds(new);
+error:
+ abort_creds(new);
return ret;
}
@@ -498,6 +303,11 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
*/
int capable(int cap)
{
+ if (unlikely(!cap_valid(cap))) {
+ printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap);
+ BUG();
+ }
+
if (has_capability(current, cap)) {
current->flags |= PF_SUPERPRIV;
return 1;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2606d0fb4e5..48348dde6d8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -571,8 +571,8 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
if (inode) {
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
@@ -1280,6 +1280,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
{
struct task_struct *tsk;
+ const struct cred *cred = current_cred(), *tcred;
int ret;
if (pid) {
@@ -1289,14 +1290,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
rcu_read_unlock();
return -ESRCH;
}
- get_task_struct(tsk);
- rcu_read_unlock();
- if ((current->euid) && (current->euid != tsk->uid)
- && (current->euid != tsk->suid)) {
- put_task_struct(tsk);
+ tcred = __task_cred(tsk);
+ if (cred->euid &&
+ cred->euid != tcred->uid &&
+ cred->euid != tcred->suid) {
+ rcu_read_unlock();
return -EACCES;
}
+ get_task_struct(tsk);
+ rcu_read_unlock();
} else {
tsk = current;
get_task_struct(tsk);
diff --git a/kernel/cred-internals.h b/kernel/cred-internals.h
new file mode 100644
index 00000000000..2dc4fc2d0bf
--- /dev/null
+++ b/kernel/cred-internals.h
@@ -0,0 +1,21 @@
+/* Internal credentials stuff
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+/*
+ * user.c
+ */
+static inline void sched_switch_user(struct task_struct *p)
+{
+#ifdef CONFIG_USER_SCHED
+ sched_move_task(p);
+#endif /* CONFIG_USER_SCHED */
+}
+
diff --git a/kernel/cred.c b/kernel/cred.c
new file mode 100644
index 00000000000..ff7bc071991
--- /dev/null
+++ b/kernel/cred.c
@@ -0,0 +1,588 @@
+/* Task credentials management - see Documentation/credentials.txt
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/cred.h>
+#include <linux/sched.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/init_task.h>
+#include <linux/security.h>
+#include <linux/cn_proc.h>
+#include "cred-internals.h"
+
+static struct kmem_cache *cred_jar;
+
+/*
+ * The common credentials for the initial task's thread group
+ */
+#ifdef CONFIG_KEYS
+static struct thread_group_cred init_tgcred = {
+ .usage = ATOMIC_INIT(2),
+ .tgid = 0,
+ .lock = SPIN_LOCK_UNLOCKED,
+};
+#endif
+
+/*
+ * The initial credentials for the initial task
+ */
+struct cred init_cred = {
+ .usage = ATOMIC_INIT(4),
+ .securebits = SECUREBITS_DEFAULT,
+ .cap_inheritable = CAP_INIT_INH_SET,
+ .cap_permitted = CAP_FULL_SET,
+ .cap_effective = CAP_INIT_EFF_SET,
+ .cap_bset = CAP_INIT_BSET,
+ .user = INIT_USER,
+ .group_info = &init_groups,
+#ifdef CONFIG_KEYS
+ .tgcred = &init_tgcred,
+#endif
+};
+
+/*
+ * Dispose of the shared task group credentials
+ */
+#ifdef CONFIG_KEYS
+static void release_tgcred_rcu(struct rcu_head *rcu)
+{
+ struct thread_group_cred *tgcred =
+ container_of(rcu, struct thread_group_cred, rcu);
+
+ BUG_ON(atomic_read(&tgcred->usage) != 0);
+
+ key_put(tgcred->session_keyring);
+ key_put(tgcred->process_keyring);
+ kfree(tgcred);
+}
+#endif
+
+/*
+ * Release a set of thread group credentials.
+ */
+static void release_tgcred(struct cred *cred)
+{
+#ifdef CONFIG_KEYS
+ struct thread_group_cred *tgcred = cred->tgcred;
+
+ if (atomic_dec_and_test(&tgcred->usage))
+ call_rcu(&tgcred->rcu, release_tgcred_rcu);
+#endif
+}
+
+/*
+ * The RCU callback to actually dispose of a set of credentials
+ */
+static void put_cred_rcu(struct rcu_head *rcu)
+{
+ struct cred *cred = container_of(rcu, struct cred, rcu);
+
+ if (atomic_read(&cred->usage) != 0)
+ panic("CRED: put_cred_rcu() sees %p with usage %d\n",
+ cred, atomic_read(&cred->usage));
+
+ security_cred_free(cred);
+ key_put(cred->thread_keyring);
+ key_put(cred->request_key_auth);
+ release_tgcred(cred);
+ put_group_info(cred->group_info);
+ free_uid(cred->user);
+ kmem_cache_free(cred_jar, cred);
+}
+
+/**
+ * __put_cred - Destroy a set of credentials
+ * @cred: The record to release
+ *
+ * Destroy a set of credentials on which no references remain.
+ */
+void __put_cred(struct cred *cred)
+{
+ BUG_ON(atomic_read(&cred->usage) != 0);
+
+ call_rcu(&cred->rcu, put_cred_rcu);
+}
+EXPORT_SYMBOL(__put_cred);
+
+/**
+ * prepare_creds - Prepare a new set of credentials for modification
+ *
+ * Prepare a new set of task credentials for modification. A task's creds
+ * shouldn't generally be modified directly, therefore this function is used to
+ * prepare a new copy, which the caller then modifies and then commits by
+ * calling commit_creds().
+ *
+ * Preparation involves making a copy of the objective creds for modification.
+ *
+ * Returns a pointer to the new creds-to-be if successful, NULL otherwise.
+ *
+ * Call commit_creds() or abort_creds() to clean up.
+ */
+struct cred *prepare_creds(void)
+{
+ struct task_struct *task = current;
+ const struct cred *old;
+ struct cred *new;
+
+ BUG_ON(atomic_read(&task->real_cred->usage) < 1);
+
+ new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ old = task->cred;
+ memcpy(new, old, sizeof(struct cred));
+
+ atomic_set(&new->usage, 1);
+ get_group_info(new->group_info);
+ get_uid(new->user);
+
+#ifdef CONFIG_KEYS
+ key_get(new->thread_keyring);
+ key_get(new->request_key_auth);
+ atomic_inc(&new->tgcred->usage);
+#endif
+
+#ifdef CONFIG_SECURITY
+ new->security = NULL;
+#endif
+
+ if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
+ goto error;
+ return new;
+
+error:
+ abort_creds(new);
+ return NULL;
+}
+EXPORT_SYMBOL(prepare_creds);
+
+/*
+ * Prepare credentials for current to perform an execve()
+ * - The caller must hold current->cred_exec_mutex
+ */
+struct cred *prepare_exec_creds(void)
+{
+ struct thread_group_cred *tgcred = NULL;
+ struct cred *new;
+
+#ifdef CONFIG_KEYS
+ tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
+ if (!tgcred)
+ return NULL;
+#endif
+
+ new = prepare_creds();
+ if (!new) {
+ kfree(tgcred);
+ return new;
+ }
+
+#ifdef CONFIG_KEYS
+ /* newly exec'd tasks don't get a thread keyring */
+ key_put(new->thread_keyring);
+ new->thread_keyring = NULL;
+
+ /* create a new per-thread-group creds for all this set of threads to
+ * share */
+ memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred));
+
+ atomic_set(&tgcred->usage, 1);
+ spin_lock_init(&tgcred->lock);
+
+ /* inherit the session keyring; new process keyring */
+ key_get(tgcred->session_keyring);
+ tgcred->process_keyring = NULL;
+
+ release_tgcred(new);
+ new->tgcred = tgcred;
+#endif
+
+ return new;
+}
+
+/*
+ * prepare new credentials for the usermode helper dispatcher
+ */
+struct cred *prepare_usermodehelper_creds(void)
+{
+#ifdef CONFIG_KEYS
+ struct thread_group_cred *tgcred = NULL;
+#endif
+ struct cred *new;
+
+#ifdef CONFIG_KEYS
+ tgcred = kzalloc(sizeof(*new->tgcred), GFP_ATOMIC);
+ if (!tgcred)
+ return NULL;
+#endif
+
+ new = kmem_cache_alloc(cred_jar, GFP_ATOMIC);
+ if (!new)
+ return NULL;
+
+ memcpy(new, &init_cred, sizeof(struct cred));
+
+ atomic_set(&new->usage, 1);
+ get_group_info(new->group_info);
+ get_uid(new->user);
+
+#ifdef CONFIG_KEYS
+ new->thread_keyring = NULL;
+ new->request_key_auth = NULL;
+ new->jit_keyring = KEY_REQKEY_DEFL_DEFAULT;
+
+ atomic_set(&tgcred->usage, 1);
+ spin_lock_init(&tgcred->lock);
+ new->tgcred = tgcred;
+#endif
+
+#ifdef CONFIG_SECURITY
+ new->security = NULL;
+#endif
+ if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
+ goto error;
+
+ BUG_ON(atomic_read(&new->usage) != 1);
+ return new;
+
+error:
+ put_cred(new);
+ return NULL;
+}
+
+/*
+ * Copy credentials for the new process created by fork()
+ *
+ * We share if we can, but under some circumstances we have to generate a new
+ * set.
+ *
+ * The new process gets the current process's subjective credentials as its
+ * objective and subjective credentials
+ */
+int copy_creds(struct task_struct *p, unsigned long clone_flags)
+{
+#ifdef CONFIG_KEYS
+ struct thread_group_cred *tgcred;
+#endif
+ struct cred *new;
+ int ret;
+
+ mutex_init(&p->cred_exec_mutex);
+
+ if (
+#ifdef CONFIG_KEYS
+ !p->cred->thread_keyring &&
+#endif
+ clone_flags & CLONE_THREAD
+ ) {
+ p->real_cred = get_cred(p->cred);
+ get_cred(p->cred);
+ atomic_inc(&p->cred->user->processes);
+ return 0;
+ }
+
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
+ if (clone_flags & CLONE_NEWUSER) {
+ ret = create_user_ns(new);
+ if (ret < 0)
+ goto error_put;
+ }
+
+#ifdef CONFIG_KEYS
+ /* new threads get their own thread keyrings if their parent already
+ * had one */
+ if (new->thread_keyring) {
+ key_put(new->thread_keyring);
+ new->thread_keyring = NULL;
+ if (clone_flags & CLONE_THREAD)
+ install_thread_keyring_to_cred(new);
+ }
+
+ /* we share the process and session keyrings between all the threads in
+ * a process - this is slightly icky as we violate COW credentials a
+ * bit */
+ if (!(clone_flags & CLONE_THREAD)) {
+ tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
+ if (!tgcred) {
+ ret = -ENOMEM;
+ goto error_put;
+ }
+ atomic_set(&tgcred->usage, 1);
+ spin_lock_init(&tgcred->lock);
+ tgcred->process_keyring = NULL;
+ tgcred->session_keyring = key_get(new->tgcred->session_keyring);
+
+ release_tgcred(new);
+ new->tgcred = tgcred;
+ }
+#endif
+
+ atomic_inc(&new->user->processes);
+ p->cred = p->real_cred = get_cred(new);
+ return 0;
+
+error_put:
+ put_cred(new);
+ return ret;
+}
+
+/**
+ * commit_creds - Install new credentials upon the current task
+ * @new: The credentials to be assigned
+ *
+ * Install a new set of credentials to the current task, using RCU to replace
+ * the old set. Both the objective and the subjective credentials pointers are
+ * updated. This function may not be called if the subjective credentials are
+ * in an overridden state.
+ *
+ * This function eats the caller's reference to the new credentials.
+ *
+ * Always returns 0 thus allowing this function to be tail-called at the end
+ * of, say, sys_setgid().
+ */
+int commit_creds(struct cred *new)
+{
+ struct task_struct *task = current;
+ const struct cred *old;
+
+ BUG_ON(task->cred != task->real_cred);
+ BUG_ON(atomic_read(&task->real_cred->usage) < 2);
+ BUG_ON(atomic_read(&new->usage) < 1);
+
+ old = task->real_cred;
+ security_commit_creds(new, old);
+
+ get_cred(new); /* we will require a ref for the subj creds too */
+
+ /* dumpability changes */
+ if (old->euid != new->euid ||
+ old->egid != new->egid ||
+ old->fsuid != new->fsuid ||
+ old->fsgid != new->fsgid ||
+ !cap_issubset(new->cap_permitted, old->cap_permitted)) {
+ set_dumpable(task->mm, suid_dumpable);
+ task->pdeath_signal = 0;
+ smp_wmb();
+ }
+
+ /* alter the thread keyring */
+ if (new->fsuid != old->fsuid)
+ key_fsuid_changed(task);
+ if (new->fsgid != old->fsgid)
+ key_fsgid_changed(task);
+
+ /* do it
+ * - What if a process setreuid()'s and this brings the
+ * new uid over his NPROC rlimit? We can check this now
+ * cheaply with the new uid cache, so if it matters
+ * we should be checking for it. -DaveM
+ */
+ if (new->user != old->user)
+ atomic_inc(&new->user->processes);
+ rcu_assign_pointer(task->real_cred, new);
+ rcu_assign_pointer(task->cred, new);
+ if (new->user != old->user)
+ atomic_dec(&old->user->processes);
+
+ sched_switch_user(task);
+
+ /* send notifications */
+ if (new->uid != old->uid ||
+ new->euid != old->euid ||
+ new->suid != old->suid ||
+ new->fsuid != old->fsuid)
+ proc_id_connector(task, PROC_EVENT_UID);
+
+ if (new->gid != old->gid ||
+ new->egid != old->egid ||
+ new->sgid != old->sgid ||
+ new->fsgid != old->fsgid)
+ proc_id_connector(task, PROC_EVENT_GID);
+
+ /* release the old obj and subj refs both */
+ put_cred(old);
+ put_cred(old);
+ return 0;
+}
+EXPORT_SYMBOL(commit_creds);
+
+/**
+ * abort_creds - Discard a set of credentials and unlock the current task
+ * @new: The credentials that were going to be applied
+ *
+ * Discard a set of credentials that were under construction and unlock the
+ * current task.
+ */
+void abort_creds(struct cred *new)
+{
+ BUG_ON(atomic_read(&new->usage) < 1);
+ put_cred(new);
+}
+EXPORT_SYMBOL(abort_creds);
+
+/**
+ * override_creds - Override the current process's subjective credentials
+ * @new: The credentials to be assigned
+ *
+ * Install a set of temporary override subjective credentials on the current
+ * process, returning the old set for later reversion.
+ */
+const struct cred *override_creds(const struct cred *new)
+{
+ const struct cred *old = current->cred;
+
+ rcu_assign_pointer(current->cred, get_cred(new));
+ return old;
+}
+EXPORT_SYMBOL(override_creds);
+
+/**
+ * revert_creds - Revert a temporary subjective credentials override
+ * @old: The credentials to be restored
+ *
+ * Revert a temporary set of override subjective credentials to an old set,
+ * discarding the override set.
+ */
+void revert_creds(const struct cred *old)
+{
+ const struct cred *override = current->cred;
+
+ rcu_assign_pointer(current->cred, old);
+ put_cred(override);
+}
+EXPORT_SYMBOL(revert_creds);
+
+/*
+ * initialise the credentials stuff
+ */
+void __init cred_init(void)
+{
+ /* allocate a slab in which we can store credentials */
+ cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+}
+
+/**
+ * prepare_kernel_cred - Prepare a set of credentials for a kernel service
+ * @daemon: A userspace daemon to be used as a reference
+ *
+ * Prepare a set of credentials for a kernel service. This can then be used to
+ * override a task's own credentials so that work can be done on behalf of that
+ * task that requires a different subjective context.
+ *
+ * @daemon is used to provide a base for the security record, but can be NULL.
+ * If @daemon is supplied, then the security data will be derived from that;
+ * otherwise they'll be set to 0 and no groups, full capabilities and no keys.
+ *
+ * The caller may change these controls afterwards if desired.
+ *
+ * Returns the new credentials or NULL if out of memory.
+ *
+ * Does not take, and does not return holding current->cred_replace_mutex.
+ */
+struct cred *prepare_kernel_cred(struct task_struct *daemon)
+{
+ const struct cred *old;
+ struct cred *new;
+
+ new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ if (daemon)
+ old = get_task_cred(daemon);
+ else
+ old = get_cred(&init_cred);
+
+ get_uid(new->user);
+ get_group_info(new->group_info);
+
+#ifdef CONFIG_KEYS
+ atomic_inc(&init_tgcred.usage);
+ new->tgcred = &init_tgcred;
+ new->request_key_auth = NULL;
+ new->thread_keyring = NULL;
+ new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+#endif
+
+#ifdef CONFIG_SECURITY
+ new->security = NULL;
+#endif
+ if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
+ goto error;
+
+ atomic_set(&new->usage, 1);
+ put_cred(old);
+ return new;
+
+error:
+ put_cred(new);
+ return NULL;
+}
+EXPORT_SYMBOL(prepare_kernel_cred);
+
+/**
+ * set_security_override - Set the security ID in a set of credentials
+ * @new: The credentials to alter
+ * @secid: The LSM security ID to set
+ *
+ * Set the LSM security ID in a set of credentials so that the subjective
+ * security is overridden when an alternative set of credentials is used.
+ */
+int set_security_override(struct cred *new, u32 secid)
+{
+ return security_kernel_act_as(new, secid);
+}
+EXPORT_SYMBOL(set_security_override);
+
+/**
+ * set_security_override_from_ctx - Set the security ID in a set of credentials
+ * @new: The credentials to alter
+ * @secctx: The LSM security context to generate the security ID from.
+ *
+ * Set the LSM security ID in a set of credentials so that the subjective
+ * security is overridden when an alternative set of credentials is used. The
+ * security ID is specified in string form as a security context to be
+ * interpreted by the LSM.
+ */
+int set_security_override_from_ctx(struct cred *new, const char *secctx)
+{
+ u32 secid;
+ int ret;
+
+ ret = security_secctx_to_secid(secctx, strlen(secctx), &secid);
+ if (ret < 0)
+ return ret;
+
+ return set_security_override(new, secid);
+}
+EXPORT_SYMBOL(set_security_override_from_ctx);
+
+/**
+ * set_create_files_as - Set the LSM file create context in a set of credentials
+ * @new: The credentials to alter
+ * @inode: The inode to take the context from
+ *
+ * Change the LSM file creation context in a set of credentials to be the same
+ * as the object context of the specified inode, so that the new inodes have
+ * the same MAC context as that inode.
+ */
+int set_create_files_as(struct cred *new, struct inode *inode)
+{
+ new->fsuid = inode->i_uid;
+ new->fsgid = inode->i_gid;
+ return security_kernel_create_files_as(new, inode);
+}
+EXPORT_SYMBOL(set_create_files_as);
diff --git a/kernel/exit.c b/kernel/exit.c
index 2d8be7ebb0f..ccb87162ff6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -46,12 +46,14 @@
#include <linux/blkdev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/tracehook.h>
+#include <linux/init_task.h>
#include <trace/sched.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
+#include "cred-internals.h"
static void exit_mm(struct task_struct * tsk);
@@ -164,7 +166,10 @@ void release_task(struct task_struct * p)
int zap_leader;
repeat:
tracehook_prepare_release_task(p);
- atomic_dec(&p->user->processes);
+ /* don't need to get the RCU readlock here - the process is dead and
+ * can't be modifying its own credentials */
+ atomic_dec(&__task_cred(p)->user->processes);
+
proc_flush_task(p);
write_lock_irq(&tasklist_lock);
tracehook_finish_release_task(p);
@@ -339,12 +344,12 @@ static void reparent_to_kthreadd(void)
/* cpus_allowed? */
/* rt_priority? */
/* signals? */
- security_task_reparent_to_init(current);
memcpy(current->signal->rlim, init_task.signal->rlim,
sizeof(current->signal->rlim));
- atomic_inc(&(INIT_USER->__count));
+
+ atomic_inc(&init_cred.usage);
+ commit_creds(&init_cred);
write_unlock_irq(&tasklist_lock);
- switch_uid(INIT_USER);
}
void __set_special_pids(struct pid *pid)
@@ -1078,7 +1083,6 @@ NORET_TYPE void do_exit(long code)
check_stack_usage();
exit_thread();
cgroup_exit(tsk, 1);
- exit_keys(tsk);
if (group_dead && tsk->signal->leader)
disassociate_ctty(1);
@@ -1263,12 +1267,12 @@ static int wait_task_zombie(struct task_struct *p, int options,
unsigned long state;
int retval, status, traced;
pid_t pid = task_pid_vnr(p);
+ uid_t uid = __task_cred(p)->uid;
if (!likely(options & WEXITED))
return 0;
if (unlikely(options & WNOWAIT)) {
- uid_t uid = p->uid;
int exit_code = p->exit_code;
int why, status;
@@ -1389,7 +1393,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
if (!retval && infop)
retval = put_user(pid, &infop->si_pid);
if (!retval && infop)
- retval = put_user(p->uid, &infop->si_uid);
+ retval = put_user(uid, &infop->si_uid);
if (!retval)
retval = pid;
@@ -1454,7 +1458,8 @@ static int wait_task_stopped(int ptrace, struct task_struct *p,
if (!unlikely(options & WNOWAIT))
p->exit_code = 0;
- uid = p->uid;
+ /* don't need the RCU readlock here as we're holding a spinlock */
+ uid = __task_cred(p)->uid;
unlock_sig:
spin_unlock_irq(&p->sighand->siglock);
if (!exit_code)
@@ -1528,10 +1533,10 @@ static int wait_task_continued(struct task_struct *p, int options,
}
if (!unlikely(options & WNOWAIT))
p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
+ uid = __task_cred(p)->uid;
spin_unlock_irq(&p->sighand->siglock);
pid = task_pid_vnr(p);
- uid = p->uid;
get_task_struct(p);
read_unlock(&tasklist_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index 495da2e9a8b..4e8ca23c0ed 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -147,9 +147,8 @@ void __put_task_struct(struct task_struct *tsk)
WARN_ON(atomic_read(&tsk->usage));
WARN_ON(tsk == current);
- security_task_free(tsk);
- free_uid(tsk->user);
- put_group_info(tsk->group_info);
+ put_cred(tsk->real_cred);
+ put_cred(tsk->cred);
delayacct_tsk_free(tsk);
if (!profile_handoff_task(tsk))
@@ -818,12 +817,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
if (!sig)
return -ENOMEM;
- ret = copy_thread_group_keys(tsk);
- if (ret < 0) {
- kmem_cache_free(signal_cachep, sig);
- return ret;
- }
-
atomic_set(&sig->count, 1);
atomic_set(&sig->live, 1);
init_waitqueue_head(&sig->wait_chldexit);
@@ -868,7 +861,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
void __cleanup_signal(struct signal_struct *sig)
{
thread_group_cputime_free(sig);
- exit_thread_group_keys(sig);
tty_kref_put(sig->tty);
kmem_cache_free(signal_cachep, sig);
}
@@ -984,16 +976,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
retval = -EAGAIN;
- if (atomic_read(&p->user->processes) >=
+ if (atomic_read(&p->real_cred->user->processes) >=
p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
- p->user != current->nsproxy->user_ns->root_user)
+ p->real_cred->user != INIT_USER)
goto bad_fork_free;
}
- atomic_inc(&p->user->__count);
- atomic_inc(&p->user->processes);
- get_group_info(p->group_info);
+ retval = copy_creds(p, clone_flags);
+ if (retval < 0)
+ goto bad_fork_free;
/*
* If multiple threads are within copy_process(), then this check
@@ -1048,10 +1040,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
do_posix_clock_monotonic_gettime(&p->start_time);
p->real_start_time = p->start_time;
monotonic_to_bootbased(&p->real_start_time);
-#ifdef CONFIG_SECURITY
- p->security = NULL;
-#endif
- p->cap_bset = current->cap_bset;
p->io_context = NULL;
p->audit_context = NULL;
cgroup_fork(p);
@@ -1096,10 +1084,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
- if ((retval = security_task_alloc(p)))
- goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p)))
- goto bad_fork_cleanup_security;
+ goto bad_fork_cleanup_policy;
/* copy all the process information */
if ((retval = copy_semundo(clone_flags, p)))
goto bad_fork_cleanup_audit;
@@ -1113,10 +1099,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
goto bad_fork_cleanup_sighand;
if ((retval = copy_mm(clone_flags, p)))
goto bad_fork_cleanup_signal;
- if ((retval = copy_keys(clone_flags, p)))
- goto bad_fork_cleanup_mm;
if ((retval = copy_namespaces(clone_flags, p)))
- goto bad_fork_cleanup_keys;
+ goto bad_fork_cleanup_mm;
if ((retval = copy_io(clone_flags, p)))
goto bad_fork_cleanup_namespaces;
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
@@ -1281,8 +1265,6 @@ bad_fork_cleanup_io:
put_io_context(p->io_context);
bad_fork_cleanup_namespaces:
exit_task_namespaces(p);
-bad_fork_cleanup_keys:
- exit_keys(p);
bad_fork_cleanup_mm:
if (p->mm)
mmput(p->mm);
@@ -1298,8 +1280,6 @@ bad_fork_cleanup_semundo:
exit_sem(p);
bad_fork_cleanup_audit:
audit_free(p);
-bad_fork_cleanup_security:
- security_task_free(p);
bad_fork_cleanup_policy:
#ifdef CONFIG_NUMA
mpol_put(p->mempolicy);
@@ -1312,9 +1292,9 @@ bad_fork_cleanup_cgroup:
bad_fork_cleanup_put_domain:
module_put(task_thread_info(p)->exec_domain->module);
bad_fork_cleanup_count:
- put_group_info(p->group_info);
- atomic_dec(&p->user->processes);
- free_uid(p->user);
+ atomic_dec(&p->cred->user->processes);
+ put_cred(p->real_cred);
+ put_cred(p->cred);
bad_fork_free:
free_task(p);
fork_out:
@@ -1358,6 +1338,21 @@ long do_fork(unsigned long clone_flags,
long nr;
/*
+ * Do some preliminary argument and permissions checking before we
+ * actually start allocating stuff
+ */
+ if (clone_flags & CLONE_NEWUSER) {
+ if (clone_flags & CLONE_THREAD)
+ return -EINVAL;
+ /* hopefully this check will go away when userns support is
+ * complete
+ */
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
+ !capable(CAP_SETGID))
+ return -EPERM;
+ }
+
+ /*
* We hope to recycle these flags after 2.6.26
*/
if (unlikely(clone_flags & CLONE_STOPPED)) {
@@ -1605,8 +1600,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
err = -EINVAL;
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
- CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
- CLONE_NEWNET))
+ CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
goto bad_unshare_out;
/*
diff --git a/kernel/futex.c b/kernel/futex.c
index 8af10027514..4fe790e89d0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -439,13 +439,20 @@ static void free_pi_state(struct futex_pi_state *pi_state)
static struct task_struct * futex_find_get_task(pid_t pid)
{
struct task_struct *p;
+ const struct cred *cred = current_cred(), *pcred;
rcu_read_lock();
p = find_task_by_vpid(pid);
- if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
+ if (!p) {
p = ERR_PTR(-ESRCH);
- else
- get_task_struct(p);
+ } else {
+ pcred = __task_cred(p);
+ if (cred->euid != pcred->euid &&
+ cred->euid != pcred->uid)
+ p = ERR_PTR(-ESRCH);
+ else
+ get_task_struct(p);
+ }
rcu_read_unlock();
@@ -1829,6 +1836,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
{
struct robust_list_head __user *head;
unsigned long ret;
+ const struct cred *cred = current_cred(), *pcred;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
@@ -1844,8 +1852,10 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
if (!p)
goto err_unlock;
ret = -EPERM;
- if ((current->euid != p->euid) && (current->euid != p->uid) &&
- !capable(CAP_SYS_PTRACE))
+ pcred = __task_cred(p);
+ if (cred->euid != pcred->euid &&
+ cred->euid != pcred->uid &&
+ !capable(CAP_SYS_PTRACE))
goto err_unlock;
head = p->robust_list;
rcu_read_unlock();
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 04ac3a9e42c..d607a5b9ee2 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -135,6 +135,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
{
struct compat_robust_list_head __user *head;
unsigned long ret;
+ const struct cred *cred = current_cred(), *pcred;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
@@ -150,8 +151,10 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
if (!p)
goto err_unlock;
ret = -EPERM;
- if ((current->euid != p->euid) && (current->euid != p->uid) &&
- !capable(CAP_SYS_PTRACE))
+ pcred = __task_cred(p);
+ if (cred->euid != pcred->euid &&
+ cred->euid != pcred->uid &&
+ !capable(CAP_SYS_PTRACE))
goto err_unlock;
head = p->compat_robust_list;
read_unlock(&tasklist_lock);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 3d3c3ea3a02..b46dbb90866 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -118,10 +118,10 @@ EXPORT_SYMBOL(request_module);
struct subprocess_info {
struct work_struct work;
struct completion *complete;
+ struct cred *cred;
char *path;
char **argv;
char **envp;
- struct key *ring;
enum umh_wait wait;
int retval;
struct file *stdin;
@@ -134,19 +134,20 @@ struct subprocess_info {
static int ____call_usermodehelper(void *data)
{
struct subprocess_info *sub_info = data;
- struct key *new_session, *old_session;
int retval;
- /* Unblock all signals and set the session keyring. */
- new_session = key_get(sub_info->ring);
+ BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+
+ /* Unblock all signals */
spin_lock_irq(&current->sighand->siglock);
- old_session = __install_session_keyring(current, new_session);
flush_signal_handlers(current, 1);
sigemptyset(&current->blocked);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
- key_put(old_session);
+ /* Install the credentials */
+ commit_creds(sub_info->cred);
+ sub_info->cred = NULL;
/* Install input pipe when needed */
if (sub_info->stdin) {
@@ -185,6 +186,8 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info)
{
if (info->cleanup)
(*info->cleanup)(info->argv, info->envp);
+ if (info->cred)
+ put_cred(info->cred);
kfree(info);
}
EXPORT_SYMBOL(call_usermodehelper_freeinfo);
@@ -240,6 +243,8 @@ static void __call_usermodehelper(struct work_struct *work)
pid_t pid;
enum umh_wait wait = sub_info->wait;
+ BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+
/* CLONE_VFORK: wait until the usermode helper has execve'd
* successfully We need the data structures to stay around
* until that is done. */
@@ -362,6 +367,9 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
sub_info->path = path;
sub_info->argv = argv;
sub_info->envp = envp;
+ sub_info->cred = prepare_usermodehelper_creds();
+ if (!sub_info->cred)
+ return NULL;
out:
return sub_info;
@@ -376,7 +384,13 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
void call_usermodehelper_setkeys(struct subprocess_info *info,
struct key *session_keyring)
{
- info->ring = session_keyring;
+#ifdef CONFIG_KEYS
+ struct thread_group_cred *tgcred = info->cred->tgcred;
+ key_put(tgcred->session_keyring);
+ tgcred->session_keyring = key_get(session_keyring);
+#else
+ BUG();
+#endif
}
EXPORT_SYMBOL(call_usermodehelper_setkeys);
@@ -444,6 +458,8 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
DECLARE_COMPLETION_ONSTACK(done);
int retval = 0;
+ BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+
helper_lock();
if (sub_info->path[0] == '\0')
goto out;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 1d3ef29a258..63598dca2d0 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -80,12 +80,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
goto out_pid;
}
- new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns);
- if (IS_ERR(new_nsp->user_ns)) {
- err = PTR_ERR(new_nsp->user_ns);
- goto out_user;
- }
-
new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
if (IS_ERR(new_nsp->net_ns)) {
err = PTR_ERR(new_nsp->net_ns);
@@ -95,9 +89,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
return new_nsp;
out_net:
- if (new_nsp->user_ns)
- put_user_ns(new_nsp->user_ns);
-out_user:
if (new_nsp->pid_ns)
put_pid_ns(new_nsp->pid_ns);
out_pid:
@@ -130,7 +121,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
get_nsproxy(old_ns);
if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
+ CLONE_NEWPID | CLONE_NEWNET)))
return 0;
if (!capable(CAP_SYS_ADMIN)) {
@@ -173,8 +164,6 @@ void free_nsproxy(struct nsproxy *ns)
put_ipc_ns(ns->ipc_ns);
if (ns->pid_ns)
put_pid_ns(ns->pid_ns);
- if (ns->user_ns)
- put_user_ns(ns->user_ns);
put_net(ns->net_ns);
kmem_cache_free(nsproxy_cachep, ns);
}
@@ -189,7 +178,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
int err = 0;
if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWUSER | CLONE_NEWNET)))
+ CLONE_NEWNET)))
return 0;
if (!capable(CAP_SYS_ADMIN))
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 4c8bcd7dd8e..ca2df68faf7 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -115,6 +115,8 @@ int ptrace_check_attach(struct task_struct *child, int kill)
int __ptrace_may_access(struct task_struct *task, unsigned int mode)
{
+ const struct cred *cred = current_cred(), *tcred;
+
/* May we inspect the given task?
* This check is used both for attaching with ptrace
* and for allowing access to sensitive information in /proc.
@@ -127,13 +129,19 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
/* Don't let security modules deny introspection */
if (task == current)
return 0;
- if (((current->uid != task->euid) ||
- (current->uid != task->suid) ||
- (current->uid != task->uid) ||
- (current->gid != task->egid) ||
- (current->gid != task->sgid) ||
- (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
+ rcu_read_lock();
+ tcred = __task_cred(task);
+ if ((cred->uid != tcred->euid ||
+ cred->uid != tcred->suid ||
+ cred->uid != tcred->uid ||
+ cred->gid != tcred->egid ||
+ cred->gid != tcred->sgid ||
+ cred->gid != tcred->gid) &&
+ !capable(CAP_SYS_PTRACE)) {
+ rcu_read_unlock();
return -EPERM;
+ }
+ rcu_read_unlock();
smp_rmb();
if (task->mm)
dumpable = get_dumpable(task->mm);
@@ -163,6 +171,14 @@ int ptrace_attach(struct task_struct *task)
if (same_thread_group(task, current))
goto out;
+ /* Protect exec's credential calculations against our interference;
+ * SUID, SGID and LSM creds get determined differently under ptrace.
+ */
+ retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+ if (retval < 0)
+ goto out;
+
+ retval = -EPERM;
repeat:
/*
* Nasty, nasty.
@@ -202,6 +218,7 @@ repeat:
bad:
write_unlock_irqrestore(&tasklist_lock, flags);
task_unlock(task);
+ mutex_unlock(&current->cred_exec_mutex);
out:
return retval;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index e4bb1dd7b30..33cf4a1cbcd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -345,7 +345,9 @@ static inline struct task_group *task_group(struct task_struct *p)
struct task_group *tg;
#ifdef CONFIG_USER_SCHED
- tg = p->user->tg;
+ rcu_read_lock();
+ tg = __task_cred(p)->user->tg;
+ rcu_read_unlock();
#elif defined(CONFIG_CGROUP_SCHED)
tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
struct task_group, css);
@@ -5134,6 +5136,22 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
set_load_weight(p);
}
+/*
+ * check the target process has a UID that matches the current process's
+ */
+static bool check_same_owner(struct task_struct *p)
+{
+ const struct cred *cred = current_cred(), *pcred;
+ bool match;
+
+ rcu_read_lock();
+ pcred = __task_cred(p);
+ match = (cred->euid == pcred->euid ||
+ cred->euid == pcred->uid);
+ rcu_read_unlock();
+ return match;
+}
+
static int __sched_setscheduler(struct task_struct *p, int policy,
struct sched_param *param, bool user)
{
@@ -5193,8 +5211,7 @@ recheck:
return -EPERM;
/* can't change other user's priorities */
- if ((current->euid != p->euid) &&
- (current->euid != p->uid))
+ if (!check_same_owner(p))
return -EPERM;
}
@@ -5426,8 +5443,7 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
read_unlock(&tasklist_lock);
retval = -EPERM;
- if ((current->euid != p->euid) && (current->euid != p->uid) &&
- !capable(CAP_SYS_NICE))
+ if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
goto out_unlock;
retval = security_task_setscheduler(p, 0, NULL);
diff --git a/kernel/signal.c b/kernel/signal.c
index 4530fc65445..2a64304ed54 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -177,6 +177,11 @@ int next_signal(struct sigpending *pending, sigset_t *mask)
return sig;
}
+/*
+ * allocate a new signal queue record
+ * - this may be called without locks if and only if t == current, otherwise an
+ * appopriate lock must be held to stop the target task from exiting
+ */
static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
int override_rlimit)
{
@@ -184,11 +189,12 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
struct user_struct *user;
/*
- * In order to avoid problems with "switch_user()", we want to make
- * sure that the compiler doesn't re-load "t->user"
+ * We won't get problems with the target's UID changing under us
+ * because changing it requires RCU be used, and if t != current, the
+ * caller must be holding the RCU readlock (by way of a spinlock) and
+ * we use RCU protection here
*/
- user = t->user;
- barrier();
+ user = get_uid(__task_cred(t)->user);
atomic_inc(&user->sigpending);
if (override_rlimit ||
atomic_read(&user->sigpending) <=
@@ -196,12 +202,14 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
q = kmem_cache_alloc(sigqueue_cachep, flags);
if (unlikely(q == NULL)) {
atomic_dec(&user->sigpending);
+ free_uid(user);
} else {
INIT_LIST_HEAD(&q->list);
q->flags = 0;
- q->user = get_uid(user);
+ q->user = user;
}
- return(q);
+
+ return q;
}
static void __sigqueue_free(struct sigqueue *q)
@@ -562,10 +570,12 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s)
/*
* Bad permissions for sending the signal
+ * - the caller must hold at least the RCU read lock
*/
static int check_kill_permission(int sig, struct siginfo *info,
struct task_struct *t)
{
+ const struct cred *cred = current_cred(), *tcred;
struct pid *sid;
int error;
@@ -579,8 +589,11 @@ static int check_kill_permission(int sig, struct siginfo *info,
if (error)
return error;
- if ((current->euid ^ t->suid) && (current->euid ^ t->uid) &&
- (current->uid ^ t->suid) && (current->uid ^ t->uid) &&
+ tcred = __task_cred(t);
+ if ((cred->euid ^ tcred->suid) &&
+ (cred->euid ^ tcred->uid) &&
+ (cred->uid ^ tcred->suid) &&
+ (cred->uid ^ tcred->uid) &&
!capable(CAP_KILL)) {
switch (sig) {
case SIGCONT:
@@ -844,7 +857,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
q->info.si_errno = 0;
q->info.si_code = SI_USER;
q->info.si_pid = task_pid_vnr(current);
- q->info.si_uid = current->uid;
+ q->info.si_uid = current_uid();
break;
case (unsigned long) SEND_SIG_PRIV:
q->info.si_signo = sig;
@@ -1008,6 +1021,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long
return sighand;
}
+/*
+ * send signal info to all the members of a group
+ * - the caller must hold the RCU read lock at least
+ */
int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
unsigned long flags;
@@ -1029,8 +1046,8 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
/*
* __kill_pgrp_info() sends a signal to a process group: this is what the tty
* control characters do (^C, ^Z etc)
+ * - the caller must hold at least a readlock on tasklist_lock
*/
-
int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
{
struct task_struct *p = NULL;
@@ -1086,6 +1103,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
{
int ret = -EINVAL;
struct task_struct *p;
+ const struct cred *pcred;
if (!valid_signal(sig))
return ret;
@@ -1096,9 +1114,11 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
ret = -ESRCH;
goto out_unlock;
}
- if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
- && (euid != p->suid) && (euid != p->uid)
- && (uid != p->suid) && (uid != p->uid)) {
+ pcred = __task_cred(p);
+ if ((info == SEND_SIG_NOINFO ||
+ (!is_si_special(info) && SI_FROMUSER(info))) &&
+ euid != pcred->suid && euid != pcred->uid &&
+ uid != pcred->suid && uid != pcred->uid) {
ret = -EPERM;
goto out_unlock;
}
@@ -1369,10 +1389,9 @@ int do_notify_parent(struct task_struct *tsk, int sig)
*/
rcu_read_lock();
info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+ info.si_uid = __task_cred(tsk)->uid;
rcu_read_unlock();
- info.si_uid = tsk->uid;
-
thread_group_cputime(tsk, &cputime);
info.si_utime = cputime_to_jiffies(cputime.utime);
info.si_stime = cputime_to_jiffies(cputime.stime);
@@ -1440,10 +1459,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
*/
rcu_read_lock();
info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+ info.si_uid = __task_cred(tsk)->uid;
rcu_read_unlock();
- info.si_uid = tsk->uid;
-
info.si_utime = cputime_to_clock_t(tsk->utime);
info.si_stime = cputime_to_clock_t(tsk->stime);
@@ -1598,7 +1616,7 @@ void ptrace_notify(int exit_code)
info.si_signo = SIGTRAP;
info.si_code = exit_code;
info.si_pid = task_pid_vnr(current);
- info.si_uid = current->uid;
+ info.si_uid = current_uid();
/* Let the debugger run. */
spin_lock_irq(&current->sighand->siglock);
@@ -1710,7 +1728,7 @@ static int ptrace_signal(int signr, siginfo_t *info,
info->si_errno = 0;
info->si_code = SI_USER;
info->si_pid = task_pid_vnr(current->parent);
- info->si_uid = current->parent->uid;
+ info->si_uid = task_uid(current->parent);
}
/* If the (new) signal is now blocked, requeue it. */
@@ -2211,7 +2229,7 @@ sys_kill(pid_t pid, int sig)
info.si_errno = 0;
info.si_code = SI_USER;
info.si_pid = task_tgid_vnr(current);
- info.si_uid = current->uid;
+ info.si_uid = current_uid();
return kill_something_info(sig, &info, pid);
}
@@ -2228,7 +2246,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
info.si_errno = 0;
info.si_code = SI_TKILL;
info.si_pid = task_tgid_vnr(current);
- info.si_uid = current->uid;
+ info.si_uid = current_uid();
rcu_read_lock();
p = find_task_by_vpid(pid);
diff --git a/kernel/sys.c b/kernel/sys.c
index 31deba8f7d1..ebe65c2c987 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -112,12 +112,17 @@ EXPORT_SYMBOL(cad_pid);
void (*pm_power_off_prepare)(void);
+/*
+ * set the priority of a task
+ * - the caller must hold the RCU read lock
+ */
static int set_one_prio(struct task_struct *p, int niceval, int error)
{
+ const struct cred *cred = current_cred(), *pcred = __task_cred(p);
int no_nice;
- if (p->uid != current->euid &&
- p->euid != current->euid && !capable(CAP_SYS_NICE)) {
+ if (pcred->uid != cred->euid &&
+ pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
error = -EPERM;
goto out;
}
@@ -141,6 +146,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
{
struct task_struct *g, *p;
struct user_struct *user;
+ const struct cred *cred = current_cred();
int error = -EINVAL;
struct pid *pgrp;
@@ -174,18 +180,18 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
break;
case PRIO_USER:
- user = current->user;
+ user = (struct user_struct *) cred->user;
if (!who)
- who = current->uid;
- else
- if ((who != current->uid) && !(user = find_user(who)))
- goto out_unlock; /* No processes for this user */
+ who = cred->uid;
+ else if ((who != cred->uid) &&
+ !(user = find_user(who)))
+ goto out_unlock; /* No processes for this user */
do_each_thread(g, p)
- if (p->uid == who)
+ if (__task_cred(p)->uid == who)
error = set_one_prio(p, niceval, error);
while_each_thread(g, p);
- if (who != current->uid)
+ if (who != cred->uid)
free_uid(user); /* For find_user() */
break;
}
@@ -205,6 +211,7 @@ asmlinkage long sys_getpriority(int which, int who)
{
struct task_struct *g, *p;
struct user_struct *user;
+ const struct cred *cred = current_cred();
long niceval, retval = -ESRCH;
struct pid *pgrp;
@@ -236,21 +243,21 @@ asmlinkage long sys_getpriority(int which, int who)
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
break;
case PRIO_USER:
- user = current->user;
+ user = (struct user_struct *) cred->user;
if (!who)
- who = current->uid;
- else
- if ((who != current->uid) && !(user = find_user(who)))
- goto out_unlock; /* No processes for this user */
+ who = cred->uid;
+ else if ((who != cred->uid) &&
+ !(user = find_user(who)))
+ goto out_unlock; /* No processes for this user */
do_each_thread(g, p)
- if (p->uid == who) {
+ if (__task_cred(p)->uid == who) {
niceval = 20 - task_nice(p);
if (niceval > retval)
retval = niceval;
}
while_each_thread(g, p);
- if (who != current->uid)
+ if (who != cred->uid)
free_uid(user); /* for find_user() */
break;
}
@@ -472,46 +479,48 @@ void ctrl_alt_del(void)
*/
asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
{
- int old_rgid = current->gid;
- int old_egid = current->egid;
- int new_rgid = old_rgid;
- int new_egid = old_egid;
+ const struct cred *old;
+ struct cred *new;
int retval;
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+ old = current_cred();
+
retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
if (retval)
- return retval;
+ goto error;
+ retval = -EPERM;
if (rgid != (gid_t) -1) {
- if ((old_rgid == rgid) ||
- (current->egid==rgid) ||
+ if (old->gid == rgid ||
+ old->egid == rgid ||
capable(CAP_SETGID))
- new_rgid = rgid;
+ new->gid = rgid;
else
- return -EPERM;
+ goto error;
}
if (egid != (gid_t) -1) {
- if ((old_rgid == egid) ||
- (current->egid == egid) ||
- (current->sgid == egid) ||
+ if (old->gid == egid ||
+ old->egid == egid ||
+ old->sgid == egid ||
capable(CAP_SETGID))
- new_egid = egid;
+ new->egid = egid;
else
- return -EPERM;
- }
- if (new_egid != old_egid) {
- set_dumpable(current->mm, suid_dumpable);
- smp_wmb();
+ goto error;
}
+
if (rgid != (gid_t) -1 ||
- (egid != (gid_t) -1 && egid != old_rgid))
- current->sgid = new_egid;
- current->fsgid = new_egid;
- current->egid = new_egid;
- current->gid = new_rgid;
- key_fsgid_changed(current);
- proc_id_connector(current, PROC_EVENT_GID);
- return 0;
+ (egid != (gid_t) -1 && egid != old->gid))
+ new->sgid = new->egid;
+ new->fsgid = new->egid;
+
+ return commit_creds(new);
+
+error:
+ abort_creds(new);
+ return retval;
}
/*
@@ -521,56 +530,54 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
*/
asmlinkage long sys_setgid(gid_t gid)
{
- int old_egid = current->egid;
+ const struct cred *old;
+ struct cred *new;
int retval;
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+ old = current_cred();
+
retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
if (retval)
- return retval;
+ goto error;
- if (capable(CAP_SETGID)) {
- if (old_egid != gid) {
- set_dumpable(current->mm, suid_dumpable);
- smp_wmb();
- }
- current->gid = current->egid = current->sgid = current->fsgid = gid;
- } else if ((gid == current->gid) || (gid == current->sgid)) {
- if (old_egid != gid) {
- set_dumpable(current->mm, suid_dumpable);
- smp_wmb();
- }
- current->egid = current->fsgid = gid;
- }
+ retval = -EPERM;
+ if (capable(CAP_SETGID))
+ new->gid = new->egid = new->sgid = new->fsgid = gid;
+ else if (gid == old->gid || gid == old->sgid)
+ new->egid = new->fsgid = gid;
else
- return -EPERM;
+ goto error;
- key_fsgid_changed(current);
- proc_id_connector(current, PROC_EVENT_GID);
- return 0;
+ return commit_creds(new);
+
+error:
+ abort_creds(new);
+ return retval;
}
-static int set_user(uid_t new_ruid, int dumpclear)
+/*
+ * change the user struct in a credentials set to match the new UID
+ */
+static int set_user(struct cred *new)
{
struct user_struct *new_user;
- new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
+ new_user = alloc_uid(current_user_ns(), new->uid);
if (!new_user)
return -EAGAIN;
if (atomic_read(&new_user->processes) >=
current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
- new_user != current->nsproxy->user_ns->root_user) {
+ new_user != INIT_USER) {
free_uid(new_user);
return -EAGAIN;
}
- switch_uid(new_user);
-
- if (dumpclear) {
- set_dumpable(current->mm, suid_dumpable);
- smp_wmb();
- }
- current->uid = new_ruid;
+ free_uid(new->user);
+ new->user = new_user;
return 0;
}
@@ -591,54 +598,56 @@ static int set_user(uid_t new_ruid, int dumpclear)
*/
asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
{
- int old_ruid, old_euid, old_suid, new_ruid, new_euid;
+ const struct cred *old;
+ struct cred *new;
int retval;
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+ old = current_cred();
+
retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
if (retval)
- return retval;
-
- new_ruid = old_ruid = current->uid;
- new_euid = old_euid = current->euid;
- old_suid = current->suid;
+ goto error;
+ retval = -EPERM;
if (ruid != (uid_t) -1) {
- new_ruid = ruid;
- if ((old_ruid != ruid) &&
- (current->euid != ruid) &&
+ new->uid = ruid;
+ if (old->uid != ruid &&
+ old->euid != ruid &&
!capable(CAP_SETUID))
- return -EPERM;
+ goto error;
}
if (euid != (uid_t) -1) {
- new_euid = euid;
- if ((old_ruid != euid) &&
- (current->euid != euid) &&
- (current->suid != euid) &&
+ new->euid = euid;
+ if (old->uid != euid &&
+ old->euid != euid &&
+ old->suid != euid &&
!capable(CAP_SETUID))
- return -EPERM;
+ goto error;
}
- if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
- return -EAGAIN;
+ retval = -EAGAIN;
+ if (new->uid != old->uid && set_user(new) < 0)
+ goto error;
- if (new_euid != old_euid) {
- set_dumpable(current->mm, suid_dumpable);
- smp_wmb();
- }
- current->fsuid = current->euid = new_euid;
if (ruid != (uid_t) -1 ||
- (euid != (uid_t) -1 && euid != old_ruid))
- current->suid = current->euid;
- current->fsuid = current->euid;
+ (euid != (uid_t) -1 && euid != old->uid))
+ new->suid = new->euid;
+ new->fsuid = new->euid;
- key_fsuid_changed(current);
- proc_id_connector(current, PROC_EVENT_UID);
-
- return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
-}
+ retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
+ if (retval < 0)
+ goto error;
+ return commit_creds(new);
+error:
+ abort_creds(new);
+ return retval;
+}
/*
* setuid() is implemented like SysV with SAVED_IDS
@@ -653,36 +662,41 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
*/
asmlinkage long sys_setuid(uid_t uid)
{
- int old_euid = current->euid;
- int old_ruid, old_suid, new_suid;
+ const struct cred *old;
+ struct cred *new;
int retval;
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+ old = current_cred();
+
retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
if (retval)
- return retval;
+ goto error;
- old_ruid = current->uid;
- old_suid = current->suid;
- new_suid = old_suid;
-
+ retval = -EPERM;
if (capable(CAP_SETUID)) {
- if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
- return -EAGAIN;
- new_suid = uid;
- } else if ((uid != current->uid) && (uid != new_suid))
- return -EPERM;
-
- if (old_euid != uid) {
- set_dumpable(current->mm, suid_dumpable);
- smp_wmb();
+ new->suid = new->uid = uid;
+ if (uid != old->uid && set_user(new) < 0) {
+ retval = -EAGAIN;
+ goto error;
+ }
+ } else if (uid != old->uid && uid != new->suid) {
+ goto error;
}
- current->fsuid = current->euid = uid;
- current->suid = new_suid;
- key_fsuid_changed(current);
- proc_id_connector(current, PROC_EVENT_UID);
+ new->fsuid = new->euid = uid;
+
+ retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
+ if (retval < 0)
+ goto error;
- return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
+ return commit_creds(new);
+
+error:
+ abort_creds(new);
+ return retval;
}
@@ -692,54 +706,63 @@ asmlinkage long sys_setuid(uid_t uid)
*/
asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
{
- int old_ruid = current->uid;
- int old_euid = current->euid;
- int old_suid = current->suid;
+ const struct cred *old;
+ struct cred *new;
int retval;
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
if (retval)
- return retval;
+ goto error;
+ old = current_cred();
+ retval = -EPERM;
if (!capable(CAP_SETUID)) {
- if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
- (ruid != current->euid) && (ruid != current->suid))
- return -EPERM;
- if ((euid != (uid_t) -1) && (euid != current->uid) &&
- (euid != current->euid) && (euid != current->suid))
- return -EPERM;
- if ((suid != (uid_t) -1) && (suid != current->uid) &&
- (suid != current->euid) && (suid != current->suid))
- return -EPERM;
+ if (ruid != (uid_t) -1 && ruid != old->uid &&
+ ruid != old->euid && ruid != old->suid)
+ goto error;
+ if (euid != (uid_t) -1 && euid != old->uid &&
+ euid != old->euid && euid != old->suid)
+ goto error;
+ if (suid != (uid_t) -1 && suid != old->uid &&
+ suid != old->euid && suid != old->suid)
+ goto error;
}
+
+ retval = -EAGAIN;
if (ruid != (uid_t) -1) {
- if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0)
- return -EAGAIN;
+ new->uid = ruid;
+ if (ruid != old->uid && set_user(new) < 0)
+ goto error;
}
- if (euid != (uid_t) -1) {
- if (euid != current->euid) {
- set_dumpable(current->mm, suid_dumpable);
- smp_wmb();
- }
- current->euid = euid;
- }
- current->fsuid = current->euid;
+ if (euid != (uid_t) -1)
+ new->euid = euid;
if (suid != (uid_t) -1)
- current->suid = suid;
+ new->suid = suid;
+ new->fsuid = new->euid;
+
+ retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
+ if (retval < 0)
+ goto error;
- key_fsuid_changed(current);
- proc_id_connector(current, PROC_EVENT_UID);
+ return commit_creds(new);
- return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
+error:
+ abort_creds(new);
+ return retval;
}
asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid)
{
+ const struct cred *cred = current_cred();
int retval;
- if (!(retval = put_user(current->uid, ruid)) &&
- !(retval = put_user(current->euid, euid)))
- retval = put_user(current->suid, suid);
+ if (!(retval = put_user(cred->uid, ruid)) &&
+ !(retval = put_user(cred->euid, euid)))
+ retval = put_user(cred->suid, suid);
return retval;
}
@@ -749,48 +772,55 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us
*/
asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
{
+ const struct cred *old;
+ struct cred *new;
int retval;
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+ old = current_cred();
+
retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
if (retval)
- return retval;
+ goto error;
+ retval = -EPERM;
if (!capable(CAP_SETGID)) {
- if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
- (rgid != current->egid) && (rgid != current->sgid))
- return -EPERM;
- if ((egid != (gid_t) -1) && (egid != current->gid) &&
- (egid != current->egid) && (egid != current->sgid))
- return -EPERM;
- if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
- (sgid != current->egid) && (sgid != current->sgid))
- return -EPERM;
+ if (rgid != (gid_t) -1 && rgid != old->gid &&
+ rgid != old->egid && rgid != old->sgid)
+ goto error;
+ if (egid != (gid_t) -1 && egid != old->gid &&
+ egid != old->egid && egid != old->sgid)
+ goto error;
+ if (sgid != (gid_t) -1 && sgid != old->gid &&
+ sgid != old->egid && sgid != old->sgid)
+ goto error;
}
- if (egid != (gid_t) -1) {
- if (egid != current->egid) {
- set_dumpable(current->mm, suid_dumpable);
- smp_wmb();
- }
- current->egid = egid;
- }
- current->fsgid = current->egid;
+
if (rgid != (gid_t) -1)
- current->gid = rgid;
+ new->gid = rgid;
+ if (egid != (gid_t) -1)
+ new->egid = egid;
if (sgid != (gid_t) -1)
- current->sgid = sgid;
+ new->sgid = sgid;
+ new->fsgid = new->egid;
- key_fsgid_changed(current);
- proc_id_connector(current, PROC_EVENT_GID);
- return 0;
+ return commit_creds(new);
+
+error:
+ abort_creds(new);
+ return retval;
}
asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid)
{
+ const struct cred *cred = current_cred();
int retval;
- if (!(retval = put_user(current->gid, rgid)) &&
- !(retval = put_user(current->egid, egid)))
- retval = put_user(current->sgid, sgid);
+ if (!(retval = put_user(cred->gid, rgid)) &&
+ !(retval = put_user(cred->egid, egid)))
+ retval = put_user(cred->sgid, sgid);
return retval;
}
@@ -804,27 +834,35 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us
*/
asmlinkage long sys_setfsuid(uid_t uid)
{
- int old_fsuid;
+ const struct cred *old;
+ struct cred *new;
+ uid_t old_fsuid;
- old_fsuid = current->fsuid;
- if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS))
- return old_fsuid;
+ new = prepare_creds();
+ if (!new)
+ return current_fsuid();
+ old = current_cred();
+ old_fsuid = old->fsuid;
- if (uid == current->uid || uid == current->euid ||
- uid == current->suid || uid == current->fsuid ||
+ if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
+ goto error;
+
+ if (uid == old->uid || uid == old->euid ||
+ uid == old->suid || uid == old->fsuid ||
capable(CAP_SETUID)) {
if (uid != old_fsuid) {
- set_dumpable(current->mm, suid_dumpable);
- smp_wmb();
+ new->fsuid = uid;
+ if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
+ goto change_okay;
}
- current->fsuid = uid;
}
- key_fsuid_changed(current);
- proc_id_connector(current, PROC_EVENT_UID);
-
- security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS);
+error:
+ abort_creds(new);
+ return old_fsuid;
+change_okay:
+ commit_creds(new);
return old_fsuid;
}
@@ -833,23 +871,34 @@ asmlinkage long sys_setfsuid(uid_t uid)
*/
asmlinkage long sys_setfsgid(gid_t gid)
{
- int old_fsgid;
+ const struct cred *old;
+ struct cred *new;
+ gid_t old_fsgid;
+
+ new = prepare_creds();
+ if (!new)
+ return current_fsgid();
+ old = current_cred();
+ old_fsgid = old->fsgid;
- old_fsgid = current->fsgid;
if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
- return old_fsgid;
+ goto error;
- if (gid == current->gid || gid == current->egid ||
- gid == current->sgid || gid == current->fsgid ||
+ if (gid == old->gid || gid == old->egid ||
+ gid == old->sgid || gid == old->fsgid ||
capable(CAP_SETGID)) {
if (gid != old_fsgid) {
- set_dumpable(current->mm, suid_dumpable);
- smp_wmb();
+ new->fsgid = gid;
+ goto change_okay;
}
- current->fsgid = gid;
- key_fsgid_changed(current);
- proc_id_connector(current, PROC_EVENT_GID);
}
+
+error:
+ abort_creds(new);
+ return old_fsgid;
+
+change_okay:
+ commit_creds(new);
return old_fsgid;
}
@@ -1118,7 +1167,7 @@ EXPORT_SYMBOL(groups_free);
/* export the group_info to a user-space array */
static int groups_to_user(gid_t __user *grouplist,
- struct group_info *group_info)
+ const struct group_info *group_info)
{
int i;
unsigned int count = group_info->ngroups;
@@ -1186,7 +1235,7 @@ static void groups_sort(struct group_info *group_info)
}
/* a simple bsearch */
-int groups_search(struct group_info *group_info, gid_t grp)
+int groups_search(const struct group_info *group_info, gid_t grp)
{
unsigned int left, right;
@@ -1208,51 +1257,74 @@ int groups_search(struct group_info *group_info, gid_t grp)
return 0;
}
-/* validate and set current->group_info */
-int set_current_groups(struct group_info *group_info)
+/**
+ * set_groups - Change a group subscription in a set of credentials
+ * @new: The newly prepared set of credentials to alter
+ * @group_info: The group list to install
+ *
+ * Validate a group subscription and, if valid, insert it into a set
+ * of credentials.
+ */
+int set_groups(struct cred *new, struct group_info *group_info)
{
int retval;
- struct group_info *old_info;
retval = security_task_setgroups(group_info);
if (retval)
return retval;
+ put_group_info(new->group_info);
groups_sort(group_info);
get_group_info(group_info);
+ new->group_info = group_info;
+ return 0;
+}
+
+EXPORT_SYMBOL(set_groups);
- task_lock(current);
- old_info = current->group_info;
- current->group_info = group_info;
- task_unlock(current);
+/**
+ * set_current_groups - Change current's group subscription
+ * @group_info: The group list to impose
+ *
+ * Validate a group subscription and, if valid, impose it upon current's task
+ * security record.
+ */
+int set_current_groups(struct group_info *group_info)
+{
+ struct cred *new;
+ int ret;
- put_group_info(old_info);
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
- return 0;
+ ret = set_groups(new, group_info);
+ if (ret < 0) {
+ abort_creds(new);
+ return ret;
+ }
+
+ return commit_creds(new);
}
EXPORT_SYMBOL(set_current_groups);
asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
{
- int i = 0;
-
- /*
- * SMP: Nobody else can change our grouplist. Thus we are
- * safe.
- */
+ const struct cred *cred = current_cred();
+ int i;
if (gidsetsize < 0)
return -EINVAL;
/* no need to grab task_lock here; it cannot change */
- i = current->group_info->ngroups;
+ i = cred->group_info->ngroups;
if (gidsetsize) {
if (i > gidsetsize) {
i = -EINVAL;
goto out;
}
- if (groups_to_user(grouplist, current->group_info)) {
+ if (groups_to_user(grouplist, cred->group_info)) {
i = -EFAULT;
goto out;
}
@@ -1296,9 +1368,11 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist)
*/
int in_group_p(gid_t grp)
{
+ const struct cred *cred = current_cred();
int retval = 1;
- if (grp != current->fsgid)
- retval = groups_search(current->group_info, grp);
+
+ if (grp != cred->fsgid)
+ retval = groups_search(cred->group_info, grp);
return retval;
}
@@ -1306,9 +1380,11 @@ EXPORT_SYMBOL(in_group_p);
int in_egroup_p(gid_t grp)
{
+ const struct cred *cred = current_cred();
int retval = 1;
- if (grp != current->egid)
- retval = groups_search(current->group_info, grp);
+
+ if (grp != cred->egid)
+ retval = groups_search(cred->group_info, grp);
return retval;
}
@@ -1624,50 +1700,56 @@ asmlinkage long sys_umask(int mask)
asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5)
{
- long error = 0;
+ struct task_struct *me = current;
+ unsigned char comm[sizeof(me->comm)];
+ long error;
- if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error))
+ error = security_task_prctl(option, arg2, arg3, arg4, arg5);
+ if (error != -ENOSYS)
return error;
+ error = 0;
switch (option) {
case PR_SET_PDEATHSIG:
if (!valid_signal(arg2)) {
error = -EINVAL;
break;
}
- current->pdeath_signal = arg2;
+ me->pdeath_signal = arg2;
+ error = 0;
break;
case PR_GET_PDEATHSIG:
- error = put_user(current->pdeath_signal, (int __user *)arg2);
+ error = put_user(me->pdeath_signal, (int __user *)arg2);
break;
case PR_GET_DUMPABLE:
- error = get_dumpable(current->mm);
+ error = get_dumpable(me->mm);
break;
case PR_SET_DUMPABLE:
if (arg2 < 0 || arg2 > 1) {
error = -EINVAL;
break;
}
- set_dumpable(current->mm, arg2);
+ set_dumpable(me->mm, arg2);
+ error = 0;
break;
case PR_SET_UNALIGN:
- error = SET_UNALIGN_CTL(current, arg2);
+ error = SET_UNALIGN_CTL(me, arg2);
break;
case PR_GET_UNALIGN:
- error = GET_UNALIGN_CTL(current, arg2);
+ error = GET_UNALIGN_CTL(me, arg2);
break;
case PR_SET_FPEMU:
- error = SET_FPEMU_CTL(current, arg2);
+ error = SET_FPEMU_CTL(me, arg2);
break;
case PR_GET_FPEMU:
- error = GET_FPEMU_CTL(current, arg2);
+ error = GET_FPEMU_CTL(me, arg2);
break;
case PR_SET_FPEXC:
- error = SET_FPEXC_CTL(current, arg2);
+ error = SET_FPEXC_CTL(me, arg2);
break;
case PR_GET_FPEXC:
- error = GET_FPEXC_CTL(current, arg2);
+ error = GET_FPEXC_CTL(me, arg2);
break;
case PR_GET_TIMING:
error = PR_TIMING_STATISTICAL;
@@ -1675,33 +1757,28 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
case PR_SET_TIMING:
if (arg2 != PR_TIMING_STATISTICAL)
error = -EINVAL;
+ else
+ error = 0;
break;
- case PR_SET_NAME: {
- struct task_struct *me = current;
- unsigned char ncomm[sizeof(me->comm)];
-
- ncomm[sizeof(me->comm)-1] = 0;
- if (strncpy_from_user(ncomm, (char __user *)arg2,
- sizeof(me->comm)-1) < 0)
+ case PR_SET_NAME:
+ comm[sizeof(me->comm)-1] = 0;
+ if (strncpy_from_user(comm, (char __user *)arg2,
+ sizeof(me->comm) - 1) < 0)
return -EFAULT;
- set_task_comm(me, ncomm);
+ set_task_comm(me, comm);
return 0;
- }
- case PR_GET_NAME: {
- struct task_struct *me = current;
- unsigned char tcomm[sizeof(me->comm)];
-
- get_task_comm(tcomm, me);
- if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm)))
+ case PR_GET_NAME:
+ get_task_comm(comm, me);
+ if (copy_to_user((char __user *)arg2, comm,
+ sizeof(comm)))
return -EFAULT;
return 0;
- }
case PR_GET_ENDIAN:
- error = GET_ENDIAN(current, arg2);
+ error = GET_ENDIAN(me, arg2);
break;
case PR_SET_ENDIAN:
- error = SET_ENDIAN(current, arg2);
+ error = SET_ENDIAN(me, arg2);
break;
case PR_GET_SECCOMP:
@@ -1725,6 +1802,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
current->default_timer_slack_ns;
else
current->timer_slack_ns = arg2;
+ error = 0;
break;
default:
error = -EINVAL;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3d56fe7570d..9d52b57310a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1651,7 +1651,7 @@ out:
static int test_perm(int mode, int op)
{
- if (!current->euid)
+ if (!current_euid())
mode >>= 6;
else if (in_egroup_p(0))
mode >>= 3;
diff --git a/kernel/timer.c b/kernel/timer.c
index dbd50fabe4c..566257d1dc1 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1192,25 +1192,25 @@ asmlinkage long sys_getppid(void)
asmlinkage long sys_getuid(void)
{
/* Only we change this so SMP safe */
- return current->uid;
+ return current_uid();
}
asmlinkage long sys_geteuid(void)
{
/* Only we change this so SMP safe */
- return current->euid;
+ return current_euid();
}
asmlinkage long sys_getgid(void)
{
/* Only we change this so SMP safe */
- return current->gid;
+ return current_gid();
}
asmlinkage long sys_getegid(void)
{
/* Only we change this so SMP safe */
- return current->egid;
+ return current_egid();
}
#endif
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d86e3252f30..1ee9e4e454a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -246,7 +246,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
data->pid = tsk->pid;
- data->uid = tsk->uid;
+ data->uid = task_uid(tsk);
data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
data->policy = tsk->policy;
data->rt_priority = tsk->rt_priority;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 8ebcd8532df..2dc06ab3571 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -27,6 +27,7 @@
*/
void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
{
+ const struct cred *tcred;
struct timespec uptime, ts;
u64 ac_etime;
@@ -53,10 +54,11 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
stats->ac_flag |= AXSIG;
stats->ac_nice = task_nice(tsk);
stats->ac_sched = tsk->policy;
- stats->ac_uid = tsk->uid;
- stats->ac_gid = tsk->gid;
stats->ac_pid = tsk->pid;
rcu_read_lock();
+ tcred = __task_cred(tsk);
+ stats->ac_uid = tcred->uid;
+ stats->ac_gid = tcred->gid;
stats->ac_ppid = pid_alive(tsk) ?
rcu_dereference(tsk->real_parent)->tgid : 0;
rcu_read_unlock();
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 3e41c1673e2..2460c3199b5 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -84,11 +84,12 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid)
{
+ const struct cred *cred = current_cred();
int retval;
- if (!(retval = put_user(high2lowuid(current->uid), ruid)) &&
- !(retval = put_user(high2lowuid(current->euid), euid)))
- retval = put_user(high2lowuid(current->suid), suid);
+ if (!(retval = put_user(high2lowuid(cred->uid), ruid)) &&
+ !(retval = put_user(high2lowuid(cred->euid), euid)))
+ retval = put_user(high2lowuid(cred->suid), suid);
return retval;
}
@@ -104,11 +105,12 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid)
{
+ const struct cred *cred = current_cred();
int retval;
- if (!(retval = put_user(high2lowgid(current->gid), rgid)) &&
- !(retval = put_user(high2lowgid(current->egid), egid)))
- retval = put_user(high2lowgid(current->sgid), sgid);
+ if (!(retval = put_user(high2lowgid(cred->gid), rgid)) &&
+ !(retval = put_user(high2lowgid(cred->egid), egid)))
+ retval = put_user(high2lowgid(cred->sgid), sgid);
return retval;
}
@@ -161,25 +163,24 @@ static int groups16_from_user(struct group_info *group_info,
asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist)
{
- int i = 0;
+ const struct cred *cred = current_cred();
+ int i;
if (gidsetsize < 0)
return -EINVAL;
- get_group_info(current->group_info);
- i = current->group_info->ngroups;
+ i = cred->group_info->ngroups;
if (gidsetsize) {
if (i > gidsetsize) {
i = -EINVAL;
goto out;
}
- if (groups16_to_user(grouplist, current->group_info)) {
+ if (groups16_to_user(grouplist, cred->group_info)) {
i = -EFAULT;
goto out;
}
}
out:
- put_group_info(current->group_info);
return i;
}
@@ -210,20 +211,20 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist)
asmlinkage long sys_getuid16(void)
{
- return high2lowuid(current->uid);
+ return high2lowuid(current_uid());
}
asmlinkage long sys_geteuid16(void)
{
- return high2lowuid(current->euid);
+ return high2lowuid(current_euid());
}
asmlinkage long sys_getgid16(void)
{
- return high2lowgid(current->gid);
+ return high2lowgid(current_gid());
}
asmlinkage long sys_getegid16(void)
{
- return high2lowgid(current->egid);
+ return high2lowgid(current_egid());
}
diff --git a/kernel/user.c b/kernel/user.c
index 39d6159fae4..6608a3d8ca6 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,12 +16,13 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/user_namespace.h>
+#include "cred-internals.h"
struct user_namespace init_user_ns = {
.kref = {
- .refcount = ATOMIC_INIT(2),
+ .refcount = ATOMIC_INIT(1),
},
- .root_user = &root_user,
+ .creator = &root_user,
};
EXPORT_SYMBOL_GPL(init_user_ns);
@@ -47,12 +48,14 @@ static struct kmem_cache *uid_cachep;
*/
static DEFINE_SPINLOCK(uidhash_lock);
+/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */
struct user_struct root_user = {
- .__count = ATOMIC_INIT(1),
+ .__count = ATOMIC_INIT(2),
.processes = ATOMIC_INIT(1),
.files = ATOMIC_INIT(0),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
+ .user_ns = &init_user_ns,
#ifdef CONFIG_USER_SCHED
.tg = &init_task_group,
#endif
@@ -104,16 +107,10 @@ static int sched_create_user(struct user_struct *up)
return rc;
}
-static void sched_switch_user(struct task_struct *p)
-{
- sched_move_task(p);
-}
-
#else /* CONFIG_USER_SCHED */
static void sched_destroy_user(struct user_struct *up) { }
static int sched_create_user(struct user_struct *up) { return 0; }
-static void sched_switch_user(struct task_struct *p) { }
#endif /* CONFIG_USER_SCHED */
@@ -242,13 +239,21 @@ static struct kobj_type uids_ktype = {
.release = uids_release,
};
-/* create /sys/kernel/uids/<uid>/cpu_share file for this user */
+/*
+ * Create /sys/kernel/uids/<uid>/cpu_share file for this user
+ * We do not create this file for users in a user namespace (until
+ * sysfs tagging is implemented).
+ *
+ * See Documentation/scheduler/sched-design-CFS.txt for ramifications.
+ */
static int uids_user_create(struct user_struct *up)
{
struct kobject *kobj = &up->kobj;
int error;
memset(kobj, 0, sizeof(struct kobject));
+ if (up->user_ns != &init_user_ns)
+ return 0;
kobj->kset = uids_kset;
error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
if (error) {
@@ -284,6 +289,8 @@ static void remove_user_sysfs_dir(struct work_struct *w)
unsigned long flags;
int remove_user = 0;
+ if (up->user_ns != &init_user_ns)
+ return;
/* Make uid_hash_remove() + sysfs_remove_file() + kobject_del()
* atomic.
*/
@@ -319,12 +326,13 @@ done:
* IRQ state (as stored in flags) is restored and uidhash_lock released
* upon function exit.
*/
-static inline void free_user(struct user_struct *up, unsigned long flags)
+static void free_user(struct user_struct *up, unsigned long flags)
{
/* restore back the count */
atomic_inc(&up->__count);
spin_unlock_irqrestore(&uidhash_lock, flags);
+ put_user_ns(up->user_ns);
INIT_WORK(&up->work, remove_user_sysfs_dir);
schedule_work(&up->work);
}
@@ -340,13 +348,14 @@ static inline void uids_mutex_unlock(void) { }
* IRQ state (as stored in flags) is restored and uidhash_lock released
* upon function exit.
*/
-static inline void free_user(struct user_struct *up, unsigned long flags)
+static void free_user(struct user_struct *up, unsigned long flags)
{
uid_hash_remove(up);
spin_unlock_irqrestore(&uidhash_lock, flags);
sched_destroy_user(up);
key_put(up->uid_keyring);
key_put(up->session_keyring);
+ put_user_ns(up->user_ns);
kmem_cache_free(uid_cachep, up);
}
@@ -362,7 +371,7 @@ struct user_struct *find_user(uid_t uid)
{
struct user_struct *ret;
unsigned long flags;
- struct user_namespace *ns = current->nsproxy->user_ns;
+ struct user_namespace *ns = current_user_ns();
spin_lock_irqsave(&uidhash_lock, flags);
ret = uid_hash_find(uid, uidhashentry(ns, uid));
@@ -409,6 +418,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
if (sched_create_user(new) < 0)
goto out_free_user;
+ new->user_ns = get_user_ns(ns);
+
if (uids_user_create(new))
goto out_destoy_sched;
@@ -432,7 +443,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
up = new;
}
spin_unlock_irq(&uidhash_lock);
-
}
uids_mutex_unlock();
@@ -441,6 +451,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
out_destoy_sched:
sched_destroy_user(new);
+ put_user_ns(new->user_ns);
out_free_user:
kmem_cache_free(uid_cachep, new);
out_unlock:
@@ -448,63 +459,6 @@ out_unlock:
return NULL;
}
-void switch_uid(struct user_struct *new_user)
-{
- struct user_struct *old_user;
-
- /* What if a process setreuid()'s and this brings the
- * new uid over his NPROC rlimit? We can check this now
- * cheaply with the new uid cache, so if it matters
- * we should be checking for it. -DaveM
- */
- old_user = current->user;
- atomic_inc(&new_user->processes);
- atomic_dec(&old_user->processes);
- switch_uid_keyring(new_user);
- current->user = new_user;
- sched_switch_user(current);
-
- /*
- * We need to synchronize with __sigqueue_alloc()
- * doing a get_uid(p->user).. If that saw the old
- * user value, we need to wait until it has exited
- * its critical region before we can free the old
- * structure.
- */
- smp_mb();
- spin_unlock_wait(&current->sighand->siglock);
-
- free_uid(old_user);
- suid_keys(current);
-}
-
-#ifdef CONFIG_USER_NS
-void release_uids(struct user_namespace *ns)
-{
- int i;
- unsigned long flags;
- struct hlist_head *head;
- struct hlist_node *nd;
-
- spin_lock_irqsave(&uidhash_lock, flags);
- /*
- * collapse the chains so that the user_struct-s will
- * be still alive, but not in hashes. subsequent free_uid()
- * will free them.
- */
- for (i = 0; i < UIDHASH_SZ; i++) {
- head = ns->uidhash_table + i;
- while (!hlist_empty(head)) {
- nd = head->first;
- hlist_del_init(nd);
- }
- }
- spin_unlock_irqrestore(&uidhash_lock, flags);
-
- free_uid(ns->root_user);
-}
-#endif
-
static int __init uid_cache_init(void)
{
int n;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 532858fa5b8..79084311ee5 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,60 +9,55 @@
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/user_namespace.h>
+#include <linux/cred.h>
/*
- * Clone a new ns copying an original user ns, setting refcount to 1
- * @old_ns: namespace to clone
- * Return NULL on error (failure to kmalloc), new ns otherwise
+ * Create a new user namespace, deriving the creator from the user in the
+ * passed credentials, and replacing that user with the new root user for the
+ * new namespace.
+ *
+ * This is called by copy_creds(), which will finish setting the target task's
+ * credentials.
*/
-static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
+int create_user_ns(struct cred *new)
{
struct user_namespace *ns;
- struct user_struct *new_user;
+ struct user_struct *root_user;
int n;
ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
if (!ns)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
kref_init(&ns->kref);
for (n = 0; n < UIDHASH_SZ; ++n)
INIT_HLIST_HEAD(ns->uidhash_table + n);
- /* Insert new root user. */
- ns->root_user = alloc_uid(ns, 0);
- if (!ns->root_user) {
+ /* Alloc new root user. */
+ root_user = alloc_uid(ns, 0);
+ if (!root_user) {
kfree(ns);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
- /* Reset current->user with a new one */
- new_user = alloc_uid(ns, current->uid);
- if (!new_user) {
- free_uid(ns->root_user);
- kfree(ns);
- return ERR_PTR(-ENOMEM);
- }
-
- switch_uid(new_user);
- return ns;
-}
-
-struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns)
-{
- struct user_namespace *new_ns;
-
- BUG_ON(!old_ns);
- get_user_ns(old_ns);
-
- if (!(flags & CLONE_NEWUSER))
- return old_ns;
+ /* set the new root user in the credentials under preparation */
+ ns->creator = new->user;
+ new->user = root_user;
+ new->uid = new->euid = new->suid = new->fsuid = 0;
+ new->gid = new->egid = new->sgid = new->fsgid = 0;
+ put_group_info(new->group_info);
+ new->group_info = get_group_info(&init_groups);
+#ifdef CONFIG_KEYS
+ key_put(new->request_key_auth);
+ new->request_key_auth = NULL;
+#endif
+ /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
- new_ns = clone_user_ns(old_ns);
+ /* alloc_uid() incremented the userns refcount. Just set it to 1 */
+ kref_set(&ns->kref, 1);
- put_user_ns(old_ns);
- return new_ns;
+ return 0;
}
void free_user_ns(struct kref *kref)
@@ -70,7 +65,7 @@ void free_user_ns(struct kref *kref)
struct user_namespace *ns;
ns = container_of(kref, struct user_namespace, kref);
- release_uids(ns);
+ free_uid(ns->creator);
kfree(ns);
}
EXPORT_SYMBOL(free_user_ns);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d4dc69ddebd..4952322cba4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -84,21 +84,21 @@ static cpumask_t cpu_singlethread_map __read_mostly;
static cpumask_t cpu_populated_map __read_mostly;
/* If it's single threaded, it isn't in the list of workqueues. */
-static inline int is_single_threaded(struct workqueue_struct *wq)
+static inline int is_wq_single_threaded(struct workqueue_struct *wq)
{
return wq->singlethread;
}
static const cpumask_t *wq_cpu_map(struct workqueue_struct *wq)
{
- return is_single_threaded(wq)
+ return is_wq_single_threaded(wq)
? &cpu_singlethread_map : &cpu_populated_map;
}
static
struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
{
- if (unlikely(is_single_threaded(wq)))
+ if (unlikely(is_wq_single_threaded(wq)))
cpu = singlethread_cpu;
return per_cpu_ptr(wq->cpu_wq, cpu);
}
@@ -769,7 +769,7 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
struct workqueue_struct *wq = cwq->wq;
- const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d";
+ const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d";
struct task_struct *p;
p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu);