diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/cgroup.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 11 | ||||
-rw-r--r-- | kernel/kexec.c | 7 | ||||
-rw-r--r-- | kernel/module.c | 26 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 3 | ||||
-rw-r--r-- | kernel/power/Makefile | 2 | ||||
-rw-r--r-- | kernel/power/console.c | 6 | ||||
-rw-r--r-- | kernel/power/disk.c | 22 | ||||
-rw-r--r-- | kernel/power/main.c | 8 | ||||
-rw-r--r-- | kernel/power/swap.c | 5 | ||||
-rw-r--r-- | kernel/power/user.c | 8 | ||||
-rw-r--r-- | kernel/printk.c | 15 | ||||
-rw-r--r-- | kernel/rcuclassic.c | 4 | ||||
-rw-r--r-- | kernel/rcupdate.c | 12 | ||||
-rw-r--r-- | kernel/rcupreempt.c | 3 | ||||
-rw-r--r-- | kernel/rcutree.c | 4 | ||||
-rw-r--r-- | kernel/sched.c | 15 | ||||
-rw-r--r-- | kernel/seccomp.c | 7 | ||||
-rw-r--r-- | kernel/signal.c | 8 | ||||
-rw-r--r-- | kernel/softirq.c | 1 | ||||
-rw-r--r-- | kernel/sys.c | 31 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 20 | ||||
-rw-r--r-- | kernel/time/ntp.c | 444 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 19 | ||||
-rw-r--r-- | kernel/tsacct.c | 6 | ||||
-rw-r--r-- | kernel/user.c | 32 | ||||
-rw-r--r-- | kernel/user_namespace.c | 21 |
30 files changed, 503 insertions, 248 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 170a9213c1b..e4791b3ba55 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_PM) += power/ +obj-$(CONFIG_FREEZER) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e14db9c089b..9edb5c4b79b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1122,8 +1122,8 @@ static void cgroup_kill_sb(struct super_block *sb) { mutex_unlock(&cgroup_mutex); - kfree(root); kill_litter_super(sb); + kfree(root); } static struct file_system_type cgroup_fs_type = { diff --git a/kernel/fork.c b/kernel/fork.c index a66fbde2071..4854c2c4a82 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1179,10 +1179,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, #endif clear_all_latency_tracing(p); - /* Our parent execution domain becomes current domain - These must match for thread signalling to apply */ - p->parent_exec_id = p->self_exec_id; - /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; @@ -1220,10 +1216,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ - if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) + if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; - else + p->parent_exec_id = current->parent_exec_id; + } else { p->real_parent = current; + p->parent_exec_id = current->self_exec_id; + } spin_lock(¤t->sighand->siglock); diff --git a/kernel/kexec.c b/kernel/kexec.c index 8a6d7b08864..48389957825 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1465,6 +1465,11 @@ int kernel_kexec(void) error = device_power_down(PMSG_FREEZE); if (error) goto Enable_irqs; + + /* Suspend system devices */ + error = sysdev_suspend(PMSG_FREEZE); + if (error) + goto Power_up_devices; } else #endif { @@ -1477,6 +1482,8 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { + sysdev_resume(); + Power_up_devices: device_power_up(PMSG_RESTORE); Enable_irqs: local_irq_enable(); diff --git a/kernel/module.c b/kernel/module.c index ba22484a987..1196f5d1170 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2015,14 +2015,6 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto free_mod; -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), - mod->name); - if (!mod->refptr) { - err = -ENOMEM; - goto free_mod; - } -#endif if (pcpuindex) { /* We have a special allocation for this section. */ percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, @@ -2030,7 +2022,7 @@ static noinline struct module *load_module(void __user *umod, mod->name); if (!percpu) { err = -ENOMEM; - goto free_percpu; + goto free_mod; } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; mod->percpu = percpu; @@ -2082,6 +2074,14 @@ static noinline struct module *load_module(void __user *umod, /* Module has been moved. */ mod = (void *)sechdrs[modindex].sh_addr; +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), + mod->name); + if (!mod->refptr) { + err = -ENOMEM; + goto free_init; + } +#endif /* Now we've moved module, initialize linked lists, etc. */ module_unload_init(mod); @@ -2288,15 +2288,17 @@ static noinline struct module *load_module(void __user *umod, ftrace_release(mod->module_core, mod->core_size); free_unload: module_unload_free(mod); + free_init: +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + percpu_modfree(mod->refptr); +#endif module_free(mod, mod->module_init); free_core: module_free(mod, mod->module_core); + /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: if (percpu) percpu_modfree(percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - percpu_modfree(mod->refptr); -#endif free_mod: kfree(args); free_hdr: diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index e976e505648..8e5d9a68b02 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1370,7 +1370,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk) if (task_cputime_expired(&group_sample, &sig->cputime_expires)) return 1; } - return 0; + + return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; } /* diff --git a/kernel/power/Makefile b/kernel/power/Makefile index d7a10167a25..720ea4f781b 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -3,7 +3,7 @@ ifeq ($(CONFIG_PM_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif -obj-y := main.o +obj-$(CONFIG_PM) += main.o obj-$(CONFIG_PM_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o diff --git a/kernel/power/console.c b/kernel/power/console.c index b8628be2a46..a3961b205de 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -78,6 +78,12 @@ void pm_restore_console(void) } set_console(orig_fgconsole); release_console_sem(); + + if (vt_waitactive(orig_fgconsole)) { + pr_debug("Resume: Can't switch VCs."); + return; + } + kmsg_redirect = orig_kmsg; } #endif diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 432ee575c9e..4a4a206b197 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -227,6 +227,12 @@ static int create_image(int platform_mode) "aborting hibernation\n"); goto Enable_irqs; } + sysdev_suspend(PMSG_FREEZE); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down, " + "aborting hibernation\n"); + goto Power_up_devices; + } if (hibernation_test(TEST_CORE)) goto Power_up; @@ -242,9 +248,11 @@ static int create_image(int platform_mode) if (!in_suspend) platform_leave(platform_mode); Power_up: + sysdev_resume(); /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ + Power_up_devices: device_power_up(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); Enable_irqs: @@ -335,6 +343,7 @@ static int resume_target_kernel(void) "aborting resume\n"); goto Enable_irqs; } + sysdev_suspend(PMSG_QUIESCE); /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); error = restore_highmem(); @@ -357,6 +366,7 @@ static int resume_target_kernel(void) swsusp_free(); restore_processor_state(); touch_softlockup_watchdog(); + sysdev_resume(); device_power_up(PMSG_RECOVER); Enable_irqs: local_irq_enable(); @@ -440,6 +450,7 @@ int hibernation_platform_enter(void) local_irq_disable(); error = device_power_down(PMSG_HIBERNATE); if (!error) { + sysdev_suspend(PMSG_HIBERNATE); hibernation_ops->enter(); /* We should never get here */ while (1); @@ -595,6 +606,12 @@ static int software_resume(void) unsigned int flags; /* + * If the user said "noresume".. bail out early. + */ + if (noresume) + return 0; + + /* * name_to_dev_t() below takes a sysfs buffer mutex when sysfs * is configured into the kernel. Since the regular hibernate * trigger path is via sysfs which takes a buffer mutex before @@ -610,6 +627,11 @@ static int software_resume(void) mutex_unlock(&pm_mutex); return -ENOENT; } + /* + * Some device discovery might still be in progress; we need + * to wait for this to finish. + */ + wait_for_device_probe(); swsusp_resume_device = name_to_dev_t(resume_file); pr_debug("PM: Resume from partition %s\n", resume_file); } else { diff --git a/kernel/power/main.c b/kernel/power/main.c index b4d219016b6..c9632f841f6 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -298,8 +298,12 @@ static int suspend_enter(suspend_state_t state) goto Done; } - if (!suspend_test(TEST_CORE)) - error = suspend_ops->enter(state); + error = sysdev_suspend(PMSG_SUSPEND); + if (!error) { + if (!suspend_test(TEST_CORE)) + error = suspend_ops->enter(state); + sysdev_resume(); + } device_power_up(PMSG_RESUME); Done: diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 6da14358537..505f319e489 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -60,6 +60,7 @@ static struct block_device *resume_bdev; static int submit(int rw, pgoff_t page_off, struct page *page, struct bio **bio_chain) { + const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); struct bio *bio; bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); @@ -80,7 +81,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page, bio_get(bio); if (bio_chain == NULL) { - submit_bio(rw | (1 << BIO_RW_SYNC), bio); + submit_bio(bio_rw, bio); wait_on_page_locked(page); if (rw == READ) bio_set_pages_dirty(bio); @@ -90,7 +91,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page, get_page(page); /* These pages are freed later */ bio->bi_private = *bio_chain; *bio_chain = bio; - submit_bio(rw | (1 << BIO_RW_SYNC), bio); + submit_bio(bio_rw, bio); } return 0; } diff --git a/kernel/power/user.c b/kernel/power/user.c index 005b93d839b..6c85359364f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -95,15 +95,15 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); if (error) - pm_notifier_call_chain(PM_POST_RESTORE); + pm_notifier_call_chain(PM_POST_HIBERNATION); } else { data->swap = -1; data->mode = O_WRONLY; - error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + error = pm_notifier_call_chain(PM_RESTORE_PREPARE); if (error) - pm_notifier_call_chain(PM_POST_HIBERNATION); + pm_notifier_call_chain(PM_POST_RESTORE); } if (error) atomic_inc(&snapshot_device_available); diff --git a/kernel/printk.c b/kernel/printk.c index 69188f226a9..e3602d0755b 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -73,7 +73,6 @@ EXPORT_SYMBOL(oops_in_progress); * driver system. */ static DECLARE_MUTEX(console_sem); -static DECLARE_MUTEX(secondary_console_sem); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); @@ -891,12 +890,14 @@ void suspend_console(void) printk("Suspending console(s) (use no_console_suspend to debug)\n"); acquire_console_sem(); console_suspended = 1; + up(&console_sem); } void resume_console(void) { if (!console_suspend_enabled) return; + down(&console_sem); console_suspended = 0; release_console_sem(); } @@ -912,11 +913,9 @@ void resume_console(void) void acquire_console_sem(void) { BUG_ON(in_interrupt()); - if (console_suspended) { - down(&secondary_console_sem); - return; - } down(&console_sem); + if (console_suspended) + return; console_locked = 1; console_may_schedule = 1; } @@ -926,6 +925,10 @@ int try_acquire_console_sem(void) { if (down_trylock(&console_sem)) return -1; + if (console_suspended) { + up(&console_sem); + return -1; + } console_locked = 1; console_may_schedule = 0; return 0; @@ -979,7 +982,7 @@ void release_console_sem(void) unsigned wake_klogd = 0; if (console_suspended) { - up(&secondary_console_sem); + up(&console_sem); return; } diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index bd5a9003497..654c640a6b9 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -679,8 +679,8 @@ int rcu_needs_cpu(int cpu) void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + (idle_cpu(cpu) && rcu_scheduler_active && + !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index d92a76a881a..cae8a059cf4 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,6 +44,7 @@ #include <linux/cpu.h> #include <linux/mutex.h> #include <linux/module.h> +#include <linux/kernel_stat.h> enum rcu_barrier { RCU_BARRIER_STD, @@ -55,6 +56,7 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; +int rcu_scheduler_active __read_mostly; /* * Awaken the corresponding synchronize_rcu() instance now that a @@ -80,6 +82,10 @@ void wakeme_after_rcu(struct rcu_head *head) void synchronize_rcu(void) { struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu(&rcu.head, wakeme_after_rcu); @@ -175,3 +181,9 @@ void __init rcu_init(void) __rcu_init(); } +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 33cfc50781f..5d59e850fb7 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -1181,6 +1181,9 @@ void __synchronize_sched(void) { struct rcu_synchronize rcu; + if (num_online_cpus() == 1) + return; /* blocking is gp if only one CPU! */ + init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_sched(&rcu.head, wakeme_after_rcu); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b2fd602a6f6..97ce31579ec 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -948,8 +948,8 @@ static void rcu_do_batch(struct rcu_data *rdp) void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + (idle_cpu(cpu) && rcu_scheduler_active && + !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user diff --git a/kernel/sched.c b/kernel/sched.c index 410eec40413..8e2558c2ba6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) { ktime_t now; - if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) + if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return; if (hrtimer_active(&rt_b->rt_period_timer)) @@ -9224,6 +9224,16 @@ static int sched_rt_global_constraints(void) return ret; } + +int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) +{ + /* Don't accept realtime tasks when there is no way for them to run */ + if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) + return 0; + + return 1; +} + #else /* !CONFIG_RT_GROUP_SCHED */ static int sched_rt_global_constraints(void) { @@ -9317,8 +9327,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *tsk) { #ifdef CONFIG_RT_GROUP_SCHED - /* Don't accept realtime tasks when there is no way for them to run */ - if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0) + if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) return -EINVAL; #else /* We don't support RT-tasks being in separate groups */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ad64fcb731f..57d4b13b631 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -8,6 +8,7 @@ #include <linux/seccomp.h> #include <linux/sched.h> +#include <linux/compat.h> /* #define SECCOMP_DEBUG 1 */ #define NR_SECCOMP_MODES 1 @@ -22,7 +23,7 @@ static int mode1_syscalls[] = { 0, /* null terminated */ }; -#ifdef TIF_32BIT +#ifdef CONFIG_COMPAT static int mode1_syscalls_32[] = { __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32, 0, /* null terminated */ @@ -37,8 +38,8 @@ void __secure_computing(int this_syscall) switch (mode) { case 1: syscall = mode1_syscalls; -#ifdef TIF_32BIT - if (test_thread_flag(TIF_32BIT)) +#ifdef CONFIG_COMPAT + if (is_compat_task()) syscall = mode1_syscalls_32; #endif do { diff --git a/kernel/signal.c b/kernel/signal.c index 2a74fe87c0d..1c8814481a1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1575,7 +1575,15 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) read_lock(&tasklist_lock); if (may_ptrace_stop()) { do_notify_parent_cldstop(current, CLD_TRAPPED); + /* + * Don't want to allow preemption here, because + * sys_ptrace() needs this task to be inactive. + * + * XXX: implement read_unlock_no_resched(). + */ + preempt_disable(); read_unlock(&tasklist_lock); + preempt_enable_no_resched(); schedule(); } else { /* diff --git a/kernel/softirq.c b/kernel/softirq.c index bdbe9de9cd8..9041ea7948f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -626,6 +626,7 @@ static int ksoftirqd(void * __bind_cpu) preempt_enable_no_resched(); cond_resched(); preempt_disable(); + rcu_qsctr_inc((long)__bind_cpu); } preempt_enable(); set_current_state(TASK_INTERRUPTIBLE); diff --git a/kernel/sys.c b/kernel/sys.c index f145c415bc1..37f458e6882 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -559,7 +559,7 @@ error: abort_creds(new); return retval; } - + /* * change the user struct in a credentials set to match the new UID */ @@ -571,6 +571,11 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; + if (!task_can_switch_user(new_user, current)) { + free_uid(new_user); + return -EINVAL; + } + if (atomic_read(&new_user->processes) >= current->signal->rlim[RLIMIT_NPROC].rlim_cur && new_user != INIT_USER) { @@ -631,10 +636,11 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) goto error; } - retval = -EAGAIN; - if (new->uid != old->uid && set_user(new) < 0) - goto error; - + if (new->uid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; + } if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && euid != old->uid)) new->suid = new->euid; @@ -680,9 +686,10 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) retval = -EPERM; if (capable(CAP_SETUID)) { new->suid = new->uid = uid; - if (uid != old->uid && set_user(new) < 0) { - retval = -EAGAIN; - goto error; + if (uid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; } } else if (uid != old->uid && uid != new->suid) { goto error; @@ -734,11 +741,13 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) goto error; } - retval = -EAGAIN; if (ruid != (uid_t) -1) { new->uid = ruid; - if (ruid != old->uid && set_user(new) < 0) - goto error; + if (ruid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; + } } if (euid != (uid_t) -1) new->euid = euid; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index ea2f48af83c..d13be216a79 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -68,6 +68,17 @@ void clockevents_set_mode(struct clock_event_device *dev, if (dev->mode != mode) { dev->set_mode(mode, dev); dev->mode = mode; + + /* + * A nsec2cyc multiplicator of 0 is invalid and we'd crash + * on it, so fix it up and emit a warning: + */ + if (mode == CLOCK_EVT_MODE_ONESHOT) { + if (unlikely(!dev->mult)) { + dev->mult = 1; + WARN_ON(1); + } + } } } @@ -168,15 +179,6 @@ void clockevents_register_device(struct clock_event_device *dev) BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(!dev->cpumask); - /* - * A nsec2cyc multiplicator of 0 is invalid and we'd crash - * on it, so fix it up and emit a warning: - */ - if (unlikely(!dev->mult)) { - dev->mult = 1; - WARN_ON(1); - } - spin_lock(&clockevents_lock); list_add(&dev->list, &clockevent_devices); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index f5f793d9241..7fc64375ff4 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -1,71 +1,129 @@ /* - * linux/kernel/time/ntp.c - * * NTP state machine interfaces and logic. * * This code was mainly moved from kernel/timer.c and kernel/time.c * Please see those files for relevant copyright info and historical * changelogs. */ - -#include <linux/mm.h> -#include <linux/time.h> -#include <linux/timex.h> -#include <linux/jiffies.h> -#include <linux/hrtimer.h> #include <linux/capability.h> -#include <linux/math64.h> #include <linux/clocksource.h> #include <linux/workqueue.h> -#include <asm/timex.h> +#include <linux/hrtimer.h> +#include <linux/jiffies.h> +#include <linux/math64.h> +#include <linux/timex.h> +#include <linux/time.h> +#include <linux/mm.h> /* - * Timekeeping variables + * NTP timekeeping variables: */ -unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ -unsigned long tick_nsec; /* ACTHZ period (nsec) */ -u64 tick_length; -static u64 tick_length_base; -static struct hrtimer leap_timer; +/* USER_HZ period (usecs): */ +unsigned long tick_usec = TICK_USEC; -#define MAX_TICKADJ 500 /* microsecs */ -#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ - NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) +/* ACTHZ period (nsecs): */ +unsigned long tick_nsec; + +u64 tick_length; +static u64 tick_length_base; + +static struct hrtimer leap_timer; + +#define MAX_TICKADJ 500LL /* usecs */ +#define MAX_TICKADJ_SCALED \ + (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) /* * phase-lock loop variables */ -/* TIME_ERROR prevents overwriting the CMOS clock */ -static int time_state = TIME_OK; /* clock synchronization status */ -int time_status = STA_UNSYNC; /* clock status bits */ -static long time_tai; /* TAI offset (s) */ -static s64 time_offset; /* time adjustment (ns) */ -static long time_constant = 2; /* pll time constant */ -long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ -long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ -static s64 time_freq; /* frequency offset (scaled ns/s)*/ -static long time_reftime; /* time at last adjustment (s) */ -long time_adjust; -static long ntp_tick_adj; +/* + * clock synchronization status + * + * (TIME_ERROR prevents overwriting the CMOS clock) + */ +static int time_state = TIME_OK; + +/* clock status bits: */ +int time_status = STA_UNSYNC; + +/* TAI offset (secs): */ +static long time_tai; + +/* time adjustment (nsecs): */ +static s64 time_offset; + +/* pll time constant: */ +static long time_constant = 2; + +/* maximum error (usecs): */ +long time_maxerror = NTP_PHASE_LIMIT; + +/* estimated error (usecs): */ +long time_esterror = NTP_PHASE_LIMIT; + +/* frequency offset (scaled nsecs/secs): */ +static s64 time_freq; + +/* time at last adjustment (secs): */ +static long time_reftime; + +long time_adjust; + +/* constant (boot-param configurable) NTP tick adjustment (upscaled) */ +static s64 ntp_tick_adj; + +/* + * NTP methods: + */ + +/* + * Update (tick_length, tick_length_base, tick_nsec), based + * on (tick_usec, ntp_tick_adj, time_freq): + */ static void ntp_update_frequency(void) { - u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) - << NTP_SCALE_SHIFT; - second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT; - second_length += time_freq; + u64 second_length; + u64 new_base; + + second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) + << NTP_SCALE_SHIFT; + + second_length += ntp_tick_adj; + second_length += time_freq; - tick_length_base = second_length; + tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; + new_base = div_u64(second_length, NTP_INTERVAL_FREQ); - tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; - tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ); + /* + * Don't wait for the next second_overflow, apply + * the change to the tick length immediately: + */ + tick_length += new_base - tick_length_base; + tick_length_base = new_base; +} + +static inline s64 ntp_update_offset_fll(s64 offset64, long secs) +{ + time_status &= ~STA_MODE; + + if (secs < MINSEC) + return 0; + + if (!(time_status & STA_FLL) && (secs <= MAXSEC)) + return 0; + + time_status |= STA_MODE; + + return div_s64(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs); } static void ntp_update_offset(long offset) { - long mtemp; s64 freq_adj; + s64 offset64; + long secs; if (!(time_status & STA_PLL)) return; @@ -84,24 +142,23 @@ static void ntp_update_offset(long offset) * Select how the frequency is to be controlled * and in which mode (PLL or FLL). */ - if (time_status & STA_FREQHOLD || time_reftime == 0) - time_reftime = xtime.tv_sec; - mtemp = xtime.tv_sec - time_reftime; + secs = xtime.tv_sec - time_reftime; + if (unlikely(time_status & STA_FREQHOLD)) + secs = 0; + time_reftime = xtime.tv_sec; - freq_adj = (s64)offset * mtemp; - freq_adj <<= NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant); - time_status &= ~STA_MODE; - if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { - freq_adj += div_s64((s64)offset << (NTP_SCALE_SHIFT - SHIFT_FLL), - mtemp); - time_status |= STA_MODE; - } - freq_adj += time_freq; - freq_adj = min(freq_adj, MAXFREQ_SCALED); - time_freq = max(freq_adj, -MAXFREQ_SCALED); + offset64 = offset; + freq_adj = (offset64 * secs) << + (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant)); - time_offset = div_s64((s64)offset << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ); + freq_adj += ntp_update_offset_fll(offset64, secs); + + freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED); + + time_freq = max(freq_adj, -MAXFREQ_SCALED); + + time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ); } /** @@ -111,15 +168,15 @@ static void ntp_update_offset(long offset) */ void ntp_clear(void) { - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; ntp_update_frequency(); - tick_length = tick_length_base; - time_offset = 0; + tick_length = tick_length_base; + time_offset = 0; } /* @@ -140,8 +197,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) xtime.tv_sec--; wall_to_monotonic.tv_sec++; time_state = TIME_OOP; - printk(KERN_NOTICE "Clock: " - "inserting leap second 23:59:60 UTC\n"); + printk(KERN_NOTICE + "Clock: inserting leap second 23:59:60 UTC\n"); hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); res = HRTIMER_RESTART; break; @@ -150,8 +207,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) time_tai--; wall_to_monotonic.tv_sec--; time_state = TIME_WAIT; - printk(KERN_NOTICE "Clock: " - "deleting leap second 23:59:59 UTC\n"); + printk(KERN_NOTICE + "Clock: deleting leap second 23:59:59 UTC\n"); break; case TIME_OOP: time_tai++; @@ -179,7 +236,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) */ void second_overflow(void) { - s64 time_adj; + s64 delta; /* Bump the maxerror field */ time_maxerror += MAXFREQ / NSEC_PER_USEC; @@ -192,24 +249,30 @@ void second_overflow(void) * Compute the phase adjustment for the next second. The offset is * reduced by a fixed factor times the time constant. */ - tick_length = tick_length_base; - time_adj = shift_right(time_offset, SHIFT_PLL + time_constant); - time_offset -= time_adj; - tick_length += time_adj; - - if (unlikely(time_adjust)) { - if (time_adjust > MAX_TICKADJ) { - time_adjust -= MAX_TICKADJ; - tick_length += MAX_TICKADJ_SCALED; - } else if (time_adjust < -MAX_TICKADJ) { - time_adjust += MAX_TICKADJ; - tick_length -= MAX_TICKADJ_SCALED; - } else { - tick_length += (s64)(time_adjust * NSEC_PER_USEC / - NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT; - time_adjust = 0; - } + tick_length = tick_length_base; + + delta = shift_right(time_offset, SHIFT_PLL + time_constant); + time_offset -= delta; + tick_length += delta; + + if (!time_adjust) + return; + + if (time_adjust > MAX_TICKADJ) { + time_adjust -= MAX_TICKADJ; + tick_length += MAX_TICKADJ_SCALED; + return; } + + if (time_adjust < -MAX_TICKADJ) { + time_adjust += MAX_TICKADJ; + tick_length -= MAX_TICKADJ_SCALED; + return; + } + + tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) + << NTP_SCALE_SHIFT; + time_adjust = 0; } #ifdef CONFIG_GENERIC_CMOS_UPDATE @@ -233,12 +296,13 @@ static void sync_cmos_clock(struct work_struct *work) * This code is run on a timer. If the clock is set, that timer * may not expire at the correct time. Thus, we adjust... */ - if (!ntp_synced()) + if (!ntp_synced()) { /* * Not synced, exit, do not restart a timer (if one is * running, let it run out). */ return; + } getnstimeofday(&now); if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) @@ -270,7 +334,116 @@ static void notify_cmos_timer(void) static inline void notify_cmos_timer(void) { } #endif -/* adjtimex mainly allows reading (and writing, if superuser) of +/* + * Start the leap seconds timer: + */ +static inline void ntp_start_leap_timer(struct timespec *ts) +{ + long now = ts->tv_sec; + + if (time_status & STA_INS) { + time_state = TIME_INS; + now += 86400 - now % 86400; + hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); + + return; + } + + if (time_status & STA_DEL) { + time_state = TIME_DEL; + now += 86400 - (now + 1) % 86400; + hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); + } +} + +/* + * Propagate a new txc->status value into the NTP state: + */ +static inline void process_adj_status(struct timex *txc, struct timespec *ts) +{ + if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { + time_state = TIME_OK; + time_status = STA_UNSYNC; + } + + /* + * If we turn on PLL adjustments then reset the + * reference time to current time. + */ + if (!(time_status & STA_PLL) && (txc->status & STA_PLL)) + time_reftime = xtime.tv_sec; + + /* only set allowed bits */ + time_status &= STA_RONLY; + time_status |= txc->status & ~STA_RONLY; + + switch (time_state) { + case TIME_OK: + ntp_start_leap_timer(ts); + break; + case TIME_INS: + case TIME_DEL: + time_state = TIME_OK; + ntp_start_leap_timer(ts); + case TIME_WAIT: + if (!(time_status & (STA_INS | STA_DEL))) + time_state = TIME_OK; + break; + case TIME_OOP: + hrtimer_restart(&leap_timer); + break; + } +} +/* + * Called with the xtime lock held, so we can access and modify + * all the global NTP state: + */ +static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts) +{ + if (txc->modes & ADJ_STATUS) + process_adj_status(txc, ts); + + if (txc->modes & ADJ_NANO) + time_status |= STA_NANO; + + if (txc->modes & ADJ_MICRO) + time_status &= ~STA_NANO; + + if (txc->modes & ADJ_FREQUENCY) { + time_freq = txc->freq * PPM_SCALE; + time_freq = min(time_freq, MAXFREQ_SCALED); + time_freq = max(time_freq, -MAXFREQ_SCALED); + } + + if (txc->modes & ADJ_MAXERROR) + time_maxerror = txc->maxerror; + + if (txc->modes & ADJ_ESTERROR) + time_esterror = txc->esterror; + + if (txc->modes & ADJ_TIMECONST) { + time_constant = txc->constant; + if (!(time_status & STA_NANO)) + time_constant += 4; + time_constant = min(time_constant, (long)MAXTC); + time_constant = max(time_constant, 0l); + } + + if (txc->modes & ADJ_TAI && txc->constant > 0) + time_tai = txc->constant; + + if (txc->modes & ADJ_OFFSET) + ntp_update_offset(txc->offset); + + if (txc->modes & ADJ_TICK) + tick_usec = txc->tick; + + if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET)) + ntp_update_frequency(); +} + +/* + * adjtimex mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. */ int do_adjtimex(struct timex *txc) @@ -291,11 +464,14 @@ int do_adjtimex(struct timex *txc) if (txc->modes && !capable(CAP_SYS_TIME)) return -EPERM; - /* if the quartz is off by more than 10% something is VERY wrong! */ + /* + * if the quartz is off by more than 10% then + * something is VERY wrong! + */ if (txc->modes & ADJ_TICK && (txc->tick < 900000/USER_HZ || txc->tick > 1100000/USER_HZ)) - return -EINVAL; + return -EINVAL; if (txc->modes & ADJ_STATUS && time_state != TIME_OK) hrtimer_cancel(&leap_timer); @@ -305,7 +481,6 @@ int do_adjtimex(struct timex *txc) write_seqlock_irq(&xtime_lock); - /* If there are input parameters, then process them */ if (txc->modes & ADJ_ADJTIME) { long save_adjust = time_adjust; @@ -315,98 +490,24 @@ int do_adjtimex(struct timex *txc) ntp_update_frequency(); } txc->offset = save_adjust; - goto adj_done; - } - if (txc->modes) { - long sec; - - if (txc->modes & ADJ_STATUS) { - if ((time_status & STA_PLL) && - !(txc->status & STA_PLL)) { - time_state = TIME_OK; - time_status = STA_UNSYNC; - } - /* only set allowed bits */ - time_status &= STA_RONLY; - time_status |= txc->status & ~STA_RONLY; - - switch (time_state) { - case TIME_OK: - start_timer: - sec = ts.tv_sec; - if (time_status & STA_INS) { - time_state = TIME_INS; - sec += 86400 - sec % 86400; - hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS); - } else if (time_status & STA_DEL) { - time_state = TIME_DEL; - sec += 86400 - (sec + 1) % 86400; - hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS); - } - break; - case TIME_INS: - case TIME_DEL: - time_state = TIME_OK; - goto start_timer; - break; - case TIME_WAIT: - if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; - break; - case TIME_OOP: - hrtimer_restart(&leap_timer); - break; - } - } - - if (txc->modes & ADJ_NANO) - time_status |= STA_NANO; - if (txc->modes & ADJ_MICRO) - time_status &= ~STA_NANO; - - if (txc->modes & ADJ_FREQUENCY) { - time_freq = (s64)txc->freq * PPM_SCALE; - time_freq = min(time_freq, MAXFREQ_SCALED); - time_freq = max(time_freq, -MAXFREQ_SCALED); - } - - if (txc->modes & ADJ_MAXERROR) - time_maxerror = txc->maxerror; - if (txc->modes & ADJ_ESTERROR) - time_esterror = txc->esterror; - - if (txc->modes & ADJ_TIMECONST) { - time_constant = txc->constant; - if (!(time_status & STA_NANO)) - time_constant += 4; - time_constant = min(time_constant, (long)MAXTC); - time_constant = max(time_constant, 0l); - } - - if (txc->modes & ADJ_TAI && txc->constant > 0) - time_tai = txc->constant; - - if (txc->modes & ADJ_OFFSET) - ntp_update_offset(txc->offset); - if (txc->modes & ADJ_TICK) - tick_usec = txc->tick; + } else { - if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET)) - ntp_update_frequency(); - } + /* If there are input parameters, then process them: */ + if (txc->modes) + process_adjtimex_modes(txc, &ts); - txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, + txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, NTP_SCALE_SHIFT); - if (!(time_status & STA_NANO)) - txc->offset /= NSEC_PER_USEC; + if (!(time_status & STA_NANO)) + txc->offset /= NSEC_PER_USEC; + } -adj_done: result = time_state; /* mostly `TIME_OK' */ if (time_status & (STA_UNSYNC|STA_CLOCKERR)) result = TIME_ERROR; txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * - (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT); + PPM_SCALE_INV, NTP_SCALE_SHIFT); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status = time_status; @@ -425,6 +526,7 @@ adj_done: txc->calcnt = 0; txc->errcnt = 0; txc->stbcnt = 0; + write_sequnlock_irq(&xtime_lock); txc->time.tv_sec = ts.tv_sec; @@ -440,6 +542,8 @@ adj_done: static int __init ntp_tick_adj_setup(char *str) { ntp_tick_adj = simple_strtol(str, NULL, 0); + ntp_tick_adj <<= NTP_SCALE_SHIFT; + return 1; } diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 58a93fbd68a..34e707e5ab8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -52,6 +52,7 @@ config FUNCTION_TRACER depends on HAVE_FUNCTION_TRACER depends on DEBUG_KERNEL select FRAME_POINTER + select KALLSYMS select TRACING select CONTEXT_SWITCH_TRACER help @@ -238,6 +239,7 @@ config STACK_TRACER depends on DEBUG_KERNEL select FUNCTION_TRACER select STACKTRACE + select KALLSYMS help This special tracer records the maximum stack footprint of the kernel and displays it in debugfs/tracing/stack_trace. diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9a236ffe2aa..fdf913dfc7e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2033,7 +2033,7 @@ free: static int start_graph_tracing(void) { struct ftrace_ret_stack **ret_stack_list; - int ret; + int ret, cpu; ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * sizeof(struct ftrace_ret_stack *), @@ -2042,6 +2042,10 @@ static int start_graph_tracing(void) if (!ret_stack_list) return -ENOMEM; + /* The cpu_boot init_task->ret_stack will never be freed */ + for_each_online_cpu(cpu) + ftrace_graph_init_task(idle_task(cpu)); + do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 88c8eb70f54..bc8e80a86bc 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -23,10 +23,20 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) { struct ring_buffer_event *event; struct trace_entry *entry; + unsigned int loops = 0; while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { entry = ring_buffer_event_data(event); + /* + * The ring buffer is a size of trace_buf_size, if + * we loop more than the size, there's something wrong + * with the ring buffer. + */ + if (loops++ > trace_buf_size) { + printk(KERN_CONT ".. bad ring buffer "); + goto failed; + } if (!trace_valid_entry(entry)) { printk(KERN_CONT ".. invalid entry %d ", entry->type); @@ -57,11 +67,20 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) cnt = ring_buffer_entries(tr->buffer); + /* + * The trace_test_buffer_cpu runs a while loop to consume all data. + * If the calling tracer is broken, and is constantly filling + * the buffer, this will run forever, and hard lock the box. + * We disable the ring buffer while we do this test to prevent + * a hard lock up. + */ + tracing_off(); for_each_possible_cpu(cpu) { ret = trace_test_buffer_cpu(tr, cpu); if (ret) break; } + tracing_on(); __raw_spin_unlock(&ftrace_max_lock); local_irq_restore(flags); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 43f891b05a4..00d59d048ed 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -122,8 +122,10 @@ void acct_update_integrals(struct task_struct *tsk) if (likely(tsk->mm)) { cputime_t time, dtime; struct timeval value; + unsigned long flags; u64 delta; + local_irq_save(flags); time = tsk->stime + tsk->utime; dtime = cputime_sub(time, tsk->acct_timexpd); jiffies_to_timeval(cputime_to_jiffies(dtime), &value); @@ -131,10 +133,12 @@ void acct_update_integrals(struct task_struct *tsk) delta = delta * USEC_PER_SEC + value.tv_usec; if (delta == 0) - return; + goto out; tsk->acct_timexpd = time; tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; + out: + local_irq_restore(flags); } } diff --git a/kernel/user.c b/kernel/user.c index 3551ac74239..fbb300e6191 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -286,14 +286,12 @@ int __init uids_sysfs_init(void) /* work function to remove sysfs directory for a user and free up * corresponding structures. */ -static void remove_user_sysfs_dir(struct work_struct *w) +static void cleanup_user_struct(struct work_struct *w) { struct user_struct *up = container_of(w, struct user_struct, work); unsigned long flags; int remove_user = 0; - if (up->user_ns != &init_user_ns) - return; /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del() * atomic. */ @@ -312,9 +310,11 @@ static void remove_user_sysfs_dir(struct work_struct *w) if (!remove_user) goto done; - kobject_uevent(&up->kobj, KOBJ_REMOVE); - kobject_del(&up->kobj); - kobject_put(&up->kobj); + if (up->user_ns == &init_user_ns) { + kobject_uevent(&up->kobj, KOBJ_REMOVE); + kobject_del(&up->kobj); + kobject_put(&up->kobj); + } sched_destroy_user(up); key_put(up->uid_keyring); @@ -335,7 +335,7 @@ static void free_user(struct user_struct *up, unsigned long flags) atomic_inc(&up->__count); spin_unlock_irqrestore(&uidhash_lock, flags); - INIT_WORK(&up->work, remove_user_sysfs_dir); + INIT_WORK(&up->work, cleanup_user_struct); schedule_work(&up->work); } @@ -362,6 +362,24 @@ static void free_user(struct user_struct *up, unsigned long flags) #endif +#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED) +/* + * We need to check if a setuid can take place. This function should be called + * before successfully completing the setuid. + */ +int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) +{ + + return sched_rt_can_attach(up->tg, tsk); + +} +#else +int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) +{ + return 1; +} +#endif + /* * Locate the user_struct for the passed UID. If found, take a ref on it. The * caller must undo that ref with free_uid(). diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 79084311ee5..076c7c8215b 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -60,12 +60,25 @@ int create_user_ns(struct cred *new) return 0; } -void free_user_ns(struct kref *kref) +/* + * Deferred destructor for a user namespace. This is required because + * free_user_ns() may be called with uidhash_lock held, but we need to call + * back to free_uid() which will want to take the lock again. + */ +static void free_user_ns_work(struct work_struct *work) { - struct user_namespace *ns; - - ns = container_of(kref, struct user_namespace, kref); + struct user_namespace *ns = + container_of(work, struct user_namespace, destroyer); free_uid(ns->creator); kfree(ns); } + +void free_user_ns(struct kref *kref) +{ + struct user_namespace *ns = + container_of(kref, struct user_namespace, kref); + + INIT_WORK(&ns->destroyer, free_user_ns_work); + schedule_work(&ns->destroyer); +} EXPORT_SYMBOL(free_user_ns); |