diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/fork.c | 11 | ||||
-rw-r--r-- | kernel/kexec.c | 7 | ||||
-rw-r--r-- | kernel/module.c | 26 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 3 | ||||
-rw-r--r-- | kernel/power/console.c | 6 | ||||
-rw-r--r-- | kernel/power/disk.c | 22 | ||||
-rw-r--r-- | kernel/power/main.c | 8 | ||||
-rw-r--r-- | kernel/power/user.c | 8 | ||||
-rw-r--r-- | kernel/printk.c | 15 | ||||
-rw-r--r-- | kernel/rcuclassic.c | 4 | ||||
-rw-r--r-- | kernel/rcupdate.c | 12 | ||||
-rw-r--r-- | kernel/rcupreempt.c | 3 | ||||
-rw-r--r-- | kernel/rcutree.c | 4 | ||||
-rw-r--r-- | kernel/relay.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 15 | ||||
-rw-r--r-- | kernel/seccomp.c | 7 | ||||
-rw-r--r-- | kernel/signal.c | 8 | ||||
-rw-r--r-- | kernel/softirq.c | 1 | ||||
-rw-r--r-- | kernel/sys.c | 31 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 20 | ||||
-rw-r--r-- | kernel/timer.c | 110 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 19 | ||||
-rw-r--r-- | kernel/tsacct.c | 6 | ||||
-rw-r--r-- | kernel/user.c | 32 | ||||
-rw-r--r-- | kernel/user_namespace.c | 21 |
27 files changed, 297 insertions, 112 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index a66fbde2071..4854c2c4a82 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1179,10 +1179,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, #endif clear_all_latency_tracing(p); - /* Our parent execution domain becomes current domain - These must match for thread signalling to apply */ - p->parent_exec_id = p->self_exec_id; - /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; @@ -1220,10 +1216,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ - if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) + if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; - else + p->parent_exec_id = current->parent_exec_id; + } else { p->real_parent = current; + p->parent_exec_id = current->self_exec_id; + } spin_lock(¤t->sighand->siglock); diff --git a/kernel/kexec.c b/kernel/kexec.c index 8a6d7b08864..48389957825 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1465,6 +1465,11 @@ int kernel_kexec(void) error = device_power_down(PMSG_FREEZE); if (error) goto Enable_irqs; + + /* Suspend system devices */ + error = sysdev_suspend(PMSG_FREEZE); + if (error) + goto Power_up_devices; } else #endif { @@ -1477,6 +1482,8 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { + sysdev_resume(); + Power_up_devices: device_power_up(PMSG_RESTORE); Enable_irqs: local_irq_enable(); diff --git a/kernel/module.c b/kernel/module.c index ba22484a987..1196f5d1170 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2015,14 +2015,6 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto free_mod; -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), - mod->name); - if (!mod->refptr) { - err = -ENOMEM; - goto free_mod; - } -#endif if (pcpuindex) { /* We have a special allocation for this section. */ percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, @@ -2030,7 +2022,7 @@ static noinline struct module *load_module(void __user *umod, mod->name); if (!percpu) { err = -ENOMEM; - goto free_percpu; + goto free_mod; } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; mod->percpu = percpu; @@ -2082,6 +2074,14 @@ static noinline struct module *load_module(void __user *umod, /* Module has been moved. */ mod = (void *)sechdrs[modindex].sh_addr; +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), + mod->name); + if (!mod->refptr) { + err = -ENOMEM; + goto free_init; + } +#endif /* Now we've moved module, initialize linked lists, etc. */ module_unload_init(mod); @@ -2288,15 +2288,17 @@ static noinline struct module *load_module(void __user *umod, ftrace_release(mod->module_core, mod->core_size); free_unload: module_unload_free(mod); + free_init: +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + percpu_modfree(mod->refptr); +#endif module_free(mod, mod->module_init); free_core: module_free(mod, mod->module_core); + /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: if (percpu) percpu_modfree(percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - percpu_modfree(mod->refptr); -#endif free_mod: kfree(args); free_hdr: diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index e976e505648..8e5d9a68b02 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1370,7 +1370,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk) if (task_cputime_expired(&group_sample, &sig->cputime_expires)) return 1; } - return 0; + + return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; } /* diff --git a/kernel/power/console.c b/kernel/power/console.c index b8628be2a46..a3961b205de 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -78,6 +78,12 @@ void pm_restore_console(void) } set_console(orig_fgconsole); release_console_sem(); + + if (vt_waitactive(orig_fgconsole)) { + pr_debug("Resume: Can't switch VCs."); + return; + } + kmsg_redirect = orig_kmsg; } #endif diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 432ee575c9e..4a4a206b197 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -227,6 +227,12 @@ static int create_image(int platform_mode) "aborting hibernation\n"); goto Enable_irqs; } + sysdev_suspend(PMSG_FREEZE); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down, " + "aborting hibernation\n"); + goto Power_up_devices; + } if (hibernation_test(TEST_CORE)) goto Power_up; @@ -242,9 +248,11 @@ static int create_image(int platform_mode) if (!in_suspend) platform_leave(platform_mode); Power_up: + sysdev_resume(); /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ + Power_up_devices: device_power_up(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); Enable_irqs: @@ -335,6 +343,7 @@ static int resume_target_kernel(void) "aborting resume\n"); goto Enable_irqs; } + sysdev_suspend(PMSG_QUIESCE); /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); error = restore_highmem(); @@ -357,6 +366,7 @@ static int resume_target_kernel(void) swsusp_free(); restore_processor_state(); touch_softlockup_watchdog(); + sysdev_resume(); device_power_up(PMSG_RECOVER); Enable_irqs: local_irq_enable(); @@ -440,6 +450,7 @@ int hibernation_platform_enter(void) local_irq_disable(); error = device_power_down(PMSG_HIBERNATE); if (!error) { + sysdev_suspend(PMSG_HIBERNATE); hibernation_ops->enter(); /* We should never get here */ while (1); @@ -595,6 +606,12 @@ static int software_resume(void) unsigned int flags; /* + * If the user said "noresume".. bail out early. + */ + if (noresume) + return 0; + + /* * name_to_dev_t() below takes a sysfs buffer mutex when sysfs * is configured into the kernel. Since the regular hibernate * trigger path is via sysfs which takes a buffer mutex before @@ -610,6 +627,11 @@ static int software_resume(void) mutex_unlock(&pm_mutex); return -ENOENT; } + /* + * Some device discovery might still be in progress; we need + * to wait for this to finish. + */ + wait_for_device_probe(); swsusp_resume_device = name_to_dev_t(resume_file); pr_debug("PM: Resume from partition %s\n", resume_file); } else { diff --git a/kernel/power/main.c b/kernel/power/main.c index b4d219016b6..c9632f841f6 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -298,8 +298,12 @@ static int suspend_enter(suspend_state_t state) goto Done; } - if (!suspend_test(TEST_CORE)) - error = suspend_ops->enter(state); + error = sysdev_suspend(PMSG_SUSPEND); + if (!error) { + if (!suspend_test(TEST_CORE)) + error = suspend_ops->enter(state); + sysdev_resume(); + } device_power_up(PMSG_RESUME); Done: diff --git a/kernel/power/user.c b/kernel/power/user.c index 005b93d839b..6c85359364f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -95,15 +95,15 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); if (error) - pm_notifier_call_chain(PM_POST_RESTORE); + pm_notifier_call_chain(PM_POST_HIBERNATION); } else { data->swap = -1; data->mode = O_WRONLY; - error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + error = pm_notifier_call_chain(PM_RESTORE_PREPARE); if (error) - pm_notifier_call_chain(PM_POST_HIBERNATION); + pm_notifier_call_chain(PM_POST_RESTORE); } if (error) atomic_inc(&snapshot_device_available); diff --git a/kernel/printk.c b/kernel/printk.c index 69188f226a9..e3602d0755b 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -73,7 +73,6 @@ EXPORT_SYMBOL(oops_in_progress); * driver system. */ static DECLARE_MUTEX(console_sem); -static DECLARE_MUTEX(secondary_console_sem); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); @@ -891,12 +890,14 @@ void suspend_console(void) printk("Suspending console(s) (use no_console_suspend to debug)\n"); acquire_console_sem(); console_suspended = 1; + up(&console_sem); } void resume_console(void) { if (!console_suspend_enabled) return; + down(&console_sem); console_suspended = 0; release_console_sem(); } @@ -912,11 +913,9 @@ void resume_console(void) void acquire_console_sem(void) { BUG_ON(in_interrupt()); - if (console_suspended) { - down(&secondary_console_sem); - return; - } down(&console_sem); + if (console_suspended) + return; console_locked = 1; console_may_schedule = 1; } @@ -926,6 +925,10 @@ int try_acquire_console_sem(void) { if (down_trylock(&console_sem)) return -1; + if (console_suspended) { + up(&console_sem); + return -1; + } console_locked = 1; console_may_schedule = 0; return 0; @@ -979,7 +982,7 @@ void release_console_sem(void) unsigned wake_klogd = 0; if (console_suspended) { - up(&secondary_console_sem); + up(&console_sem); return; } diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index bd5a9003497..654c640a6b9 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -679,8 +679,8 @@ int rcu_needs_cpu(int cpu) void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + (idle_cpu(cpu) && rcu_scheduler_active && + !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index d92a76a881a..cae8a059cf4 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,6 +44,7 @@ #include <linux/cpu.h> #include <linux/mutex.h> #include <linux/module.h> +#include <linux/kernel_stat.h> enum rcu_barrier { RCU_BARRIER_STD, @@ -55,6 +56,7 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; +int rcu_scheduler_active __read_mostly; /* * Awaken the corresponding synchronize_rcu() instance now that a @@ -80,6 +82,10 @@ void wakeme_after_rcu(struct rcu_head *head) void synchronize_rcu(void) { struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu(&rcu.head, wakeme_after_rcu); @@ -175,3 +181,9 @@ void __init rcu_init(void) __rcu_init(); } +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 33cfc50781f..5d59e850fb7 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -1181,6 +1181,9 @@ void __synchronize_sched(void) { struct rcu_synchronize rcu; + if (num_online_cpus() == 1) + return; /* blocking is gp if only one CPU! */ + init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_sched(&rcu.head, wakeme_after_rcu); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b2fd602a6f6..97ce31579ec 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -948,8 +948,8 @@ static void rcu_do_batch(struct rcu_data *rdp) void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + (idle_cpu(cpu) && rcu_scheduler_active && + !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user diff --git a/kernel/relay.c b/kernel/relay.c index 9d79b7854fa..8f2179c8056 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -750,7 +750,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) * from the scheduler (trying to re-grab * rq->lock), so defer it. */ - __mod_timer(&buf->timer, jiffies + 1); + mod_timer(&buf->timer, jiffies + 1); } old = buf->data; diff --git a/kernel/sched.c b/kernel/sched.c index 410eec40413..8e2558c2ba6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) { ktime_t now; - if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) + if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return; if (hrtimer_active(&rt_b->rt_period_timer)) @@ -9224,6 +9224,16 @@ static int sched_rt_global_constraints(void) return ret; } + +int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) +{ + /* Don't accept realtime tasks when there is no way for them to run */ + if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) + return 0; + + return 1; +} + #else /* !CONFIG_RT_GROUP_SCHED */ static int sched_rt_global_constraints(void) { @@ -9317,8 +9327,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *tsk) { #ifdef CONFIG_RT_GROUP_SCHED - /* Don't accept realtime tasks when there is no way for them to run */ - if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0) + if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) return -EINVAL; #else /* We don't support RT-tasks being in separate groups */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ad64fcb731f..57d4b13b631 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -8,6 +8,7 @@ #include <linux/seccomp.h> #include <linux/sched.h> +#include <linux/compat.h> /* #define SECCOMP_DEBUG 1 */ #define NR_SECCOMP_MODES 1 @@ -22,7 +23,7 @@ static int mode1_syscalls[] = { 0, /* null terminated */ }; -#ifdef TIF_32BIT +#ifdef CONFIG_COMPAT static int mode1_syscalls_32[] = { __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32, 0, /* null terminated */ @@ -37,8 +38,8 @@ void __secure_computing(int this_syscall) switch (mode) { case 1: syscall = mode1_syscalls; -#ifdef TIF_32BIT - if (test_thread_flag(TIF_32BIT)) +#ifdef CONFIG_COMPAT + if (is_compat_task()) syscall = mode1_syscalls_32; #endif do { diff --git a/kernel/signal.c b/kernel/signal.c index 2a74fe87c0d..1c8814481a1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1575,7 +1575,15 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) read_lock(&tasklist_lock); if (may_ptrace_stop()) { do_notify_parent_cldstop(current, CLD_TRAPPED); + /* + * Don't want to allow preemption here, because + * sys_ptrace() needs this task to be inactive. + * + * XXX: implement read_unlock_no_resched(). + */ + preempt_disable(); read_unlock(&tasklist_lock); + preempt_enable_no_resched(); schedule(); } else { /* diff --git a/kernel/softirq.c b/kernel/softirq.c index bdbe9de9cd8..9041ea7948f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -626,6 +626,7 @@ static int ksoftirqd(void * __bind_cpu) preempt_enable_no_resched(); cond_resched(); preempt_disable(); + rcu_qsctr_inc((long)__bind_cpu); } preempt_enable(); set_current_state(TASK_INTERRUPTIBLE); diff --git a/kernel/sys.c b/kernel/sys.c index f145c415bc1..37f458e6882 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -559,7 +559,7 @@ error: abort_creds(new); return retval; } - + /* * change the user struct in a credentials set to match the new UID */ @@ -571,6 +571,11 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; + if (!task_can_switch_user(new_user, current)) { + free_uid(new_user); + return -EINVAL; + } + if (atomic_read(&new_user->processes) >= current->signal->rlim[RLIMIT_NPROC].rlim_cur && new_user != INIT_USER) { @@ -631,10 +636,11 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) goto error; } - retval = -EAGAIN; - if (new->uid != old->uid && set_user(new) < 0) - goto error; - + if (new->uid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; + } if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && euid != old->uid)) new->suid = new->euid; @@ -680,9 +686,10 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) retval = -EPERM; if (capable(CAP_SETUID)) { new->suid = new->uid = uid; - if (uid != old->uid && set_user(new) < 0) { - retval = -EAGAIN; - goto error; + if (uid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; } } else if (uid != old->uid && uid != new->suid) { goto error; @@ -734,11 +741,13 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) goto error; } - retval = -EAGAIN; if (ruid != (uid_t) -1) { new->uid = ruid; - if (ruid != old->uid && set_user(new) < 0) - goto error; + if (ruid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; + } } if (euid != (uid_t) -1) new->euid = euid; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index ea2f48af83c..d13be216a79 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -68,6 +68,17 @@ void clockevents_set_mode(struct clock_event_device *dev, if (dev->mode != mode) { dev->set_mode(mode, dev); dev->mode = mode; + + /* + * A nsec2cyc multiplicator of 0 is invalid and we'd crash + * on it, so fix it up and emit a warning: + */ + if (mode == CLOCK_EVT_MODE_ONESHOT) { + if (unlikely(!dev->mult)) { + dev->mult = 1; + WARN_ON(1); + } + } } } @@ -168,15 +179,6 @@ void clockevents_register_device(struct clock_event_device *dev) BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(!dev->cpumask); - /* - * A nsec2cyc multiplicator of 0 is invalid and we'd crash - * on it, so fix it up and emit a warning: - */ - if (unlikely(!dev->mult)) { - dev->mult = 1; - WARN_ON(1); - } - spin_lock(&clockevents_lock); list_add(&dev->list, &clockevent_devices); diff --git a/kernel/timer.c b/kernel/timer.c index 13dd64fe143..9b77fc9a9ac 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -589,11 +589,14 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, } } -int __mod_timer(struct timer_list *timer, unsigned long expires) +static inline int +__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) { struct tvec_base *base, *new_base; unsigned long flags; - int ret = 0; + int ret; + + ret = 0; timer_stats_timer_set_start_info(timer); BUG_ON(!timer->function); @@ -603,6 +606,9 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) if (timer_pending(timer)) { detach_timer(timer, 0); ret = 1; + } else { + if (pending_only) + goto out_unlock; } debug_timer_activate(timer); @@ -629,42 +635,28 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) timer->expires = expires; internal_add_timer(base, timer); + +out_unlock: spin_unlock_irqrestore(&base->lock, flags); return ret; } -EXPORT_SYMBOL(__mod_timer); - /** - * add_timer_on - start a timer on a particular CPU - * @timer: the timer to be added - * @cpu: the CPU to start it on + * mod_timer_pending - modify a pending timer's timeout + * @timer: the pending timer to be modified + * @expires: new timeout in jiffies * - * This is not very scalable on SMP. Double adds are not possible. + * mod_timer_pending() is the same for pending timers as mod_timer(), + * but will not re-activate and modify already deleted timers. + * + * It is useful for unserialized use of timers. */ -void add_timer_on(struct timer_list *timer, int cpu) +int mod_timer_pending(struct timer_list *timer, unsigned long expires) { - struct tvec_base *base = per_cpu(tvec_bases, cpu); - unsigned long flags; - - timer_stats_timer_set_start_info(timer); - BUG_ON(timer_pending(timer) || !timer->function); - spin_lock_irqsave(&base->lock, flags); - timer_set_base(timer, base); - debug_timer_activate(timer); - internal_add_timer(base, timer); - /* - * Check whether the other CPU is idle and needs to be - * triggered to reevaluate the timer wheel when nohz is - * active. We are protected against the other CPU fiddling - * with the timer by holding the timer base lock. This also - * makes sure that a CPU on the way to idle can not evaluate - * the timer wheel. - */ - wake_up_idle_cpu(cpu); - spin_unlock_irqrestore(&base->lock, flags); + return __mod_timer(timer, expires, true); } +EXPORT_SYMBOL(mod_timer_pending); /** * mod_timer - modify a timer's timeout @@ -688,9 +680,6 @@ void add_timer_on(struct timer_list *timer, int cpu) */ int mod_timer(struct timer_list *timer, unsigned long expires) { - BUG_ON(!timer->function); - - timer_stats_timer_set_start_info(timer); /* * This is a common optimization triggered by the * networking code - if the timer is re-modified @@ -699,12 +688,62 @@ int mod_timer(struct timer_list *timer, unsigned long expires) if (timer->expires == expires && timer_pending(timer)) return 1; - return __mod_timer(timer, expires); + return __mod_timer(timer, expires, false); } - EXPORT_SYMBOL(mod_timer); /** + * add_timer - start a timer + * @timer: the timer to be added + * + * The kernel will do a ->function(->data) callback from the + * timer interrupt at the ->expires point in the future. The + * current time is 'jiffies'. + * + * The timer's ->expires, ->function (and if the handler uses it, ->data) + * fields must be set prior calling this function. + * + * Timers with an ->expires field in the past will be executed in the next + * timer tick. + */ +void add_timer(struct timer_list *timer) +{ + BUG_ON(timer_pending(timer)); + mod_timer(timer, timer->expires); +} +EXPORT_SYMBOL(add_timer); + +/** + * add_timer_on - start a timer on a particular CPU + * @timer: the timer to be added + * @cpu: the CPU to start it on + * + * This is not very scalable on SMP. Double adds are not possible. + */ +void add_timer_on(struct timer_list *timer, int cpu) +{ + struct tvec_base *base = per_cpu(tvec_bases, cpu); + unsigned long flags; + + timer_stats_timer_set_start_info(timer); + BUG_ON(timer_pending(timer) || !timer->function); + spin_lock_irqsave(&base->lock, flags); + timer_set_base(timer, base); + debug_timer_activate(timer); + internal_add_timer(base, timer); + /* + * Check whether the other CPU is idle and needs to be + * triggered to reevaluate the timer wheel when nohz is + * active. We are protected against the other CPU fiddling + * with the timer by holding the timer base lock. This also + * makes sure that a CPU on the way to idle can not evaluate + * the timer wheel. + */ + wake_up_idle_cpu(cpu); + spin_unlock_irqrestore(&base->lock, flags); +} + +/** * del_timer - deactive a timer. * @timer: the timer to be deactivated * @@ -733,7 +772,6 @@ int del_timer(struct timer_list *timer) return ret; } - EXPORT_SYMBOL(del_timer); #ifdef CONFIG_SMP @@ -767,7 +805,6 @@ out: return ret; } - EXPORT_SYMBOL(try_to_del_timer_sync); /** @@ -796,7 +833,6 @@ int del_timer_sync(struct timer_list *timer) cpu_relax(); } } - EXPORT_SYMBOL(del_timer_sync); #endif @@ -1268,7 +1304,7 @@ signed long __sched schedule_timeout(signed long timeout) expire = timeout + jiffies; setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); - __mod_timer(&timer, expire); + __mod_timer(&timer, expire, false); schedule(); del_singleshot_timer_sync(&timer); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 58a93fbd68a..34e707e5ab8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -52,6 +52,7 @@ config FUNCTION_TRACER depends on HAVE_FUNCTION_TRACER depends on DEBUG_KERNEL select FRAME_POINTER + select KALLSYMS select TRACING select CONTEXT_SWITCH_TRACER help @@ -238,6 +239,7 @@ config STACK_TRACER depends on DEBUG_KERNEL select FUNCTION_TRACER select STACKTRACE + select KALLSYMS help This special tracer records the maximum stack footprint of the kernel and displays it in debugfs/tracing/stack_trace. diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9a236ffe2aa..fdf913dfc7e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2033,7 +2033,7 @@ free: static int start_graph_tracing(void) { struct ftrace_ret_stack **ret_stack_list; - int ret; + int ret, cpu; ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * sizeof(struct ftrace_ret_stack *), @@ -2042,6 +2042,10 @@ static int start_graph_tracing(void) if (!ret_stack_list) return -ENOMEM; + /* The cpu_boot init_task->ret_stack will never be freed */ + for_each_online_cpu(cpu) + ftrace_graph_init_task(idle_task(cpu)); + do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 88c8eb70f54..bc8e80a86bc 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -23,10 +23,20 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) { struct ring_buffer_event *event; struct trace_entry *entry; + unsigned int loops = 0; while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { entry = ring_buffer_event_data(event); + /* + * The ring buffer is a size of trace_buf_size, if + * we loop more than the size, there's something wrong + * with the ring buffer. + */ + if (loops++ > trace_buf_size) { + printk(KERN_CONT ".. bad ring buffer "); + goto failed; + } if (!trace_valid_entry(entry)) { printk(KERN_CONT ".. invalid entry %d ", entry->type); @@ -57,11 +67,20 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) cnt = ring_buffer_entries(tr->buffer); + /* + * The trace_test_buffer_cpu runs a while loop to consume all data. + * If the calling tracer is broken, and is constantly filling + * the buffer, this will run forever, and hard lock the box. + * We disable the ring buffer while we do this test to prevent + * a hard lock up. + */ + tracing_off(); for_each_possible_cpu(cpu) { ret = trace_test_buffer_cpu(tr, cpu); if (ret) break; } + tracing_on(); __raw_spin_unlock(&ftrace_max_lock); local_irq_restore(flags); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 43f891b05a4..00d59d048ed 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -122,8 +122,10 @@ void acct_update_integrals(struct task_struct *tsk) if (likely(tsk->mm)) { cputime_t time, dtime; struct timeval value; + unsigned long flags; u64 delta; + local_irq_save(flags); time = tsk->stime + tsk->utime; dtime = cputime_sub(time, tsk->acct_timexpd); jiffies_to_timeval(cputime_to_jiffies(dtime), &value); @@ -131,10 +133,12 @@ void acct_update_integrals(struct task_struct *tsk) delta = delta * USEC_PER_SEC + value.tv_usec; if (delta == 0) - return; + goto out; tsk->acct_timexpd = time; tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; + out: + local_irq_restore(flags); } } diff --git a/kernel/user.c b/kernel/user.c index 3551ac74239..fbb300e6191 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -286,14 +286,12 @@ int __init uids_sysfs_init(void) /* work function to remove sysfs directory for a user and free up * corresponding structures. */ -static void remove_user_sysfs_dir(struct work_struct *w) +static void cleanup_user_struct(struct work_struct *w) { struct user_struct *up = container_of(w, struct user_struct, work); unsigned long flags; int remove_user = 0; - if (up->user_ns != &init_user_ns) - return; /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del() * atomic. */ @@ -312,9 +310,11 @@ static void remove_user_sysfs_dir(struct work_struct *w) if (!remove_user) goto done; - kobject_uevent(&up->kobj, KOBJ_REMOVE); - kobject_del(&up->kobj); - kobject_put(&up->kobj); + if (up->user_ns == &init_user_ns) { + kobject_uevent(&up->kobj, KOBJ_REMOVE); + kobject_del(&up->kobj); + kobject_put(&up->kobj); + } sched_destroy_user(up); key_put(up->uid_keyring); @@ -335,7 +335,7 @@ static void free_user(struct user_struct *up, unsigned long flags) atomic_inc(&up->__count); spin_unlock_irqrestore(&uidhash_lock, flags); - INIT_WORK(&up->work, remove_user_sysfs_dir); + INIT_WORK(&up->work, cleanup_user_struct); schedule_work(&up->work); } @@ -362,6 +362,24 @@ static void free_user(struct user_struct *up, unsigned long flags) #endif +#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED) +/* + * We need to check if a setuid can take place. This function should be called + * before successfully completing the setuid. + */ +int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) +{ + + return sched_rt_can_attach(up->tg, tsk); + +} +#else +int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) +{ + return 1; +} +#endif + /* * Locate the user_struct for the passed UID. If found, take a ref on it. The * caller must undo that ref with free_uid(). diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 79084311ee5..076c7c8215b 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -60,12 +60,25 @@ int create_user_ns(struct cred *new) return 0; } -void free_user_ns(struct kref *kref) +/* + * Deferred destructor for a user namespace. This is required because + * free_user_ns() may be called with uidhash_lock held, but we need to call + * back to free_uid() which will want to take the lock again. + */ +static void free_user_ns_work(struct work_struct *work) { - struct user_namespace *ns; - - ns = container_of(kref, struct user_namespace, kref); + struct user_namespace *ns = + container_of(work, struct user_namespace, destroyer); free_uid(ns->creator); kfree(ns); } + +void free_user_ns(struct kref *kref) +{ + struct user_namespace *ns = + container_of(kref, struct user_namespace, kref); + + INIT_WORK(&ns->destroyer, free_user_ns_work); + schedule_work(&ns->destroyer); +} EXPORT_SYMBOL(free_user_ns); |