aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c12
-rw-r--r--kernel/cgroup.c28
-rw-r--r--kernel/cpuset.c13
-rw-r--r--kernel/dma-coherent.c47
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c20
-rw-r--r--kernel/hrtimer.c4
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/handle.c16
-rw-r--r--kernel/irq/manage.c10
-rw-r--r--kernel/irq/numa_migrate.c7
-rw-r--r--kernel/itimer.c4
-rw-r--r--kernel/kallsyms.c16
-rw-r--r--kernel/module.c35
-rw-r--r--kernel/posix-cpu-timers.c165
-rw-r--r--kernel/power/disk.c10
-rw-r--r--kernel/rcuclassic.c2
-rw-r--r--kernel/rcutree.c2
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched.c31
-rw-r--r--kernel/sched_fair.c32
-rw-r--r--kernel/sched_rt.c4
-rw-r--r--kernel/sched_stats.h48
-rw-r--r--kernel/signal.c10
-rw-r--r--kernel/smp.c36
-rw-r--r--kernel/softlockup.c9
-rw-r--r--kernel/sys.c16
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/clockevents.c20
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/trace/ftrace.c32
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.c5
-rw-r--r--kernel/trace/trace_irqsoff.c1
-rw-r--r--kernel/trace/trace_sched_wakeup.c1
-rw-r--r--kernel/wait.c59
-rw-r--r--kernel/workqueue.c20
37 files changed, 491 insertions, 252 deletions
diff --git a/kernel/async.c b/kernel/async.c
index 608b32b4281..67a2be71f51 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -138,15 +138,18 @@ static void run_one_entry(void)
/* 3) run it (and print duration)*/
if (initcall_debug && system_state == SYSTEM_BOOTING) {
- printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current));
+ printk("calling %lli_%pF @ %i\n", (long long)entry->cookie,
+ entry->func, task_pid_nr(current));
calltime = ktime_get();
}
entry->func(entry->data, entry->cookie);
if (initcall_debug && system_state == SYSTEM_BOOTING) {
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
- printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie,
- entry->func, ktime_to_ns(delta) >> 10);
+ printk("initcall %lli_%pF returned 0 after %lld usecs\n",
+ (long long)entry->cookie,
+ entry->func,
+ (long long)ktime_to_ns(delta) >> 10);
}
/* 4) remove it from the running queue */
@@ -247,7 +250,8 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r
delta = ktime_sub(endtime, starttime);
printk("async_continuing @ %i after %lli usec\n",
- task_pid_nr(current), ktime_to_ns(delta) >> 10);
+ task_pid_nr(current),
+ (long long)ktime_to_ns(delta) >> 10);
}
}
EXPORT_SYMBOL_GPL(async_synchronize_cookie_special);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c29831076e7..5a54ff42874 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1115,8 +1115,10 @@ static void cgroup_kill_sb(struct super_block *sb) {
}
write_unlock(&css_set_lock);
- list_del(&root->root_list);
- root_count--;
+ if (!list_empty(&root->root_list)) {
+ list_del(&root->root_list);
+ root_count--;
+ }
mutex_unlock(&cgroup_mutex);
@@ -2434,7 +2436,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
err_remove:
+ cgroup_lock_hierarchy(root);
list_del(&cgrp->sibling);
+ cgroup_unlock_hierarchy(root);
root->number_of_cgroups--;
err_destroy:
@@ -2507,7 +2511,7 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
for_each_subsys(cgrp->root, ss) {
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
int refcnt;
- do {
+ while (1) {
/* We can only remove a CSS with a refcnt==1 */
refcnt = atomic_read(&css->refcnt);
if (refcnt > 1) {
@@ -2521,7 +2525,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
* css_tryget() to spin until we set the
* CSS_REMOVED bits or abort
*/
- } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt);
+ if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
+ break;
+ cpu_relax();
+ }
}
done:
for_each_subsys(cgrp->root, ss) {
@@ -2991,20 +2998,21 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
mutex_unlock(&cgroup_mutex);
return 0;
}
- task_lock(tsk);
- cg = tsk->cgroups;
- parent = task_cgroup(tsk, subsys->subsys_id);
/* Pin the hierarchy */
- if (!atomic_inc_not_zero(&parent->root->sb->s_active)) {
+ if (!atomic_inc_not_zero(&root->sb->s_active)) {
/* We race with the final deactivate_super() */
mutex_unlock(&cgroup_mutex);
return 0;
}
/* Keep the cgroup alive */
+ task_lock(tsk);
+ parent = task_cgroup(tsk, subsys->subsys_id);
+ cg = tsk->cgroups;
get_css_set(cg);
task_unlock(tsk);
+
mutex_unlock(&cgroup_mutex);
/* Now do the VFS work to create a cgroup */
@@ -3043,7 +3051,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
mutex_unlock(&inode->i_mutex);
put_css_set(cg);
- deactivate_super(parent->root->sb);
+ deactivate_super(root->sb);
/* The cgroup is still accessible in the VFS, but
* we're not going to try to rmdir() it at this
* point. */
@@ -3069,7 +3077,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
mutex_lock(&cgroup_mutex);
put_css_set(cg);
mutex_unlock(&cgroup_mutex);
- deactivate_super(parent->root->sb);
+ deactivate_super(root->sb);
return ret;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a85678865c5..f76db9dcaa0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -61,6 +61,14 @@
#include <linux/cgroup.h>
/*
+ * Workqueue for cpuset related tasks.
+ *
+ * Using kevent workqueue may cause deadlock when memory_migrate
+ * is set. So we create a separate workqueue thread for cpuset.
+ */
+static struct workqueue_struct *cpuset_wq;
+
+/*
* Tracks how many cpusets are currently defined in system.
* When there is only one cpuset (the root cpuset) we can
* short circuit some hooks.
@@ -831,7 +839,7 @@ static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
*/
static void async_rebuild_sched_domains(void)
{
- schedule_work(&rebuild_sched_domains_work);
+ queue_work(cpuset_wq, &rebuild_sched_domains_work);
}
/*
@@ -2111,6 +2119,9 @@ void __init cpuset_init_smp(void)
hotcpu_notifier(cpuset_track_online_cpus, 0);
hotplug_memory_notifier(cpuset_track_online_nodes, 10);
+
+ cpuset_wq = create_singlethread_workqueue("cpuset");
+ BUG_ON(!cpuset_wq);
}
/**
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
index 038707404b7..962a3b574f2 100644
--- a/kernel/dma-coherent.c
+++ b/kernel/dma-coherent.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
* @size: size of requested memory area
* @dma_handle: This will be filled with the correct dma handle
* @ret: This pointer will be filled with the virtual address
- * to allocated area.
+ * to allocated area.
*
* This function should be only called from per-arch dma_alloc_coherent()
* to support allocation from per-device coherent memory pools.
@@ -118,31 +118,32 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
mem = dev->dma_mem;
if (!mem)
return 0;
- if (unlikely(size > mem->size))
- return 0;
+
+ *ret = NULL;
+
+ if (unlikely(size > (mem->size << PAGE_SHIFT)))
+ goto err;
pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
- if (pageno >= 0) {
- /*
- * Memory was found in the per-device arena.
- */
- *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
- *ret = mem->virt_base + (pageno << PAGE_SHIFT);
- memset(*ret, 0, size);
- } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) {
- /*
- * The per-device arena is exhausted and we are not
- * permitted to fall back to generic memory.
- */
- *ret = NULL;
- } else {
- /*
- * The per-device arena is exhausted and we are
- * permitted to fall back to generic memory.
- */
- return 0;
- }
+ if (unlikely(pageno < 0))
+ goto err;
+
+ /*
+ * Memory was found in the per-device area.
+ */
+ *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
+ *ret = mem->virt_base + (pageno << PAGE_SHIFT);
+ memset(*ret, 0, size);
+
return 1;
+
+err:
+ /*
+ * In the case where the allocation can not be satisfied from the
+ * per-device area, try to fall back to generic memory if the
+ * constraints allow it.
+ */
+ return mem->flags & DMA_MEMORY_EXCLUSIVE;
}
EXPORT_SYMBOL(dma_alloc_from_coherent);
diff --git a/kernel/exit.c b/kernel/exit.c
index f80dec3f187..efd30ccf385 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -118,6 +118,8 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
+ sig->utime = cputime_add(sig->utime, task_utime(tsk));
+ sig->stime = cputime_add(sig->stime, task_stime(tsk));
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
@@ -126,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk)
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
task_io_accounting_add(&sig->ioac, &tsk->ioac);
+ sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig = NULL; /* Marker for below. */
}
diff --git a/kernel/fork.c b/kernel/fork.c
index bf0cef8bbdf..bb5fe56a7a9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -817,17 +817,17 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
{
struct signal_struct *sig;
- int ret;
if (clone_flags & CLONE_THREAD) {
- ret = thread_group_cputime_clone_thread(current);
- if (likely(!ret)) {
- atomic_inc(&current->signal->count);
- atomic_inc(&current->signal->live);
- }
- return ret;
+ atomic_inc(&current->signal->count);
+ atomic_inc(&current->signal->live);
+ return 0;
}
sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+
+ if (sig)
+ posix_cpu_timers_init_group(sig);
+
tsk->signal = sig;
if (!sig)
return -ENOMEM;
@@ -851,21 +851,20 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->tty_old_pgrp = NULL;
sig->tty = NULL;
- sig->cutime = sig->cstime = cputime_zero;
+ sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
sig->gtime = cputime_zero;
sig->cgtime = cputime_zero;
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
task_io_accounting_init(&sig->ioac);
+ sig->sum_sched_runtime = 0;
taskstats_tgid_init(sig);
task_lock(current->group_leader);
memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
task_unlock(current->group_leader);
- posix_cpu_timers_init_group(sig);
-
acct_init_pacct(&sig->pacct);
tty_audit_fork(sig);
@@ -1007,6 +1006,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* triggers too late. This doesn't hurt, the check is only there
* to stop root fork bombs.
*/
+ retval = -EAGAIN;
if (nr_threads >= max_threads)
goto bad_fork_cleanup_count;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d71cef25954..f394d2a42ca 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -621,7 +621,9 @@ void clock_was_set(void)
*/
void hres_timers_resume(void)
{
- /* Retrigger the CPU local events: */
+ WARN_ONCE(!irqs_disabled(),
+ KERN_INFO "hres_timers_resume() called with IRQs enabled!");
+
retrigger_next_event(NULL);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f63c706d25e..7de11bd64df 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -383,6 +383,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
out_unlock:
spin_unlock(&desc->lock);
}
+EXPORT_SYMBOL_GPL(handle_level_irq);
/**
* handle_fasteoi_irq - irq handler for transparent controllers
@@ -593,6 +594,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
}
spin_unlock_irqrestore(&desc->lock, flags);
}
+EXPORT_SYMBOL_GPL(__set_irq_handler);
void
set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index c20db0be917..3aba8d12f32 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -39,6 +39,18 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
ack_bad_irq(irq);
}
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
+static void __init init_irq_default_affinity(void)
+{
+ alloc_bootmem_cpumask_var(&irq_default_affinity);
+ cpumask_setall(irq_default_affinity);
+}
+#else
+static void __init init_irq_default_affinity(void)
+{
+}
+#endif
+
/*
* Linux has a controller-independent interrupt architecture.
* Every controller has a 'controller-template', that is used
@@ -134,6 +146,8 @@ int __init early_irq_init(void)
int legacy_count;
int i;
+ init_irq_default_affinity();
+
desc = irq_desc_legacy;
legacy_count = ARRAY_SIZE(irq_desc_legacy);
@@ -219,6 +233,8 @@ int __init early_irq_init(void)
int count;
int i;
+ init_irq_default_affinity();
+
desc = irq_desc;
count = ARRAY_SIZE(irq_desc);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index cd0cd8dcb34..291f0366455 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -15,17 +15,9 @@
#include "internals.h"
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
cpumask_var_t irq_default_affinity;
-static int init_irq_default_affinity(void)
-{
- alloc_cpumask_var(&irq_default_affinity, GFP_KERNEL);
- cpumask_setall(irq_default_affinity);
- return 0;
-}
-core_initcall(init_irq_default_affinity);
-
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* @irq: interrupt number to wait for
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index ecf765c6a77..acd88356ac7 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -71,7 +71,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
desc = irq_desc_ptrs[irq];
if (desc && old_desc != desc)
- goto out_unlock;
+ goto out_unlock;
node = cpu_to_node(cpu);
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
@@ -84,10 +84,15 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
init_copy_one_irq_desc(irq, old_desc, desc, cpu);
irq_desc_ptrs[irq] = desc;
+ spin_unlock_irqrestore(&sparse_irq_lock, flags);
/* free the old one */
free_one_irq_desc(old_desc, desc);
+ spin_unlock(&old_desc->lock);
kfree(old_desc);
+ spin_lock(&desc->lock);
+
+ return desc;
out_unlock:
spin_unlock_irqrestore(&sparse_irq_lock, flags);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 6a5fe93dd8b..58762f7077e 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value)
struct task_cputime cputime;
cputime_t utime;
- thread_group_cputime(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime);
utime = cputime.utime;
if (cputime_le(cval, utime)) { /* about to fire */
cval = jiffies_to_cputime(1);
@@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value)
struct task_cputime times;
cputime_t ptime;
- thread_group_cputime(tsk, &times);
+ thread_group_cputimer(tsk, &times);
ptime = cputime_add(times.utime, times.stime);
if (cputime_le(cval, ptime)) { /* about to fire */
cval = jiffies_to_cputime(1);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index e694afa0eb8..7b8b0f21a5b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -30,19 +30,20 @@
#define all_var 0
#endif
-extern const unsigned long kallsyms_addresses[];
-extern const u8 kallsyms_names[];
+/* These will be re-linked against their real values during the second link stage */
+extern const unsigned long kallsyms_addresses[] __attribute__((weak));
+extern const u8 kallsyms_names[] __attribute__((weak));
/* tell the compiler that the count isn't in the small data section if the arch
* has one (eg: FRV)
*/
extern const unsigned long kallsyms_num_syms
- __attribute__((__section__(".rodata")));
+__attribute__((weak, section(".rodata")));
-extern const u8 kallsyms_token_table[];
-extern const u16 kallsyms_token_index[];
+extern const u8 kallsyms_token_table[] __attribute__((weak));
+extern const u16 kallsyms_token_index[] __attribute__((weak));
-extern const unsigned long kallsyms_markers[];
+extern const unsigned long kallsyms_markers[] __attribute__((weak));
static inline int is_kernel_inittext(unsigned long addr)
{
@@ -167,6 +168,9 @@ static unsigned long get_symbol_pos(unsigned long addr,
unsigned long symbol_start = 0, symbol_end = 0;
unsigned long i, low, high, mid;
+ /* This kernel should never had been booted. */
+ BUG_ON(!kallsyms_addresses);
+
/* do a binary search on the sorted kallsyms_addresses array */
low = 0;
high = kallsyms_num_syms;
diff --git a/kernel/module.c b/kernel/module.c
index e8b51d41dd7..ba22484a987 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -573,13 +573,13 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
/* Init the unload section of the module. */
static void module_unload_init(struct module *mod)
{
- unsigned int i;
+ int cpu;
INIT_LIST_HEAD(&mod->modules_which_use_me);
- for (i = 0; i < NR_CPUS; i++)
- local_set(&mod->ref[i].count, 0);
+ for_each_possible_cpu(cpu)
+ local_set(__module_ref_addr(mod, cpu), 0);
/* Hold reference count during initialization. */
- local_set(&mod->ref[raw_smp_processor_id()].count, 1);
+ local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1);
/* Backwards compatibility macros put refcount during init. */
mod->waiter = current;
}
@@ -717,10 +717,11 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
unsigned int module_refcount(struct module *mod)
{
- unsigned int i, total = 0;
+ unsigned int total = 0;
+ int cpu;
- for (i = 0; i < NR_CPUS; i++)
- total += local_read(&mod->ref[i].count);
+ for_each_possible_cpu(cpu)
+ total += local_read(__module_ref_addr(mod, cpu));
return total;
}
EXPORT_SYMBOL(module_refcount);
@@ -894,7 +895,7 @@ void module_put(struct module *module)
{
if (module) {
unsigned int cpu = get_cpu();
- local_dec(&module->ref[cpu].count);
+ local_dec(__module_ref_addr(module, cpu));
/* Maybe they're waiting for us to drop reference? */
if (unlikely(!module_is_live(module)))
wake_up_process(module->waiter);
@@ -1464,7 +1465,10 @@ static void free_module(struct module *mod)
kfree(mod->args);
if (mod->percpu)
percpu_modfree(mod->percpu);
-
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+ if (mod->refptr)
+ percpu_modfree(mod->refptr);
+#endif
/* Free lock-classes: */
lockdep_free_key_range(mod->module_core, mod->core_size);
@@ -2011,6 +2015,14 @@ static noinline struct module *load_module(void __user *umod,
if (err < 0)
goto free_mod;
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+ mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
+ mod->name);
+ if (!mod->refptr) {
+ err = -ENOMEM;
+ goto free_mod;
+ }
+#endif
if (pcpuindex) {
/* We have a special allocation for this section. */
percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
@@ -2018,7 +2030,7 @@ static noinline struct module *load_module(void __user *umod,
mod->name);
if (!percpu) {
err = -ENOMEM;
- goto free_mod;
+ goto free_percpu;
}
sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
mod->percpu = percpu;
@@ -2282,6 +2294,9 @@ static noinline struct module *load_module(void __user *umod,
free_percpu:
if (percpu)
percpu_modfree(percpu);
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+ percpu_modfree(mod->refptr);
+#endif
free_mod:
kfree(args);
free_hdr:
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 157de3a4783..db107c9bbc0 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -10,76 +10,6 @@
#include <linux/kernel_stat.h>
/*
- * Allocate the thread_group_cputime structure appropriately and fill in the
- * current values of the fields. Called from copy_signal() via
- * thread_group_cputime_clone_thread() when adding a second or subsequent
- * thread to a thread group. Assumes interrupts are enabled when called.
- */
-int thread_group_cputime_alloc(struct task_struct *tsk)
-{
- struct signal_struct *sig = tsk->signal;
- struct task_cputime *cputime;
-
- /*
- * If we have multiple threads and we don't already have a
- * per-CPU task_cputime struct (checked in the caller), allocate
- * one and fill it in with the times accumulated so far. We may
- * race with another thread so recheck after we pick up the sighand
- * lock.
- */
- cputime = alloc_percpu(struct task_cputime);
- if (cputime == NULL)
- return -ENOMEM;
- spin_lock_irq(&tsk->sighand->siglock);
- if (sig->cputime.totals) {
- spin_unlock_irq(&tsk->sighand->siglock);
- free_percpu(cputime);
- return 0;
- }
- sig->cputime.totals = cputime;
- cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
- cputime->utime = tsk->utime;
- cputime->stime = tsk->stime;
- cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
- spin_unlock_irq(&tsk->sighand->siglock);
- return 0;
-}
-
-/**
- * thread_group_cputime - Sum the thread group time fields across all CPUs.
- *
- * @tsk: The task we use to identify the thread group.
- * @times: task_cputime structure in which we return the summed fields.
- *
- * Walk the list of CPUs to sum the per-CPU time fields in the thread group
- * time structure.
- */
-void thread_group_cputime(
- struct task_struct *tsk,
- struct task_cputime *times)
-{
- struct task_cputime *totals, *tot;
- int i;
-
- totals = tsk->signal->cputime.totals;
- if (!totals) {
- times->utime = tsk->utime;
- times->stime = tsk->stime;
- times->sum_exec_runtime = tsk->se.sum_exec_runtime;
- return;
- }
-
- times->stime = times->utime = cputime_zero;
- times->sum_exec_runtime = 0;
- for_each_possible_cpu(i) {
- tot = per_cpu_ptr(totals, i);
- times->utime = cputime_add(times->utime, tot->utime);
- times->stime = cputime_add(times->stime, tot->stime);
- times->sum_exec_runtime += tot->sum_exec_runtime;
- }
-}
-
-/*
* Called after updating RLIMIT_CPU to set timer expiration if necessary.
*/
void update_rlimit_cpu(unsigned long rlim_new)
@@ -300,6 +230,37 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
return 0;
}
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+{
+ struct sighand_struct *sighand;
+ struct signal_struct *sig;
+ struct task_struct *t;
+
+ *times = INIT_CPUTIME;
+
+ rcu_read_lock();
+ sighand = rcu_dereference(tsk->sighand);
+ if (!sighand)
+ goto out;
+
+ sig = tsk->signal;
+
+ t = tsk;
+ do {
+ times->utime = cputime_add(times->utime, t->utime);
+ times->stime = cputime_add(times->stime, t->stime);
+ times->sum_exec_runtime += t->se.sum_exec_runtime;
+
+ t = next_thread(t);
+ } while (t != tsk);
+
+ times->utime = cputime_add(times->utime, sig->utime);
+ times->stime = cputime_add(times->stime, sig->stime);
+ times->sum_exec_runtime += sig->sum_sched_runtime;
+out:
+ rcu_read_unlock();
+}
+
/*
* Sample a process (thread group) clock for the given group_leader task.
* Must be called with tasklist_lock held for reading.
@@ -546,6 +507,29 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
}
/*
+ * Enable the process wide cpu timer accounting.
+ *
+ * serialized using ->sighand->siglock
+ */
+static void start_process_timers(struct task_struct *tsk)
+{
+ tsk->signal->cputimer.running = 1;
+ barrier();
+}
+
+/*
+ * Release the process wide timer accounting -- timer stops ticking when
+ * nobody cares about it.
+ *
+ * serialized using ->sighand->siglock
+ */
+static void stop_process_timers(struct task_struct *tsk)
+{
+ tsk->signal->cputimer.running = 0;
+ barrier();
+}
+
+/*
* Insert the timer on the appropriate list before any timers that
* expire later. This must be called with the tasklist_lock held
* for reading, and interrupts disabled.
@@ -565,6 +549,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
BUG_ON(!irqs_disabled());
spin_lock(&p->sighand->siglock);
+ if (!CPUCLOCK_PERTHREAD(timer->it_clock))
+ start_process_timers(p);
+
listpos = head;
if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
list_for_each_entry(next, head, entry) {
@@ -1057,13 +1044,15 @@ static void check_process_timers(struct task_struct *tsk,
sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
list_empty(&timers[CPUCLOCK_VIRT]) &&
cputime_eq(sig->it_virt_expires, cputime_zero) &&
- list_empty(&timers[CPUCLOCK_SCHED]))
+ list_empty(&timers[CPUCLOCK_SCHED])) {
+ stop_process_timers(tsk);
return;
+ }
/*
* Collect the current process totals.
*/
- thread_group_cputime(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime);
utime = cputime.utime;
ptime = cputime_add(utime, cputime.stime);
sum_sched_runtime = cputime.sum_exec_runtime;
@@ -1329,7 +1318,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
if (!task_cputime_zero(&sig->cputime_expires)) {
struct task_cputime group_sample;
- thread_group_cputime(tsk, &group_sample);
+ thread_group_cputimer(tsk, &group_sample);
if (task_cputime_expired(&group_sample, &sig->cputime_expires))
return 1;
}
@@ -1399,6 +1388,33 @@ void run_posix_cpu_timers(struct task_struct *tsk)
}
/*
+ * Sample a process (thread group) timer for the given group_leader task.
+ * Must be called with tasklist_lock held for reading.
+ */
+static int cpu_timer_sample_group(const clockid_t which_clock,
+ struct task_struct *p,
+ union cpu_time_count *cpu)
+{
+ struct task_cputime cputime;
+
+ thread_group_cputimer(p, &cputime);
+ switch (CPUCLOCK_WHICH(which_clock)) {
+ default:
+ return -EINVAL;
+ case CPUCLOCK_PROF:
+ cpu->cpu = cputime_add(cputime.utime, cputime.stime);
+ break;
+ case CPUCLOCK_VIRT:
+ cpu->cpu = cputime.utime;
+ break;
+ case CPUCLOCK_SCHED:
+ cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+ break;
+ }
+ return 0;
+}
+
+/*
* Set one of the process-wide special case CPU timers.
* The tsk->sighand->siglock must be held by the caller.
* The *newval argument is relative and we update it to be absolute, *oldval
@@ -1411,7 +1427,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
struct list_head *head;
BUG_ON(clock_idx == CPUCLOCK_SCHED);
- cpu_clock_sample_group(clock_idx, tsk, &now);
+ start_process_timers(tsk);
+ cpu_timer_sample_group(clock_idx, tsk, &now);
if (oldval) {
if (!cputime_eq(*oldval, cputime_zero)) {
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 45e8541ab7e..432ee575c9e 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -71,6 +71,14 @@ void hibernation_set_ops(struct platform_hibernation_ops *ops)
mutex_unlock(&pm_mutex);
}
+static bool entering_platform_hibernation;
+
+bool system_entering_hibernation(void)
+{
+ return entering_platform_hibernation;
+}
+EXPORT_SYMBOL(system_entering_hibernation);
+
#ifdef CONFIG_PM_DEBUG
static void hibernation_debug_sleep(void)
{
@@ -411,6 +419,7 @@ int hibernation_platform_enter(void)
if (error)
goto Close;
+ entering_platform_hibernation = true;
suspend_console();
error = device_suspend(PMSG_HIBERNATE);
if (error) {
@@ -445,6 +454,7 @@ int hibernation_platform_enter(void)
Finish:
hibernation_ops->finish();
Resume_devices:
+ entering_platform_hibernation = false;
device_resume(PMSG_RESTORE);
resume_console();
Close:
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 490934fc7ac..bd5a9003497 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -716,7 +716,7 @@ void rcu_check_callbacks(int cpu, int user)
raise_rcu_softirq();
}
-static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
+static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
struct rcu_data *rdp)
{
unsigned long flags;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f2d8638e6c6..b2fd602a6f6 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1314,7 +1314,7 @@ int rcu_needs_cpu(int cpu)
* access due to the fact that this CPU cannot possibly have any RCU
* callbacks in flight yet.
*/
-static void
+static void __cpuinit
rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
{
unsigned long flags;
diff --git a/kernel/relay.c b/kernel/relay.c
index 09ac2008f77..9d79b7854fa 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -663,8 +663,10 @@ int relay_late_setup_files(struct rchan *chan,
mutex_lock(&relay_channels_mutex);
/* Is chan already set up? */
- if (unlikely(chan->has_base_filename))
+ if (unlikely(chan->has_base_filename)) {
+ mutex_unlock(&relay_channels_mutex);
return -EEXIST;
+ }
chan->has_base_filename = 1;
chan->parent = parent;
curr_cpu = get_cpu();
diff --git a/kernel/sched.c b/kernel/sched.c
index 52bbf1c842a..e72485033c4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2266,6 +2266,16 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
if (!sched_feat(SYNC_WAKEUPS))
sync = 0;
+ if (!sync) {
+ if (current->se.avg_overlap < sysctl_sched_migration_cost &&
+ p->se.avg_overlap < sysctl_sched_migration_cost)
+ sync = 1;
+ } else {
+ if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
+ p->se.avg_overlap >= sysctl_sched_migration_cost)
+ sync = 0;
+ }
+
#ifdef CONFIG_SMP
if (sched_feat(LB_WAKEUP_UPDATE)) {
struct sched_domain *sd;
@@ -3880,19 +3890,24 @@ int select_nohz_load_balancer(int stop_tick)
int cpu = smp_processor_id();
if (stop_tick) {
- cpumask_set_cpu(cpu, nohz.cpu_mask);
cpu_rq(cpu)->in_nohz_recently = 1;
- /*
- * If we are going offline and still the leader, give up!
- */
- if (!cpu_active(cpu) &&
- atomic_read(&nohz.load_balancer) == cpu) {
+ if (!cpu_active(cpu)) {
+ if (atomic_read(&nohz.load_balancer) != cpu)
+ return 0;
+
+ /*
+ * If we are going offline and still the leader,
+ * give up!
+ */
if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
BUG();
+
return 0;
}
+ cpumask_set_cpu(cpu, nohz.cpu_mask);
+
/* time for ilb owner also to sleep */
if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
if (atomic_read(&nohz.load_balancer) == cpu)
@@ -4687,8 +4702,8 @@ EXPORT_SYMBOL(default_wake_function);
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
-static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, int sync, void *key)
+void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+ int nr_exclusive, int sync, void *key)
{
wait_queue_t *curr, *next;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5cc1c162044..a7e50ba185a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -719,7 +719,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
__enqueue_entity(cfs_rq, se);
}
-static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (cfs_rq->last == se)
cfs_rq->last = NULL;
@@ -728,6 +728,12 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
cfs_rq->next = NULL;
}
+static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ for_each_sched_entity(se)
+ __clear_buddies(cfs_rq_of(se), se);
+}
+
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
{
@@ -768,8 +774,14 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
- if (delta_exec > ideal_runtime)
+ if (delta_exec > ideal_runtime) {
resched_task(rq_of(cfs_rq)->curr);
+ /*
+ * The current task ran long enough, ensure it doesn't get
+ * re-elected due to buddy favours.
+ */
+ clear_buddies(cfs_rq, curr);
+ }
}
static void
@@ -1179,20 +1191,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
int idx, unsigned long load, unsigned long this_load,
unsigned int imbalance)
{
- struct task_struct *curr = this_rq->curr;
- struct task_group *tg;
unsigned long tl = this_load;
unsigned long tl_per_task;
+ struct task_group *tg;
unsigned long weight;
int balanced;
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
return 0;
- if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
- p->se.avg_overlap > sysctl_sched_migration_cost))
- sync = 0;
-
/*
* If sync wakeup then subtract the (maximum possible)
* effect of the currently running task from the load
@@ -1419,9 +1426,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
if (!sched_feat(WAKEUP_PREEMPT))
return;
- if (sched_feat(WAKEUP_OVERLAP) && (sync ||
- (se->avg_overlap < sysctl_sched_migration_cost &&
- pse->avg_overlap < sysctl_sched_migration_cost))) {
+ if (sched_feat(WAKEUP_OVERLAP) && sync) {
resched_task(curr);
return;
}
@@ -1452,6 +1457,11 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
do {
se = pick_next_entity(cfs_rq);
+ /*
+ * If se was a buddy, clear it so that it will have to earn
+ * the favour again.
+ */
+ __clear_buddies(cfs_rq, se);
set_next_entity(cfs_rq, se);
cfs_rq = group_cfs_rq(se);
} while (cfs_rq);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 954e1a81b79..bac1061cea2 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -968,8 +968,8 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
return this_cpu;
- first = first_cpu(*mask);
- if (first != NR_CPUS)
+ first = cpumask_first(mask);
+ if (first < nr_cpu_ids)
return first;
return -1;
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index f2773b5d122..a8f93dd374e 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -296,20 +296,21 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
static inline void account_group_user_time(struct task_struct *tsk,
cputime_t cputime)
{
- struct signal_struct *sig;
+ struct thread_group_cputimer *cputimer;
/* tsk == current, ensure it is safe to use ->signal */
if (unlikely(tsk->exit_state))
return;
- sig = tsk->signal;
- if (sig->cputime.totals) {
- struct task_cputime *times;
+ cputimer = &tsk->signal->cputimer;
- times = per_cpu_ptr(sig->cputime.totals, get_cpu());
- times->utime = cputime_add(times->utime, cputime);
- put_cpu_no_resched();
- }
+ if (!cputimer->running)
+ return;
+
+ spin_lock(&cputimer->lock);
+ cputimer->cputime.utime =
+ cputime_add(cputimer->cputime.utime, cputime);
+ spin_unlock(&cputimer->lock);
}
/**
@@ -325,20 +326,21 @@ static inline void account_group_user_time(struct task_struct *tsk,
static inline void account_group_system_time(struct task_struct *tsk,
cputime_t cputime)
{
- struct signal_struct *sig;
+ struct thread_group_cputimer *cputimer;
/* tsk == current, ensure it is safe to use ->signal */
if (unlikely(tsk->exit_state))
return;
- sig = tsk->signal;
- if (sig->cputime.totals) {
- struct task_cputime *times;
+ cputimer = &tsk->signal->cputimer;
- times = per_cpu_ptr(sig->cputime.totals, get_cpu());
- times->stime = cputime_add(times->stime, cputime);
- put_cpu_no_resched();
- }
+ if (!cputimer->running)
+ return;
+
+ spin_lock(&cputimer->lock);
+ cputimer->cputime.stime =
+ cputime_add(cputimer->cputime.stime, cputime);
+ spin_unlock(&cputimer->lock);
}
/**
@@ -354,6 +356,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
static inline void account_group_exec_runtime(struct task_struct *tsk,
unsigned long long ns)
{
+ struct thread_group_cputimer *cputimer;
struct signal_struct *sig;
sig = tsk->signal;
@@ -362,11 +365,12 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
if (unlikely(!sig))
return;
- if (sig->cputime.totals) {
- struct task_cputime *times;
+ cputimer = &sig->cputimer;
- times = per_cpu_ptr(sig->cputime.totals, get_cpu());
- times->sum_exec_runtime += ns;
- put_cpu_no_resched();
- }
+ if (!cputimer->running)
+ return;
+
+ spin_lock(&cputimer->lock);
+ cputimer->cputime.sum_exec_runtime += ns;
+ spin_unlock(&cputimer->lock);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index e73759783dc..2a74fe87c0d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -909,7 +909,9 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
}
#endif
printk("\n");
+ preempt_disable();
show_regs(regs);
+ preempt_enable();
}
static int __init setup_print_fatal_signals(char *str)
@@ -1365,7 +1367,6 @@ int do_notify_parent(struct task_struct *tsk, int sig)
struct siginfo info;
unsigned long flags;
struct sighand_struct *psig;
- struct task_cputime cputime;
int ret = sig;
BUG_ON(sig == -1);
@@ -1395,9 +1396,10 @@ int do_notify_parent(struct task_struct *tsk, int sig)
info.si_uid = __task_cred(tsk)->uid;
rcu_read_unlock();
- thread_group_cputime(tsk, &cputime);
- info.si_utime = cputime_to_jiffies(cputime.utime);
- info.si_stime = cputime_to_jiffies(cputime.stime);
+ info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
+ tsk->signal->utime));
+ info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
+ tsk->signal->stime));
info.si_status = tsk->exit_code & 0x7f;
if (tsk->exit_code & 0x80)
diff --git a/kernel/smp.c b/kernel/smp.c
index 5cfa0e5e3e8..bbedbb7efe3 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -18,6 +18,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
enum {
CSD_FLAG_WAIT = 0x01,
CSD_FLAG_ALLOC = 0x02,
+ CSD_FLAG_LOCK = 0x04,
};
struct call_function_data {
@@ -186,6 +187,9 @@ void generic_smp_call_function_single_interrupt(void)
if (data_flags & CSD_FLAG_WAIT) {
smp_wmb();
data->flags &= ~CSD_FLAG_WAIT;
+ } else if (data_flags & CSD_FLAG_LOCK) {
+ smp_wmb();
+ data->flags &= ~CSD_FLAG_LOCK;
} else if (data_flags & CSD_FLAG_ALLOC)
kfree(data);
}
@@ -196,6 +200,8 @@ void generic_smp_call_function_single_interrupt(void)
}
}
+static DEFINE_PER_CPU(struct call_single_data, csd_data);
+
/*
* smp_call_function_single - Run a function on a specific CPU
* @func: The function to run. This must be fast and non-blocking.
@@ -224,14 +230,38 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
func(info);
local_irq_restore(flags);
} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
- struct call_single_data *data = NULL;
+ struct call_single_data *data;
if (!wait) {
+ /*
+ * We are calling a function on a single CPU
+ * and we are not going to wait for it to finish.
+ * We first try to allocate the data, but if we
+ * fail, we fall back to use a per cpu data to pass
+ * the information to that CPU. Since all callers
+ * of this code will use the same data, we must
+ * synchronize the callers to prevent a new caller
+ * from corrupting the data before the callee
+ * can access it.
+ *
+ * The CSD_FLAG_LOCK is used to let us know when
+ * the IPI handler is done with the data.
+ * The first caller will set it, and the callee
+ * will clear it. The next caller must wait for
+ * it to clear before we set it again. This
+ * will make sure the callee is done with the
+ * data before a new caller will use it.
+ */
data = kmalloc(sizeof(*data), GFP_ATOMIC);
if (data)
data->flags = CSD_FLAG_ALLOC;
- }
- if (!data) {
+ else {
+ data = &per_cpu(csd_data, me);
+ while (data->flags & CSD_FLAG_LOCK)
+ cpu_relax();
+ data->flags = CSD_FLAG_LOCK;
+ }
+ } else {
data = &d;
data->flags = CSD_FLAG_WAIT;
}
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d9188c66278..85d5a245510 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -16,6 +16,7 @@
#include <linux/lockdep.h>
#include <linux/notifier.h>
#include <linux/module.h>
+#include <linux/sysctl.h>
#include <asm/irq_regs.h>
@@ -88,6 +89,14 @@ void touch_all_softlockup_watchdogs(void)
}
EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
+int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ touch_all_softlockup_watchdogs();
+ return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+}
+
/*
* This callback runs from the timer interrupt, and checks
* whether the watchdog thread has hung or not:
diff --git a/kernel/sys.c b/kernel/sys.c
index e7dc0e10a48..f145c415bc1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1525,22 +1525,14 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
return -EINVAL;
if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
return -EFAULT;
+ if (new_rlim.rlim_cur > new_rlim.rlim_max)
+ return -EINVAL;
old_rlim = current->signal->rlim + resource;
if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
!capable(CAP_SYS_RESOURCE))
return -EPERM;
-
- if (resource == RLIMIT_NOFILE) {
- if (new_rlim.rlim_max == RLIM_INFINITY)
- new_rlim.rlim_max = sysctl_nr_open;
- if (new_rlim.rlim_cur == RLIM_INFINITY)
- new_rlim.rlim_cur = sysctl_nr_open;
- if (new_rlim.rlim_max > sysctl_nr_open)
- return -EPERM;
- }
-
- if (new_rlim.rlim_cur > new_rlim.rlim_max)
- return -EINVAL;
+ if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
+ return -EPERM;
retval = security_task_setrlimit(resource, &new_rlim);
if (retval)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 368d1638ee7..790f9d78566 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -809,7 +809,7 @@ static struct ctl_table kern_table[] = {
.data = &softlockup_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = &proc_dosoftlockup_thresh,
.strategy = &sysctl_intvec,
.extra1 = &neg_one,
.extra2 = &sixty,
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index ea2f48af83c..d13be216a79 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -68,6 +68,17 @@ void clockevents_set_mode(struct clock_event_device *dev,
if (dev->mode != mode) {
dev->set_mode(mode, dev);
dev->mode = mode;
+
+ /*
+ * A nsec2cyc multiplicator of 0 is invalid and we'd crash
+ * on it, so fix it up and emit a warning:
+ */
+ if (mode == CLOCK_EVT_MODE_ONESHOT) {
+ if (unlikely(!dev->mult)) {
+ dev->mult = 1;
+ WARN_ON(1);
+ }
+ }
}
}
@@ -168,15 +179,6 @@ void clockevents_register_device(struct clock_event_device *dev)
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
BUG_ON(!dev->cpumask);
- /*
- * A nsec2cyc multiplicator of 0 is invalid and we'd crash
- * on it, so fix it up and emit a warning:
- */
- if (unlikely(!dev->mult)) {
- dev->mult = 1;
- WARN_ON(1);
- }
-
spin_lock(&clockevents_lock);
list_add(&dev->list, &clockevent_devices);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1b6c05bd0d0..d3f1ef4d5cb 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -134,7 +134,7 @@ __setup("nohz=", setup_tick_nohz);
* value. We do this unconditionally on any cpu, as we don't know whether the
* cpu, which has the update task assigned is in a long sleep.
*/
-void tick_nohz_update_jiffies(void)
+static void tick_nohz_update_jiffies(void)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2f32969c09d..9a236ffe2aa 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -17,6 +17,7 @@
#include <linux/clocksource.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
+#include <linux/suspend.h>
#include <linux/debugfs.h>
#include <linux/hardirq.h>
#include <linux/kthread.h>
@@ -1736,9 +1737,12 @@ static void clear_ftrace_pid(struct pid *pid)
{
struct task_struct *p;
+ rcu_read_lock();
do_each_pid_task(pid, PIDTYPE_PID, p) {
clear_tsk_trace_trace(p);
} while_each_pid_task(pid, PIDTYPE_PID, p);
+ rcu_read_unlock();
+
put_pid(pid);
}
@@ -1746,9 +1750,11 @@ static void set_ftrace_pid(struct pid *pid)
{
struct task_struct *p;
+ rcu_read_lock();
do_each_pid_task(pid, PIDTYPE_PID, p) {
set_tsk_trace_trace(p);
} while_each_pid_task(pid, PIDTYPE_PID, p);
+ rcu_read_unlock();
}
static void clear_ftrace_pid_task(struct pid **pid)
@@ -1965,6 +1971,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static atomic_t ftrace_graph_active;
+static struct notifier_block ftrace_suspend_notifier;
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
{
@@ -2043,6 +2050,27 @@ static int start_graph_tracing(void)
return ret;
}
+/*
+ * Hibernation protection.
+ * The state of the current task is too much unstable during
+ * suspend/restore to disk. We want to protect against that.
+ */
+static int
+ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
+ void *unused)
+{
+ switch (state) {
+ case PM_HIBERNATION_PREPARE:
+ pause_graph_tracing();
+ break;
+
+ case PM_POST_HIBERNATION:
+ unpause_graph_tracing();
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
int register_ftrace_graph(trace_func_graph_ret_t retfunc,
trace_func_graph_ent_t entryfunc)
{
@@ -2050,6 +2078,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
mutex_lock(&ftrace_sysctl_lock);
+ ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
+ register_pm_notifier(&ftrace_suspend_notifier);
+
atomic_inc(&ftrace_graph_active);
ret = start_graph_tracing();
if (ret) {
@@ -2075,6 +2106,7 @@ void unregister_ftrace_graph(void)
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
ftrace_shutdown(FTRACE_STOP_FUNC_RET);
+ unregister_pm_notifier(&ftrace_suspend_notifier);
mutex_unlock(&ftrace_sysctl_lock);
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8b0daf0662e..bd38c5cfd8a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta)
return 0;
}
-#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
+#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data))
/*
* head_page == tail_page && head == tail then buffer is empty.
@@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
}
if (next_page == head_page) {
- if (!(buffer->flags & RB_FL_OVERWRITE)) {
- /* reset write */
- if (tail <= BUF_PAGE_SIZE)
- local_set(&tail_page->write, tail);
+ if (!(buffer->flags & RB_FL_OVERWRITE))
goto out_unlock;
- }
/* tail_page has not moved yet? */
if (tail_page == cpu_buffer->tail_page) {
@@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
return event;
out_unlock:
+ /* reset write */
+ if (tail <= BUF_PAGE_SIZE)
+ local_set(&tail_page->write, tail);
+
__raw_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);
return NULL;
@@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->overrun = 0;
cpu_buffer->entries = 0;
+
+ cpu_buffer->write_stamp = 0;
+ cpu_buffer->read_stamp = 0;
}
/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c580233add9..17bb88d86ac 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -40,7 +40,7 @@
#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
-unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
+unsigned long __read_mostly tracing_max_latency;
unsigned long __read_mostly tracing_thresh;
/*
@@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = {
* it if we decide to change what log level the ftrace dump
* should be at.
*/
-#define KERN_TRACE KERN_INFO
+#define KERN_TRACE KERN_EMERG
static void
trace_printk_seq(struct trace_seq *s)
@@ -3770,6 +3770,7 @@ void ftrace_dump(void)
dump_ran = 1;
/* No turning back! */
+ tracing_off();
ftrace_kill();
for_each_tracing_cpu(cpu) {
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7c2e326bbc8..62a78d94353 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr)
static void __irqsoff_tracer_init(struct trace_array *tr)
{
+ tracing_max_latency = 0;
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 43586b689e3..42ae1e77b6b 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
static int wakeup_tracer_init(struct trace_array *tr)
{
+ tracing_max_latency = 0;
wakeup_trace = tr;
start_wakeup_tracer(tr);
return 0;
diff --git a/kernel/wait.c b/kernel/wait.c
index cd87131f2fc..42a2dbc181c 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -91,6 +91,15 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
}
EXPORT_SYMBOL(prepare_to_wait_exclusive);
+/*
+ * finish_wait - clean up after waiting in a queue
+ * @q: waitqueue waited on
+ * @wait: wait descriptor
+ *
+ * Sets current thread back to running state and removes
+ * the wait descriptor from the given waitqueue if still
+ * queued.
+ */
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
@@ -117,6 +126,39 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
}
EXPORT_SYMBOL(finish_wait);
+/*
+ * abort_exclusive_wait - abort exclusive waiting in a queue
+ * @q: waitqueue waited on
+ * @wait: wait descriptor
+ * @state: runstate of the waiter to be woken
+ * @key: key to identify a wait bit queue or %NULL
+ *
+ * Sets current thread back to running state and removes
+ * the wait descriptor from the given waitqueue if still
+ * queued.
+ *
+ * Wakes up the next waiter if the caller is concurrently
+ * woken up through the queue.
+ *
+ * This prevents waiter starvation where an exclusive waiter
+ * aborts and is woken up concurrently and noone wakes up
+ * the next waiter.
+ */
+void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
+ unsigned int mode, void *key)
+{
+ unsigned long flags;
+
+ __set_current_state(TASK_RUNNING);
+ spin_lock_irqsave(&q->lock, flags);
+ if (!list_empty(&wait->task_list))
+ list_del_init(&wait->task_list);
+ else if (waitqueue_active(q))
+ __wake_up_common(q, mode, 1, 0, key);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(abort_exclusive_wait);
+
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int ret = default_wake_function(wait, mode, sync, key);
@@ -177,17 +219,20 @@ int __sched
__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
int (*action)(void *), unsigned mode)
{
- int ret = 0;
-
do {
+ int ret;
+
prepare_to_wait_exclusive(wq, &q->wait, mode);
- if (test_bit(q->key.bit_nr, q->key.flags)) {
- if ((ret = (*action)(q->key.flags)))
- break;
- }
+ if (!test_bit(q->key.bit_nr, q->key.flags))
+ continue;
+ ret = action(q->key.flags);
+ if (!ret)
+ continue;
+ abort_exclusive_wait(wq, &q->wait, mode, &q->key);
+ return ret;
} while (test_and_set_bit(q->key.bit_nr, q->key.flags));
finish_wait(wq, &q->wait);
- return ret;
+ return 0;
}
EXPORT_SYMBOL(__wait_on_bit_lock);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2f445833ae3..1f0c509b40d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -971,6 +971,8 @@ undo:
}
#ifdef CONFIG_SMP
+static struct workqueue_struct *work_on_cpu_wq __read_mostly;
+
struct work_for_cpu {
struct work_struct work;
long (*fn)(void *);
@@ -991,8 +993,8 @@ static void do_work_for_cpu(struct work_struct *w)
* @fn: the function to run
* @arg: the function arg
*
- * This will return -EINVAL in the cpu is not online, or the return value
- * of @fn otherwise.
+ * This will return the value @fn returns.
+ * It is up to the caller to ensure that the cpu doesn't go offline.
*/
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
{
@@ -1001,14 +1003,8 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
INIT_WORK(&wfc.work, do_work_for_cpu);
wfc.fn = fn;
wfc.arg = arg;
- get_online_cpus();
- if (unlikely(!cpu_online(cpu)))
- wfc.ret = -EINVAL;
- else {
- schedule_work_on(cpu, &wfc.work);
- flush_work(&wfc.work);
- }
- put_online_cpus();
+ queue_work_on(cpu, work_on_cpu_wq, &wfc.work);
+ flush_work(&wfc.work);
return wfc.ret;
}
@@ -1025,4 +1021,8 @@ void __init init_workqueues(void)
hotcpu_notifier(workqueue_cpu_callback, 0);
keventd_wq = create_workqueue("events");
BUG_ON(!keventd_wq);
+#ifdef CONFIG_SMP
+ work_on_cpu_wq = create_workqueue("work_on_cpu");
+ BUG_ON(!work_on_cpu_wq);
+#endif
}