aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/freezer.c7
-rw-r--r--kernel/futex.c1
-rw-r--r--kernel/hrtimer.c110
-rw-r--r--kernel/irq/internals.h3
-rw-r--r--kernel/irq/manage.c50
-rw-r--r--kernel/irq/migration.c2
-rw-r--r--kernel/kmod.c1
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/perf_counter.c2
-rw-r--r--kernel/pid.c7
-rw-r--r--kernel/power/user.c1
-rw-r--r--kernel/ptrace.c4
-rw-r--r--kernel/rcutree.c3
-rw-r--r--kernel/sched.c61
-rw-r--r--kernel/sched_fair.c13
-rw-r--r--kernel/sched_rt.c18
-rw-r--r--kernel/softirq.c64
-rw-r--r--kernel/time/clockevents.c11
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/blktrace.c1
-rw-r--r--kernel/trace/ftrace.c9
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace_event_types.h3
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/trace/trace_stack.c4
31 files changed, 278 insertions, 127 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 628d41f0dd5..869dc221733 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -12,7 +12,6 @@
#include <linux/completion.h>
#include <linux/personality.h>
#include <linux/tty.h>
-#include <linux/mnt_namespace.h>
#include <linux/iocontext.h>
#include <linux/key.h>
#include <linux/security.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 4812d60b29f..9b42695f0d1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,7 +17,6 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/completion.h>
-#include <linux/mnt_namespace.h>
#include <linux/personality.h>
#include <linux/mempolicy.h>
#include <linux/sem.h>
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 2f4936cf708..bd1d42b17cb 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -44,12 +44,19 @@ void refrigerator(void)
recalc_sigpending(); /* We sent fake signal, clean it up */
spin_unlock_irq(&current->sighand->siglock);
+ /* prevent accounting of that task to load */
+ current->flags |= PF_FREEZING;
+
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!frozen(current))
break;
schedule();
}
+
+ /* Remove the accounting blocker */
+ current->flags &= ~PF_FREEZING;
+
pr_debug("%s left refrigerator\n", current->comm);
__set_current_state(save);
}
diff --git a/kernel/futex.c b/kernel/futex.c
index 794c862125f..0672ff88f15 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -247,6 +247,7 @@ again:
if (err < 0)
return err;
+ page = compound_head(page);
lock_page(page);
if (!page->mapping) {
unlock_page(page);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 9002958a96e..49da79ab848 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
}
}
+
+/*
+ * Get the preferred target CPU for NOHZ
+ */
+static int hrtimer_get_target(int this_cpu, int pinned)
+{
+#ifdef CONFIG_NO_HZ
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
+ int preferred_cpu = get_nohz_load_balancer();
+
+ if (preferred_cpu >= 0)
+ return preferred_cpu;
+ }
+#endif
+ return this_cpu;
+}
+
+/*
+ * With HIGHRES=y we do not migrate the timer when it is expiring
+ * before the next event on the target cpu because we cannot reprogram
+ * the target cpu hardware and we would cause it to fire late.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+static int
+hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+ ktime_t expires;
+
+ if (!new_base->cpu_base->hres_active)
+ return 0;
+
+ expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+ return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+#else
+ return 0;
+#endif
+}
+
/*
* Switch the timer base to the current CPU when possible.
*/
@@ -200,16 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
{
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
- int cpu, preferred_cpu = -1;
-
- cpu = smp_processor_id();
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
- preferred_cpu = get_nohz_load_balancer();
- if (preferred_cpu >= 0)
- cpu = preferred_cpu;
- }
-#endif
+ int this_cpu = smp_processor_id();
+ int cpu = hrtimer_get_target(this_cpu, pinned);
again:
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -217,7 +249,7 @@ again:
if (base != new_base) {
/*
- * We are trying to schedule the timer on the local CPU.
+ * We are trying to move timer to new_base.
* However we can't change timer's base while it is running,
* so we keep it on the same CPU. No hassle vs. reprogramming
* the event source in the high resolution case. The softirq
@@ -233,38 +265,12 @@ again:
spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock);
- /* Optimized away for NOHZ=n SMP=n */
- if (cpu == preferred_cpu) {
- /* Calculate clock monotonic expiry time */
-#ifdef CONFIG_HIGH_RES_TIMERS
- ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
- new_base->offset);
-#else
- ktime_t expires = hrtimer_get_expires(timer);
-#endif
-
- /*
- * Get the next event on target cpu from the
- * clock events layer.
- * This covers the highres=off nohz=on case as well.
- */
- ktime_t next = clockevents_get_next_event(cpu);
-
- ktime_t delta = ktime_sub(expires, next);
-
- /*
- * We do not migrate the timer when it is expiring
- * before the next event on the target cpu because
- * we cannot reprogram the target cpu hardware and
- * we would cause it to fire late.
- */
- if (delta.tv64 < 0) {
- cpu = smp_processor_id();
- spin_unlock(&new_base->cpu_base->lock);
- spin_lock(&base->cpu_base->lock);
- timer->base = base;
- goto again;
- }
+ if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+ cpu = this_cpu;
+ spin_unlock(&new_base->cpu_base->lock);
+ spin_lock(&base->cpu_base->lock);
+ timer->base = base;
+ goto again;
}
timer->base = new_base;
}
@@ -1276,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
expires_next.tv64 = KTIME_MAX;
+ spin_lock(&cpu_base->lock);
+ /*
+ * We set expires_next to KTIME_MAX here with cpu_base->lock
+ * held to prevent that a timer is enqueued in our queue via
+ * the migration code. This does not affect enqueueing of
+ * timers which run their callback and need to be requeued on
+ * this CPU.
+ */
+ cpu_base->expires_next.tv64 = KTIME_MAX;
+
base = cpu_base->clock_base;
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
ktime_t basenow;
struct rb_node *node;
- spin_lock(&cpu_base->lock);
-
basenow = ktime_add(now, base->offset);
while ((node = base->first)) {
@@ -1316,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
__run_hrtimer(timer);
}
- spin_unlock(&cpu_base->lock);
base++;
}
+ /*
+ * Store the new expiry value so the migration code can verify
+ * against it.
+ */
cpu_base->expires_next = expires_next;
+ spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */
if (expires_next.tv64 != KTIME_MAX) {
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 73468253143..e70ed5592eb 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -42,8 +42,7 @@ static inline void unregister_handler_proc(unsigned int irq,
extern int irq_select_affinity_usr(unsigned int irq);
-extern void
-irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask);
+extern void irq_set_thread_affinity(struct irq_desc *desc);
/*
* Debugging printout:
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 50da6767290..f0de36f13a4 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -80,14 +80,22 @@ int irq_can_set_affinity(unsigned int irq)
return 1;
}
-void
-irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
+/**
+ * irq_set_thread_affinity - Notify irq threads to adjust affinity
+ * @desc: irq descriptor which has affitnity changed
+ *
+ * We just set IRQTF_AFFINITY and delegate the affinity setting
+ * to the interrupt thread itself. We can not call
+ * set_cpus_allowed_ptr() here as we hold desc->lock and this
+ * code can be called from hard interrupt context.
+ */
+void irq_set_thread_affinity(struct irq_desc *desc)
{
struct irqaction *action = desc->action;
while (action) {
if (action->thread)
- set_cpus_allowed_ptr(action->thread, cpumask);
+ set_bit(IRQTF_AFFINITY, &action->thread_flags);
action = action->next;
}
}
@@ -112,7 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
if (desc->status & IRQ_MOVE_PCNTXT) {
if (!desc->chip->set_affinity(irq, cpumask)) {
cpumask_copy(desc->affinity, cpumask);
- irq_set_thread_affinity(desc, cpumask);
+ irq_set_thread_affinity(desc);
}
}
else {
@@ -122,7 +130,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
#else
if (!desc->chip->set_affinity(irq, cpumask)) {
cpumask_copy(desc->affinity, cpumask);
- irq_set_thread_affinity(desc, cpumask);
+ irq_set_thread_affinity(desc);
}
#endif
desc->status |= IRQ_AFFINITY_SET;
@@ -176,7 +184,7 @@ int irq_select_affinity_usr(unsigned int irq)
spin_lock_irqsave(&desc->lock, flags);
ret = setup_affinity(irq, desc);
if (!ret)
- irq_set_thread_affinity(desc, desc->affinity);
+ irq_set_thread_affinity(desc);
spin_unlock_irqrestore(&desc->lock, flags);
return ret;
@@ -444,6 +452,34 @@ static int irq_wait_for_interrupt(struct irqaction *action)
}
/*
+ * Check whether we need to change the affinity of the interrupt thread.
+ */
+static void
+irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
+{
+ cpumask_var_t mask;
+
+ if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags))
+ return;
+
+ /*
+ * In case we are out of memory we set IRQTF_AFFINITY again and
+ * try again next time
+ */
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
+ set_bit(IRQTF_AFFINITY, &action->thread_flags);
+ return;
+ }
+
+ spin_lock_irq(&desc->lock);
+ cpumask_copy(mask, desc->affinity);
+ spin_unlock_irq(&desc->lock);
+
+ set_cpus_allowed_ptr(current, mask);
+ free_cpumask_var(mask);
+}
+
+/*
* Interrupt handler thread
*/
static int irq_thread(void *data)
@@ -458,6 +494,8 @@ static int irq_thread(void *data)
while (!irq_wait_for_interrupt(action)) {
+ irq_thread_check_affinity(desc, action);
+
atomic_inc(&desc->threads_active);
spin_lock_irq(&desc->lock);
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index cfe767ca154..fcb6c96f262 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -45,7 +45,7 @@ void move_masked_irq(int irq)
< nr_cpu_ids))
if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
cpumask_copy(desc->affinity, desc->pending_mask);
- irq_set_thread_affinity(desc, desc->pending_mask);
+ irq_set_thread_affinity(desc);
}
cpumask_clear(desc->pending_mask);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 7e95bedb2bf..385c31a1bdb 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -24,7 +24,6 @@
#include <linux/unistd.h>
#include <linux/kmod.h>
#include <linux/slab.h>
-#include <linux/mnt_namespace.h>
#include <linux/completion.h>
#include <linux/file.h>
#include <linux/fdtable.h>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c0fa54b276d..16b5739c516 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos, *next;
- int safety;
/* Ensure no-one is preepmted on the garbages */
- mutex_unlock(&kprobe_insn_mutex);
- safety = check_safety();
- mutex_lock(&kprobe_insn_mutex);
- if (safety != 0)
+ if (check_safety())
return -EAGAIN;
hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
diff --git a/kernel/module.c b/kernel/module.c
index 38928fcaff2..0a049837008 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2451,9 +2451,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
return ret;
}
if (ret > 0) {
- printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, "
- "it should follow 0/-E convention\n"
- KERN_WARNING "%s: loading module anyway...\n",
+ printk(KERN_WARNING
+"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
+"%s: loading module anyway...\n",
__func__, mod->name, ret,
__func__);
dump_stack();
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 787d4daef18..95093104195 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2030,7 +2030,7 @@ fail:
static void perf_mmap_free_page(unsigned long addr)
{
- struct page *page = virt_to_page(addr);
+ struct page *page = virt_to_page((void *)addr);
page->mapping = NULL;
__free_page(page);
diff --git a/kernel/pid.c b/kernel/pid.c
index 5fa1db48d8b..31310b5d3f5 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,7 +36,6 @@
#include <linux/pid_namespace.h>
#include <linux/init_task.h>
#include <linux/syscalls.h>
-#include <linux/kmemleak.h>
#define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -513,12 +512,6 @@ void __init pidhash_init(void)
pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
if (!pid_hash)
panic("Could not alloc pidhash!\n");
- /*
- * pid_hash contains references to allocated struct pid objects and it
- * must be scanned by kmemleak to avoid false positives.
- */
- kmemleak_alloc(pid_hash, pidhash_size * sizeof(*(pid_hash)), 0,
- GFP_KERNEL);
for (i = 0; i < pidhash_size; i++)
INIT_HLIST_HEAD(&pid_hash[i]);
}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index ed97375daae..bf0014d6a5f 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,7 +23,6 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
-#include <linux/smp_lock.h>
#include <scsi/scsi_scan.h>
#include <asm/uaccess.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61c78b2c07b..082c320e4db 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -181,8 +181,8 @@ int ptrace_attach(struct task_struct *task)
* interference; SUID, SGID and LSM creds get determined differently
* under ptrace.
*/
- retval = mutex_lock_interruptible(&task->cred_guard_mutex);
- if (retval < 0)
+ retval = -ERESTARTNOINTR;
+ if (mutex_lock_interruptible(&task->cred_guard_mutex))
goto out;
task_lock(task);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0dccfbba6d2..7717b95c202 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1533,7 +1533,7 @@ void __init __rcu_init(void)
int j;
struct rcu_node *rnp;
- printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n");
+ printk(KERN_INFO "Hierarchical RCU implementation.\n");
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
@@ -1546,7 +1546,6 @@ void __init __rcu_init(void)
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
/* Register notifier for non-boot CPUs */
register_cpu_notifier(&rcu_nb);
- printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
}
module_param(blimit, int, 0);
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e..1b59e265273 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -493,6 +493,7 @@ struct rt_rq {
#endif
#ifdef CONFIG_SMP
unsigned long rt_nr_migratory;
+ unsigned long rt_nr_total;
int overloaded;
struct plist_head pushable_tasks;
#endif
@@ -2571,15 +2572,37 @@ static void __sched_fork(struct task_struct *p)
p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
#ifdef CONFIG_SCHEDSTATS
- p->se.wait_start = 0;
- p->se.sum_sleep_runtime = 0;
- p->se.sleep_start = 0;
- p->se.block_start = 0;
- p->se.sleep_max = 0;
- p->se.block_max = 0;
- p->se.exec_max = 0;
- p->se.slice_max = 0;
- p->se.wait_max = 0;
+ p->se.wait_start = 0;
+ p->se.wait_max = 0;
+ p->se.wait_count = 0;
+ p->se.wait_sum = 0;
+
+ p->se.sleep_start = 0;
+ p->se.sleep_max = 0;
+ p->se.sum_sleep_runtime = 0;
+
+ p->se.block_start = 0;
+ p->se.block_max = 0;
+ p->se.exec_max = 0;
+ p->se.slice_max = 0;
+
+ p->se.nr_migrations_cold = 0;
+ p->se.nr_failed_migrations_affine = 0;
+ p->se.nr_failed_migrations_running = 0;
+ p->se.nr_failed_migrations_hot = 0;
+ p->se.nr_forced_migrations = 0;
+ p->se.nr_forced2_migrations = 0;
+
+ p->se.nr_wakeups = 0;
+ p->se.nr_wakeups_sync = 0;
+ p->se.nr_wakeups_migrate = 0;
+ p->se.nr_wakeups_local = 0;
+ p->se.nr_wakeups_remote = 0;
+ p->se.nr_wakeups_affine = 0;
+ p->se.nr_wakeups_affine_attempts = 0;
+ p->se.nr_wakeups_passive = 0;
+ p->se.nr_wakeups_idle = 0;
+
#endif
INIT_LIST_HEAD(&p->rt.run_list);
@@ -6541,6 +6564,11 @@ SYSCALL_DEFINE0(sched_yield)
return 0;
}
+static inline int should_resched(void)
+{
+ return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
+}
+
static void __cond_resched(void)
{
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -6560,8 +6588,7 @@ static void __cond_resched(void)
int __sched _cond_resched(void)
{
- if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
- system_state == SYSTEM_RUNNING) {
+ if (should_resched()) {
__cond_resched();
return 1;
}
@@ -6579,12 +6606,12 @@ EXPORT_SYMBOL(_cond_resched);
*/
int cond_resched_lock(spinlock_t *lock)
{
- int resched = need_resched() && system_state == SYSTEM_RUNNING;
+ int resched = should_resched();
int ret = 0;
if (spin_needbreak(lock) || resched) {
spin_unlock(lock);
- if (resched && need_resched())
+ if (resched)
__cond_resched();
else
cpu_relax();
@@ -6599,7 +6626,7 @@ int __sched cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
- if (need_resched() && system_state == SYSTEM_RUNNING) {
+ if (should_resched()) {
local_bh_enable();
__cond_resched();
local_bh_disable();
@@ -7262,6 +7289,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
static void calc_global_load_remove(struct rq *rq)
{
atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+ rq->calc_load_active = 0;
}
#endif /* CONFIG_HOTPLUG_CPU */
@@ -7488,6 +7516,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
task_rq_unlock(rq, &flags);
get_task_struct(p);
cpu_rq(cpu)->migration_thread = p;
+ rq->calc_load_update = calc_load_update;
break;
case CPU_ONLINE:
@@ -7498,8 +7527,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
/* Update our root-domain */
rq = cpu_rq(cpu);
spin_lock_irqsave(&rq->lock, flags);
- rq->calc_load_update = calc_load_update;
- rq->calc_load_active = 0;
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -9070,7 +9097,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
#ifdef CONFIG_SMP
rt_rq->rt_nr_migratory = 0;
rt_rq->overloaded = 0;
- plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
+ plist_head_init(&rt_rq->pushable_tasks, &rq->lock);
#endif
rt_rq->rt_time = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ba7fd6e9556..9ffb2b2ceba 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -266,6 +266,12 @@ static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
return min_vruntime;
}
+static inline int entity_before(struct sched_entity *a,
+ struct sched_entity *b)
+{
+ return (s64)(a->vruntime - b->vruntime) < 0;
+}
+
static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
return se->vruntime - cfs_rq->min_vruntime;
@@ -687,7 +693,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
* all of which have the same weight.
*/
if (sched_feat(NORMALIZED_SLEEPER) &&
- task_of(se)->policy != SCHED_IDLE)
+ (!entity_is_task(se) ||
+ task_of(se)->policy != SCHED_IDLE))
thresh = calc_delta_fair(thresh, se);
vruntime -= thresh;
@@ -1016,7 +1023,7 @@ static void yield_task_fair(struct rq *rq)
/*
* Already in the rightmost position?
*/
- if (unlikely(!rightmost || rightmost->vruntime < se->vruntime))
+ if (unlikely(!rightmost || entity_before(rightmost, se)))
return;
/*
@@ -1712,7 +1719,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
/* 'curr' will be NULL if the child belongs to a different group */
if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
- curr && curr->vruntime < se->vruntime) {
+ curr && entity_before(curr, se)) {
/*
* Upon rescheduling, sched_class::put_prev_task() will place
* 'current' within the tree based on its new key value.
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 9bf0d2a7304..3918e01994e 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -10,6 +10,8 @@ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
#ifdef CONFIG_RT_GROUP_SCHED
+#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
+
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return rt_rq->rq;
@@ -22,6 +24,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
#else /* CONFIG_RT_GROUP_SCHED */
+#define rt_entity_is_task(rt_se) (1)
+
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return container_of(rt_rq, struct rq, rt);
@@ -73,7 +77,7 @@ static inline void rt_clear_overload(struct rq *rq)
static void update_rt_migration(struct rt_rq *rt_rq)
{
- if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) {
+ if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
if (!rt_rq->overloaded) {
rt_set_overload(rq_of_rt_rq(rt_rq));
rt_rq->overloaded = 1;
@@ -86,6 +90,12 @@ static void update_rt_migration(struct rt_rq *rt_rq)
static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
+ if (!rt_entity_is_task(rt_se))
+ return;
+
+ rt_rq = &rq_of_rt_rq(rt_rq)->rt;
+
+ rt_rq->rt_nr_total++;
if (rt_se->nr_cpus_allowed > 1)
rt_rq->rt_nr_migratory++;
@@ -94,6 +104,12 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
+ if (!rt_entity_is_task(rt_se))
+ return;
+
+ rt_rq = &rq_of_rt_rq(rt_rq)->rt;
+
+ rt_rq->rt_nr_total--;
if (rt_se->nr_cpus_allowed > 1)
rt_rq->rt_nr_migratory--;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 3a94905fa5d..eb5e131a048 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -345,7 +345,9 @@ void open_softirq(int nr, void (*action)(struct softirq_action *))
softirq_vec[nr].action = action;
}
-/* Tasklets */
+/*
+ * Tasklets
+ */
struct tasklet_head
{
struct tasklet_struct *head;
@@ -493,6 +495,66 @@ void tasklet_kill(struct tasklet_struct *t)
EXPORT_SYMBOL(tasklet_kill);
+/*
+ * tasklet_hrtimer
+ */
+
+/*
+ * The trampoline is called when the hrtimer expires. If this is
+ * called from the hrtimer interrupt then we schedule the tasklet as
+ * the timer callback function expects to run in softirq context. If
+ * it's called in softirq context anyway (i.e. high resolution timers
+ * disabled) then the hrtimer callback is called right away.
+ */
+static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
+{
+ struct tasklet_hrtimer *ttimer =
+ container_of(timer, struct tasklet_hrtimer, timer);
+
+ if (hrtimer_is_hres_active(timer)) {
+ tasklet_hi_schedule(&ttimer->tasklet);
+ return HRTIMER_NORESTART;
+ }
+ return ttimer->function(timer);
+}
+
+/*
+ * Helper function which calls the hrtimer callback from
+ * tasklet/softirq context
+ */
+static void __tasklet_hrtimer_trampoline(unsigned long data)
+{
+ struct tasklet_hrtimer *ttimer = (void *)data;
+ enum hrtimer_restart restart;
+
+ restart = ttimer->function(&ttimer->timer);
+ if (restart != HRTIMER_NORESTART)
+ hrtimer_restart(&ttimer->timer);
+}
+
+/**
+ * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
+ * @ttimer: tasklet_hrtimer which is initialized
+ * @function: hrtimer callback funtion which gets called from softirq context
+ * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
+ * @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
+ */
+void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
+ enum hrtimer_restart (*function)(struct hrtimer *),
+ clockid_t which_clock, enum hrtimer_mode mode)
+{
+ hrtimer_init(&ttimer->timer, which_clock, mode);
+ ttimer->timer.function = __hrtimer_tasklet_trampoline;
+ tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
+ (unsigned long)ttimer);
+ ttimer->function = function;
+}
+EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
+
+/*
+ * Remote softirq bits
+ */
+
DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
EXPORT_PER_CPU_SYMBOL(softirq_work_list);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 1ad6dd46111..a6dcd67b041 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg)
spin_unlock(&clockevents_lock);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
-
-ktime_t clockevents_get_next_event(int cpu)
-{
- struct tick_device *td;
- struct clock_event_device *dev;
-
- td = &per_cpu(tick_cpu_device, cpu);
- dev = td->evtdev;
-
- return dev->next_event;
-}
#endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 592bf584d1d..7466cb81125 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -513,7 +513,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
* Check to make sure we don't switch to a non-highres capable
* clocksource if the tick code is in oneshot mode (highres or nohz)
*/
- if (tick_oneshot_mode_active() &&
+ if (tick_oneshot_mode_active() && ovr &&
!(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
printk(KERN_WARNING "%s clocksource is not HRT compatible. "
"Cannot switch while in HRT/NOHZ mode\n", ovr->name);
diff --git a/kernel/timer.c b/kernel/timer.c
index 0b36b9e5cc8..a7f07d5a624 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -714,7 +714,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
* networking code - if the timer is re-modified
* to be the same thing then just return:
*/
- if (timer->expires == expires && timer_pending(timer))
+ if (timer_pending(timer) && timer->expires == expires)
return 1;
return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1551f47e766..019f380fd76 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -226,13 +226,13 @@ config BOOT_TRACER
the timings of the initcalls and traces key events and the identity
of tasks that can cause boot delays, such as context-switches.
- Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+ Its aim is to be parsed by the scripts/bootgraph.pl tool to
produce pretty graphics about boot inefficiencies, giving a visual
representation of the delays during initcalls - but the raw
/debug/tracing/trace text output is readable too.
- You must pass in ftrace=initcall to the kernel command line
- to enable this on bootup.
+ You must pass in initcall_debug and ftrace=initcall to the kernel
+ command line to enable this on bootup.
config TRACE_BRANCH_PROFILING
bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 39af8af6fc3..1090b0aed9b 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/debugfs.h>
+#include <linux/smp_lock.h>
#include <linux/time.h>
#include <linux/uaccess.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3716bf04df..4521c77d1a1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -768,7 +768,7 @@ static struct tracer_stat function_stats __initdata = {
.stat_show = function_stat_show
};
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
{
struct ftrace_profile_stat *stat;
struct dentry *entry;
@@ -786,7 +786,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
* The files created are permanent, if something happens
* we still do not free memory.
*/
- kfree(stat);
WARN(1,
"Could not allocate stat file for cpu %d\n",
cpu);
@@ -813,7 +812,7 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
}
#else /* CONFIG_FUNCTION_PROFILER */
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
{
}
#endif /* CONFIG_FUNCTION_PROFILER */
@@ -3160,10 +3159,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
- if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
+ if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
goto out;
- last_ftrace_enabled = ftrace_enabled;
+ last_ftrace_enabled = !!ftrace_enabled;
if (ftrace_enabled) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3aa0a0dfdfa..8bc8d8afea6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,6 +17,7 @@
#include <linux/writeback.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
+#include <linux/smp_lock.h>
#include <linux/notifier.h>
#include <linux/irqflags.h>
#include <linux/debugfs.h>
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 5e32e375134..6db005e1248 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
ftrace_graph_ret_entry, ignore,
TRACE_STRUCT(
TRACE_FIELD(unsigned long, ret.func, func)
+ TRACE_FIELD(unsigned long long, ret.calltime, calltime)
+ TRACE_FIELD(unsigned long long, ret.rettime, rettime)
+ TRACE_FIELD(unsigned long, ret.overrun, overrun)
TRACE_FIELD(int, ret.depth, depth)
),
TP_RAW_FMT("<-- %lx (%d)")
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 7402144bff2..75ef000613c 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -363,7 +363,7 @@ ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
out_reg:
ret = register_ftrace_function_probe(glob, ops, count);
- return ret;
+ return ret < 0 ? ret : 0;
}
static struct ftrace_func_command ftrace_traceon_cmd = {
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3ae93e..e0c2545622e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
{
int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
- s->buffer[len] = 0;
- seq_puts(m, s->buffer);
+ seq_write(m, s->buffer, len);
trace_seq_init(s);
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd71db..e644af91012 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -326,10 +326,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
if (ret || !write ||
- (last_stack_tracer_enabled == stack_tracer_enabled))
+ (last_stack_tracer_enabled == !!stack_tracer_enabled))
goto out;
- last_stack_tracer_enabled = stack_tracer_enabled;
+ last_stack_tracer_enabled = !!stack_tracer_enabled;
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);