aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c28
-rw-r--r--kernel/hrtimer.c41
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/handle.c16
-rw-r--r--kernel/irq/manage.c10
-rw-r--r--kernel/kallsyms.c16
-rw-r--r--kernel/power/disk.c10
-rw-r--r--kernel/time/tick-common.c26
-rw-r--r--kernel/trace/ftrace.c27
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.c5
-rw-r--r--kernel/trace/trace_irqsoff.c1
-rw-r--r--kernel/trace/trace_sched_wakeup.c1
13 files changed, 157 insertions, 41 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c29831076e7..5a54ff42874 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1115,8 +1115,10 @@ static void cgroup_kill_sb(struct super_block *sb) {
}
write_unlock(&css_set_lock);
- list_del(&root->root_list);
- root_count--;
+ if (!list_empty(&root->root_list)) {
+ list_del(&root->root_list);
+ root_count--;
+ }
mutex_unlock(&cgroup_mutex);
@@ -2434,7 +2436,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
err_remove:
+ cgroup_lock_hierarchy(root);
list_del(&cgrp->sibling);
+ cgroup_unlock_hierarchy(root);
root->number_of_cgroups--;
err_destroy:
@@ -2507,7 +2511,7 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
for_each_subsys(cgrp->root, ss) {
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
int refcnt;
- do {
+ while (1) {
/* We can only remove a CSS with a refcnt==1 */
refcnt = atomic_read(&css->refcnt);
if (refcnt > 1) {
@@ -2521,7 +2525,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
* css_tryget() to spin until we set the
* CSS_REMOVED bits or abort
*/
- } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt);
+ if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
+ break;
+ cpu_relax();
+ }
}
done:
for_each_subsys(cgrp->root, ss) {
@@ -2991,20 +2998,21 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
mutex_unlock(&cgroup_mutex);
return 0;
}
- task_lock(tsk);
- cg = tsk->cgroups;
- parent = task_cgroup(tsk, subsys->subsys_id);
/* Pin the hierarchy */
- if (!atomic_inc_not_zero(&parent->root->sb->s_active)) {
+ if (!atomic_inc_not_zero(&root->sb->s_active)) {
/* We race with the final deactivate_super() */
mutex_unlock(&cgroup_mutex);
return 0;
}
/* Keep the cgroup alive */
+ task_lock(tsk);
+ parent = task_cgroup(tsk, subsys->subsys_id);
+ cg = tsk->cgroups;
get_css_set(cg);
task_unlock(tsk);
+
mutex_unlock(&cgroup_mutex);
/* Now do the VFS work to create a cgroup */
@@ -3043,7 +3051,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
mutex_unlock(&inode->i_mutex);
put_css_set(cg);
- deactivate_super(parent->root->sb);
+ deactivate_super(root->sb);
/* The cgroup is still accessible in the VFS, but
* we're not going to try to rmdir() it at this
* point. */
@@ -3069,7 +3077,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
mutex_lock(&cgroup_mutex);
put_css_set(cg);
mutex_unlock(&cgroup_mutex);
- deactivate_super(parent->root->sb);
+ deactivate_super(root->sb);
return ret;
}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f33afb0407b..f394d2a42ca 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -501,6 +501,13 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
continue;
timer = rb_entry(base->first, struct hrtimer, node);
expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+ /*
+ * clock_was_set() has changed base->offset so the
+ * result might be negative. Fix it up to prevent a
+ * false positive in clockevents_program_event()
+ */
+ if (expires.tv64 < 0)
+ expires.tv64 = 0;
if (expires.tv64 < cpu_base->expires_next.tv64)
cpu_base->expires_next = expires;
}
@@ -1158,6 +1165,29 @@ static void __run_hrtimer(struct hrtimer *timer)
#ifdef CONFIG_HIGH_RES_TIMERS
+static int force_clock_reprogram;
+
+/*
+ * After 5 iteration's attempts, we consider that hrtimer_interrupt()
+ * is hanging, which could happen with something that slows the interrupt
+ * such as the tracing. Then we force the clock reprogramming for each future
+ * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
+ * threshold that we will overwrite.
+ * The next tick event will be scheduled to 3 times we currently spend on
+ * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
+ * 1/4 of their time to process the hrtimer interrupts. This is enough to
+ * let it running without serious starvation.
+ */
+
+static inline void
+hrtimer_interrupt_hanging(struct clock_event_device *dev,
+ ktime_t try_time)
+{
+ force_clock_reprogram = 1;
+ dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
+ printk(KERN_WARNING "hrtimer: interrupt too slow, "
+ "forcing clock min delta to %lu ns\n", dev->min_delta_ns);
+}
/*
* High resolution timer interrupt
* Called with interrupts disabled
@@ -1167,6 +1197,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
struct hrtimer_clock_base *base;
ktime_t expires_next, now;
+ int nr_retries = 0;
int i;
BUG_ON(!cpu_base->hres_active);
@@ -1174,6 +1205,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
dev->next_event.tv64 = KTIME_MAX;
retry:
+ /* 5 retries is enough to notice a hang */
+ if (!(++nr_retries % 5))
+ hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
+
now = ktime_get();
expires_next.tv64 = KTIME_MAX;
@@ -1226,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
/* Reprogramming necessary ? */
if (expires_next.tv64 != KTIME_MAX) {
- if (tick_program_event(expires_next, 0))
+ if (tick_program_event(expires_next, force_clock_reprogram))
goto retry;
}
}
@@ -1580,6 +1615,10 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
break;
#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DYING:
+ case CPU_DYING_FROZEN:
+ clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
+ break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
{
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f63c706d25e..7de11bd64df 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -383,6 +383,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
out_unlock:
spin_unlock(&desc->lock);
}
+EXPORT_SYMBOL_GPL(handle_level_irq);
/**
* handle_fasteoi_irq - irq handler for transparent controllers
@@ -593,6 +594,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
}
spin_unlock_irqrestore(&desc->lock, flags);
}
+EXPORT_SYMBOL_GPL(__set_irq_handler);
void
set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index c20db0be917..3aba8d12f32 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -39,6 +39,18 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
ack_bad_irq(irq);
}
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
+static void __init init_irq_default_affinity(void)
+{
+ alloc_bootmem_cpumask_var(&irq_default_affinity);
+ cpumask_setall(irq_default_affinity);
+}
+#else
+static void __init init_irq_default_affinity(void)
+{
+}
+#endif
+
/*
* Linux has a controller-independent interrupt architecture.
* Every controller has a 'controller-template', that is used
@@ -134,6 +146,8 @@ int __init early_irq_init(void)
int legacy_count;
int i;
+ init_irq_default_affinity();
+
desc = irq_desc_legacy;
legacy_count = ARRAY_SIZE(irq_desc_legacy);
@@ -219,6 +233,8 @@ int __init early_irq_init(void)
int count;
int i;
+ init_irq_default_affinity();
+
desc = irq_desc;
count = ARRAY_SIZE(irq_desc);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index cd0cd8dcb34..291f0366455 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -15,17 +15,9 @@
#include "internals.h"
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
cpumask_var_t irq_default_affinity;
-static int init_irq_default_affinity(void)
-{
- alloc_cpumask_var(&irq_default_affinity, GFP_KERNEL);
- cpumask_setall(irq_default_affinity);
- return 0;
-}
-core_initcall(init_irq_default_affinity);
-
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* @irq: interrupt number to wait for
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index e694afa0eb8..7b8b0f21a5b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -30,19 +30,20 @@
#define all_var 0
#endif
-extern const unsigned long kallsyms_addresses[];
-extern const u8 kallsyms_names[];
+/* These will be re-linked against their real values during the second link stage */
+extern const unsigned long kallsyms_addresses[] __attribute__((weak));
+extern const u8 kallsyms_names[] __attribute__((weak));
/* tell the compiler that the count isn't in the small data section if the arch
* has one (eg: FRV)
*/
extern const unsigned long kallsyms_num_syms
- __attribute__((__section__(".rodata")));
+__attribute__((weak, section(".rodata")));
-extern const u8 kallsyms_token_table[];
-extern const u16 kallsyms_token_index[];
+extern const u8 kallsyms_token_table[] __attribute__((weak));
+extern const u16 kallsyms_token_index[] __attribute__((weak));
-extern const unsigned long kallsyms_markers[];
+extern const unsigned long kallsyms_markers[] __attribute__((weak));
static inline int is_kernel_inittext(unsigned long addr)
{
@@ -167,6 +168,9 @@ static unsigned long get_symbol_pos(unsigned long addr,
unsigned long symbol_start = 0, symbol_end = 0;
unsigned long i, low, high, mid;
+ /* This kernel should never had been booted. */
+ BUG_ON(!kallsyms_addresses);
+
/* do a binary search on the sorted kallsyms_addresses array */
low = 0;
high = kallsyms_num_syms;
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 45e8541ab7e..432ee575c9e 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -71,6 +71,14 @@ void hibernation_set_ops(struct platform_hibernation_ops *ops)
mutex_unlock(&pm_mutex);
}
+static bool entering_platform_hibernation;
+
+bool system_entering_hibernation(void)
+{
+ return entering_platform_hibernation;
+}
+EXPORT_SYMBOL(system_entering_hibernation);
+
#ifdef CONFIG_PM_DEBUG
static void hibernation_debug_sleep(void)
{
@@ -411,6 +419,7 @@ int hibernation_platform_enter(void)
if (error)
goto Close;
+ entering_platform_hibernation = true;
suspend_console();
error = device_suspend(PMSG_HIBERNATE);
if (error) {
@@ -445,6 +454,7 @@ int hibernation_platform_enter(void)
Finish:
hibernation_ops->finish();
Resume_devices:
+ entering_platform_hibernation = false;
device_resume(PMSG_RESTORE);
resume_console();
Close:
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 63e05d423a0..21a5ca84951 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -274,6 +274,21 @@ out_bc:
}
/*
+ * Transfer the do_timer job away from a dying cpu.
+ *
+ * Called with interrupts disabled.
+ */
+static void tick_handover_do_timer(int *cpup)
+{
+ if (*cpup == tick_do_timer_cpu) {
+ int cpu = cpumask_first(cpu_online_mask);
+
+ tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
+ TICK_DO_TIMER_NONE;
+ }
+}
+
+/*
* Shutdown an event device on a given cpu:
*
* This is called on a life CPU, when a CPU is dead. So we cannot
@@ -297,13 +312,6 @@ static void tick_shutdown(unsigned int *cpup)
clockevents_exchange_device(dev, NULL);
td->evtdev = NULL;
}
- /* Transfer the do_timer job away from this cpu */
- if (*cpup == tick_do_timer_cpu) {
- int cpu = cpumask_first(cpu_online_mask);
-
- tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
- TICK_DO_TIMER_NONE;
- }
spin_unlock_irqrestore(&tick_device_lock, flags);
}
@@ -357,6 +365,10 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
tick_broadcast_oneshot_control(reason);
break;
+ case CLOCK_EVT_NOTIFY_CPU_DYING:
+ tick_handover_do_timer(dev);
+ break;
+
case CLOCK_EVT_NOTIFY_CPU_DEAD:
tick_shutdown_broadcast_oneshot(dev);
tick_shutdown_broadcast(dev);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2f32969c09d..7dcf6e9f2b0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -17,6 +17,7 @@
#include <linux/clocksource.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
+#include <linux/suspend.h>
#include <linux/debugfs.h>
#include <linux/hardirq.h>
#include <linux/kthread.h>
@@ -1965,6 +1966,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static atomic_t ftrace_graph_active;
+static struct notifier_block ftrace_suspend_notifier;
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
{
@@ -2043,6 +2045,27 @@ static int start_graph_tracing(void)
return ret;
}
+/*
+ * Hibernation protection.
+ * The state of the current task is too much unstable during
+ * suspend/restore to disk. We want to protect against that.
+ */
+static int
+ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
+ void *unused)
+{
+ switch (state) {
+ case PM_HIBERNATION_PREPARE:
+ pause_graph_tracing();
+ break;
+
+ case PM_POST_HIBERNATION:
+ unpause_graph_tracing();
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
int register_ftrace_graph(trace_func_graph_ret_t retfunc,
trace_func_graph_ent_t entryfunc)
{
@@ -2050,6 +2073,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
mutex_lock(&ftrace_sysctl_lock);
+ ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
+ register_pm_notifier(&ftrace_suspend_notifier);
+
atomic_inc(&ftrace_graph_active);
ret = start_graph_tracing();
if (ret) {
@@ -2075,6 +2101,7 @@ void unregister_ftrace_graph(void)
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
ftrace_shutdown(FTRACE_STOP_FUNC_RET);
+ unregister_pm_notifier(&ftrace_suspend_notifier);
mutex_unlock(&ftrace_sysctl_lock);
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8b0daf0662e..bd38c5cfd8a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta)
return 0;
}
-#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
+#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data))
/*
* head_page == tail_page && head == tail then buffer is empty.
@@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
}
if (next_page == head_page) {
- if (!(buffer->flags & RB_FL_OVERWRITE)) {
- /* reset write */
- if (tail <= BUF_PAGE_SIZE)
- local_set(&tail_page->write, tail);
+ if (!(buffer->flags & RB_FL_OVERWRITE))
goto out_unlock;
- }
/* tail_page has not moved yet? */
if (tail_page == cpu_buffer->tail_page) {
@@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
return event;
out_unlock:
+ /* reset write */
+ if (tail <= BUF_PAGE_SIZE)
+ local_set(&tail_page->write, tail);
+
__raw_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);
return NULL;
@@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->overrun = 0;
cpu_buffer->entries = 0;
+
+ cpu_buffer->write_stamp = 0;
+ cpu_buffer->read_stamp = 0;
}
/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c580233add9..17bb88d86ac 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -40,7 +40,7 @@
#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
-unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
+unsigned long __read_mostly tracing_max_latency;
unsigned long __read_mostly tracing_thresh;
/*
@@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = {
* it if we decide to change what log level the ftrace dump
* should be at.
*/
-#define KERN_TRACE KERN_INFO
+#define KERN_TRACE KERN_EMERG
static void
trace_printk_seq(struct trace_seq *s)
@@ -3770,6 +3770,7 @@ void ftrace_dump(void)
dump_ran = 1;
/* No turning back! */
+ tracing_off();
ftrace_kill();
for_each_tracing_cpu(cpu) {
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7c2e326bbc8..62a78d94353 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr)
static void __irqsoff_tracer_init(struct trace_array *tr)
{
+ tracing_max_latency = 0;
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 43586b689e3..42ae1e77b6b 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
static int wakeup_tracer_init(struct trace_array *tr)
{
+ tracing_max_latency = 0;
wakeup_trace = tr;
start_wakeup_tracer(tr);
return 0;