diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 28 | ||||
-rw-r--r-- | kernel/hrtimer.c | 41 | ||||
-rw-r--r-- | kernel/irq/chip.c | 2 | ||||
-rw-r--r-- | kernel/module.c | 35 | ||||
-rw-r--r-- | kernel/signal.c | 2 | ||||
-rw-r--r-- | kernel/smp.c | 36 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 26 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 27 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 15 | ||||
-rw-r--r-- | kernel/trace/trace.c | 5 | ||||
-rw-r--r-- | kernel/trace/trace_irqsoff.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 1 |
12 files changed, 180 insertions, 39 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c29831076e7..5a54ff42874 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1115,8 +1115,10 @@ static void cgroup_kill_sb(struct super_block *sb) { } write_unlock(&css_set_lock); - list_del(&root->root_list); - root_count--; + if (!list_empty(&root->root_list)) { + list_del(&root->root_list); + root_count--; + } mutex_unlock(&cgroup_mutex); @@ -2434,7 +2436,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, err_remove: + cgroup_lock_hierarchy(root); list_del(&cgrp->sibling); + cgroup_unlock_hierarchy(root); root->number_of_cgroups--; err_destroy: @@ -2507,7 +2511,7 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; int refcnt; - do { + while (1) { /* We can only remove a CSS with a refcnt==1 */ refcnt = atomic_read(&css->refcnt); if (refcnt > 1) { @@ -2521,7 +2525,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) * css_tryget() to spin until we set the * CSS_REMOVED bits or abort */ - } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); + if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt) + break; + cpu_relax(); + } } done: for_each_subsys(cgrp->root, ss) { @@ -2991,20 +2998,21 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_unlock(&cgroup_mutex); return 0; } - task_lock(tsk); - cg = tsk->cgroups; - parent = task_cgroup(tsk, subsys->subsys_id); /* Pin the hierarchy */ - if (!atomic_inc_not_zero(&parent->root->sb->s_active)) { + if (!atomic_inc_not_zero(&root->sb->s_active)) { /* We race with the final deactivate_super() */ mutex_unlock(&cgroup_mutex); return 0; } /* Keep the cgroup alive */ + task_lock(tsk); + parent = task_cgroup(tsk, subsys->subsys_id); + cg = tsk->cgroups; get_css_set(cg); task_unlock(tsk); + mutex_unlock(&cgroup_mutex); /* Now do the VFS work to create a cgroup */ @@ -3043,7 +3051,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_unlock(&inode->i_mutex); put_css_set(cg); - deactivate_super(parent->root->sb); + deactivate_super(root->sb); /* The cgroup is still accessible in the VFS, but * we're not going to try to rmdir() it at this * point. */ @@ -3069,7 +3077,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_lock(&cgroup_mutex); put_css_set(cg); mutex_unlock(&cgroup_mutex); - deactivate_super(parent->root->sb); + deactivate_super(root->sb); return ret; } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f33afb0407b..f394d2a42ca 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -501,6 +501,13 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) continue; timer = rb_entry(base->first, struct hrtimer, node); expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + /* + * clock_was_set() has changed base->offset so the + * result might be negative. Fix it up to prevent a + * false positive in clockevents_program_event() + */ + if (expires.tv64 < 0) + expires.tv64 = 0; if (expires.tv64 < cpu_base->expires_next.tv64) cpu_base->expires_next = expires; } @@ -1158,6 +1165,29 @@ static void __run_hrtimer(struct hrtimer *timer) #ifdef CONFIG_HIGH_RES_TIMERS +static int force_clock_reprogram; + +/* + * After 5 iteration's attempts, we consider that hrtimer_interrupt() + * is hanging, which could happen with something that slows the interrupt + * such as the tracing. Then we force the clock reprogramming for each future + * hrtimer interrupts to avoid infinite loops and use the min_delta_ns + * threshold that we will overwrite. + * The next tick event will be scheduled to 3 times we currently spend on + * hrtimer_interrupt(). This gives a good compromise, the cpus will spend + * 1/4 of their time to process the hrtimer interrupts. This is enough to + * let it running without serious starvation. + */ + +static inline void +hrtimer_interrupt_hanging(struct clock_event_device *dev, + ktime_t try_time) +{ + force_clock_reprogram = 1; + dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; + printk(KERN_WARNING "hrtimer: interrupt too slow, " + "forcing clock min delta to %lu ns\n", dev->min_delta_ns); +} /* * High resolution timer interrupt * Called with interrupts disabled @@ -1167,6 +1197,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; ktime_t expires_next, now; + int nr_retries = 0; int i; BUG_ON(!cpu_base->hres_active); @@ -1174,6 +1205,10 @@ void hrtimer_interrupt(struct clock_event_device *dev) dev->next_event.tv64 = KTIME_MAX; retry: + /* 5 retries is enough to notice a hang */ + if (!(++nr_retries % 5)) + hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); + now = ktime_get(); expires_next.tv64 = KTIME_MAX; @@ -1226,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) /* Reprogramming necessary ? */ if (expires_next.tv64 != KTIME_MAX) { - if (tick_program_event(expires_next, 0)) + if (tick_program_event(expires_next, force_clock_reprogram)) goto retry; } } @@ -1580,6 +1615,10 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_DYING: + case CPU_DYING_FROZEN: + clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu); + break; case CPU_DEAD: case CPU_DEAD_FROZEN: { diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f63c706d25e..7de11bd64df 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -383,6 +383,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) out_unlock: spin_unlock(&desc->lock); } +EXPORT_SYMBOL_GPL(handle_level_irq); /** * handle_fasteoi_irq - irq handler for transparent controllers @@ -593,6 +594,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, } spin_unlock_irqrestore(&desc->lock, flags); } +EXPORT_SYMBOL_GPL(__set_irq_handler); void set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip, diff --git a/kernel/module.c b/kernel/module.c index e8b51d41dd7..ba22484a987 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -573,13 +573,13 @@ static char last_unloaded_module[MODULE_NAME_LEN+1]; /* Init the unload section of the module. */ static void module_unload_init(struct module *mod) { - unsigned int i; + int cpu; INIT_LIST_HEAD(&mod->modules_which_use_me); - for (i = 0; i < NR_CPUS; i++) - local_set(&mod->ref[i].count, 0); + for_each_possible_cpu(cpu) + local_set(__module_ref_addr(mod, cpu), 0); /* Hold reference count during initialization. */ - local_set(&mod->ref[raw_smp_processor_id()].count, 1); + local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -717,10 +717,11 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int i, total = 0; + unsigned int total = 0; + int cpu; - for (i = 0; i < NR_CPUS; i++) - total += local_read(&mod->ref[i].count); + for_each_possible_cpu(cpu) + total += local_read(__module_ref_addr(mod, cpu)); return total; } EXPORT_SYMBOL(module_refcount); @@ -894,7 +895,7 @@ void module_put(struct module *module) { if (module) { unsigned int cpu = get_cpu(); - local_dec(&module->ref[cpu].count); + local_dec(__module_ref_addr(module, cpu)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); @@ -1464,7 +1465,10 @@ static void free_module(struct module *mod) kfree(mod->args); if (mod->percpu) percpu_modfree(mod->percpu); - +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + if (mod->refptr) + percpu_modfree(mod->refptr); +#endif /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); @@ -2011,6 +2015,14 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto free_mod; +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), + mod->name); + if (!mod->refptr) { + err = -ENOMEM; + goto free_mod; + } +#endif if (pcpuindex) { /* We have a special allocation for this section. */ percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, @@ -2018,7 +2030,7 @@ static noinline struct module *load_module(void __user *umod, mod->name); if (!percpu) { err = -ENOMEM; - goto free_mod; + goto free_percpu; } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; mod->percpu = percpu; @@ -2282,6 +2294,9 @@ static noinline struct module *load_module(void __user *umod, free_percpu: if (percpu) percpu_modfree(percpu); +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + percpu_modfree(mod->refptr); +#endif free_mod: kfree(args); free_hdr: diff --git a/kernel/signal.c b/kernel/signal.c index e73759783dc..b6b36768b75 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -909,7 +909,9 @@ static void print_fatal_signal(struct pt_regs *regs, int signr) } #endif printk("\n"); + preempt_disable(); show_regs(regs); + preempt_enable(); } static int __init setup_print_fatal_signals(char *str) diff --git a/kernel/smp.c b/kernel/smp.c index 5cfa0e5e3e8..bbedbb7efe3 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -18,6 +18,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock); enum { CSD_FLAG_WAIT = 0x01, CSD_FLAG_ALLOC = 0x02, + CSD_FLAG_LOCK = 0x04, }; struct call_function_data { @@ -186,6 +187,9 @@ void generic_smp_call_function_single_interrupt(void) if (data_flags & CSD_FLAG_WAIT) { smp_wmb(); data->flags &= ~CSD_FLAG_WAIT; + } else if (data_flags & CSD_FLAG_LOCK) { + smp_wmb(); + data->flags &= ~CSD_FLAG_LOCK; } else if (data_flags & CSD_FLAG_ALLOC) kfree(data); } @@ -196,6 +200,8 @@ void generic_smp_call_function_single_interrupt(void) } } +static DEFINE_PER_CPU(struct call_single_data, csd_data); + /* * smp_call_function_single - Run a function on a specific CPU * @func: The function to run. This must be fast and non-blocking. @@ -224,14 +230,38 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, func(info); local_irq_restore(flags); } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { - struct call_single_data *data = NULL; + struct call_single_data *data; if (!wait) { + /* + * We are calling a function on a single CPU + * and we are not going to wait for it to finish. + * We first try to allocate the data, but if we + * fail, we fall back to use a per cpu data to pass + * the information to that CPU. Since all callers + * of this code will use the same data, we must + * synchronize the callers to prevent a new caller + * from corrupting the data before the callee + * can access it. + * + * The CSD_FLAG_LOCK is used to let us know when + * the IPI handler is done with the data. + * The first caller will set it, and the callee + * will clear it. The next caller must wait for + * it to clear before we set it again. This + * will make sure the callee is done with the + * data before a new caller will use it. + */ data = kmalloc(sizeof(*data), GFP_ATOMIC); if (data) data->flags = CSD_FLAG_ALLOC; - } - if (!data) { + else { + data = &per_cpu(csd_data, me); + while (data->flags & CSD_FLAG_LOCK) + cpu_relax(); + data->flags = CSD_FLAG_LOCK; + } + } else { data = &d; data->flags = CSD_FLAG_WAIT; } diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 63e05d423a0..21a5ca84951 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -274,6 +274,21 @@ out_bc: } /* + * Transfer the do_timer job away from a dying cpu. + * + * Called with interrupts disabled. + */ +static void tick_handover_do_timer(int *cpup) +{ + if (*cpup == tick_do_timer_cpu) { + int cpu = cpumask_first(cpu_online_mask); + + tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : + TICK_DO_TIMER_NONE; + } +} + +/* * Shutdown an event device on a given cpu: * * This is called on a life CPU, when a CPU is dead. So we cannot @@ -297,13 +312,6 @@ static void tick_shutdown(unsigned int *cpup) clockevents_exchange_device(dev, NULL); td->evtdev = NULL; } - /* Transfer the do_timer job away from this cpu */ - if (*cpup == tick_do_timer_cpu) { - int cpu = cpumask_first(cpu_online_mask); - - tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : - TICK_DO_TIMER_NONE; - } spin_unlock_irqrestore(&tick_device_lock, flags); } @@ -357,6 +365,10 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, tick_broadcast_oneshot_control(reason); break; + case CLOCK_EVT_NOTIFY_CPU_DYING: + tick_handover_do_timer(dev); + break; + case CLOCK_EVT_NOTIFY_CPU_DEAD: tick_shutdown_broadcast_oneshot(dev); tick_shutdown_broadcast(dev); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2f32969c09d..7dcf6e9f2b0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -17,6 +17,7 @@ #include <linux/clocksource.h> #include <linux/kallsyms.h> #include <linux/seq_file.h> +#include <linux/suspend.h> #include <linux/debugfs.h> #include <linux/hardirq.h> #include <linux/kthread.h> @@ -1965,6 +1966,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER static atomic_t ftrace_graph_active; +static struct notifier_block ftrace_suspend_notifier; int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) { @@ -2043,6 +2045,27 @@ static int start_graph_tracing(void) return ret; } +/* + * Hibernation protection. + * The state of the current task is too much unstable during + * suspend/restore to disk. We want to protect against that. + */ +static int +ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, + void *unused) +{ + switch (state) { + case PM_HIBERNATION_PREPARE: + pause_graph_tracing(); + break; + + case PM_POST_HIBERNATION: + unpause_graph_tracing(); + break; + } + return NOTIFY_DONE; +} + int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc) { @@ -2050,6 +2073,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, mutex_lock(&ftrace_sysctl_lock); + ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; + register_pm_notifier(&ftrace_suspend_notifier); + atomic_inc(&ftrace_graph_active); ret = start_graph_tracing(); if (ret) { @@ -2075,6 +2101,7 @@ void unregister_ftrace_graph(void) ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); + unregister_pm_notifier(&ftrace_suspend_notifier); mutex_unlock(&ftrace_sysctl_lock); } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8b0daf0662e..bd38c5cfd8a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta) return 0; } -#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) +#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) /* * head_page == tail_page && head == tail then buffer is empty. @@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } if (next_page == head_page) { - if (!(buffer->flags & RB_FL_OVERWRITE)) { - /* reset write */ - if (tail <= BUF_PAGE_SIZE) - local_set(&tail_page->write, tail); + if (!(buffer->flags & RB_FL_OVERWRITE)) goto out_unlock; - } /* tail_page has not moved yet? */ if (tail_page == cpu_buffer->tail_page) { @@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, return event; out_unlock: + /* reset write */ + if (tail <= BUF_PAGE_SIZE) + local_set(&tail_page->write, tail); + __raw_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); return NULL; @@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->overrun = 0; cpu_buffer->entries = 0; + + cpu_buffer->write_stamp = 0; + cpu_buffer->read_stamp = 0; } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c580233add9..17bb88d86ac 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -40,7 +40,7 @@ #define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) -unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; +unsigned long __read_mostly tracing_max_latency; unsigned long __read_mostly tracing_thresh; /* @@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = { * it if we decide to change what log level the ftrace dump * should be at. */ -#define KERN_TRACE KERN_INFO +#define KERN_TRACE KERN_EMERG static void trace_printk_seq(struct trace_seq *s) @@ -3770,6 +3770,7 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ + tracing_off(); ftrace_kill(); for_each_tracing_cpu(cpu) { diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 7c2e326bbc8..62a78d94353 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr) static void __irqsoff_tracer_init(struct trace_array *tr) { + tracing_max_latency = 0; irqsoff_trace = tr; /* make sure that the tracer is visible */ smp_wmb(); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 43586b689e3..42ae1e77b6b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr) static int wakeup_tracer_init(struct trace_array *tr) { + tracing_max_latency = 0; wakeup_trace = tr; start_wakeup_tracer(tr); return 0; |