From e4fa4c97016037620f9dc8bafe03e1086b665b4c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 14 Jan 2009 14:58:15 +0800 Subject: rcu: add __cpuinit to rcu_init_percpu_data() Impact: reduce memory footprint add __cpuinit to rcu_init_percpu_data(), and this function's text will be discarded after boot when !CONFIG_HOTPLUG_CPU. Signed-off-by: Lai Jiangshan Signed-off-by: Ingo Molnar --- kernel/rcuclassic.c | 2 +- kernel/rcutree.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index 490934fc7ac..bd5a9003497 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -716,7 +716,7 @@ void rcu_check_callbacks(int cpu, int user) raise_rcu_softirq(); } -static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, +static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { unsigned long flags; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f2d8638e6c6..b2fd602a6f6 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1314,7 +1314,7 @@ int rcu_needs_cpu(int cpu) * access due to the fact that this CPU cannot possibly have any RCU * callbacks in flight yet. */ -static void +static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp) { unsigned long flags; -- cgit v1.2.3 From baf48f6577e581a9adb8fe849dc80e24b21d171d Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Mon, 12 Jan 2009 21:15:17 -0800 Subject: softlock: fix false panic which can occur if softlockup_thresh is reduced At run-time, if softlockup_thresh is changed to a much lower value, touch_timestamp is likely to be much older than the new softlock_thresh. This will cause a false softlockup to be detected. If softlockup_panic is enabled, the system will panic. The fix is to touch all watchdogs before changing softlockup_thresh. Signed-off-by: Mandeep Singh Baines Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ kernel/softlockup.c | 9 +++++++++ kernel/sysctl.c | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 4cae9b81a1f..54cbabf3b87 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -293,6 +293,9 @@ extern void sched_show_task(struct task_struct *p); extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); +extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index d9188c66278..85d5a245510 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -88,6 +89,14 @@ void touch_all_softlockup_watchdogs(void) } EXPORT_SYMBOL(touch_all_softlockup_watchdogs); +int proc_dosoftlockup_thresh(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + touch_all_softlockup_watchdogs(); + return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); +} + /* * This callback runs from the timer interrupt, and checks * whether the watchdog thread has hung or not: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 89d74436318..596dc31a711 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -800,7 +800,7 @@ static struct ctl_table kern_table[] = { .data = &softlockup_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = &proc_dosoftlockup_thresh, .strategy = &sysctl_intvec, .extra1 = &neg_one, .extra2 = &sixty, -- cgit v1.2.3 From c903ff837909ccada51243307d4239f86af40179 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 15 Jan 2009 12:28:29 -0800 Subject: rcu: move Kconfig menu Move RCU Kconfig options from top-level menu to an "RCU Subsystem" menu under the "General Setup" menu. Signed-off-by: Mike Travis Tested-by: "Paul E. McKenney" Signed-off-by: Ingo Molnar --- init/Kconfig | 179 ++++++++++++++++++++++++++++++----------------------------- 1 file changed, 92 insertions(+), 87 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index a724a149bf3..989f58b0395 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -238,6 +238,98 @@ config AUDIT_TREE def_bool y depends on AUDITSYSCALL && INOTIFY +menu "RCU Subsystem" + +choice + prompt "RCU Implementation" + default CLASSIC_RCU + +config CLASSIC_RCU + bool "Classic RCU" + help + This option selects the classic RCU implementation that is + designed for best read-side performance on non-realtime + systems. + + Select this option if you are unsure. + +config TREE_RCU + bool "Tree-based hierarchical RCU" + help + This option selects the RCU implementation that is + designed for very large SMP system with hundreds or + thousands of CPUs. + +config PREEMPT_RCU + bool "Preemptible RCU" + depends on PREEMPT + help + This option reduces the latency of the kernel by making certain + RCU sections preemptible. Normally RCU code is non-preemptible, if + this option is selected then read-only RCU sections become + preemptible. This helps latency, but may expose bugs due to + now-naive assumptions about each RCU read-side critical section + remaining on a given CPU through its execution. + +endchoice + +config RCU_TRACE + bool "Enable tracing for RCU" + depends on TREE_RCU || PREEMPT_RCU + help + This option provides tracing in RCU which presents stats + in debugfs for debugging RCU implementation. + + Say Y here if you want to enable RCU tracing + Say N if you are unsure. + +config RCU_FANOUT + int "Tree-based hierarchical RCU fanout value" + range 2 64 if 64BIT + range 2 32 if !64BIT + depends on TREE_RCU + default 64 if 64BIT + default 32 if !64BIT + help + This option controls the fanout of hierarchical implementations + of RCU, allowing RCU to work efficiently on machines with + large numbers of CPUs. This value must be at least the cube + root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit + systems and up to 262,144 for 64-bit systems. + + Select a specific number if testing RCU itself. + Take the default if unsure. + +config RCU_FANOUT_EXACT + bool "Disable tree-based hierarchical RCU auto-balancing" + depends on TREE_RCU + default n + help + This option forces use of the exact RCU_FANOUT value specified, + regardless of imbalances in the hierarchy. This is useful for + testing RCU itself, and might one day be useful on systems with + strong NUMA behavior. + + Without RCU_FANOUT_EXACT, the code will balance the hierarchy. + + Say N if unsure. + +config TREE_RCU_TRACE + def_bool RCU_TRACE && TREE_RCU + select DEBUG_FS + help + This option provides tracing for the TREE_RCU implementation, + permitting Makefile to trivially select kernel/rcutree_trace.c. + +config PREEMPT_RCU_TRACE + def_bool RCU_TRACE && PREEMPT_RCU + select DEBUG_FS + help + This option provides tracing for the PREEMPT_RCU implementation, + permitting Makefile to trivially select kernel/rcupreempt_trace.c. + +endmenu # "RCU Subsystem" + config IKCONFIG tristate "Kernel .config support" ---help--- @@ -973,90 +1065,3 @@ source "block/Kconfig" config PREEMPT_NOTIFIERS bool -choice - prompt "RCU Implementation" - default CLASSIC_RCU - -config CLASSIC_RCU - bool "Classic RCU" - help - This option selects the classic RCU implementation that is - designed for best read-side performance on non-realtime - systems. - - Select this option if you are unsure. - -config TREE_RCU - bool "Tree-based hierarchical RCU" - help - This option selects the RCU implementation that is - designed for very large SMP system with hundreds or - thousands of CPUs. - -config PREEMPT_RCU - bool "Preemptible RCU" - depends on PREEMPT - help - This option reduces the latency of the kernel by making certain - RCU sections preemptible. Normally RCU code is non-preemptible, if - this option is selected then read-only RCU sections become - preemptible. This helps latency, but may expose bugs due to - now-naive assumptions about each RCU read-side critical section - remaining on a given CPU through its execution. - -endchoice - -config RCU_TRACE - bool "Enable tracing for RCU" - depends on TREE_RCU || PREEMPT_RCU - help - This option provides tracing in RCU which presents stats - in debugfs for debugging RCU implementation. - - Say Y here if you want to enable RCU tracing - Say N if you are unsure. - -config RCU_FANOUT - int "Tree-based hierarchical RCU fanout value" - range 2 64 if 64BIT - range 2 32 if !64BIT - depends on TREE_RCU - default 64 if 64BIT - default 32 if !64BIT - help - This option controls the fanout of hierarchical implementations - of RCU, allowing RCU to work efficiently on machines with - large numbers of CPUs. This value must be at least the cube - root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit - systems and up to 262,144 for 64-bit systems. - - Select a specific number if testing RCU itself. - Take the default if unsure. - -config RCU_FANOUT_EXACT - bool "Disable tree-based hierarchical RCU auto-balancing" - depends on TREE_RCU - default n - help - This option forces use of the exact RCU_FANOUT value specified, - regardless of imbalances in the hierarchy. This is useful for - testing RCU itself, and might one day be useful on systems with - strong NUMA behavior. - - Without RCU_FANOUT_EXACT, the code will balance the hierarchy. - - Say N if unsure. - -config TREE_RCU_TRACE - def_bool RCU_TRACE && TREE_RCU - select DEBUG_FS - help - This option provides tracing for the TREE_RCU implementation, - permitting Makefile to trivially select kernel/rcutree_trace.c. - -config PREEMPT_RCU_TRACE - def_bool RCU_TRACE && PREEMPT_RCU - select DEBUG_FS - help - This option provides tracing for the PREEMPT_RCU implementation, - permitting Makefile to trivially select kernel/rcupreempt_trace.c. -- cgit v1.2.3 From fdb6a8f4db813b4e50f4e975efe6be12ba5bf460 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Sat, 17 Jan 2009 17:13:27 +0100 Subject: oprofile: fix uninitialized use of struct op_entry Impact: fix crash In case of losing samples struct op_entry could have been used uninitialized causing e.g. a wrong preemption count or NULL pointer access. This patch fixes this. Signed-off-by: Robert Richter Signed-off-by: Ingo Molnar --- drivers/oprofile/cpu_buffer.c | 5 +++++ drivers/oprofile/cpu_buffer.h | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 2e03b6d796d..e76d715e434 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -393,16 +393,21 @@ oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs, return; fail: + entry->event = NULL; cpu_buf->sample_lost_overflow++; } int oprofile_add_data(struct op_entry *entry, unsigned long val) { + if (!entry->event) + return 0; return op_cpu_buffer_add_data(entry, val); } int oprofile_write_commit(struct op_entry *entry) { + if (!entry->event) + return -EINVAL; return op_cpu_buffer_write_commit(entry); } diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index 63f81c44846..272995d2029 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -66,6 +66,13 @@ static inline void op_cpu_buffer_reset(int cpu) cpu_buf->last_task = NULL; } +/* + * op_cpu_buffer_add_data() and op_cpu_buffer_write_commit() may be + * called only if op_cpu_buffer_write_reserve() did not return NULL or + * entry->event != NULL, otherwise entry->size or entry->event will be + * used uninitialized. + */ + struct op_sample *op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size); int op_cpu_buffer_write_commit(struct op_entry *entry); -- cgit v1.2.3 From b786c6a98ef6fa81114ba7b9fbfc0d67060775e3 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sat, 17 Jan 2009 12:04:36 +0100 Subject: relay: fix lock imbalance in relay_late_setup_files One fail path in relay_late_setup_files() omits mutex_unlock(&relay_channels_mutex); Add it. Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar --- kernel/relay.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index 09ac2008f77..9d79b7854fa 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -663,8 +663,10 @@ int relay_late_setup_files(struct rchan *chan, mutex_lock(&relay_channels_mutex); /* Is chan already set up? */ - if (unlikely(chan->has_base_filename)) + if (unlikely(chan->has_base_filename)) { + mutex_unlock(&relay_channels_mutex); return -EEXIST; + } chan->has_base_filename = 1; chan->parent = parent; curr_cpu = get_cpu(); -- cgit v1.2.3 From 749a440b2218e9937dca8c68e23e51ca4d8d37fe Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 22 Jan 2009 09:46:38 +0800 Subject: rcu: remove duplicate CONFIG_RCU_CPU_STALL_DETECTOR Impact: remove the old CONFIG_RCU_CPU_STALL_DETECTOR tree_rcu introduce CONFIG_RCU_CPU_STALL_DETECTOR again. These two are the same exactly except: the old one "depends on CLASSIC_RCU" the new one "depends on CLASSIC_RCU || TREE_RCU" This patch remove the old one. Signed-off-by: Lai Jiangshan Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4c9ae6085c7..e770e85f76c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -631,19 +631,6 @@ config RCU_TORTURE_TEST_RUNNABLE Say N here if you want the RCU torture tests to start only after being manually enabled via /proc. -config RCU_CPU_STALL_DETECTOR - bool "Check for stalled CPUs delaying RCU grace periods" - depends on CLASSIC_RCU - default n - help - This option causes RCU to printk information on which - CPUs are delaying the current grace period, but only when - the grace period extends for excessive time periods. - - Say Y if you want RCU to perform such checks. - - Say N if you are unsure. - config RCU_CPU_STALL_DETECTOR bool "Check for stalled CPUs delaying RCU grace periods" depends on CLASSIC_RCU || TREE_RCU -- cgit v1.2.3 From 336f6c322d87806ef93afad6308ac65083a865e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Jan 2009 09:50:44 +0100 Subject: debugobjects: add and use INIT_WORK_ON_STACK Impact: Fix debugobjects warning debugobject enabled kernels spit out a warning in hpet code due to a workqueue which is initialized on stack. Add INIT_WORK_ON_STACK() which calls init_timer_on_stack() and use it in hpet. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 3 ++- include/linux/workqueue.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index cd759ad9069..64d5ad0b8ad 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -628,11 +628,12 @@ static int hpet_cpuhp_notify(struct notifier_block *n, switch (action & 0xf) { case CPU_ONLINE: - INIT_DELAYED_WORK(&work.work, hpet_work); + INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work); init_completion(&work.complete); /* FIXME: add schedule_work_on() */ schedule_delayed_work_on(cpu, &work.work, 0); wait_for_completion(&work.complete); + destroy_timer_on_stack(&work.work.timer); break; case CPU_DEAD: if (hdev) { diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b36291130f2..20b59eb1fac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -124,6 +124,12 @@ struct execute_work { init_timer_deferrable(&(_work)->timer); \ } while (0) +#define INIT_DELAYED_WORK_ON_STACK(_work, _func) \ + do { \ + INIT_WORK(&(_work)->work, (_func)); \ + init_timer_on_stack(&(_work)->timer); \ + } while (0) + /** * work_pending - Find out whether a work item is currently pending * @work: The work item in question -- cgit v1.2.3