From 1b04624f93bb1c4f9495b8476d1dd0200af019e2 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 19 Aug 2008 20:37:07 -0700 Subject: tracehook: fix SA_NOCLDWAIT I outwitted myself again in commit 2b2a1ff64afbadac842bbc58c5166962cf4f7664, and broke the SA_NOCLDWAIT behavior so it leaks zombies. This fixes it. Reported-by: Andi Kleen Signed-off-by: Roland McGrath --- kernel/signal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index c539f60c6f4..e661b01d340 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1338,6 +1338,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) struct siginfo info; unsigned long flags; struct sighand_struct *psig; + int ret = sig; BUG_ON(sig == -1); @@ -1402,7 +1403,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) * is implementation-defined: we do (if you don't want * it, just use SIG_IGN instead). */ - tsk->exit_signal = -1; + ret = tsk->exit_signal = -1; if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) sig = -1; } @@ -1411,7 +1412,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) __wake_up_parent(tsk, tsk->parent); spin_unlock_irqrestore(&psig->siglock, flags); - return sig; + return ret; } static void do_notify_parent_cldstop(struct task_struct *tsk, int why) -- cgit v1.2.3 From 2d70b68d42b5196a48ccb639e3797f097ef5bea3 Mon Sep 17 00:00:00 2001 From: Ken Chen Date: Wed, 20 Aug 2008 14:09:17 -0700 Subject: fix setpriority(PRIO_PGRP) thread iterator breakage When user calls sys_setpriority(PRIO_PGRP ...) on a NPTL style multi-LWP process, only the task leader of the process is affected, all other sibling LWP threads didn't receive the setting. The problem was that the iterator used in sys_setpriority() only iteartes over one task for each process, ignoring all other sibling thread. Introduce a new macro do_each_pid_thread / while_each_pid_thread to walk each thread of a process. Convert 4 call sites in {set/get}priority and ioprio_{set/get}. Signed-off-by: Ken Chen Cc: Oleg Nesterov Cc: Roland McGrath Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 3dacb00a7f7..038a7bc0901 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -169,9 +169,9 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) pgrp = find_vpid(who); else pgrp = task_pgrp(current); - do_each_pid_task(pgrp, PIDTYPE_PGID, p) { + do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { error = set_one_prio(p, niceval, error); - } while_each_pid_task(pgrp, PIDTYPE_PGID, p); + } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: user = current->user; @@ -229,11 +229,11 @@ asmlinkage long sys_getpriority(int which, int who) pgrp = find_vpid(who); else pgrp = task_pgrp(current); - do_each_pid_task(pgrp, PIDTYPE_PGID, p) { + do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; - } while_each_pid_task(pgrp, PIDTYPE_PGID, p); + } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: user = current->user; -- cgit v1.2.3 From efc2dead2c82cae31943828f6d977c483942b0eb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 Aug 2008 12:44:55 +0200 Subject: sched: enable LB_BIAS by default Yanmin reported a significant regression on his 16-core machine due to: commit 93b75217df39e6d75889cc6f8050343286aff4a5 Author: Peter Zijlstra Date: Fri Jun 27 13:41:33 2008 +0200 Flip back to the old behaviour. Reported-by: "Zhang, Yanmin" Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 862b06bd560..9353ca78154 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -8,6 +8,6 @@ SCHED_FEAT(SYNC_WAKEUPS, 1) SCHED_FEAT(HRTICK, 1) SCHED_FEAT(DOUBLE_TICK, 0) SCHED_FEAT(ASYM_GRAN, 1) -SCHED_FEAT(LB_BIAS, 0) +SCHED_FEAT(LB_BIAS, 1) SCHED_FEAT(LB_WAKEUP_UPDATE, 1) SCHED_FEAT(ASYM_EFF_LOAD, 1) -- cgit v1.2.3 From 01dcb0443ed89eccf26c2b43f1ea13b368ae740d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 20 Aug 2008 16:35:19 -0700 Subject: rcu: fix synchronize_rcu() so that kernel-doc works Fix RCU's synchronize_rcu() so that it looks like a C function, enabling it to be recognized as a function with kernel-doc annotation. Warning(linux-2.6.26-git11//kernel/rcupdate.c:81): No description found for parameter 'synchronize_rcu' Warning(linux-2.6.26-git11//kernel/rcupdate.c:81): No description found for parameter 'call_rcu' [akpm@linux-foundation.org: fix comment] Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/rcupdate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f14f372cf6f..467d5940f62 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -77,6 +77,7 @@ void wakeme_after_rcu(struct rcu_head *head) * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ +void synchronize_rcu(void); /* Makes kernel-doc tools happy */ synchronize_rcu_xxx(synchronize_rcu, call_rcu) EXPORT_SYMBOL_GPL(synchronize_rcu); -- cgit v1.2.3 From 3c4fbe5e01d7e5309be5045e7ae0db20a049e6dc Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 20 Aug 2008 16:37:38 -0700 Subject: nohz: fix wrong event handler after online an offlined cpu On the tickless system(CONFIG_NO_HZ=y and CONFIG_HIGH_RES_TIMERS=n), after I made an offlined cpu online, I found this cpu's event handler was tick_handle_periodic, not tick_nohz_handler. After debuging, I found this bug was caused by the wrong tick mode. the tick mode is not changed to NOHZ_MODE_INACTIVE when the cpu is offline. This patch fixes this bug. Signed-off-by: Miao Xie Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f5da526424a..7a46bde78c6 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -643,17 +643,21 @@ void tick_setup_sched_timer(void) ts->nohz_mode = NOHZ_MODE_HIGHRES; #endif } +#endif /* HIGH_RES_TIMERS */ +#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); +# ifdef CONFIG_HIGH_RES_TIMERS if (ts->sched_timer.base) hrtimer_cancel(&ts->sched_timer); +# endif ts->nohz_mode = NOHZ_MODE_INACTIVE; } -#endif /* HIGH_RES_TIMERS */ +#endif /** * Async notification about clocksource changes -- cgit v1.2.3 From 7a8fc9b248e77a4eab0613acf30a6811799786b3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 17 Aug 2008 17:36:59 +0300 Subject: removed unused #include 's This patch lets the files using linux/version.h match the files that #include it. Signed-off-by: Adrian Bunk Signed-off-by: Linus Torvalds --- kernel/nsproxy.c | 1 - kernel/power/swap.c | 1 - kernel/user_namespace.c | 1 - kernel/utsname.c | 1 - kernel/utsname_sysctl.c | 1 - 5 files changed, 5 deletions(-) (limited to 'kernel') diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 21575fc46d0..1d3ef29a258 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -14,7 +14,6 @@ */ #include -#include #include #include #include diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a0abf9a463f..80ccac849e4 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index a9ab0596de4..532858fa5b8 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include diff --git a/kernel/utsname.c b/kernel/utsname.c index 64d398f1244..815237a55af 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index fe3a56c2256..4ab9659d269 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -12,7 +12,6 @@ #include #include #include -#include #include static void *get_uts(ctl_table *table, int write) -- cgit v1.2.3 From 354879bb977e06695993435745f06a0f6d39ce2b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Aug 2008 17:15:34 +0200 Subject: sched_clock: fix cpu_clock() This patch fixes 3 issues: a) it removes the dependency on jiffies, because jiffies are incremented by a single CPU, and the tick is not synchronized between CPUs. Therefore relying on it to calculate a window to clip whacky TSC values doesn't work as it can drift around. So instead use [GTOD, GTOD+TICK_NSEC) as the window. b) __update_sched_clock() did (roughly speaking): delta = sched_clock() - scd->tick_raw; clock += delta; Which gives exponential growth, instead of linear. c) allows the sched_clock_cpu() value to warp the u64 without breaking. the results are more reliable sched_clock() deltas: before after sched_clock cpu_clock: 15750 51312 51488 cpu_clock: 59719 51052 50947 cpu_clock: 15879 51249 51061 cpu_clock: 1 50933 51198 cpu_clock: 1 50931 51039 cpu_clock: 1 51093 50981 cpu_clock: 1 51043 51040 cpu_clock: 1 50959 50938 cpu_clock: 1 50981 51011 cpu_clock: 1 51364 51212 cpu_clock: 1 51219 51273 cpu_clock: 1 51389 51048 cpu_clock: 1 51285 51611 cpu_clock: 1 50964 51137 cpu_clock: 1 50973 50968 cpu_clock: 1 50967 50972 cpu_clock: 1 58910 58485 cpu_clock: 1 51082 51025 cpu_clock: 1 50957 50958 cpu_clock: 1 50958 50957 cpu_clock: 1006128 51128 50971 cpu_clock: 1 51107 51155 cpu_clock: 1 51371 51081 cpu_clock: 1 51104 51365 cpu_clock: 1 51363 51309 cpu_clock: 1 51107 51160 cpu_clock: 1 51139 51100 cpu_clock: 1 51216 51136 cpu_clock: 1 51207 51215 cpu_clock: 1 51087 51263 cpu_clock: 1 51249 51177 cpu_clock: 1 51519 51412 cpu_clock: 1 51416 51255 cpu_clock: 1 51591 51594 cpu_clock: 1 50966 51374 cpu_clock: 1 50966 50966 cpu_clock: 1 51291 50948 cpu_clock: 1 50973 50867 cpu_clock: 1 50970 50970 cpu_clock: 998306 50970 50971 cpu_clock: 1 50971 50970 cpu_clock: 1 50970 50970 cpu_clock: 1 50971 50971 cpu_clock: 1 50970 50970 cpu_clock: 1 51351 50970 cpu_clock: 1 50970 51352 cpu_clock: 1 50971 50970 cpu_clock: 1 50970 50970 cpu_clock: 1 51321 50971 cpu_clock: 1 50974 51324 Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_clock.c | 84 +++++++++++++++++++++------------------------------- 1 file changed, 34 insertions(+), 50 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 204991a0bfa..e8ab096ddfe 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -12,19 +12,17 @@ * * Create a semi stable clock from a mixture of other events, including: * - gtod - * - jiffies * - sched_clock() * - explicit idle events * * We use gtod as base and the unstable clock deltas. The deltas are filtered, - * making it monotonic and keeping it within an expected window. This window - * is set up using jiffies. + * making it monotonic and keeping it within an expected window. * * Furthermore, explicit sleep and wakeup hooks allow us to account for time * that is otherwise invisible (TSC gets stopped). * * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat - * consistent between cpus (never more than 1 jiffies difference). + * consistent between cpus (never more than 2 jiffies difference). */ #include #include @@ -54,7 +52,6 @@ struct sched_clock_data { */ raw_spinlock_t lock; - unsigned long tick_jiffies; u64 tick_raw; u64 tick_gtod; u64 clock; @@ -75,14 +72,12 @@ static inline struct sched_clock_data *cpu_sdc(int cpu) void sched_clock_init(void) { u64 ktime_now = ktime_to_ns(ktime_get()); - unsigned long now_jiffies = jiffies; int cpu; for_each_possible_cpu(cpu) { struct sched_clock_data *scd = cpu_sdc(cpu); scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; - scd->tick_jiffies = now_jiffies; scd->tick_raw = 0; scd->tick_gtod = ktime_now; scd->clock = ktime_now; @@ -91,47 +86,52 @@ void sched_clock_init(void) sched_clock_running = 1; } +/* + * min,max except they take wrapping into account + */ + +static inline u64 wrap_min(u64 x, u64 y) +{ + return (s64)(x - y) < 0 ? x : y; +} + +static inline u64 wrap_max(u64 x, u64 y) +{ + return (s64)(x - y) > 0 ? x : y; +} + /* * update the percpu scd from the raw @now value * * - filter out backward motion - * - use jiffies to generate a min,max window to clip the raw values + * - use the GTOD tick value to create a window to filter crazy TSC values */ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) { - unsigned long now_jiffies = jiffies; - long delta_jiffies = now_jiffies - scd->tick_jiffies; - u64 clock = scd->clock; - u64 min_clock, max_clock; s64 delta = now - scd->tick_raw; + u64 clock, min_clock, max_clock; WARN_ON_ONCE(!irqs_disabled()); - min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC; - if (unlikely(delta < 0)) { - clock++; - goto out; - } + if (unlikely(delta < 0)) + delta = 0; - max_clock = min_clock + TICK_NSEC; + /* + * scd->clock = clamp(scd->tick_gtod + delta, + * max(scd->tick_gtod, scd->clock), + * scd->tick_gtod + TICK_NSEC); + */ - if (unlikely(clock + delta > max_clock)) { - if (clock < max_clock) - clock = max_clock; - else - clock++; - } else { - clock += delta; - } + clock = scd->tick_gtod + delta; + min_clock = wrap_max(scd->tick_gtod, scd->clock); + max_clock = scd->tick_gtod + TICK_NSEC; - out: - if (unlikely(clock < min_clock)) - clock = min_clock; + clock = wrap_max(clock, min_clock); + clock = wrap_min(clock, max_clock); - scd->tick_jiffies = now_jiffies; scd->clock = clock; - return clock; + return scd->clock; } static void lock_double_clock(struct sched_clock_data *data1, @@ -171,7 +171,7 @@ u64 sched_clock_cpu(int cpu) * larger time as the latest time for both * runqueues. (this creates monotonic movement) */ - if (likely(remote_clock < this_clock)) { + if (likely((s64)(remote_clock - this_clock) < 0)) { clock = this_clock; scd->clock = clock; } else { @@ -207,14 +207,9 @@ void sched_clock_tick(void) now = sched_clock(); __raw_spin_lock(&scd->lock); - __update_sched_clock(scd, now); - /* - * update tick_gtod after __update_sched_clock() because that will - * already observe 1 new jiffy; adding a new tick_gtod to that would - * increase the clock 2 jiffies. - */ scd->tick_raw = now; scd->tick_gtod = now_gtod; + __update_sched_clock(scd, now); __raw_spin_unlock(&scd->lock); } @@ -232,18 +227,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); */ void sched_clock_idle_wakeup_event(u64 delta_ns) { - struct sched_clock_data *scd = this_scd(); - - /* - * Override the previous timestamp and ignore all - * sched_clock() deltas that occured while we idled, - * and use the PM-provided delta_ns to advance the - * rq clock: - */ - __raw_spin_lock(&scd->lock); - scd->clock += delta_ns; - __raw_spin_unlock(&scd->lock); - + sched_clock_tick(); touch_softlockup_watchdog(); } EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); -- cgit v1.2.3 From ffb4ba76a25ab6c9deeec33e4f58395586ca747c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Aug 2008 11:10:26 -0700 Subject: [module] Don't let gcc inline load_module() 'load_module()' is a complex function that contains all the ELF section logic, and inlining it is utterly insane. But gcc will do it, simply because there is only one call-site. As a result, all the stack space that is allocated for all the work to load the module will still be active when we actually call the module init sequence, and the deep call chain makes stack overflows happen. And stack overflows are really hard to debug, because they not only corrupt random pages below the stack, but also corrupt the thread_info structure that is allocated under the stack. In this case, Alan Brunelle reported some crazy oopses at bootup, after loading the processor module that ends up doing complex ACPI stuff and has quite a deep callchain. This should fix it, and is the sane thing to do regardless. Cc: Alan D. Brunelle Cc: Arjan van de Ven Cc: Rusty Russell Signed-off-by: Linus Torvalds --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 08864d257eb..9db11911e04 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1799,7 +1799,7 @@ static void *module_alloc_update_bounds(unsigned long size) /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ -static struct module *load_module(void __user *umod, +static noinline struct module *load_module(void __user *umod, unsigned long len, const char __user *uargs) { -- cgit v1.2.3