From 8a8cde163ea724baf74e7752a31a69d3121a240e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jun 2008 14:22:28 +0200 Subject: sched: rt: dont stop the period timer when there are tasks wanting to run So if the group ever gets throttled, it will never wake up again. Reported-by: "Daniel K." Signed-off-by: Peter Zijlstra Tested-by: Daniel K. Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 1dad5bbb59b..0f3c19197fa 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -250,7 +250,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (rt_rq->rt_time || rt_rq->rt_nr_running) idle = 0; spin_unlock(&rt_rq->rt_runtime_lock); - } + } else if (rt_rq->rt_nr_running) + idle = 0; if (enqueue) sched_rt_rq_enqueue(rt_rq); -- cgit v1.2.3 From bb10ed0994927d433f6dbdf274fdb26cfcf516b7 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 19 Jun 2008 15:04:07 -0700 Subject: sched: fix wait_for_completion_timeout() spurious failure under heavy load It seems that the current implementaton of wait_for_completion_timeout() has a small problem under very high load for the common pattern: if (!wait_for_completion_timeout(&done, timeout)) /* handle failure */ because the implementation very roughly does (lots of code deleted to show the basic flow): static inline long __sched do_wait_for_common(struct completion *x, long timeout, int state) { if (x->done) return timeout; do { timeout = schedule_timeout(timeout); if (!timeout) return timeout; } while (!x->done); return timeout; } so if the system is very busy and x->done is not set when do_wait_for_common() is entered, it is possible that the first call to schedule_timeout() returns 0 because the task doing wait_for_completion doesn't get rescheduled for a long time, even if it is woken up early enough. In this case, wait_for_completion_timeout() returns 0 without even checking x->done again, and the code above falls into its failure case purely for scheduler reasons, even if the hardware event or whatever was being waited for happened early enough. It would make sense to add an extra test to do_wait_for() in the timeout case and return 1 if x->done is actually set. A quick audit (not exhaustive) of wait_for_completion_timeout() callers seems to indicate that no one actually cares about the return value in the success case -- they just test for 0 (timed out) versus non-zero (wait succeeded). Signed-off-by: Ingo Molnar --- kernel/sched.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/sched.c b/kernel/sched.c index 4a3cb061415..577f160131b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4405,6 +4405,16 @@ do_wait_for_common(struct completion *x, long timeout, int state) spin_unlock_irq(&x->wait.lock); timeout = schedule_timeout(timeout); spin_lock_irq(&x->wait.lock); + + /* + * If the completion has arrived meanwhile + * then return 1 jiffy time left: + */ + if (x->done && !timeout) { + timeout = 1; + break; + } + if (!timeout) { __remove_wait_queue(&x->wait, &wait); return timeout; -- cgit v1.2.3 From ea71a546706dfdad72462624394e1e472c6bf34f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 20 Jun 2008 18:32:20 +0400 Subject: sched: refactor wait_for_completion_timeout() Simplify the code and fix the boundary condition of wait_for_completion_timeout(,0). We can kill the first __remove_wait_queue() as well. Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra --- kernel/sched.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 577f160131b..bebf9788f45 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4398,32 +4398,20 @@ do_wait_for_common(struct completion *x, long timeout, int state) signal_pending(current)) || (state == TASK_KILLABLE && fatal_signal_pending(current))) { - __remove_wait_queue(&x->wait, &wait); - return -ERESTARTSYS; + timeout = -ERESTARTSYS; + break; } __set_current_state(state); spin_unlock_irq(&x->wait.lock); timeout = schedule_timeout(timeout); spin_lock_irq(&x->wait.lock); - - /* - * If the completion has arrived meanwhile - * then return 1 jiffy time left: - */ - if (x->done && !timeout) { - timeout = 1; - break; - } - - if (!timeout) { - __remove_wait_queue(&x->wait, &wait); - return timeout; - } - } while (!x->done); + } while (!x->done && timeout); __remove_wait_queue(&x->wait, &wait); + if (!x->done) + return timeout; } x->done--; - return timeout; + return timeout ?: 1; } static long __sched -- cgit v1.2.3