From eed3b9cf3fe3fcc7a50238dfcab63a63914e8f42 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 29 Sep 2009 14:25:15 +0200 Subject: nohz: Reuse ktime in sub-functions of tick_check_idle. On a system with NOHZ=y tick_check_idle calls tick_nohz_stop_idle and tick_nohz_update_jiffies. Given the right conditions (ts->idle_active and/or ts->tick_stopped) both function get a time stamp with ktime_get. The same time stamp can be reused if both function require one. On s390 this change has the additional benefit that gcc inlines the tick_nohz_stop_idle function into tick_check_idle. The number of instructions to execute tick_check_idle drops from 225 to 144 (without the ktime_get optimization it is 367 vs 215 instructions). before: 0) | tick_check_idle() { 0) | tick_nohz_stop_idle() { 0) | ktime_get() { 0) | read_tod_clock() { 0) 0.601 us | } 0) 1.765 us | } 0) 3.047 us | } 0) | ktime_get() { 0) | read_tod_clock() { 0) 0.570 us | } 0) 1.727 us | } 0) | tick_do_update_jiffies64() { 0) 0.609 us | } 0) 8.055 us | } after: 0) | tick_check_idle() { 0) | ktime_get() { 0) | read_tod_clock() { 0) 0.617 us | } 0) 1.773 us | } 0) | tick_do_update_jiffies64() { 0) 0.593 us | } 0) 4.477 us | } Signed-off-by: Martin Schwidefsky Cc: john stultz LKML-Reference: <20090929122533.206589318@de.ibm.com> Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 62 ++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e0f59a21c06..7378e2c71ca 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz); * value. We do this unconditionally on any cpu, as we don't know whether the * cpu, which has the update task assigned is in a long sleep. */ -static void tick_nohz_update_jiffies(void) +static void tick_nohz_update_jiffies(ktime_t now) { int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long flags; - ktime_t now; - - if (!ts->tick_stopped) - return; cpumask_clear_cpu(cpu, nohz_cpu_mask); - now = ktime_get(); ts->idle_waketime = now; local_irq_save(flags); @@ -155,20 +150,17 @@ static void tick_nohz_update_jiffies(void) touch_softlockup_watchdog(); } -static void tick_nohz_stop_idle(int cpu) +static void tick_nohz_stop_idle(int cpu, ktime_t now) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t delta; - if (ts->idle_active) { - ktime_t now, delta; - now = ktime_get(); - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_lastupdate = now; - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - ts->idle_active = 0; + delta = ktime_sub(now, ts->idle_entrytime); + ts->idle_lastupdate = now; + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + ts->idle_active = 0; - sched_clock_idle_wakeup_event(0); - } + sched_clock_idle_wakeup_event(0); } static ktime_t tick_nohz_start_idle(struct tick_sched *ts) @@ -431,7 +423,11 @@ void tick_nohz_restart_sched_tick(void) ktime_t now; local_irq_disable(); - tick_nohz_stop_idle(cpu); + if (ts->idle_active || (ts->inidle && ts->tick_stopped)) + now = ktime_get(); + + if (ts->idle_active) + tick_nohz_stop_idle(cpu, now); if (!ts->inidle || !ts->tick_stopped) { ts->inidle = 0; @@ -445,7 +441,6 @@ void tick_nohz_restart_sched_tick(void) /* Update jiffies first */ select_nohz_load_balancer(0); - now = ktime_get(); tick_do_update_jiffies64(now); cpumask_clear_cpu(cpu, nohz_cpu_mask); @@ -579,22 +574,18 @@ static void tick_nohz_switch_to_nohz(void) * timer and do not touch the other magic bits which need to be done * when idle is left. */ -static void tick_nohz_kick_tick(int cpu) +static void tick_nohz_kick_tick(int cpu, ktime_t now) { #if 0 /* Switch back to 2.6.27 behaviour */ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - ktime_t delta, now; - - if (!ts->tick_stopped) - return; + ktime_t delta; /* * Do not touch the tick device, when the next expiry is either * already reached or less/equal than the tick period. */ - now = ktime_get(); delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); if (delta.tv64 <= tick_period.tv64) return; @@ -603,9 +594,26 @@ static void tick_nohz_kick_tick(int cpu) #endif } +static inline void tick_check_nohz(int cpu) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now; + + if (!ts->idle_active && !ts->tick_stopped) + return; + now = ktime_get(); + if (ts->idle_active) + tick_nohz_stop_idle(cpu, now); + if (ts->tick_stopped) { + tick_nohz_update_jiffies(now); + tick_nohz_kick_tick(cpu, now); + } +} + #else static inline void tick_nohz_switch_to_nohz(void) { } +static inline void tick_check_nohz(int cpu) { } #endif /* NO_HZ */ @@ -615,11 +623,7 @@ static inline void tick_nohz_switch_to_nohz(void) { } void tick_check_idle(int cpu) { tick_check_oneshot_broadcast(cpu); -#ifdef CONFIG_NO_HZ - tick_nohz_stop_idle(cpu); - tick_nohz_update_jiffies(); - tick_nohz_kick_tick(cpu); -#endif + tick_check_nohz(cpu); } /* -- cgit v1.2.3 From 3c5d92a0cfb5103c0d5ab74d4ae6373d3af38148 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 29 Sep 2009 14:25:16 +0200 Subject: nohz: Introduce arch_needs_cpu Allow the architecture to request a normal jiffy tick when the system goes idle and tick_nohz_stop_sched_tick is called . On s390 the hook is used to prevent the system going fully idle if there has been an interrupt other than a clock comparator interrupt since the last wakeup. On s390 the HiperSockets response time for 1 connection ping-pong goes down from 42 to 34 microseconds. The CPU cost decreases by 27%. Signed-off-by: Martin Schwidefsky LKML-Reference: <20090929122533.402715150@de.ibm.com> Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7378e2c71ca..3840f6dff7e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -264,12 +264,15 @@ void tick_nohz_stop_sched_tick(int inidle) last_jiffies = jiffies; } while (read_seqretry(&xtime_lock, seq)); - /* Get the next timer wheel timer */ - next_jiffies = get_next_timer_interrupt(last_jiffies); - delta_jiffies = next_jiffies - last_jiffies; - - if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu)) + if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || + arch_needs_cpu(cpu)) { + next_jiffies = last_jiffies + 1; delta_jiffies = 1; + } else { + /* Get the next timer wheel timer */ + next_jiffies = get_next_timer_interrupt(last_jiffies); + delta_jiffies = next_jiffies - last_jiffies; + } /* * Do not stop the tick, if we are only one off * or if the cpu is required for rcu -- cgit v1.2.3 From 529eaccd900a59724619b4a6ef6579fd518d5218 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2009 14:32:19 +0100 Subject: nohz: Type cast printk argument On some archs local_softirq_pending() has a data type of unsigned long on others its unsigned int. Type cast it to (unsigned int) in the printk to avoid the compiler warning. Signed-off-by: Thomas Gleixner LKML-Reference: --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3840f6dff7e..c65ba0faa98 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -250,7 +250,7 @@ void tick_nohz_stop_sched_tick(int inidle) if (ratelimit < 10) { printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", - local_softirq_pending()); + (unsigned int) local_softirq_pending()); ratelimit++; } goto end; -- cgit v1.2.3 From 98962465ed9e6ea99c38e0af63fe1dcb5a79dc25 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 18 Aug 2009 12:45:10 -0500 Subject: nohz: Prevent clocksource wrapping during idle The dynamic tick allows the kernel to sleep for periods longer than a single tick, but it does not limit the sleep time currently. In the worst case the kernel could sleep longer than the wrap around time of the time keeping clock source which would result in losing track of time. Prevent this by limiting it to the safe maximum sleep time of the current time keeping clock source. The value is calculated when the clock source is registered. [ tglx: simplified the code a bit and massaged the commit msg ] Signed-off-by: Jon Hunter Cc: John Stultz LKML-Reference: <1250617512-23567-2-git-send-email-jon-hunter@ti.com> Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 52 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 14 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c65ba0faa98..a80b4644fe6 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -208,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle) struct tick_sched *ts; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; + u64 time_delta; int cpu; local_irq_save(flags); @@ -262,6 +263,17 @@ void tick_nohz_stop_sched_tick(int inidle) seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; + + /* + * On SMP we really should only care for the CPU which + * has the do_timer duty assigned. All other CPUs can + * sleep as long as they want. + */ + if (cpu == tick_do_timer_cpu || + tick_do_timer_cpu == TICK_DO_TIMER_NONE) + time_delta = timekeeping_max_deferment(); + else + time_delta = KTIME_MAX; } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || @@ -284,11 +296,26 @@ void tick_nohz_stop_sched_tick(int inidle) if ((long)delta_jiffies >= 1) { /* - * calculate the expiry time for the next timer wheel - * timer - */ - expires = ktime_add_ns(last_update, tick_period.tv64 * - delta_jiffies); + * calculate the expiry time for the next timer wheel + * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals + * that there is no timer pending or at least extremely + * far into the future (12 days for HZ=1000). In this + * case we set the expiry to the end of time. + */ + if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { + /* + * Calculate the time delta for the next timer event. + * If the time delta exceeds the maximum time delta + * permitted by the current clocksource then adjust + * the time delta accordingly to ensure the + * clocksource does not wrap. + */ + time_delta = min_t(u64, time_delta, + tick_period.tv64 * delta_jiffies); + expires = ktime_add_ns(last_update, time_delta); + } else { + expires.tv64 = KTIME_MAX; + } /* * If this cpu is the one which updates jiffies, then @@ -332,22 +359,19 @@ void tick_nohz_stop_sched_tick(int inidle) ts->idle_sleeps++; + /* Mark expires */ + ts->idle_expires = expires; + /* - * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that - * there is no timer pending or at least extremly far - * into the future (12 days for HZ=1000). In this case - * we simply stop the tick timer: + * If the expiration time == KTIME_MAX, then + * in this case we simply stop the tick timer. */ - if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { - ts->idle_expires.tv64 = KTIME_MAX; + if (unlikely(expires.tv64 == KTIME_MAX)) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); goto out; } - /* Mark expiries */ - ts->idle_expires = expires; - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED); -- cgit v1.2.3 From 27185016b806d5a1181ff501cae120582b2b27dd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 12 Nov 2009 22:12:06 +0100 Subject: nohz: Track last do_timer() cpu The previous patch which limits the sleep time to the maximum deferment time of the time keeping clocksource has some limitations on SMP machines: if all CPUs are idle then for all CPUs the maximum sleep time is limited. Solve this by keeping track of which cpu had the do_timer() duty assigned last and limit the sleep time only for this cpu. Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Jon Hunter Cc: John Stultz --- kernel/time/tick-sched.c | 52 ++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 24 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a80b4644fe6..df133bc29f8 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -263,17 +263,7 @@ void tick_nohz_stop_sched_tick(int inidle) seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; - - /* - * On SMP we really should only care for the CPU which - * has the do_timer duty assigned. All other CPUs can - * sleep as long as they want. - */ - if (cpu == tick_do_timer_cpu || - tick_do_timer_cpu == TICK_DO_TIMER_NONE) - time_delta = timekeeping_max_deferment(); - else - time_delta = KTIME_MAX; + time_delta = timekeeping_max_deferment(); } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || @@ -295,6 +285,29 @@ void tick_nohz_stop_sched_tick(int inidle) /* Schedule the tick, if we are at least one jiffie off */ if ((long)delta_jiffies >= 1) { + /* + * If this cpu is the one which updates jiffies, then + * give up the assignment and let it be taken by the + * cpu which runs the tick timer next, which might be + * this cpu as well. If we don't drop this here the + * jiffies might be stale and do_timer() never + * invoked. Keep track of the fact that it was the one + * which had the do_timer() duty last. If this cpu is + * the one which had the do_timer() duty last, we + * limit the sleep time to the timekeeping + * max_deferement value which we retrieved + * above. Otherwise we can sleep as long as we want. + */ + if (cpu == tick_do_timer_cpu) { + tick_do_timer_cpu = TICK_DO_TIMER_NONE; + ts->do_timer_last = 1; + } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { + time_delta = KTIME_MAX; + ts->do_timer_last = 0; + } else if (!ts->do_timer_last) { + time_delta = KTIME_MAX; + } + /* * calculate the expiry time for the next timer wheel * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals @@ -312,21 +325,12 @@ void tick_nohz_stop_sched_tick(int inidle) */ time_delta = min_t(u64, time_delta, tick_period.tv64 * delta_jiffies); - expires = ktime_add_ns(last_update, time_delta); - } else { - expires.tv64 = KTIME_MAX; } - /* - * If this cpu is the one which updates jiffies, then - * give up the assignment and let it be taken by the - * cpu which runs the tick timer next, which might be - * this cpu as well. If we don't drop this here the - * jiffies might be stale and do_timer() never - * invoked. - */ - if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = TICK_DO_TIMER_NONE; + if (time_delta < KTIME_MAX) + expires = ktime_add_ns(last_update, time_delta); + else + expires.tv64 = KTIME_MAX; if (delta_jiffies > 1) cpumask_set_cpu(cpu, nohz_cpu_mask); -- cgit v1.2.3