From 7bb67439bf6bd3782f07f1d7be1e63406453d5de Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 31 Aug 2008 08:05:58 -0700 Subject: select: Introduce a hrtimeout function This patch adds a schedule_hrtimeout() function, to be used by select() and poll() in a later patch. This function works similar to schedule_timeout() in most ways, but takes a timespec rather than jiffies. With a lot of contributions/fixes from Thomas Signed-off-by: Arjan van de Ven Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b8e4dce80a7..782137dc755 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1678,3 +1678,68 @@ void __init hrtimers_init(void) #endif } +/** + * schedule_hrtimeout - sleep until timeout + * @expires: timeout value (ktime_t) + * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to + * pass before the routine returns. + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Returns 0 when the timer has expired otherwise -EINTR + */ +int __sched schedule_hrtimeout(ktime_t *expires, + const enum hrtimer_mode mode) +{ + struct hrtimer_sleeper t; + + /* + * Optimize when a zero timeout value is given. It does not + * matter whether this is an absolute or a relative time. + */ + if (expires && !expires->tv64) { + __set_current_state(TASK_RUNNING); + return 0; + } + + /* + * A NULL parameter means "inifinte" + */ + if (!expires) { + schedule(); + __set_current_state(TASK_RUNNING); + return -EINTR; + } + + hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); + t.timer.expires = *expires; + + hrtimer_init_sleeper(&t, current); + + hrtimer_start(&t.timer, t.timer.expires, mode); + if (!hrtimer_active(&t.timer)) + t.task = NULL; + + if (likely(t.task)) + schedule(); + + hrtimer_cancel(&t.timer); + destroy_hrtimer_on_stack(&t.timer); + + __set_current_state(TASK_RUNNING); + + return !t.task ? 0 : -EINTR; +} +EXPORT_SYMBOL_GPL(schedule_hrtimeout); -- cgit v1.2.3 From df0cc0539b4127bd02f64de2c335b4af1fdb3845 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 31 Aug 2008 08:09:53 -0700 Subject: select: add a timespec_add_safe() function For the select() rework, it's important to be able to add timespec structures in an overflow-safe manner. This patch adds a timespec_add_safe() function for this which is similar in operation to ktime_add_safe(), but works on a struct timespec. Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven --- kernel/time.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'kernel') diff --git a/kernel/time.c b/kernel/time.c index 6a08660b4fa..d63a4336fad 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -669,3 +669,21 @@ EXPORT_SYMBOL(get_jiffies_64); #endif EXPORT_SYMBOL(jiffies); + +/* + * Add two timespec values and do a safety check for overflow. + * It's assumed that both values are valid (>= 0) + */ +struct timespec timespec_add_safe(const struct timespec lhs, + const struct timespec rhs) +{ + struct timespec res; + + set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec, + lhs.tv_nsec + rhs.tv_nsec); + + if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec) + res.tv_sec = TIME_T_MAX; + + return res; +} -- cgit v1.2.3 From cc584b213f252bf698849cf4be2377cd3ec7501a Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 15:02:30 -0700 Subject: hrtimer: convert kernel/* to the new hrtimer apis In order to be able to do range hrtimers we need to use accessor functions to the "expire" member of the hrtimer struct. This patch converts kernel/* to these accessors. Signed-off-by: Arjan van de Ven --- kernel/futex.c | 7 +++---- kernel/hrtimer.c | 44 +++++++++++++++++++++++--------------------- kernel/posix-timers.c | 10 ++++------ kernel/rtmutex.c | 3 +-- kernel/sched.c | 7 +++---- kernel/time/ntp.c | 3 +-- kernel/time/tick-sched.c | 21 ++++++++++----------- kernel/time/timer_list.c | 4 ++-- 8 files changed, 47 insertions(+), 52 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 7d1136e97c1..4cd5b4319b0 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1299,10 +1299,9 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init_sleeper(&t, current); - t.timer.expires = *abs_time; + hrtimer_set_expires(&t.timer, *abs_time); - hrtimer_start(&t.timer, t.timer.expires, - HRTIMER_MODE_ABS); + hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); if (!hrtimer_active(&t.timer)) t.task = NULL; @@ -1404,7 +1403,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); - to->timer.expires = *time; + hrtimer_set_expires(&to->timer, *time); } q.pi_state = NULL; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 782137dc755..ae307feec74 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -517,7 +517,7 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) if (!base->first) continue; timer = rb_entry(base->first, struct hrtimer, node); - expires = ktime_sub(timer->expires, base->offset); + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires.tv64 < cpu_base->expires_next.tv64) cpu_base->expires_next = expires; } @@ -539,10 +539,10 @@ static int hrtimer_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base) { ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; - ktime_t expires = ktime_sub(timer->expires, base->offset); + ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); int res; - WARN_ON_ONCE(timer->expires.tv64 < 0); + WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); /* * When the callback is running, we do not reprogram the clock event @@ -794,7 +794,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) u64 orun = 1; ktime_t delta; - delta = ktime_sub(now, timer->expires); + delta = ktime_sub(now, hrtimer_get_expires(timer)); if (delta.tv64 < 0) return 0; @@ -806,8 +806,8 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) s64 incr = ktime_to_ns(interval); orun = ktime_divns(delta, incr); - timer->expires = ktime_add_ns(timer->expires, incr * orun); - if (timer->expires.tv64 > now.tv64) + hrtimer_add_expires_ns(timer, incr * orun); + if (hrtimer_get_expires_tv64(timer) > now.tv64) return orun; /* * This (and the ktime_add() below) is the @@ -815,7 +815,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) */ orun++; } - timer->expires = ktime_add_safe(timer->expires, interval); + hrtimer_add_expires(timer, interval); return orun; } @@ -847,7 +847,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, * We dont care about collisions. Nodes with * the same expiry time stay together. */ - if (timer->expires.tv64 < entry->expires.tv64) { + if (hrtimer_get_expires_tv64(timer) < + hrtimer_get_expires_tv64(entry)) { link = &(*link)->rb_left; } else { link = &(*link)->rb_right; @@ -982,7 +983,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) #endif } - timer->expires = tim; + hrtimer_set_expires(timer, tim); timer_stats_hrtimer_set_start_info(timer); @@ -1076,7 +1077,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) ktime_t rem; base = lock_hrtimer_base(timer, &flags); - rem = ktime_sub(timer->expires, base->get_time()); + rem = hrtimer_expires_remaining(timer); unlock_hrtimer_base(timer, &flags); return rem; @@ -1108,7 +1109,7 @@ ktime_t hrtimer_get_next_event(void) continue; timer = rb_entry(base->first, struct hrtimer, node); - delta.tv64 = timer->expires.tv64; + delta.tv64 = hrtimer_get_expires_tv64(timer); delta = ktime_sub(delta, base->get_time()); if (delta.tv64 < mindelta.tv64) mindelta.tv64 = delta.tv64; @@ -1308,10 +1309,10 @@ void hrtimer_interrupt(struct clock_event_device *dev) timer = rb_entry(node, struct hrtimer, node); - if (basenow.tv64 < timer->expires.tv64) { + if (basenow.tv64 < hrtimer_get_expires_tv64(timer)) { ktime_t expires; - expires = ktime_sub(timer->expires, + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires.tv64 < expires_next.tv64) expires_next = expires; @@ -1414,7 +1415,8 @@ void hrtimer_run_queues(void) struct hrtimer *timer; timer = rb_entry(node, struct hrtimer, node); - if (base->softirq_time.tv64 <= timer->expires.tv64) + if (base->softirq_time.tv64 <= + hrtimer_get_expires_tv64(timer)) break; if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { @@ -1462,7 +1464,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod do { set_current_state(TASK_INTERRUPTIBLE); - hrtimer_start(&t->timer, t->timer.expires, mode); + hrtimer_start_expires(&t->timer, mode); if (!hrtimer_active(&t->timer)) t->task = NULL; @@ -1484,7 +1486,7 @@ static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp) struct timespec rmt; ktime_t rem; - rem = ktime_sub(timer->expires, timer->base->get_time()); + rem = hrtimer_expires_remaining(timer); if (rem.tv64 <= 0) return 0; rmt = ktime_to_timespec(rem); @@ -1503,7 +1505,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, HRTIMER_MODE_ABS); - t.timer.expires.tv64 = restart->nanosleep.expires; + hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); if (do_nanosleep(&t, HRTIMER_MODE_ABS)) goto out; @@ -1530,7 +1532,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, int ret = 0; hrtimer_init_on_stack(&t.timer, clockid, mode); - t.timer.expires = timespec_to_ktime(*rqtp); + hrtimer_set_expires(&t.timer, timespec_to_ktime(*rqtp)); if (do_nanosleep(&t, mode)) goto out; @@ -1550,7 +1552,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, restart->fn = hrtimer_nanosleep_restart; restart->nanosleep.index = t.timer.base->index; restart->nanosleep.rmtp = rmtp; - restart->nanosleep.expires = t.timer.expires.tv64; + restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); ret = -ERESTART_RESTARTBLOCK; out: @@ -1724,11 +1726,11 @@ int __sched schedule_hrtimeout(ktime_t *expires, } hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); - t.timer.expires = *expires; + hrtimer_set_expires(&t.timer, *expires); hrtimer_init_sleeper(&t, current); - hrtimer_start(&t.timer, t.timer.expires, mode); + hrtimer_start_expires(&t.timer, mode); if (!hrtimer_active(&t.timer)) t.task = NULL; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index e36d5798cbf..f85efcdcab2 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -668,7 +668,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); - remaining = ktime_sub(timer->expires, now); + remaining = ktime_sub(hrtimer_get_expires(timer), now); /* Return 0 only, when the timer is expired and not pending */ if (remaining.tv64 <= 0) { /* @@ -762,7 +762,7 @@ common_timer_set(struct k_itimer *timr, int flags, hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); timr->it.real.timer.function = posix_timer_fn; - timer->expires = timespec_to_ktime(new_setting->it_value); + hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value)); /* Convert interval */ timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); @@ -771,14 +771,12 @@ common_timer_set(struct k_itimer *timr, int flags, if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { /* Setup correct expiry time for relative timers */ if (mode == HRTIMER_MODE_REL) { - timer->expires = - ktime_add_safe(timer->expires, - timer->base->get_time()); + hrtimer_add_expires(timer, timer->base->get_time()); } return 0; } - hrtimer_start(timer, timer->expires, mode); + hrtimer_start_expires(timer, mode); return 0; } diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 6522ae5b14a..69d9cb921ff 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -631,8 +631,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, /* Setup the timer, when timeout != NULL */ if (unlikely(timeout)) { - hrtimer_start(&timeout->timer, timeout->timer.expires, - HRTIMER_MODE_ABS); + hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); if (!hrtimer_active(&timeout->timer)) timeout->task = NULL; } diff --git a/kernel/sched.c b/kernel/sched.c index 1a5f73c1fcd..e46b5afa200 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -221,9 +221,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) now = hrtimer_cb_get_time(&rt_b->rt_period_timer); hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); - hrtimer_start(&rt_b->rt_period_timer, - rt_b->rt_period_timer.expires, - HRTIMER_MODE_ABS); + hrtimer_start_expires(&rt_b->rt_period_timer, + HRTIMER_MODE_ABS); } spin_unlock(&rt_b->rt_runtime_lock); } @@ -1058,7 +1057,7 @@ static void hrtick_start(struct rq *rq, u64 delay) struct hrtimer *timer = &rq->hrtick_timer; ktime_t time = ktime_add_ns(timer->base->get_time(), delay); - timer->expires = time; + hrtimer_set_expires(timer, time); if (rq == this_rq()) { hrtimer_restart(timer); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 5125ddd8196..4c8d85421d2 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -142,8 +142,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) time_state = TIME_OOP; printk(KERN_NOTICE "Clock: " "inserting leap second 23:59:60 UTC\n"); - leap_timer.expires = ktime_add_ns(leap_timer.expires, - NSEC_PER_SEC); + hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); res = HRTIMER_RESTART; break; case TIME_DEL: diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a87b0468568..b33be61c0f6 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -288,7 +288,7 @@ void tick_nohz_stop_sched_tick(int inidle) goto out; } - ts->idle_tick = ts->sched_timer.expires; + ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; ts->idle_jiffies = last_jiffies; rcu_enter_nohz(); @@ -419,21 +419,21 @@ void tick_nohz_restart_sched_tick(void) ts->tick_stopped = 0; ts->idle_exittime = now; hrtimer_cancel(&ts->sched_timer); - ts->sched_timer.expires = ts->idle_tick; + hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); while (1) { /* Forward the time to expire in the future */ hrtimer_forward(&ts->sched_timer, now, tick_period); if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { - hrtimer_start(&ts->sched_timer, - ts->sched_timer.expires, + hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS); /* Check, if the timer was already in the past */ if (hrtimer_active(&ts->sched_timer)) break; } else { - if (!tick_program_event(ts->sched_timer.expires, 0)) + if (!tick_program_event( + hrtimer_get_expires(&ts->sched_timer), 0)) break; } /* Update jiffies and reread time */ @@ -446,7 +446,7 @@ void tick_nohz_restart_sched_tick(void) static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) { hrtimer_forward(&ts->sched_timer, now, tick_period); - return tick_program_event(ts->sched_timer.expires, 0); + return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0); } /* @@ -529,7 +529,7 @@ static void tick_nohz_switch_to_nohz(void) next = tick_init_jiffy_update(); for (;;) { - ts->sched_timer.expires = next; + hrtimer_set_expires(&ts->sched_timer, next); if (!tick_program_event(next, 0)) break; next = ktime_add(next, tick_period); @@ -625,16 +625,15 @@ void tick_setup_sched_timer(void) ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; /* Get the next period (per cpu) */ - ts->sched_timer.expires = tick_init_jiffy_update(); + hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); offset = ktime_to_ns(tick_period) >> 1; do_div(offset, num_possible_cpus()); offset *= smp_processor_id(); - ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); + hrtimer_add_expires_ns(&ts->sched_timer, offset); for (;;) { hrtimer_forward(&ts->sched_timer, now, tick_period); - hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, - HRTIMER_MODE_ABS); + hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS); /* Check, if the timer was already in the past */ if (hrtimer_active(&ts->sched_timer)) break; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index a40e20fd000..5224a3215fb 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -66,8 +66,8 @@ print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) #endif SEQ_printf(m, "\n"); SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n", - (unsigned long long)ktime_to_ns(timer->expires), - (long long)(ktime_to_ns(timer->expires) - now)); + (unsigned long long)ktime_to_ns(hrtimer_get_expires(timer)), + (long long)(ktime_to_ns(hrtimer_get_expires(timer)) - now)); } static void -- cgit v1.2.3 From 654c8e0b1c623b156c5b92f28d914ab38c9c2c90 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 15:47:08 -0700 Subject: hrtimer: turn hrtimers into range timers this patch turns hrtimers into range timers; they have 2 expire points 1) the soft expire point 2) the hard expire point the kernel will do it's regular best effort attempt to get the timer run at the hard expire point. However, if some other time fires after the soft expire point, the kernel now has the freedom to fire this timer at this point, and thus grouping the events and preventing a power-expensive wakeup in the future. Signed-off-by: Arjan van de Ven --- kernel/hrtimer.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ae307feec74..01483004183 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1309,7 +1309,20 @@ void hrtimer_interrupt(struct clock_event_device *dev) timer = rb_entry(node, struct hrtimer, node); - if (basenow.tv64 < hrtimer_get_expires_tv64(timer)) { + /* + * The immediate goal for using the softexpires is + * minimizing wakeups, not running timers at the + * earliest interrupt after their soft expiration. + * This allows us to avoid using a Priority Search + * Tree, which can answer a stabbing querry for + * overlapping intervals and instead use the simple + * BST we already have. + * We don't add extra wakeups by delaying timers that + * are right-of a not yet expired timer, because that + * timer will have to trigger a wakeup anyway. + */ + + if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { ktime_t expires; expires = ktime_sub(hrtimer_get_expires(timer), @@ -1681,14 +1694,20 @@ void __init hrtimers_init(void) } /** - * schedule_hrtimeout - sleep until timeout + * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) + * @delta: slack in expires timeout (ktime_t) * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless * the current task state has been set (see set_current_state()). * + * The @delta argument gives the kernel the freedom to schedule the + * actual wakeup to a time that is both power and performance friendly. + * The kernel give the normal best effort behavior for "@expires+@delta", + * but may decide to fire the timer earlier, but no earlier than @expires. + * * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to @@ -1702,7 +1721,7 @@ void __init hrtimers_init(void) * * Returns 0 when the timer has expired otherwise -EINTR */ -int __sched schedule_hrtimeout(ktime_t *expires, +int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, const enum hrtimer_mode mode) { struct hrtimer_sleeper t; @@ -1726,7 +1745,7 @@ int __sched schedule_hrtimeout(ktime_t *expires, } hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); - hrtimer_set_expires(&t.timer, *expires); + hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_init_sleeper(&t, current); @@ -1744,4 +1763,33 @@ int __sched schedule_hrtimeout(ktime_t *expires, return !t.task ? 0 : -EINTR; } +EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); + +/** + * schedule_hrtimeout - sleep until timeout + * @expires: timeout value (ktime_t) + * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to + * pass before the routine returns. + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Returns 0 when the timer has expired otherwise -EINTR + */ +int __sched schedule_hrtimeout(ktime_t *expires, + const enum hrtimer_mode mode) +{ + return schedule_hrtimeout_range(expires, 0, mode); +} EXPORT_SYMBOL_GPL(schedule_hrtimeout); -- cgit v1.2.3 From 6976675d94042fbd446231d1bd8b7de71a980ada Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 15:52:40 -0700 Subject: hrtimer: create a "timer_slack" field in the task struct We want to be able to control the default "rounding" that is used by select() and poll() and friends. This is a per process property (so that we can have a "nice" like program to start certain programs with a looser or stricter rounding) that can be set/get via a prctl(). For this purpose, a field called "timer_slack_ns" is added to the task struct. In addition, a field called "default_timer_slack"ns" is added so that tasks easily can temporarily to a more/less accurate slack and then back to the default. The default value of the slack is set to 50 usec; this is significantly less than 2.6.27's average select() and poll() timing error but still allows the kernel to group timers somewhat to preserve power behavior. Applications and admins can override this via the prctl() Signed-off-by: Arjan van de Ven --- kernel/fork.c | 2 ++ kernel/sys.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 7ce2ebe8479..4308d75f0fa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -987,6 +987,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; + p->default_timer_slack_ns = current->timer_slack_ns; + #ifdef CONFIG_DETECT_SOFTLOCKUP p->last_switch_count = 0; p->last_switch_timestamp = 0; diff --git a/kernel/sys.c b/kernel/sys.c index 038a7bc0901..1b96401a057 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1727,6 +1727,16 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_TSC: error = SET_TSC_CTL(arg2); break; + case PR_GET_TIMERSLACK: + error = current->timer_slack_ns; + break; + case PR_SET_TIMERSLACK: + if (arg2 <= 0) + current->timer_slack_ns = + current->default_timer_slack_ns; + else + current->timer_slack_ns = arg2; + break; default: error = -EINVAL; break; -- cgit v1.2.3 From da8f2e170ea94cc20f8ebbc8ee8d127edb8f12f1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 7 Sep 2008 10:47:46 -0700 Subject: hrtimer: add a hrtimer_start_range() function this patch adds a _range version of hrtimer_start() so that range timers can be created; the hrtimer_start() function is just a wrapper around this. In addition, hrtimer_start_expires() will now preserve existing ranges. Signed-off-by: Arjan van de Ven --- kernel/hrtimer.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 01483004183..a0222097c57 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -945,9 +945,10 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) } /** - * hrtimer_start - (re)start an relative timer on the current CPU + * hrtimer_start_range_ns - (re)start an relative timer on the current CPU * @timer: the timer to be added * @tim: expiry time + * @delta_ns: "slack" range for the timer * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) * * Returns: @@ -955,7 +956,8 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) * 1 when the timer was active */ int -hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, + const enum hrtimer_mode mode) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; @@ -983,7 +985,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) #endif } - hrtimer_set_expires(timer, tim); + hrtimer_set_expires_range_ns(timer, tim, delta_ns); timer_stats_hrtimer_set_start_info(timer); @@ -1016,8 +1018,26 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) return ret; } +EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); + +/** + * hrtimer_start - (re)start an relative timer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +int +hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +{ + return hrtimer_start_range_ns(timer, tim, 0, mode); +} EXPORT_SYMBOL_GPL(hrtimer_start); + /** * hrtimer_try_to_cancel - try to deactivate a timer * @timer: hrtimer to stop -- cgit v1.2.3 From 704af52bd13a5d9f3c60c496c68e752fafdfb434 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 7 Sep 2008 16:10:20 -0700 Subject: hrtimer: show the timer ranges in /proc/timer_list to help debugging and visibility of timer ranges, show them in the existing timer list in /proc/timer_list Signed-off-by: Arjan van de Ven --- kernel/time/timer_list.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 5224a3215fb..122ee751d2d 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -65,8 +65,10 @@ print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); #endif SEQ_printf(m, "\n"); - SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n", + SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n", + (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)), (unsigned long long)ktime_to_ns(hrtimer_get_expires(timer)), + (long long)(ktime_to_ns(hrtimer_get_softexpires(timer)) - now), (long long)(ktime_to_ns(hrtimer_get_expires(timer)) - now)); } -- cgit v1.2.3 From 3bd012060f962567aadb52b27b2fc8fdc91102c7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 8 Sep 2008 08:58:59 -0700 Subject: hrtimer: make the nanosleep() syscall use the per process slack This patch makes the nanosleep() system call use the per process slack value; with this users are able to externally control existing applications to reduce the wakeup rate. Signed-off-by: Arjan van de Ven --- kernel/hrtimer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index a0222097c57..9a4c9018556 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1563,9 +1563,14 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, struct restart_block *restart; struct hrtimer_sleeper t; int ret = 0; + unsigned long slack; + + slack = current->timer_slack_ns; + if (rt_task(current)) + slack = 0; hrtimer_init_on_stack(&t.timer, clockid, mode); - hrtimer_set_expires(&t.timer, timespec_to_ktime(*rqtp)); + hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); if (do_nanosleep(&t, mode)) goto out; -- cgit v1.2.3 From ae4b748e81b7e366f04f55229d5e372e372c33af Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 8 Sep 2008 09:03:57 -0700 Subject: hrtimer: make the futex() system call use the per process slack value This patch makes the futex() system call use the per process slack value; with this users are able to externally control existing applications to reduce the wakeup rate. Signed-off-by: Arjan van de Ven --- kernel/futex.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 4cd5b4319b0..8af10027514 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1296,10 +1296,14 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, if (!abs_time) schedule(); else { + unsigned long slack; + slack = current->timer_slack_ns; + if (rt_task(current)) + slack = 0; hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init_sleeper(&t, current); - hrtimer_set_expires(&t.timer, *abs_time); + hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack); hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); if (!hrtimer_active(&t.timer)) -- cgit v1.2.3 From 2e94d1f71f7e4404d997e6fb4f1618aa147d76f9 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 10 Sep 2008 16:06:00 -0700 Subject: hrtimer: peek at the timer queue just before going idle As part of going idle, we already look at the time of the next timer event to determine which C-state to select etc. This patch adds functionality that causes the timers that are past their soft expire time, to fire at this time, before we calculate the next wakeup time. This functionality will thus avoid wakeups by running timers before going idle rather than specially waking up for it. Signed-off-by: Arjan van de Ven --- kernel/hrtimer.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 9a4c9018556..eb2cf984959 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1381,6 +1381,36 @@ void hrtimer_interrupt(struct clock_event_device *dev) raise_softirq(HRTIMER_SOFTIRQ); } +/** + * hrtimer_peek_ahead_timers -- run soft-expired timers now + * + * hrtimer_peek_ahead_timers will peek at the timer queue of + * the current cpu and check if there are any timers for which + * the soft expires time has passed. If any such timers exist, + * they are run immediately and then removed from the timer queue. + * + */ +void hrtimer_peek_ahead_timers(void) +{ + unsigned long flags; + struct tick_device *td; + struct clock_event_device *dev; + + if (hrtimer_hres_active()) + return; + + local_irq_save(flags); + td = &__get_cpu_var(tick_cpu_device); + if (!td) + goto out; + dev = td->evtdev; + if (!dev) + goto out; + hrtimer_interrupt(dev); +out: + local_irq_restore(flags); +} + static void run_hrtimer_softirq(struct softirq_action *h) { run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); -- cgit v1.2.3 From 030aebd2e439a2ebcca2b0ce30a02ed84feb043e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 11 Oct 2008 12:25:45 -0700 Subject: rangetimer: fix BUG_ON reported by Ingo There's a small race/chance that, while hrtimers are enabled globally, they're later not enabled when we're calling the hrtimer_interrupt() function, which then BUG_ON()'s for that. This patch closes that race/gap. Signed-off-by: Arjan van de Ven --- kernel/hrtimer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index eb2cf984959..b17657d8d81 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1395,11 +1395,15 @@ void hrtimer_peek_ahead_timers(void) unsigned long flags; struct tick_device *td; struct clock_event_device *dev; - + struct hrtimer_cpu_base *cpu_base; if (hrtimer_hres_active()) return; local_irq_save(flags); + cpu_base = &__get_cpu_var(hrtimer_bases); + if (!cpu_base->hres_active) + goto out; + td = &__get_cpu_var(tick_cpu_device); if (!td) goto out; -- cgit v1.2.3 From dc4304f7deee29fcdf6a2b62f7146ea7f505fd42 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 13 Oct 2008 10:32:15 -0400 Subject: rangetimers: fix the bug reported by Ingo for real and please hand me a brown paper bag (thanks to Thomas for pointing out this very obvious bug) Signed-off-by: Arjan van de Ven --- kernel/hrtimer.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b17657d8d81..2bd230be1cb 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1395,15 +1395,11 @@ void hrtimer_peek_ahead_timers(void) unsigned long flags; struct tick_device *td; struct clock_event_device *dev; - struct hrtimer_cpu_base *cpu_base; - if (hrtimer_hres_active()) + + if (!hrtimer_hres_active()) return; local_irq_save(flags); - cpu_base = &__get_cpu_var(hrtimer_bases); - if (!cpu_base->hres_active) - goto out; - td = &__get_cpu_var(tick_cpu_device); if (!td) goto out; -- cgit v1.2.3 From e1dd7bc58578ebfcaba989608017fe5156c29c86 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 20 Oct 2008 13:33:36 +0200 Subject: hrtimers: fix docbook comments hrtimer_start() and hrtimer_start_range_ns() handle relative and absolute timers. Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 51ee90bca2d..00e6f0a1e7a 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -946,7 +946,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) } /** - * hrtimer_start_range_ns - (re)start an relative timer on the current CPU + * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU * @timer: the timer to be added * @tim: expiry time * @delta_ns: "slack" range for the timer @@ -1022,7 +1022,7 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); /** - * hrtimer_start - (re)start an relative timer on the current CPU + * hrtimer_start - (re)start an hrtimer on the current CPU * @timer: the timer to be added * @tim: expiry time * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) -- cgit v1.2.3 From 643bdf68f92a8516574ed7ca3713f9334c331b8d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 20 Oct 2008 13:38:11 +0200 Subject: hrtimers: simplify hrtimer_peek_ahead_timers() Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 00e6f0a1e7a..4fc41414fc0 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1394,22 +1394,16 @@ void hrtimer_interrupt(struct clock_event_device *dev) */ void hrtimer_peek_ahead_timers(void) { - unsigned long flags; struct tick_device *td; - struct clock_event_device *dev; + unsigned long flags; if (!hrtimer_hres_active()) return; local_irq_save(flags); td = &__get_cpu_var(tick_cpu_device); - if (!td) - goto out; - dev = td->evtdev; - if (!dev) - goto out; - hrtimer_interrupt(dev); -out: + if (td && td->evtdev) + hrtimer_interrupt(td->evtdev); local_irq_restore(flags); } -- cgit v1.2.3