From 1be396794897f80bfc8774719ba60309a9e3d374 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:20 +0200 Subject: timekeeping: Move reset of cycle_last for tsc clocksource to tsc change_clocksource resets the cycle_last value to zero then sets it to a value read from the clocksource. The reset to zero is required only for the TSC clocksource to make the read_tsc function work after a resume. The reason is that the TSC read function uses cycle_last to detect backwards going TSCs. In the resume case cycle_last contains the TSC value from the last update before the suspend. On resume the TSC starts counting from 0 again and would trip over the cycle_last comparison. This is subtle and surprising. Move the reset to a resume function in the tsc code. Signed-off-by: Martin Schwidefsky Acked-by: Thomas Gleixner Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134808.142191175@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 71f4368b357..968425422c4 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -744,10 +744,16 @@ static cycle_t __vsyscall_fn vread_tsc(void) } #endif +static void resume_tsc(void) +{ + clocksource_tsc.cycle_last = 0; +} + static struct clocksource clocksource_tsc = { .name = "tsc", .rating = 300, .read = read_tsc, + .resume = resume_tsc, .mask = CLOCKSOURCE_MASK(64), .shift = 22, .flags = CLOCK_SOURCE_IS_CONTINUOUS | -- cgit v1.2.3 From d4f587c67fc39e0030ddd718675e252e208da4d7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:31 +0200 Subject: timekeeping: Increase granularity of read_persistent_clock() The persistent clock of some architectures (e.g. s390) have a better granularity than seconds. To reduce the delta between the host clock and the guest clock in a virtualized system change the read_persistent_clock function to return a struct timespec. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134811.013873340@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/rtc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 5d465b207e7..bf67dcb4a44 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime) } /* not static: needed by APM */ -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned long retval, flags; @@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void) retval = get_wallclock(); spin_unlock_irqrestore(&rtc_lock, flags); - return retval; + ts->tv_sec = retval; + ts->tv_nsec = 0; } int update_persistent_clock(struct timespec now) -- cgit v1.2.3 From 8cab02dc3c58a12235c6d463ce684dded9696848 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 18:19:45 +0200 Subject: x86: Do not unregister PIT clocksource on PIT oneshot setup/shutdown This basically reverts commit 1a0c009ac (x86: unregister PIT clocksource when PIT is disabled) because the problem which was tried to address with that patch has been solved by commit 3f68535ada (clocksource: sanity check sysfs clocksource changes). The problem addressed by the original patch is that PIT could be selected as clocksource after the system switched the PIT off or set the PIT into one shot mode which would result in complete timekeeping wreckage. Now with the sysfs sanity check in place PIT cannot be selected again when the system is in oneshot mode. The system will not switch to one shot mode as long as PIT is installed because PIT is not suitable for one shot. The shutdown case which happens when the lapic timer is installed is covered by the fact that init_pit_clocksource() is called after the lapic timer take over and then does not install the PIT clocksource at all. We should have done the sanity checks back then, but ... This also solves the locking problem which was reported vs. the clocksource rework. LKML-Reference: Cc: Martin Schwidefsky Cc: john stultz Signed-off-by: Thomas Gleixner --- arch/x86/kernel/i8253.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 5cf36c053ac..23c167925a5 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -19,12 +19,6 @@ DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -#ifdef CONFIG_X86_32 -static void pit_disable_clocksource(void); -#else -static inline void pit_disable_clocksource(void) { } -#endif - /* * HPET replaces the PIT, when enabled. So we need to know, which of * the two timers is used @@ -57,12 +51,10 @@ static void init_pit_timer(enum clock_event_mode mode, outb_pit(0, PIT_CH0); outb_pit(0, PIT_CH0); } - pit_disable_clocksource(); break; case CLOCK_EVT_MODE_ONESHOT: /* One shot setup */ - pit_disable_clocksource(); outb_pit(0x38, PIT_MODE); break; @@ -200,17 +192,6 @@ static struct clocksource pit_cs = { .shift = 20, }; -static void pit_disable_clocksource(void) -{ - /* - * Use mult to check whether it is registered or not - */ - if (pit_cs.mult) { - clocksource_unregister(&pit_cs); - pit_cs.mult = 0; - } -} - static int __init init_pit_clocksource(void) { /* -- cgit v1.2.3 From da15cfdae03351c689736f8d142618592e3cebc3 Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 19 Aug 2009 19:13:34 -0700 Subject: time: Introduce CLOCK_REALTIME_COARSE After talking with some application writers who want very fast, but not fine-grained timestamps, I decided to try to implement new clock_ids to clock_gettime(): CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE which returns the time at the last tick. This is very fast as we don't have to access any hardware (which can be very painful if you're using something like the acpi_pm clocksource), and we can even use the vdso clock_gettime() method to avoid the syscall. The only trade off is you only get low-res tick grained time resolution. This isn't a new idea, I know Ingo has a patch in the -rt tree that made the vsyscall gettimeofday() return coarse grained time when the vsyscall64 sysctrl was set to 2. However this affects all applications on a system. With this method, applications can choose the proper speed/granularity trade-off for themselves. Signed-off-by: John Stultz Cc: Andi Kleen Cc: nikolag@ca.ibm.com Cc: Darren Hart Cc: arjan@infradead.org Cc: jonathan@jonmasters.org LKML-Reference: <1250734414.6897.5.camel@localhost.localdomain> Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/vgtod.h | 1 + arch/x86/kernel/vsyscall_64.c | 1 + arch/x86/vdso/vclock_gettime.c | 39 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index dc27a69e5d2..3d61e204826 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -21,6 +21,7 @@ struct vsyscall_gtod_data { u32 shift; } clock; struct timespec wall_to_monotonic; + struct timespec wall_time_coarse; }; extern struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 25ee06a80aa..cf53a78e2dc 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; + vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6a40b78b46a..ee55754cc3c 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts) return 0; } +notrace static noinline int do_realtime_coarse(struct timespec *ts) +{ + unsigned long seq; + do { + seq = read_seqbegin(>od->lock); + ts->tv_sec = gtod->wall_time_coarse.tv_sec; + ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + return 0; +} + +notrace static noinline int do_monotonic_coarse(struct timespec *ts) +{ + unsigned long seq, ns, secs; + do { + seq = read_seqbegin(>od->lock); + secs = gtod->wall_time_coarse.tv_sec; + ns = gtod->wall_time_coarse.tv_nsec; + secs += gtod->wall_to_monotonic.tv_sec; + ns += gtod->wall_to_monotonic.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + vset_normalized_timespec(ts, secs, ns); + return 0; +} + notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { - if (likely(gtod->sysctl_enabled && gtod->clock.vread)) + if (likely(gtod->sysctl_enabled)) switch (clock) { case CLOCK_REALTIME: - return do_realtime(ts); + if (likely(gtod->clock.vread)) + return do_realtime(ts); + break; case CLOCK_MONOTONIC: - return do_monotonic(ts); + if (likely(gtod->clock.vread)) + return do_monotonic(ts); + break; + case CLOCK_REALTIME_COARSE: + return do_realtime_coarse(ts); + case CLOCK_MONOTONIC_COARSE: + return do_monotonic_coarse(ts); } return vdso_fallback_gettime(clock, ts); } -- cgit v1.2.3 From 7285dd7fd375763bfb8ab1ac9cf3f1206f503c16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 28 Aug 2009 20:25:24 +0200 Subject: clocksource: Resolve cpu hotplug dead lock with TSC unstable Martin Schwidefsky analyzed it: To register a clocksource the clocksource_mutex is acquired and if necessary timekeeping_notify is called to install the clocksource as the timekeeper clock. timekeeping_notify uses stop_machine which needs to take cpu_add_remove_lock mutex. Starting a new cpu is done with the cpu_add_remove_lock mutex held. native_cpu_up checks the tsc of the new cpu and if the tsc is no good clocksource_change_rating is called. Which needs the clocksource_mutex and the deadlock is complete. The solution is to replace the TSC via the clocksource watchdog mechanism. Mark the TSC as unstable and schedule the watchdog work so it gets removed in the watchdog thread context. Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Martin Schwidefsky Cc: John Stultz --- arch/x86/kernel/tsc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 968425422c4..fc3672a303d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -767,12 +767,14 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; - printk("Marking TSC unstable due to %s\n", reason); + printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else + clocksource_mark_unstable(&clocksource_tsc); + else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; clocksource_tsc.rating = 0; + } } } -- cgit v1.2.3