From c818a18146997d1356a4840b0c01f1168c16c8a4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 11 Jan 2006 22:45:24 +0100 Subject: [PATCH] x86_64: On Intel CPUs don't do an additional CPU sync before RDTSC RDTSC serialization using cpuid is not needed for Intel platforms. This increases gettimeofday performance. Cc: vojtech@suse.cz Cc: rohit.seth@intel.com Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 1 + arch/x86_64/kernel/time.c | 26 +++++++------------------- arch/x86_64/kernel/vsyscall.c | 3 +-- include/asm-x86_64/cpufeature.h | 1 + include/asm-x86_64/timex.h | 16 ++++++++++++++++ 5 files changed, 26 insertions(+), 21 deletions(-) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 8090a0a4688..46bf556efc3 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -1042,6 +1042,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index f8d4b69388d..25bc58aac2a 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -71,14 +71,6 @@ unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES; struct timespec __xtime __section_xtime; struct timezone __sys_tz __section_sys_tz; -static inline void rdtscll_sync(unsigned long *tsc) -{ -#ifdef CONFIG_SMP - sync_core(); -#endif - rdtscll(*tsc); -} - /* * do_gettimeoffset() returns microseconds since last timer interrupt was * triggered by hardware. A memory read of HPET is slower than a register read @@ -93,7 +85,7 @@ static inline unsigned int do_gettimeoffset_tsc(void) { unsigned long t; unsigned long x; - rdtscll_sync(&t); + t = get_cycles_sync(); if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; return x; @@ -309,8 +301,7 @@ unsigned long long monotonic_clock(void) last_offset = vxtime.last_tsc; base = monotonic_base; } while (read_seqretry(&xtime_lock, seq)); - sync_core(); - rdtscll(this_offset); + this_offset = get_cycles_sync(); offset = (this_offset - last_offset)*1000/cpu_khz; return base + offset; } @@ -391,7 +382,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) delay = LATCH - 1 - delay; } - rdtscll_sync(&tsc); + tsc = get_cycles_sync(); if (vxtime.mode == VXTIME_HPET) { if (offset - vxtime.last > hpet_tick) { @@ -700,8 +691,7 @@ static unsigned int __init hpet_calibrate_tsc(void) do { local_irq_disable(); hpet_now = hpet_readl(HPET_COUNTER); - sync_core(); - rdtscl(tsc_now); + tsc_now = get_cycles_sync(); local_irq_restore(flags); } while ((tsc_now - tsc_start) < TICK_COUNT && (hpet_now - hpet_start) < TICK_COUNT); @@ -731,11 +721,9 @@ static unsigned int __init pit_calibrate_tsc(void) outb(0xb0, 0x43); outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42); outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42); - rdtscll(start); - sync_core(); + start = get_cycles_sync(); while ((inb(0x61) & 0x20) == 0); - sync_core(); - rdtscll(end); + end = get_cycles_sync(); spin_unlock_irqrestore(&i8253_lock, flags); @@ -939,7 +927,7 @@ void __init time_init(void) vxtime.mode = VXTIME_TSC; vxtime.quot = (1000000L << 32) / vxtime_hz; vxtime.tsc_quot = (1000L << 32) / cpu_khz; - rdtscll_sync(&vxtime.last_tsc); + vxtime.last_tsc = get_cycles_sync(); setup_irq(0, &irq0); set_cyc2ns_scale(cpu_khz); diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 70a0bd16085..9e51047fc41 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -66,8 +66,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv) (__jiffies - __wall_jiffies) * (1000000 / HZ); if (__vxtime.mode != VXTIME_HPET) { - sync_core(); - rdtscll(t); + t = get_cycles_sync(); if (t < __vxtime.last_tsc) t = __vxtime.last_tsc; usec += ((t - __vxtime.last_tsc) * diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index aea308c6570..72b39f51479 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h @@ -63,6 +63,7 @@ #define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ #define X86_FEATURE_K8_C (3*32+ 4) /* C stepping K8 */ #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */ +#define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h index f971f45d6d7..f18443fcdf0 100644 --- a/include/asm-x86_64/timex.h +++ b/include/asm-x86_64/timex.h @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include #define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */ @@ -23,6 +26,19 @@ static inline cycles_t get_cycles (void) return ret; } +/* Like get_cycles, but make sure the CPU is synchronized. */ +static __always_inline cycles_t get_cycles_sync(void) +{ + unsigned long long ret; + unsigned eax; + /* Don't do an additional sync on CPUs where we know + RDTSC is already synchronous. */ + alternative_io(ASM_NOP2, "cpuid", X86_FEATURE_SYNC_RDTSC, + "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); + rdtscll(ret); + return ret; +} + extern unsigned int cpu_khz; extern int read_current_timer(unsigned long *timer_value); -- cgit v1.2.3