From 36b2a8d5aff4cb3ee83d5e40447a8f073bcfe2fb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86-64: add X86_FEATURE_PEBS and detection Here is a patch (used by perfmon2) to detect the presence of the Precise Event Based Sampling (PEBS) feature for Intel 64-bit processors. The patch also adds the cpu_has_pebs macro. changelog: - adds X86_FEATURE_PEBS - adds cpu_has_pebs to test for X86_FEATURE_PEBS Signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index fc944b5e8f4..619af2e2fa2 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -835,6 +835,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); } + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } + n = c->extended_cpuid_level; if (n >= 0x80000008) { unsigned eax = cpuid_eax(0x80000008); -- cgit v1.2.3 From ee58fad51a2a767cb2567706ace967705233d881 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86-64: x86-64 add Intel BTS cpufeature bit and detection (take 2) Here is a small patch for x86-64 which adds a cpufeature flag and detection code for Intel's Branch Trace Store (BTS) feature. This feature can be found on Intel P4 and Core 2 processors among others. It can also be used by perfmon. changelog: - add CPU_FEATURE_BTS - add Branch Trace Store detection signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 619af2e2fa2..a570c81c831 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -838,6 +838,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_ds) { unsigned int l1, l2; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); if (!(l1 & (1<<12))) set_bit(X86_FEATURE_PEBS, c->x86_capability); } -- cgit v1.2.3 From 616779656989cb8c59177e35cb13e87028b1edc8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86-64: Synchronize RDTSC on single core AMD There is no guarantee that two RDTSCs in a row are monotonic, so don't assume it on single core AMD systems. This will make gettimeofday slower again Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index a570c81c831..05eaca41802 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -732,11 +732,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* Fix cpuid4 emulation for more */ num_cache_leaves = 3; - /* When there is only one core no need to synchronize RDTSC */ - if (num_possible_cpus() == 1) - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); - else - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + /* RDTSC can be speculated around */ + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); } static void __cpuinit detect_ht(struct cpuinfo_x86 *c) -- cgit v1.2.3 From f3d73707a1e84f0687a05144b70b660441e999c7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86-64: Mark rdtsc as sync only for netburst, not for core2 On the Core2 cpus, the rdtsc instruction is not serializing (as defined in the architecture reference since rdtsc exists) and due to the deep speculation of these cores, it's possible that you can observe time go backwards between cores due to this speculation. Since the kernel already deals with this with the SYNC_RDTSC flag, the solution is simple, only assume that the instruction is serializing on family 15... The price one pays for this is a slightly slower gettimeofday (by a dozen or two cycles), but that increase is quite small to pay for a really-going-forward tsc counter. Signed-off-by: Arjan van de Ven Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 05eaca41802..6595a4ebe7f 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -860,7 +860,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); if (c->x86 == 6) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + if (c->x86 == 15) + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + else + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); -- cgit v1.2.3