diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
36 files changed, 1925 insertions, 160 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 778396c78d6..cfdb2f3bd76 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -2,19 +2,19 @@ # Makefile for x86-compatible CPU details and quirks # -obj-y := common.o proc.o bugs.o +obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += amd.o -obj-y += cyrix.o -obj-y += centaur.o -obj-y += transmeta.o -obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o -obj-y += nexgen.o -obj-y += umc.o +obj-$(CONFIG_X86_32) += common.o proc.o bugs.o +obj-$(CONFIG_X86_32) += amd.o +obj-$(CONFIG_X86_32) += cyrix.o +obj-$(CONFIG_X86_32) += centaur.o +obj-$(CONFIG_X86_32) += transmeta.o +obj-$(CONFIG_X86_32) += intel.o +obj-$(CONFIG_X86_32) += nexgen.o +obj-$(CONFIG_X86_32) += umc.o -obj-$(CONFIG_X86_MCE) += mcheck/ - -obj-$(CONFIG_MTRR) += mtrr/ -obj-$(CONFIG_CPU_FREQ) += cpufreq/ +obj-$(CONFIG_X86_MCE) += mcheck/ +obj-$(CONFIG_MTRR) += mtrr/ +obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dcf6bbb1c7c..1ff88c7f45c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -4,6 +4,7 @@ #include <asm/io.h> #include <asm/processor.h> #include <asm/apic.h> +#include <asm/mach_apic.h> #include "cpu.h" @@ -45,13 +46,17 @@ static __cpuinit int amd_apic_timer_broken(void) case CPUID_XFAM_10H: case CPUID_XFAM_11H: rdmsr(MSR_K8_ENABLE_C1E, lo, hi); - if (lo & ENABLE_C1E_MASK) + if (lo & ENABLE_C1E_MASK) { + if (smp_processor_id() != boot_cpu_physical_apicid) + printk(KERN_INFO "AMD C1E detected late. " + " Force timer broadcast.\n"); return 1; - break; - default: - /* err on the side of caution */ + } + break; + default: + /* err on the side of caution */ return 1; - } + } return 0; } #endif @@ -261,7 +266,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_HT /* * On a AMD multi core setup the lower bits of the APIC id - * distingush the cores. + * distinguish the cores. */ if (c->x86_max_cores > 1) { int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 473eac883c7..9681fa15ddf 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -53,7 +53,7 @@ static u32 __cpuinit ramtop(void) /* 16388 */ continue; /* * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophy already + * we frob around that catastrophe already */ if (e820.map[i].type == E820_RESERVED) @@ -287,7 +287,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) c->x86_capability[5] = cpuid_edx(0xC0000001); } - /* Cyrix III family needs CX8 & PGE explicity enabled. */ + /* Cyrix III family needs CX8 & PGE explicitly enabled. */ if (c->x86_model >=6 && c->x86_model <= 9) { rdmsr (MSR_VIA_FCR, lo, hi); lo |= (1<<1 | 1<<7); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d506201d397..e2fcf2051bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -207,7 +207,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) static int __init x86_fxsr_setup(char * s) { - /* Tell all the other CPU's to not use it... */ + /* Tell all the other CPUs to not use it... */ disable_x86_fxsr = 1; /* diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index d8c6f132dc7..151eda0a23f 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -19,6 +19,9 @@ config X86_ACPI_CPUFREQ Processor Performance States. This driver also supports Intel Enhanced Speedstep. + To compile this driver as a module, choose M here: the + module will be called acpi-cpufreq. + For details, take a look at <file:Documentation/cpu-freq/>. If in doubt, say N. @@ -26,7 +29,7 @@ config X86_ACPI_CPUFREQ config ELAN_CPUFREQ tristate "AMD Elan SC400 and SC410" select CPU_FREQ_TABLE - depends on X86_ELAN + depends on X86_32 && X86_ELAN ---help--- This adds the CPUFreq driver for AMD Elan SC400 and SC410 processors. @@ -42,7 +45,7 @@ config ELAN_CPUFREQ config SC520_CPUFREQ tristate "AMD Elan SC520" select CPU_FREQ_TABLE - depends on X86_ELAN + depends on X86_32 && X86_ELAN ---help--- This adds the CPUFreq driver for AMD Elan SC520 processor. @@ -54,6 +57,7 @@ config SC520_CPUFREQ config X86_POWERNOW_K6 tristate "AMD Mobile K6-2/K6-3 PowerNow!" select CPU_FREQ_TABLE + depends on X86_32 help This adds the CPUFreq driver for mobile AMD K6-2+ and mobile AMD K6-3+ processors. @@ -65,6 +69,7 @@ config X86_POWERNOW_K6 config X86_POWERNOW_K7 tristate "AMD Mobile Athlon/Duron PowerNow!" select CPU_FREQ_TABLE + depends on X86_32 help This adds the CPUFreq driver for mobile AMD K7 mobile processors. @@ -76,23 +81,27 @@ config X86_POWERNOW_K7_ACPI bool depends on X86_POWERNOW_K7 && ACPI_PROCESSOR depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) + depends on X86_32 default y config X86_POWERNOW_K8 tristate "AMD Opteron/Athlon64 PowerNow!" select CPU_FREQ_TABLE - depends on EXPERIMENTAL help This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. + To compile this driver as a module, choose M here: the + module will be called powernow-k8. + For details, take a look at <file:Documentation/cpu-freq/>. If in doubt, say N. config X86_POWERNOW_K8_ACPI - bool "ACPI Support" - select ACPI_PROCESSOR - depends on ACPI && X86_POWERNOW_K8 + bool + prompt "ACPI Support" if X86_32 + depends on ACPI && X86_POWERNOW_K8 && ACPI_PROCESSOR + depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) default y help This provides access to the K8s Processor Performance States via ACPI. @@ -104,7 +113,7 @@ config X86_POWERNOW_K8_ACPI config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" - depends on PCI + depends on X86_32 && PCI help This add the CPUFreq driver for NatSemi Geode processors which support suspend modulation. @@ -114,15 +123,20 @@ config X86_GX_SUSPMOD If in doubt, say N. config X86_SPEEDSTEP_CENTRINO - tristate "Intel Enhanced SpeedStep" + tristate "Intel Enhanced SpeedStep (deprecated)" select CPU_FREQ_TABLE - select X86_SPEEDSTEP_CENTRINO_TABLE + select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32 + depends on X86_32 || (X86_64 && ACPI_PROCESSOR) help + This is deprecated and this functionality is now merged into + acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of + speedstep_centrino. This adds the CPUFreq driver for Enhanced SpeedStep enabled - mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, - you also need to say Y to "Use ACPI tables to decode..." below - [which might imply enabling ACPI] if you want to use this driver - on non-Banias CPUs. + mobile CPUs. This means Intel Pentium M (Centrino) CPUs + or 64bit enabled Intel Xeons. + + To compile this driver as a module, choose M here: the + module will be called speedstep-centrino. For details, take a look at <file:Documentation/cpu-freq/>. @@ -130,7 +144,7 @@ config X86_SPEEDSTEP_CENTRINO config X86_SPEEDSTEP_CENTRINO_TABLE bool "Built-in tables for Banias CPUs" - depends on X86_SPEEDSTEP_CENTRINO + depends on X86_32 && X86_SPEEDSTEP_CENTRINO default y help Use built-in tables for Banias CPUs if ACPI encoding @@ -141,6 +155,7 @@ config X86_SPEEDSTEP_CENTRINO_TABLE config X86_SPEEDSTEP_ICH tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" select CPU_FREQ_TABLE + depends on X86_32 help This adds the CPUFreq driver for certain mobile Intel Pentium III (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all @@ -154,7 +169,7 @@ config X86_SPEEDSTEP_ICH config X86_SPEEDSTEP_SMI tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" select CPU_FREQ_TABLE - depends on EXPERIMENTAL + depends on X86_32 && EXPERIMENTAL help This adds the CPUFreq driver for certain mobile Intel Pentium III (Coppermine), all mobile Intel Pentium III-M (Tualatin) @@ -169,15 +184,24 @@ config X86_P4_CLOCKMOD select CPU_FREQ_TABLE help This adds the CPUFreq driver for Intel Pentium 4 / XEON - processors. + processors. When enabled it will lower CPU temperature by skipping + clocks. + + This driver should be only used in exceptional + circumstances when very low power is needed because it causes severe + slowdowns and noticeable latencies. Normally Speedstep should be used + instead. + + To compile this driver as a module, choose M here: the + module will be called p4-clockmod. For details, take a look at <file:Documentation/cpu-freq/>. - If in doubt, say N. + Unless you are absolutely sure say N. config X86_CPUFREQ_NFORCE2 tristate "nVidia nForce2 FSB changing" - depends on EXPERIMENTAL + depends on X86_32 && EXPERIMENTAL help This adds the CPUFreq driver for FSB changing on nVidia nForce2 platforms. @@ -188,6 +212,7 @@ config X86_CPUFREQ_NFORCE2 config X86_LONGRUN tristate "Transmeta LongRun" + depends on X86_32 help This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors which support LongRun. @@ -199,7 +224,7 @@ config X86_LONGRUN config X86_LONGHAUL tristate "VIA Cyrix III Longhaul" select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR + depends on X86_32 && ACPI_PROCESSOR help This adds the CPUFreq driver for VIA Samuel/CyrixIII, VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T @@ -212,7 +237,7 @@ config X86_LONGHAUL config X86_E_POWERSAVER tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" select CPU_FREQ_TABLE - depends on EXPERIMENTAL + depends on X86_32 && EXPERIMENTAL help This adds the CPUFreq driver for VIA C7 processors. @@ -233,11 +258,11 @@ config X86_ACPI_CPUFREQ_PROC_INTF config X86_SPEEDSTEP_LIB tristate - default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD + default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) config X86_SPEEDSTEP_RELAXED_CAP_CHECK bool "Relaxed speedstep capability checks" - depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) + depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) help Don't perform all checks for a speedstep capable system which would normally be done. Some ancient or strange systems, though speedstep diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ffd01e5dcb5..fea0af0476b 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -77,7 +77,7 @@ static unsigned int acpi_pstate_strict; static int check_est_cpu(unsigned int cpuid) { - struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; + struct cpuinfo_x86 *cpu = &cpu_data(cpuid); if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) @@ -560,7 +560,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int cpu = policy->cpu; struct acpi_cpufreq_data *data; unsigned int result = 0; - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; dprintk("acpi_cpufreq_cpu_init\n"); @@ -595,7 +595,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) dmi_check_system(sw_any_bug_dmi_table); if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - policy->cpus = cpu_core_map[cpu]; + policy->cpus = per_cpu(cpu_core_map, cpu); } #endif diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index 32f0bda3fc9..f03e9153618 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -260,7 +260,7 @@ static int nforce2_target(struct cpufreq_policy *policy, freqs.old = nforce2_get(policy->cpu); freqs.new = target_fsb * fid * 100; - freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */ + freqs.cpu = 0; /* Only one CPU on nForce2 platforms */ if (freqs.old == freqs.new) return 0; diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index c11baaf9f2b..326a4c81f68 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -305,7 +305,7 @@ static struct cpufreq_driver eps_driver = { static int __init eps_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); /* This driver will work only on Centaur C7 processors with * Enhanced SpeedStep/PowerSaver registers */ diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 1e7ae7dafcf..94619c22f56 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -199,7 +199,7 @@ static int elanfreq_target (struct cpufreq_policy *policy, static int elanfreq_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); unsigned int i; int result; @@ -280,7 +280,7 @@ static struct cpufreq_driver elanfreq_driver = { static int __init elanfreq_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); /* Test if we have the right hardware */ if ((c->x86_vendor != X86_VENDOR_AMD) || diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index ed2bda127c4..2ed7db2fd25 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -12,12 +12,12 @@ * of any nature resulting due to the use of this software. This * software is provided AS-IS with no warranties. * - * Theoritical note: + * Theoretical note: * * (see Geode(tm) CS5530 manual (rev.4.1) page.56) * * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 - * are based on Suspend Moduration. + * are based on Suspend Modulation. * * Suspend Modulation works by asserting and de-asserting the SUSP# pin * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# @@ -101,11 +101,11 @@ /* SUSCFG bits */ #define SUSMOD (1<<0) /* enable/disable suspend modulation */ -/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ +/* the below is supported only with cs5530 (after rev.1.2)/cs5530A */ #define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ #define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ -/* the belows support only with cs5530A */ +/* the below is supported only with cs5530A */ #define PWRSVE_ISA (1<<3) /* stop ISA clock */ #define PWRSVE (1<<4) /* active idle */ diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 5045f5d583c..749d00cb2eb 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -780,7 +780,7 @@ static int longhaul_setup_southbridge(void) static int __init longhaul_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); char *cpuname=NULL; int ret; u32 lo, hi; @@ -959,7 +959,7 @@ static struct cpufreq_driver longhaul_driver = { static int __init longhaul_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) return -ENODEV; diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index b2689514295..af4a867a097 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -172,7 +172,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, u32 save_lo, save_hi; u32 eax, ebx, ecx, edx; u32 try_hi; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (!low_freq || !high_freq) return -EINVAL; @@ -298,7 +298,7 @@ static struct cpufreq_driver longrun_driver = { */ static int __init longrun_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (c->x86_vendor != X86_VENDOR_TRANSMETA || !cpu_has(c, X86_FEATURE_LONGRUN)) diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 8eb414b906d..14791ec55cf 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -195,12 +195,12 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *c = &cpu_data(policy->cpu); int cpuid = 0; unsigned int i; #ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; + policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); #endif /* Errata workaround */ @@ -279,7 +279,7 @@ static struct cpufreq_driver p4clockmod_driver = { static int __init cpufreq_p4_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int ret; /* diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index 6d028533931..eb9b62b0830 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -1,6 +1,6 @@ /* * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) - * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. + * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. * * Licensed under the terms of the GNU GPL License version 2. * @@ -215,7 +215,7 @@ static struct cpufreq_driver powernow_k6_driver = { */ static int __init powernow_k6_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || ((c->x86_model != 12) && (c->x86_model != 13))) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 7decd6a50ff..b5a9863d6cd 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -114,7 +114,7 @@ static int check_fsb(unsigned int fsbspeed) static int check_powernow(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); unsigned int maxei, eax, ebx, ecx, edx; if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { @@ -565,7 +565,7 @@ static unsigned int powernow_get(unsigned int cpu) } -static int __init acer_cpufreq_pst(struct dmi_system_id *d) +static int __init acer_cpufreq_pst(const struct dmi_system_id *d) { printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index f105ffd0996..99e1ef9939b 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -57,7 +57,7 @@ static struct powernow_k8_data *powernow_data[NR_CPUS]; static int cpu_family = CPU_OPTERON; #ifndef CONFIG_SMP -static cpumask_t cpu_core_map[1]; +DEFINE_PER_CPU(cpumask_t, cpu_core_map); #endif /* Return a frequency in MHz, given an input fid */ @@ -144,7 +144,7 @@ static void count_off_irt(struct powernow_k8_data *data) return; } -/* the voltage stabalization time */ +/* the voltage stabilization time */ static void count_off_vst(struct powernow_k8_data *data) { udelay(data->vstable * VST_UNITS_20US); @@ -643,7 +643,7 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); data->powernow_table = powernow_table; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) print_basics(data); for (j = 0; j < data->numps; j++) @@ -797,7 +797,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* fill in data */ data->numps = data->acpi_data.state_count; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) print_basics(data); powernow_k8_acpi_pst_values(data, 0); @@ -1171,7 +1171,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (cpu_family == CPU_HW_PSTATE) pol->cpus = cpumask_of_cpu(pol->cpu); else - pol->cpus = cpu_core_map[pol->cpu]; + pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); /* Take a crude guess here. @@ -1237,7 +1237,7 @@ static unsigned int powernowk8_get (unsigned int cpu) cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; - data = powernow_data[first_cpu(cpu_core_map[cpu])]; + data = powernow_data[first_cpu(per_cpu(cpu_core_map, cpu))]; if (!data) return -EINVAL; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 8ae88f18128..afd2b520d35 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -140,10 +140,10 @@ struct powernow_k8_data { #define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ #define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ -#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */ +#define VST_UNITS_20US 20 /* Voltage Stabilization Time is in units of 20us */ /* - * Most values of interest are enocoded in a single field of the _PSS + * Most values of interest are encoded in a single field of the _PSS * entries: the "control" value. */ diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c index d9f3e90a7ae..42da9bd677d 100644 --- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c +++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c @@ -102,7 +102,7 @@ static int sc520_freq_target (struct cpufreq_policy *policy, static int sc520_freq_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int result; /* capability check */ @@ -151,7 +151,7 @@ static struct cpufreq_driver sc520_freq_driver = { static int __init sc520_freq_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int err; /* Test if we have the right hardware */ diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 811d4743854..3031f119619 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -230,7 +230,7 @@ static struct cpu_model models[] = static int centrino_cpu_init_table(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); struct cpu_model *model; for(model = models; model->cpu_id != NULL; model++) @@ -340,7 +340,7 @@ static unsigned int get_cur_freq(unsigned int cpu) static int centrino_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); unsigned freq; unsigned l, h; int ret; @@ -612,7 +612,7 @@ static struct cpufreq_driver centrino_driver = { */ static int __init centrino_init(void) { - struct cpuinfo_x86 *cpu = cpu_data; + struct cpuinfo_x86 *cpu = &cpu_data(0); if (!cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 36685e8f7be..14d68aa301e 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -322,7 +322,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) /* only run on CPU to be set, or on its sibling */ #ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; + policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); #endif cpus_allowed = current->cpus_allowed; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index b1acc8ce316..76c3ab0da46 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -228,7 +228,7 @@ EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); unsigned int speedstep_detect_processor (void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); u32 ebx, msr_lo, msr_hi; dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 122d2d75aa9..88d66fb8411 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -93,7 +93,7 @@ static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ ccr5 = getCx86(CX86_CCR5); if (ccr5 & 2) setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ @@ -115,9 +115,9 @@ static void __cpuinit set_cx86_reorder(void) printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - /* Load/Store Serialize to mem access disable (=reorder it) */ + /* Load/Store Serialize to mem access disable (=reorder it) */ setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; @@ -146,7 +146,7 @@ static void __cpuinit set_cx86_inc(void) printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* PCR1 -- Performance Control */ /* Incrementor on, whatever that is */ setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); @@ -256,7 +256,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) u32 vendor, device; /* It isn't really a PCI quirk directly, but the cure is the same. The MediaGX has deep magic SMM stuff that handles the - SB emulation. It thows away the fifo on disable_dma() which + SB emulation. It throws away the fifo on disable_dma() which is wrong and ruins the audio. Bug2: VSA1 has a wrap bug so that using maximum sized DMA diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index dc4e08147b1..cc8c501b9f3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <asm/processor.h> +#include <asm/pgtable.h> #include <asm/msr.h> #include <asm/uaccess.h> @@ -19,8 +20,6 @@ #include <mach_apic.h> #endif -extern int trap_init_f00f_bug(void); - #ifdef CONFIG_X86_INTEL_USERCOPY /* * Alignment at which movsl is preferred for bulk memory copies. @@ -95,6 +94,20 @@ static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) return 1; } +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index db6c25aa577..9921b01fe19 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -170,15 +170,15 @@ union l3_cache { unsigned val; }; -static const unsigned short assocs[] = { +static unsigned short assocs[] __cpuinitdata = { [1] = 1, [2] = 2, [4] = 4, [6] = 8, [8] = 16, [0xa] = 32, [0xb] = 48, [0xc] = 64, [0xf] = 0xffff // ?? }; -static const unsigned char levels[] = { 1, 1, 2, 3 }; -static const unsigned char types[] = { 1, 2, 3, 3 }; +static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; +static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, union _cpuid4_leaf_ebx *ebx, @@ -295,7 +295,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; #ifdef CONFIG_X86_HT - unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); + unsigned int cpu = c->cpu_index; #endif if (c->cpuid_level > 3) { @@ -417,14 +417,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l2) { l2 = new_l2; #ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l2_id; + per_cpu(cpu_llc_id, cpu) = l2_id; #endif } if (new_l3) { l3 = new_l3; #ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l3_id; + per_cpu(cpu_llc_id, cpu) = l3_id; #endif } @@ -459,7 +459,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) struct _cpuid4_info *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; int index_msb, i; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; @@ -470,8 +470,8 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) index_msb = get_count_order(num_threads_sharing); for_each_online_cpu(i) { - if (c[i].apicid >> index_msb == - c[cpu].apicid >> index_msb) { + if (cpu_data(i).apicid >> index_msb == + c->apicid >> index_msb) { cpu_set(i, this_leaf->shared_cpu_map); if (i != cpu && cpuid4_info[i]) { sibling_leaf = CPUID4_INFO_IDX(i, index); @@ -493,12 +493,17 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) } } #else -static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} -static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {} +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {} #endif static void free_cache_attributes(unsigned int cpu) { + int i; + + for (i = 0; i < num_cache_leaves; i++) + cache_remove_shared_cpu_map(cpu, i); + kfree(cpuid4_info[cpu]); cpuid4_info[cpu] = NULL; } @@ -506,8 +511,8 @@ static void free_cache_attributes(unsigned int cpu) static int __cpuinit detect_cache_attributes(unsigned int cpu) { struct _cpuid4_info *this_leaf; - unsigned long j; - int retval; + unsigned long j; + int retval; cpumask_t oldmask; if (num_cache_leaves == 0) @@ -524,19 +529,26 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) goto out; /* Do cpuid and store the results */ - retval = 0; for (j = 0; j < num_cache_leaves; j++) { this_leaf = CPUID4_INFO_IDX(cpu, j); retval = cpuid4_cache_lookup(j, this_leaf); - if (unlikely(retval < 0)) + if (unlikely(retval < 0)) { + int i; + + for (i = 0; i < j; i++) + cache_remove_shared_cpu_map(cpu, i); break; + } cache_shared_cpu_map_setup(cpu, j); } set_cpus_allowed(current, oldmask); out: - if (retval) - free_cache_attributes(cpu); + if (retval) { + kfree(cpuid4_info[cpu]); + cpuid4_info[cpu] = NULL; + } + return retval; } @@ -669,7 +681,7 @@ static struct kobj_type ktype_percpu_entry = { .sysfs_ops = &sysfs_ops, }; -static void cpuid4_cache_sysfs_exit(unsigned int cpu) +static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) { kfree(cache_kobject[cpu]); kfree(index_kobject[cpu]); @@ -680,13 +692,14 @@ static void cpuid4_cache_sysfs_exit(unsigned int cpu) static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) { + int err; if (num_cache_leaves == 0) return -ENOENT; - detect_cache_attributes(cpu); - if (cpuid4_info[cpu] == NULL) - return -ENOENT; + err = detect_cache_attributes(cpu); + if (err) + return err; /* Allocate all required memory */ cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); @@ -705,13 +718,15 @@ err_out: return -ENOMEM; } +static cpumask_t cache_dev_map = CPU_MASK_NONE; + /* Add/Remove cache interface for CPU device */ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i, j; struct _index_kobject *this_object; - int retval = 0; + int retval; retval = cpuid4_cache_sysfs_init(cpu); if (unlikely(retval < 0)) @@ -721,6 +736,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) kobject_set_name(cache_kobject[cpu], "%s", "cache"); cache_kobject[cpu]->ktype = &ktype_percpu_entry; retval = kobject_register(cache_kobject[cpu]); + if (retval < 0) { + cpuid4_cache_sysfs_exit(cpu); + return retval; + } for (i = 0; i < num_cache_leaves; i++) { this_object = INDEX_KOBJECT_PTR(cpu,i); @@ -740,6 +759,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) break; } } + if (!retval) + cpu_set(cpu, cache_dev_map); + return retval; } @@ -750,13 +772,14 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) if (cpuid4_info[cpu] == NULL) return; - for (i = 0; i < num_cache_leaves; i++) { - cache_remove_shared_cpu_map(cpu, i); + if (!cpu_isset(cpu, cache_dev_map)) + return; + cpu_clear(cpu, cache_dev_map); + + for (i = 0; i < num_cache_leaves; i++) kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); - } kobject_unregister(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); - return; } static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, @@ -781,7 +804,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { - .notifier_call = cacheinfo_cpu_callback, + .notifier_call = cacheinfo_cpu_callback, }; static int __cpuinit cache_sysfs_init(void) @@ -791,13 +814,15 @@ static int __cpuinit cache_sysfs_init(void) if (num_cache_leaves == 0) return 0; - register_hotcpu_notifier(&cacheinfo_cpu_notifier); - for_each_online_cpu(i) { - cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, - (void *)(long)i); - } + int err; + struct sys_device *sys_dev = get_cpu_sysdev(i); + err = cache_add_dev(sys_dev); + if (err) + return err; + } + register_hotcpu_notifier(&cacheinfo_cpu_notifier); return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index f1ebe1c1c17..d7d2323bbb6 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,2 +1,6 @@ -obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o -obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o +obj-y = mce_$(BITS).o therm_throt.o + +obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o +obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o +obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o +obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index 34c781eddee..34c781eddee 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c new file mode 100644 index 00000000000..447b351f1f2 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -0,0 +1,902 @@ +/* + * Machine check handler. + * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. + * Rest from unknown author(s). + * 2004 Andi Kleen. Rewrote most of it. + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/rcupdate.h> +#include <linux/kallsyms.h> +#include <linux/sysdev.h> +#include <linux/miscdevice.h> +#include <linux/fs.h> +#include <linux/capability.h> +#include <linux/cpu.h> +#include <linux/percpu.h> +#include <linux/poll.h> +#include <linux/thread_info.h> +#include <linux/ctype.h> +#include <linux/kmod.h> +#include <linux/kdebug.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/mce.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/idle.h> + +#define MISC_MCELOG_MINOR 227 +#define NR_BANKS 6 + +atomic_t mce_entry; + +static int mce_dont_init; + +/* + * Tolerant levels: + * 0: always panic on uncorrected errors, log corrected errors + * 1: panic or SIGBUS on uncorrected errors, log corrected errors + * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors + * 3: never panic or SIGBUS, log all errors (for testing only) + */ +static int tolerant = 1; +static int banks; +static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; +static unsigned long notify_user; +static int rip_msr; +static int mce_bootlog = 1; +static atomic_t mce_events; + +static char trigger[128]; +static char *trigger_argv[2] = { trigger, NULL }; + +static DECLARE_WAIT_QUEUE_HEAD(mce_wait); + +/* + * Lockless MCE logging infrastructure. + * This avoids deadlocks on printk locks without having to break locks. Also + * separate MCEs from kernel messages to avoid bogus bug reports. + */ + +struct mce_log mcelog = { + MCE_LOG_SIGNATURE, + MCE_LOG_LEN, +}; + +void mce_log(struct mce *mce) +{ + unsigned next, entry; + atomic_inc(&mce_events); + mce->finished = 0; + wmb(); + for (;;) { + entry = rcu_dereference(mcelog.next); + for (;;) { + /* When the buffer fills up discard new entries. Assume + that the earlier errors are the more interesting. */ + if (entry >= MCE_LOG_LEN) { + set_bit(MCE_OVERFLOW, &mcelog.flags); + return; + } + /* Old left over entry. Skip. */ + if (mcelog.entry[entry].finished) { + entry++; + continue; + } + break; + } + smp_rmb(); + next = entry + 1; + if (cmpxchg(&mcelog.next, entry, next) == entry) + break; + } + memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); + wmb(); + mcelog.entry[entry].finished = 1; + wmb(); + + set_bit(0, ¬ify_user); +} + +static void print_mce(struct mce *m) +{ + printk(KERN_EMERG "\n" + KERN_EMERG "HARDWARE ERROR\n" + KERN_EMERG + "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + m->cpu, m->mcgstatus, m->bank, m->status); + if (m->rip) { + printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", + !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", + m->cs, m->rip); + if (m->cs == __KERNEL_CS) + print_symbol("{%s}", m->rip); + printk("\n"); + } + printk(KERN_EMERG "TSC %Lx ", m->tsc); + if (m->addr) + printk("ADDR %Lx ", m->addr); + if (m->misc) + printk("MISC %Lx ", m->misc); + printk("\n"); + printk(KERN_EMERG "This is not a software problem!\n"); + printk(KERN_EMERG "Run through mcelog --ascii to decode " + "and contact your hardware vendor\n"); +} + +static void mce_panic(char *msg, struct mce *backup, unsigned long start) +{ + int i; + + oops_begin(); + for (i = 0; i < MCE_LOG_LEN; i++) { + unsigned long tsc = mcelog.entry[i].tsc; + + if (time_before(tsc, start)) + continue; + print_mce(&mcelog.entry[i]); + if (backup && mcelog.entry[i].tsc == backup->tsc) + backup = NULL; + } + if (backup) + print_mce(backup); + panic(msg); +} + +static int mce_available(struct cpuinfo_x86 *c) +{ + return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); +} + +static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) +{ + if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { + m->rip = regs->rip; + m->cs = regs->cs; + } else { + m->rip = 0; + m->cs = 0; + } + if (rip_msr) { + /* Assume the RIP in the MSR is exact. Is this true? */ + m->mcgstatus |= MCG_STATUS_EIPV; + rdmsrl(rip_msr, m->rip); + m->cs = 0; + } +} + +/* + * The actual machine check handler + */ +void do_machine_check(struct pt_regs * regs, long error_code) +{ + struct mce m, panicm; + u64 mcestart = 0; + int i; + int panicm_found = 0; + /* + * If no_way_out gets set, there is no safe way to recover from this + * MCE. If tolerant is cranked up, we'll try anyway. + */ + int no_way_out = 0; + /* + * If kill_it gets set, there might be a way to recover from this + * error. + */ + int kill_it = 0; + + atomic_inc(&mce_entry); + + if (regs) + notify_die(DIE_NMI, "machine check", regs, error_code, 18, + SIGKILL); + if (!banks) + goto out2; + + memset(&m, 0, sizeof(struct mce)); + m.cpu = smp_processor_id(); + rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + /* if the restart IP is not valid, we're done for */ + if (!(m.mcgstatus & MCG_STATUS_RIPV)) + no_way_out = 1; + + rdtscll(mcestart); + barrier(); + + for (i = 0; i < banks; i++) { + if (!bank[i]) + continue; + + m.misc = 0; + m.addr = 0; + m.bank = i; + m.tsc = 0; + + rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); + if ((m.status & MCI_STATUS_VAL) == 0) + continue; + + if (m.status & MCI_STATUS_EN) { + /* if PCC was set, there's no way out */ + no_way_out |= !!(m.status & MCI_STATUS_PCC); + /* + * If this error was uncorrectable and there was + * an overflow, we're in trouble. If no overflow, + * we might get away with just killing a task. + */ + if (m.status & MCI_STATUS_UC) { + if (tolerant < 1 || m.status & MCI_STATUS_OVER) + no_way_out = 1; + kill_it = 1; + } + } + + if (m.status & MCI_STATUS_MISCV) + rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); + if (m.status & MCI_STATUS_ADDRV) + rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); + + mce_get_rip(&m, regs); + if (error_code >= 0) + rdtscll(m.tsc); + if (error_code != -2) + mce_log(&m); + + /* Did this bank cause the exception? */ + /* Assume that the bank with uncorrectable errors did it, + and that there is only a single one. */ + if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { + panicm = m; + panicm_found = 1; + } + + add_taint(TAINT_MACHINE_CHECK); + } + + /* Never do anything final in the polling timer */ + if (!regs) + goto out; + + /* If we didn't find an uncorrectable error, pick + the last one (shouldn't happen, just being safe). */ + if (!panicm_found) + panicm = m; + + /* + * If we have decided that we just CAN'T continue, and the user + * has not set tolerant to an insane level, give up and die. + */ + if (no_way_out && tolerant < 3) + mce_panic("Machine check", &panicm, mcestart); + + /* + * If the error seems to be unrecoverable, something should be + * done. Try to kill as little as possible. If we can kill just + * one task, do that. If the user has set the tolerance very + * high, don't try to do anything at all. + */ + if (kill_it && tolerant < 3) { + int user_space = 0; + + /* + * If the EIPV bit is set, it means the saved IP is the + * instruction which caused the MCE. + */ + if (m.mcgstatus & MCG_STATUS_EIPV) + user_space = panicm.rip && (panicm.cs & 3); + + /* + * If we know that the error was in user space, send a + * SIGBUS. Otherwise, panic if tolerance is low. + * + * do_exit() takes an awful lot of locks and has a slight + * risk of deadlocking. + */ + if (user_space) { + do_exit(SIGBUS); + } else if (panic_on_oops || tolerant < 2) { + mce_panic("Uncorrected machine check", + &panicm, mcestart); + } + } + + /* notify userspace ASAP */ + set_thread_flag(TIF_MCE_NOTIFY); + + out: + /* the last thing we do is clear state */ + for (i = 0; i < banks; i++) + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + wrmsrl(MSR_IA32_MCG_STATUS, 0); + out2: + atomic_dec(&mce_entry); +} + +#ifdef CONFIG_X86_MCE_INTEL +/*** + * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog + * @cpu: The CPU on which the event occurred. + * @status: Event status information + * + * This function should be called by the thermal interrupt after the + * event has been processed and the decision was made to log the event + * further. + * + * The status parameter will be saved to the 'status' field of 'struct mce' + * and historically has been the register value of the + * MSR_IA32_THERMAL_STATUS (Intel) msr. + */ +void mce_log_therm_throt_event(unsigned int cpu, __u64 status) +{ + struct mce m; + + memset(&m, 0, sizeof(m)); + m.cpu = cpu; + m.bank = MCE_THERMAL_BANK; + m.status = status; + rdtscll(m.tsc); + mce_log(&m); +} +#endif /* CONFIG_X86_MCE_INTEL */ + +/* + * Periodic polling timer for "silent" machine check errors. If the + * poller finds an MCE, poll 2x faster. When the poller finds no more + * errors, poll 2x slower (up to check_interval seconds). + */ + +static int check_interval = 5 * 60; /* 5 minutes */ +static int next_interval; /* in jiffies */ +static void mcheck_timer(struct work_struct *work); +static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); + +static void mcheck_check_cpu(void *info) +{ + if (mce_available(¤t_cpu_data)) + do_machine_check(NULL, 0); +} + +static void mcheck_timer(struct work_struct *work) +{ + on_each_cpu(mcheck_check_cpu, NULL, 1, 1); + + /* + * Alert userspace if needed. If we logged an MCE, reduce the + * polling interval, otherwise increase the polling interval. + */ + if (mce_notify_user()) { + next_interval = max(next_interval/2, HZ/100); + } else { + next_interval = min(next_interval * 2, + (int)round_jiffies_relative(check_interval*HZ)); + } + + schedule_delayed_work(&mcheck_work, next_interval); +} + +/* + * This is only called from process context. This is where we do + * anything we need to alert userspace about new MCEs. This is called + * directly from the poller and also from entry.S and idle, thanks to + * TIF_MCE_NOTIFY. + */ +int mce_notify_user(void) +{ + clear_thread_flag(TIF_MCE_NOTIFY); + if (test_and_clear_bit(0, ¬ify_user)) { + static unsigned long last_print; + unsigned long now = jiffies; + + wake_up_interruptible(&mce_wait); + if (trigger[0]) + call_usermodehelper(trigger, trigger_argv, NULL, + UMH_NO_WAIT); + + if (time_after_eq(now, last_print + (check_interval*HZ))) { + last_print = now; + printk(KERN_INFO "Machine check events logged\n"); + } + + return 1; + } + return 0; +} + +/* see if the idle task needs to notify userspace */ +static int +mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) +{ + /* IDLE_END should be safe - interrupts are back on */ + if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) + mce_notify_user(); + + return NOTIFY_OK; +} + +static struct notifier_block mce_idle_notifier = { + .notifier_call = mce_idle_callback, +}; + +static __init int periodic_mcheck_init(void) +{ + next_interval = check_interval * HZ; + if (next_interval) + schedule_delayed_work(&mcheck_work, + round_jiffies_relative(next_interval)); + idle_notifier_register(&mce_idle_notifier); + return 0; +} +__initcall(periodic_mcheck_init); + + +/* + * Initialize Machine Checks for a CPU. + */ +static void mce_init(void *dummy) +{ + u64 cap; + int i; + + rdmsrl(MSR_IA32_MCG_CAP, cap); + banks = cap & 0xff; + if (banks > NR_BANKS) { + printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); + banks = NR_BANKS; + } + /* Use accurate RIP reporting if available. */ + if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) + rip_msr = MSR_IA32_MCG_EIP; + + /* Log the machine checks left over from the previous reset. + This also clears all registers */ + do_machine_check(NULL, mce_bootlog ? -1 : -2); + + set_in_cr4(X86_CR4_MCE); + + if (cap & MCG_CTL_P) + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + + for (i = 0; i < banks; i++) { + wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + } +} + +/* Add per CPU specific workarounds here */ +static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) +{ + /* This should be disabled by the BIOS, but isn't always */ + if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { + /* disable GART TBL walk error reporting, which trips off + incorrectly with the IOMMU & 3ware & Cerberus. */ + clear_bit(10, &bank[4]); + /* Lots of broken BIOS around that don't clear them + by default and leave crap in there. Don't log. */ + mce_bootlog = 0; + } + +} + +static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) +{ + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + mce_intel_feature_init(c); + break; + case X86_VENDOR_AMD: + mce_amd_feature_init(c); + break; + default: + break; + } +} + +/* + * Called for each booted CPU to set up machine checks. + * Must be called with preempt off. + */ +void __cpuinit mcheck_init(struct cpuinfo_x86 *c) +{ + static cpumask_t mce_cpus = CPU_MASK_NONE; + + mce_cpu_quirks(c); + + if (mce_dont_init || + cpu_test_and_set(smp_processor_id(), mce_cpus) || + !mce_available(c)) + return; + + mce_init(NULL); + mce_cpu_features(c); +} + +/* + * Character device to read and clear the MCE log. + */ + +static DEFINE_SPINLOCK(mce_state_lock); +static int open_count; /* #times opened */ +static int open_exclu; /* already open exclusive? */ + +static int mce_open(struct inode *inode, struct file *file) +{ + spin_lock(&mce_state_lock); + + if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { + spin_unlock(&mce_state_lock); + return -EBUSY; + } + + if (file->f_flags & O_EXCL) + open_exclu = 1; + open_count++; + + spin_unlock(&mce_state_lock); + + return nonseekable_open(inode, file); +} + +static int mce_release(struct inode *inode, struct file *file) +{ + spin_lock(&mce_state_lock); + + open_count--; + open_exclu = 0; + + spin_unlock(&mce_state_lock); + + return 0; +} + +static void collect_tscs(void *data) +{ + unsigned long *cpu_tsc = (unsigned long *)data; + + rdtscll(cpu_tsc[smp_processor_id()]); +} + +static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, + loff_t *off) +{ + unsigned long *cpu_tsc; + static DECLARE_MUTEX(mce_read_sem); + unsigned next; + char __user *buf = ubuf; + int i, err; + + cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL); + if (!cpu_tsc) + return -ENOMEM; + + down(&mce_read_sem); + next = rcu_dereference(mcelog.next); + + /* Only supports full reads right now */ + if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { + up(&mce_read_sem); + kfree(cpu_tsc); + return -EINVAL; + } + + err = 0; + for (i = 0; i < next; i++) { + unsigned long start = jiffies; + + while (!mcelog.entry[i].finished) { + if (time_after_eq(jiffies, start + 2)) { + memset(mcelog.entry + i,0, sizeof(struct mce)); + goto timeout; + } + cpu_relax(); + } + smp_rmb(); + err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); + buf += sizeof(struct mce); + timeout: + ; + } + + memset(mcelog.entry, 0, next * sizeof(struct mce)); + mcelog.next = 0; + + synchronize_sched(); + + /* + * Collect entries that were still getting written before the + * synchronize. + */ + on_each_cpu(collect_tscs, cpu_tsc, 1, 1); + for (i = next; i < MCE_LOG_LEN; i++) { + if (mcelog.entry[i].finished && + mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { + err |= copy_to_user(buf, mcelog.entry+i, + sizeof(struct mce)); + smp_rmb(); + buf += sizeof(struct mce); + memset(&mcelog.entry[i], 0, sizeof(struct mce)); + } + } + up(&mce_read_sem); + kfree(cpu_tsc); + return err ? -EFAULT : buf - ubuf; +} + +static unsigned int mce_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &mce_wait, wait); + if (rcu_dereference(mcelog.next)) + return POLLIN | POLLRDNORM; + return 0; +} + +static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, + unsigned long arg) +{ + int __user *p = (int __user *)arg; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + switch (cmd) { + case MCE_GET_RECORD_LEN: + return put_user(sizeof(struct mce), p); + case MCE_GET_LOG_LEN: + return put_user(MCE_LOG_LEN, p); + case MCE_GETCLEAR_FLAGS: { + unsigned flags; + + do { + flags = mcelog.flags; + } while (cmpxchg(&mcelog.flags, flags, 0) != flags); + return put_user(flags, p); + } + default: + return -ENOTTY; + } +} + +static const struct file_operations mce_chrdev_ops = { + .open = mce_open, + .release = mce_release, + .read = mce_read, + .poll = mce_poll, + .ioctl = mce_ioctl, +}; + +static struct miscdevice mce_log_device = { + MISC_MCELOG_MINOR, + "mcelog", + &mce_chrdev_ops, +}; + +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + +/* + * Old style boot options parsing. Only for compatibility. + */ +static int __init mcheck_disable(char *str) +{ + mce_dont_init = 1; + return 1; +} + +/* mce=off disables machine check. Note you can re-enable it later + using sysfs. + mce=TOLERANCELEVEL (number, see above) + mce=bootlog Log MCEs from before booting. Disabled by default on AMD. + mce=nobootlog Don't log MCEs from before booting. */ +static int __init mcheck_enable(char *str) +{ + if (!strcmp(str, "off")) + mce_dont_init = 1; + else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) + mce_bootlog = str[0] == 'b'; + else if (isdigit(str[0])) + get_option(&str, &tolerant); + else + printk("mce= argument %s ignored. Please use /sys", str); + return 1; +} + +__setup("nomce", mcheck_disable); +__setup("mce=", mcheck_enable); + +/* + * Sysfs support + */ + +/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. + Only one CPU is active at this time, the others get readded later using + CPU hotplug. */ +static int mce_resume(struct sys_device *dev) +{ + mce_init(NULL); + return 0; +} + +/* Reinit MCEs after user configuration changes */ +static void mce_restart(void) +{ + if (next_interval) + cancel_delayed_work(&mcheck_work); + /* Timer race is harmless here */ + on_each_cpu(mce_init, NULL, 1, 1); + next_interval = check_interval * HZ; + if (next_interval) + schedule_delayed_work(&mcheck_work, + round_jiffies_relative(next_interval)); +} + +static struct sysdev_class mce_sysclass = { + .resume = mce_resume, + set_kset_name("machinecheck"), +}; + +DEFINE_PER_CPU(struct sys_device, device_mce); + +/* Why are there no generic functions for this? */ +#define ACCESSOR(name, var, start) \ + static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ + return sprintf(buf, "%lx\n", (unsigned long)var); \ + } \ + static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ + char *end; \ + unsigned long new = simple_strtoul(buf, &end, 0); \ + if (end == buf) return -EINVAL; \ + var = new; \ + start; \ + return end-buf; \ + } \ + static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); + +/* TBD should generate these dynamically based on number of available banks */ +ACCESSOR(bank0ctl,bank[0],mce_restart()) +ACCESSOR(bank1ctl,bank[1],mce_restart()) +ACCESSOR(bank2ctl,bank[2],mce_restart()) +ACCESSOR(bank3ctl,bank[3],mce_restart()) +ACCESSOR(bank4ctl,bank[4],mce_restart()) +ACCESSOR(bank5ctl,bank[5],mce_restart()) + +static ssize_t show_trigger(struct sys_device *s, char *buf) +{ + strcpy(buf, trigger); + strcat(buf, "\n"); + return strlen(trigger) + 1; +} + +static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) +{ + char *p; + int len; + strncpy(trigger, buf, sizeof(trigger)); + trigger[sizeof(trigger)-1] = 0; + len = strlen(trigger); + p = strchr(trigger, '\n'); + if (*p) *p = 0; + return len; +} + +static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); +ACCESSOR(tolerant,tolerant,) +ACCESSOR(check_interval,check_interval,mce_restart()) +static struct sysdev_attribute *mce_attributes[] = { + &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, + &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, + &attr_tolerant, &attr_check_interval, &attr_trigger, + NULL +}; + +static cpumask_t mce_device_initialized = CPU_MASK_NONE; + +/* Per cpu sysdev init. All of the cpus still share the same ctl bank */ +static __cpuinit int mce_create_device(unsigned int cpu) +{ + int err; + int i; + + if (!mce_available(&cpu_data(cpu))) + return -EIO; + + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); + per_cpu(device_mce,cpu).id = cpu; + per_cpu(device_mce,cpu).cls = &mce_sysclass; + + err = sysdev_register(&per_cpu(device_mce,cpu)); + if (err) + return err; + + for (i = 0; mce_attributes[i]; i++) { + err = sysdev_create_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + if (err) + goto error; + } + cpu_set(cpu, mce_device_initialized); + + return 0; +error: + while (i--) { + sysdev_remove_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + } + sysdev_unregister(&per_cpu(device_mce,cpu)); + + return err; +} + +static void mce_remove_device(unsigned int cpu) +{ + int i; + + if (!cpu_isset(cpu, mce_device_initialized)) + return; + + for (i = 0; mce_attributes[i]; i++) + sysdev_remove_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + sysdev_unregister(&per_cpu(device_mce,cpu)); + cpu_clear(cpu, mce_device_initialized); +} + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static int +mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + mce_create_device(cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + mce_remove_device(cpu); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block mce_cpu_notifier = { + .notifier_call = mce_cpu_callback, +}; + +static __init int mce_init_device(void) +{ + int err; + int i = 0; + + if (!mce_available(&boot_cpu_data)) + return -EIO; + err = sysdev_class_register(&mce_sysclass); + if (err) + return err; + + for_each_online_cpu(i) { + err = mce_create_device(i); + if (err) + return err; + } + + register_hotcpu_notifier(&mce_cpu_notifier); + misc_register(&mce_log_device); + return err; +} + +device_initcall(mce_init_device); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c new file mode 100644 index 00000000000..752fb16a817 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -0,0 +1,690 @@ +/* + * (c) 2005, 2006 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "COPYING" or + * http://www.gnu.org/licenses/gpl.html + * + * Written by Jacob Shin - AMD, Inc. + * + * Support : jacob.shin@amd.com + * + * April 2006 + * - added support for AMD Family 0x10 processors + * + * All MC4_MISCi registers are shared between multi-cores + */ + +#include <linux/cpu.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kobject.h> +#include <linux/notifier.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/sysdev.h> +#include <linux/sysfs.h> +#include <asm/apic.h> +#include <asm/mce.h> +#include <asm/msr.h> +#include <asm/percpu.h> +#include <asm/idle.h> + +#define PFX "mce_threshold: " +#define VERSION "version 1.1.1" +#define NR_BANKS 6 +#define NR_BLOCKS 9 +#define THRESHOLD_MAX 0xFFF +#define INT_TYPE_APIC 0x00020000 +#define MASK_VALID_HI 0x80000000 +#define MASK_CNTP_HI 0x40000000 +#define MASK_LOCKED_HI 0x20000000 +#define MASK_LVTOFF_HI 0x00F00000 +#define MASK_COUNT_EN_HI 0x00080000 +#define MASK_INT_TYPE_HI 0x00060000 +#define MASK_OVERFLOW_HI 0x00010000 +#define MASK_ERR_COUNT_HI 0x00000FFF +#define MASK_BLKPTR_LO 0xFF000000 +#define MCG_XBLK_ADDR 0xC0000400 + +struct threshold_block { + unsigned int block; + unsigned int bank; + unsigned int cpu; + u32 address; + u16 interrupt_enable; + u16 threshold_limit; + struct kobject kobj; + struct list_head miscj; +}; + +/* defaults used early on boot */ +static struct threshold_block threshold_defaults = { + .interrupt_enable = 0, + .threshold_limit = THRESHOLD_MAX, +}; + +struct threshold_bank { + struct kobject kobj; + struct threshold_block *blocks; + cpumask_t cpus; +}; +static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); + +#ifdef CONFIG_SMP +static unsigned char shared_bank[NR_BANKS] = { + 0, 0, 0, 0, 1 +}; +#endif + +static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ + +/* + * CPU Initialization + */ + +/* must be called with correct cpu affinity */ +static void threshold_restart_bank(struct threshold_block *b, + int reset, u16 old_limit) +{ + u32 mci_misc_hi, mci_misc_lo; + + rdmsr(b->address, mci_misc_lo, mci_misc_hi); + + if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) + reset = 1; /* limit cannot be lower than err count */ + + if (reset) { /* reset err count and overflow bit */ + mci_misc_hi = + (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | + (THRESHOLD_MAX - b->threshold_limit); + } else if (old_limit) { /* change limit w/o reset */ + int new_count = (mci_misc_hi & THRESHOLD_MAX) + + (old_limit - b->threshold_limit); + mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | + (new_count & THRESHOLD_MAX); + } + + b->interrupt_enable ? + (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : + (mci_misc_hi &= ~MASK_INT_TYPE_HI); + + mci_misc_hi |= MASK_COUNT_EN_HI; + wrmsr(b->address, mci_misc_lo, mci_misc_hi); +} + +/* cpu init entry point, called from mce.c with preempt off */ +void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) +{ + unsigned int bank, block; + unsigned int cpu = smp_processor_id(); + u32 low = 0, high = 0, address = 0; + + for (bank = 0; bank < NR_BANKS; ++bank) { + for (block = 0; block < NR_BLOCKS; ++block) { + if (block == 0) + address = MSR_IA32_MC0_MISC + bank * 4; + else if (block == 1) { + address = (low & MASK_BLKPTR_LO) >> 21; + if (!address) + break; + address += MCG_XBLK_ADDR; + } + else + ++address; + + if (rdmsr_safe(address, &low, &high)) + break; + + if (!(high & MASK_VALID_HI)) { + if (block) + continue; + else + break; + } + + if (!(high & MASK_CNTP_HI) || + (high & MASK_LOCKED_HI)) + continue; + + if (!block) + per_cpu(bank_map, cpu) |= (1 << bank); +#ifdef CONFIG_SMP + if (shared_bank[bank] && c->cpu_core_id) + break; +#endif + high &= ~MASK_LVTOFF_HI; + high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20; + wrmsr(address, low, high); + + setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD, + THRESHOLD_APIC_VECTOR, + K8_APIC_EXT_INT_MSG_FIX, 0); + + threshold_defaults.address = address; + threshold_restart_bank(&threshold_defaults, 0, 0); + } + } +} + +/* + * APIC Interrupt Handler + */ + +/* + * threshold interrupt handler will service THRESHOLD_APIC_VECTOR. + * the interrupt goes off when error_count reaches threshold_limit. + * the handler will simply log mcelog w/ software defined bank number. + */ +asmlinkage void mce_threshold_interrupt(void) +{ + unsigned int bank, block; + struct mce m; + u32 low = 0, high = 0, address = 0; + + ack_APIC_irq(); + exit_idle(); + irq_enter(); + + memset(&m, 0, sizeof(m)); + rdtscll(m.tsc); + m.cpu = smp_processor_id(); + + /* assume first bank caused it */ + for (bank = 0; bank < NR_BANKS; ++bank) { + if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) + continue; + for (block = 0; block < NR_BLOCKS; ++block) { + if (block == 0) + address = MSR_IA32_MC0_MISC + bank * 4; + else if (block == 1) { + address = (low & MASK_BLKPTR_LO) >> 21; + if (!address) + break; + address += MCG_XBLK_ADDR; + } + else + ++address; + + if (rdmsr_safe(address, &low, &high)) + break; + + if (!(high & MASK_VALID_HI)) { + if (block) + continue; + else + break; + } + + if (!(high & MASK_CNTP_HI) || + (high & MASK_LOCKED_HI)) + continue; + + /* Log the machine check that caused the threshold + event. */ + do_machine_check(NULL, 0); + + if (high & MASK_OVERFLOW_HI) { + rdmsrl(address, m.misc); + rdmsrl(MSR_IA32_MC0_STATUS + bank * 4, + m.status); + m.bank = K8_MCE_THRESHOLD_BASE + + bank * NR_BLOCKS + + block; + mce_log(&m); + goto out; + } + } + } +out: + add_pda(irq_threshold_count, 1); + irq_exit(); +} + +/* + * Sysfs Interface + */ + +struct threshold_attr { + struct attribute attr; + ssize_t(*show) (struct threshold_block *, char *); + ssize_t(*store) (struct threshold_block *, const char *, size_t count); +}; + +static cpumask_t affinity_set(unsigned int cpu) +{ + cpumask_t oldmask = current->cpus_allowed; + cpumask_t newmask = CPU_MASK_NONE; + cpu_set(cpu, newmask); + set_cpus_allowed(current, newmask); + return oldmask; +} + +static void affinity_restore(cpumask_t oldmask) +{ + set_cpus_allowed(current, oldmask); +} + +#define SHOW_FIELDS(name) \ +static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ +{ \ + return sprintf(buf, "%lx\n", (unsigned long) b->name); \ +} +SHOW_FIELDS(interrupt_enable) +SHOW_FIELDS(threshold_limit) + +static ssize_t store_interrupt_enable(struct threshold_block *b, + const char *buf, size_t count) +{ + char *end; + cpumask_t oldmask; + unsigned long new = simple_strtoul(buf, &end, 0); + if (end == buf) + return -EINVAL; + b->interrupt_enable = !!new; + + oldmask = affinity_set(b->cpu); + threshold_restart_bank(b, 0, 0); + affinity_restore(oldmask); + + return end - buf; +} + +static ssize_t store_threshold_limit(struct threshold_block *b, + const char *buf, size_t count) +{ + char *end; + cpumask_t oldmask; + u16 old; + unsigned long new = simple_strtoul(buf, &end, 0); + if (end == buf) + return -EINVAL; + if (new > THRESHOLD_MAX) + new = THRESHOLD_MAX; + if (new < 1) + new = 1; + old = b->threshold_limit; + b->threshold_limit = new; + + oldmask = affinity_set(b->cpu); + threshold_restart_bank(b, 0, old); + affinity_restore(oldmask); + + return end - buf; +} + +static ssize_t show_error_count(struct threshold_block *b, char *buf) +{ + u32 high, low; + cpumask_t oldmask; + oldmask = affinity_set(b->cpu); + rdmsr(b->address, low, high); + affinity_restore(oldmask); + return sprintf(buf, "%x\n", + (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); +} + +static ssize_t store_error_count(struct threshold_block *b, + const char *buf, size_t count) +{ + cpumask_t oldmask; + oldmask = affinity_set(b->cpu); + threshold_restart_bank(b, 1, 0); + affinity_restore(oldmask); + return 1; +} + +#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +#define RW_ATTR(name) \ +static struct threshold_attr name = \ + THRESHOLD_ATTR(name, 0644, show_## name, store_## name) + +RW_ATTR(interrupt_enable); +RW_ATTR(threshold_limit); +RW_ATTR(error_count); + +static struct attribute *default_attrs[] = { + &interrupt_enable.attr, + &threshold_limit.attr, + &error_count.attr, + NULL +}; + +#define to_block(k) container_of(k, struct threshold_block, kobj) +#define to_attr(a) container_of(a, struct threshold_attr, attr) + +static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct threshold_block *b = to_block(kobj); + struct threshold_attr *a = to_attr(attr); + ssize_t ret; + ret = a->show ? a->show(b, buf) : -EIO; + return ret; +} + +static ssize_t store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct threshold_block *b = to_block(kobj); + struct threshold_attr *a = to_attr(attr); + ssize_t ret; + ret = a->store ? a->store(b, buf, count) : -EIO; + return ret; +} + +static struct sysfs_ops threshold_ops = { + .show = show, + .store = store, +}; + +static struct kobj_type threshold_ktype = { + .sysfs_ops = &threshold_ops, + .default_attrs = default_attrs, +}; + +static __cpuinit int allocate_threshold_blocks(unsigned int cpu, + unsigned int bank, + unsigned int block, + u32 address) +{ + int err; + u32 low, high; + struct threshold_block *b = NULL; + + if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) + return 0; + + if (rdmsr_safe(address, &low, &high)) + return 0; + + if (!(high & MASK_VALID_HI)) { + if (block) + goto recurse; + else + return 0; + } + + if (!(high & MASK_CNTP_HI) || + (high & MASK_LOCKED_HI)) + goto recurse; + + b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL); + if (!b) + return -ENOMEM; + + b->block = block; + b->bank = bank; + b->cpu = cpu; + b->address = address; + b->interrupt_enable = 0; + b->threshold_limit = THRESHOLD_MAX; + + INIT_LIST_HEAD(&b->miscj); + + if (per_cpu(threshold_banks, cpu)[bank]->blocks) + list_add(&b->miscj, + &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); + else + per_cpu(threshold_banks, cpu)[bank]->blocks = b; + + kobject_set_name(&b->kobj, "misc%i", block); + b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj; + b->kobj.ktype = &threshold_ktype; + err = kobject_register(&b->kobj); + if (err) + goto out_free; +recurse: + if (!block) { + address = (low & MASK_BLKPTR_LO) >> 21; + if (!address) + return 0; + address += MCG_XBLK_ADDR; + } else + ++address; + + err = allocate_threshold_blocks(cpu, bank, ++block, address); + if (err) + goto out_free; + + return err; + +out_free: + if (b) { + kobject_unregister(&b->kobj); + kfree(b); + } + return err; +} + +/* symlinks sibling shared banks to first core. first core owns dir/files. */ +static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) +{ + int i, err = 0; + struct threshold_bank *b = NULL; + cpumask_t oldmask = CPU_MASK_NONE; + char name[32]; + + sprintf(name, "threshold_bank%i", bank); + +#ifdef CONFIG_SMP + if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ + i = first_cpu(per_cpu(cpu_core_map, cpu)); + + /* first core not up yet */ + if (cpu_data(i).cpu_core_id) + goto out; + + /* already linked */ + if (per_cpu(threshold_banks, cpu)[bank]) + goto out; + + b = per_cpu(threshold_banks, i)[bank]; + + if (!b) + goto out; + + err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, + &b->kobj, name); + if (err) + goto out; + + b->cpus = per_cpu(cpu_core_map, cpu); + per_cpu(threshold_banks, cpu)[bank] = b; + goto out; + } +#endif + + b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); + if (!b) { + err = -ENOMEM; + goto out; + } + + kobject_set_name(&b->kobj, "threshold_bank%i", bank); + b->kobj.parent = &per_cpu(device_mce, cpu).kobj; +#ifndef CONFIG_SMP + b->cpus = CPU_MASK_ALL; +#else + b->cpus = per_cpu(cpu_core_map, cpu); +#endif + err = kobject_register(&b->kobj); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + oldmask = affinity_set(cpu); + err = allocate_threshold_blocks(cpu, bank, 0, + MSR_IA32_MC0_MISC + bank * 4); + affinity_restore(oldmask); + + if (err) + goto out_free; + + for_each_cpu_mask(i, b->cpus) { + if (i == cpu) + continue; + + err = sysfs_create_link(&per_cpu(device_mce, i).kobj, + &b->kobj, name); + if (err) + goto out; + + per_cpu(threshold_banks, i)[bank] = b; + } + + goto out; + +out_free: + per_cpu(threshold_banks, cpu)[bank] = NULL; + kfree(b); +out: + return err; +} + +/* create dir/files for all valid threshold banks */ +static __cpuinit int threshold_create_device(unsigned int cpu) +{ + unsigned int bank; + int err = 0; + + for (bank = 0; bank < NR_BANKS; ++bank) { + if (!(per_cpu(bank_map, cpu) & 1 << bank)) + continue; + err = threshold_create_bank(cpu, bank); + if (err) + goto out; + } +out: + return err; +} + +/* + * let's be hotplug friendly. + * in case of multiple core processors, the first core always takes ownership + * of shared sysfs dir/files, and rest of the cores will be symlinked to it. + */ + +static void deallocate_threshold_block(unsigned int cpu, + unsigned int bank) +{ + struct threshold_block *pos = NULL; + struct threshold_block *tmp = NULL; + struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank]; + + if (!head) + return; + + list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { + kobject_unregister(&pos->kobj); + list_del(&pos->miscj); + kfree(pos); + } + + kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); + per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; +} + +static void threshold_remove_bank(unsigned int cpu, int bank) +{ + int i = 0; + struct threshold_bank *b; + char name[32]; + + b = per_cpu(threshold_banks, cpu)[bank]; + + if (!b) + return; + + if (!b->blocks) + goto free_out; + + sprintf(name, "threshold_bank%i", bank); + +#ifdef CONFIG_SMP + /* sibling symlink */ + if (shared_bank[bank] && b->blocks->cpu != cpu) { + sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); + per_cpu(threshold_banks, cpu)[bank] = NULL; + return; + } +#endif + + /* remove all sibling symlinks before unregistering */ + for_each_cpu_mask(i, b->cpus) { + if (i == cpu) + continue; + + sysfs_remove_link(&per_cpu(device_mce, i).kobj, name); + per_cpu(threshold_banks, i)[bank] = NULL; + } + + deallocate_threshold_block(cpu, bank); + +free_out: + kobject_unregister(&b->kobj); + kfree(b); + per_cpu(threshold_banks, cpu)[bank] = NULL; +} + +static void threshold_remove_device(unsigned int cpu) +{ + unsigned int bank; + + for (bank = 0; bank < NR_BANKS; ++bank) { + if (!(per_cpu(bank_map, cpu) & 1 << bank)) + continue; + threshold_remove_bank(cpu, bank); + } +} + +/* get notified when a cpu comes on/off */ +static int threshold_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + /* cpu was unsigned int to begin with */ + unsigned int cpu = (unsigned long)hcpu; + + if (cpu >= NR_CPUS) + goto out; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + threshold_create_device(cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + threshold_remove_device(cpu); + break; + default: + break; + } + out: + return NOTIFY_OK; +} + +static struct notifier_block threshold_cpu_notifier = { + .notifier_call = threshold_cpu_callback, +}; + +static __init int threshold_init_device(void) +{ + unsigned lcpu = 0; + + /* to hit CPUs online before the notifier is up */ + for_each_online_cpu(lcpu) { + int err = threshold_create_device(lcpu); + if (err) + return err; + } + register_hotcpu_notifier(&threshold_cpu_notifier); + return 0; +} + +device_initcall(threshold_init_device); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c new file mode 100644 index 00000000000..c17eaf5dd6d --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -0,0 +1,90 @@ +/* + * Intel specific MCE features. + * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> + */ + +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/percpu.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/mce.h> +#include <asm/hw_irq.h> +#include <asm/idle.h> +#include <asm/therm_throt.h> + +asmlinkage void smp_thermal_interrupt(void) +{ + __u64 msr_val; + + ack_APIC_irq(); + + exit_idle(); + irq_enter(); + + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); + if (therm_throt_process(msr_val & 1)) + mce_log_therm_throt_event(smp_processor_id(), msr_val); + + add_pda(irq_thermal_count, 1); + irq_exit(); +} + +static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) +{ + u32 l, h; + int tm2 = 0; + unsigned int cpu = smp_processor_id(); + + if (!cpu_has(c, X86_FEATURE_ACPI)) + return; + + if (!cpu_has(c, X86_FEATURE_ACC)) + return; + + /* first check if TM1 is already enabled by the BIOS, in which + * case there might be some SMM goo which handles it, so we can't even + * put a handler since it might be delivered via SMI already. + */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG + "CPU%d: Thermal monitoring handled by SMI\n", cpu); + return; + } + + if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) + tm2 = 1; + + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG + "CPU%d: Thermal LVT vector (%#x) already " + "installed\n", cpu, (h & APIC_VECTOR_MASK)); + return; + } + + h = THERMAL_APIC_VECTOR; + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); + apic_write(APIC_LVTTHMR, h); + + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); + + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); + + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", + cpu, tm2 ? "TM2" : "TM1"); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); + return; +} + +void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c) +{ + intel_init_thermal(c); +} diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 1509edfb231..be4dabfee1f 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -61,6 +61,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs) { irq_enter(); vendor_thermal_interrupt(regs); + __get_cpu_var(irq_stat).irq_thermal_count++; irq_exit(); } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 1203dc5ab87..24885be5c48 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -131,17 +131,19 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, { unsigned int cpu = (unsigned long)hcpu; struct sys_device *sys_dev; - int err; + int err = 0; sys_dev = get_cpu_sysdev(cpu); switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: mutex_lock(&therm_cpu_lock); err = thermal_throttle_add_dev(sys_dev); mutex_unlock(&therm_cpu_lock); WARN_ON(err); break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: mutex_lock(&therm_cpu_lock); @@ -149,10 +151,10 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, mutex_unlock(&therm_cpu_lock); break; } - return NOTIFY_OK; + return err ? NOTIFY_BAD : NOTIFY_OK; } -static struct notifier_block thermal_throttle_cpu_notifier = +static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = { .notifier_call = thermal_throttle_cpu_callback, }; diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 2287d4863a8..9964be3de2b 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -147,10 +147,10 @@ static void prepare_set(void) write_cr0(cr0); wbinvd(); - /* Cyrix ARRs - everything else were excluded at the top */ + /* Cyrix ARRs - everything else was excluded at the top */ ccr3 = getCx86(CX86_CCR3); - /* Cyrix ARRs - everything else were excluded at the top */ + /* Cyrix ARRs - everything else was excluded at the top */ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 56f64e34829..992f08dfbb6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -182,7 +182,7 @@ static inline void k8_enable_fixed_iorrs(void) /** * Checks and updates an fixed-range MTRR if it differs from the value it - * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also. + * should have. If K8 extentions are wanted, update the K8 SYSCFG MSR also. * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information * \param msr MSR address of the MTTR which should be checked and updated * \param changed pointer which indicates whether the MTRR needed to be changed diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c48b6fea5ab..3b20613325d 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -139,13 +139,12 @@ struct set_mtrr_data { mtrr_type smp_type; }; -#ifdef CONFIG_SMP - static void ipi_handler(void *info) /* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. [RETURNS] Nothing. */ { +#ifdef CONFIG_SMP struct set_mtrr_data *data = info; unsigned long flags; @@ -168,9 +167,8 @@ static void ipi_handler(void *info) atomic_dec(&data->count); local_irq_restore(flags); -} - #endif +} static inline int types_compatible(mtrr_type type1, mtrr_type type2) { return type1 == MTRR_TYPE_UNCACHABLE || @@ -738,13 +736,7 @@ void mtrr_ap_init(void) */ void mtrr_save_state(void) { - int cpu = get_cpu(); - - if (cpu == 0) - mtrr_save_fixed_ranges(NULL); - else - smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); - put_cpu(); + smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); } static int __init mtrr_init_finialize(void) @@ -754,7 +746,7 @@ static int __init mtrr_init_finialize(void) if (use_intel()) mtrr_state_warn(); else { - /* The CPUs haven't MTRR and seemes not support SMP. They have + /* The CPUs haven't MTRR and seem to not support SMP. They have * specific drivers, we use a tricky method to support * suspend/resume for them. * TBD: is there any system with such CPU which supports diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 93fecd4b03d..c02541e6e65 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -34,7 +34,7 @@ struct wd_ops { u64 checkbit; }; -static struct wd_ops *wd_ops; +static const struct wd_ops *wd_ops; /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) @@ -120,7 +120,9 @@ int reserve_perfctr_nmi(unsigned int msr) unsigned int counter; counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return 1; if (!test_and_set_bit(counter, perfctr_nmi_owner)) return 1; @@ -132,7 +134,9 @@ void release_perfctr_nmi(unsigned int msr) unsigned int counter; counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return; clear_bit(counter, perfctr_nmi_owner); } @@ -142,7 +146,9 @@ int reserve_evntsel_nmi(unsigned int msr) unsigned int counter; counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return 1; if (!test_and_set_bit(counter, evntsel_nmi_owner)) return 1; @@ -154,7 +160,9 @@ void release_evntsel_nmi(unsigned int msr) unsigned int counter; counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return; clear_bit(counter, evntsel_nmi_owner); } @@ -317,7 +325,7 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); } -static struct wd_ops k7_wd_ops = { +static const struct wd_ops k7_wd_ops = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_k7_watchdog, @@ -380,7 +388,7 @@ static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); } -static struct wd_ops p6_wd_ops = { +static const struct wd_ops p6_wd_ops = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_p6_watchdog, @@ -532,7 +540,7 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); } -static struct wd_ops p4_wd_ops = { +static const struct wd_ops p4_wd_ops = { .reserve = p4_reserve, .unreserve = p4_unreserve, .setup = setup_p4_watchdog, @@ -550,6 +558,8 @@ static struct wd_ops p4_wd_ops = { #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK +static struct wd_ops intel_arch_wd_ops; + static int setup_intel_arch_watchdog(unsigned nmi_hz) { unsigned int ebx; @@ -591,11 +601,11 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; //unused - wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1); + intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); return 1; } -static struct wd_ops intel_arch_wd_ops = { +static struct wd_ops intel_arch_wd_ops __read_mostly = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_intel_arch_watchdog, diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 1e31b6caffb..066f8c6af4d 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -49,7 +49,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ @@ -59,10 +59,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", - "altmovcr8", "abm", "sse4a", - "misalignsse", "3dnowprefetch", - "osvw", "ibs", NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", "extapic", + "cr8_legacy", "abm", "sse4a", "misalignsse", + "3dnowprefetch", "osvw", "ibs", "sse5", + "skinit", "wdt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -85,12 +85,13 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* nothing */ }; struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; + int i, n = 0; int fpu_exception; #ifdef CONFIG_SMP if (!cpu_online(n)) return 0; + n = c->cpu_index; #endif seq_printf(m, "processor\t: %d\n" "vendor_id\t: %s\n" @@ -122,7 +123,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_X86_HT if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, n))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } @@ -174,11 +176,15 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_possible_map); + if ((*pos) < NR_CPUS && cpu_possible(*pos)) + return &cpu_data(*pos); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + *pos = next_cpu(*pos, cpu_possible_map); return c_start(m, pos); } static void c_stop(struct seq_file *m, void *v) |