aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile28
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c122
-rw-r--r--arch/x86/kernel/cpu/amd.c581
-rw-r--r--arch/x86/kernel/cpu/bugs.c56
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c33
-rw-r--r--arch/x86/kernel/cpu/centaur.c15
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c37
-rw-r--r--arch/x86/kernel/cpu/cmpxchg.c72
-rw-r--r--arch/x86/kernel/cpu/common.c1011
-rw-r--r--arch/x86/kernel/cpu/cpu.h24
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c19
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c44
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c44
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c41
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.h1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c11
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c151
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c4
-rw-r--r--arch/x86/kernel/cpu/cyrix.c73
-rw-r--r--arch/x86/kernel/cpu/feature_names.c83
-rw-r--r--arch/x86/kernel/cpu/intel.c357
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c179
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c36
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c61
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c22
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c92
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl32
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c53
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c1014
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h3
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c306
-rw-r--r--arch/x86/kernel/cpu/powerflags.c20
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/cpu/transmeta.c32
-rw-r--r--arch/x86/kernel/cpu/umc.c3
40 files changed, 3380 insertions, 1301 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0c6f819088..7f0b45a5d78 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,18 +3,30 @@
#
obj-y := intel_cacheinfo.o addon_cpuid_features.o
-obj-y += proc.o feature_names.o
+obj-y += proc.o capflags.o powerflags.o common.o
-obj-$(CONFIG_X86_32) += common.o bugs.o
-obj-$(CONFIG_X86_32) += amd.o
-obj-$(CONFIG_X86_32) += cyrix.o
-obj-$(CONFIG_X86_32) += centaur.o
-obj-$(CONFIG_X86_32) += transmeta.o
-obj-$(CONFIG_X86_32) += intel.o
-obj-$(CONFIG_X86_32) += umc.o
+obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
+obj-$(CONFIG_X86_64) += bugs_64.o
+
+obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
+obj-$(CONFIG_CPU_SUP_AMD) += amd.o
+obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
+obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o
+obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o
+obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
+obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
+
+quiet_cmd_mkcapflags = MKCAP $@
+ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
+
+cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h
+
+targets += capflags.c
+$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE
+ $(call if_changed,mkcapflags)
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index c2e1ce33c7c..0d9c993aa93 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -1,14 +1,14 @@
-
/*
* Routines to indentify additional cpu features that are scattered in
* cpuid space.
*/
-
#include <linux/cpu.h>
#include <asm/pat.h>
#include <asm/processor.h>
+#include <mach_apic.h>
+
struct cpuid_bit {
u16 feature;
u8 reg;
@@ -50,22 +50,122 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
}
}
+/* leaf 0xb SMT level */
+#define SMT_LEVEL 0
+
+/* leaf 0xb sub-leaf types */
+#define INVALID_TYPE 0
+#define SMT_TYPE 1
+#define CORE_TYPE 2
+
+#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
+#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
+#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
+
+/*
+ * Check for extended topology enumeration cpuid leaf 0xb and if it
+ * exists, use it for populating initial_apicid and cpu topology
+ * detection.
+ */
+void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ unsigned int eax, ebx, ecx, edx, sub_index;
+ unsigned int ht_mask_width, core_plus_mask_width;
+ unsigned int core_select_mask, core_level_siblings;
+
+ if (c->cpuid_level < 0xb)
+ return;
+
+ cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+
+ /*
+ * check if the cpuid leaf 0xb is actually implemented.
+ */
+ if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
+ return;
+
+ set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
+
+ /*
+ * initial apic id, which also represents 32-bit extended x2apic id.
+ */
+ c->initial_apicid = edx;
+
+ /*
+ * Populate HT related information from sub-leaf level 0.
+ */
+ core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+
+ sub_index = 1;
+ do {
+ cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
+
+ /*
+ * Check for the Core type in the implemented sub leaves.
+ */
+ if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
+ core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ break;
+ }
+
+ sub_index++;
+ } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
+
+ core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
+
+#ifdef CONFIG_X86_32
+ c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
+ & core_select_mask;
+ c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+#else
+ c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
+ c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
+#endif
+ c->x86_max_cores = (core_level_siblings / smp_num_siblings);
+
+
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ c->phys_proc_id);
+ if (c->x86_max_cores > 1)
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ c->cpu_core_id);
+ return;
+#endif
+}
+
#ifdef CONFIG_X86_PAT
void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
{
+ if (!cpu_has_pat)
+ pat_disable("PAT not supported by CPU.");
+
switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- if (c->x86 >= 0xf && c->x86 <= 0x11)
- return;
- break;
case X86_VENDOR_INTEL:
- if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
+ /*
+ * There is a known erratum on Pentium III and Core Solo
+ * and Core Duo CPUs.
+ * " Page with PAT set to WC while associated MTRR is UC
+ * may consolidate to UC "
+ * Because of this erratum, it is better to stick with
+ * setting WC in MTRR rather than using PAT on these CPUs.
+ *
+ * Enable PAT WC only on P4, Core 2 or later CPUs.
+ */
+ if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
return;
- break;
+
+ pat_disable("PAT WC disabled due to known CPU erratum.");
+ return;
+
+ case X86_VENDOR_AMD:
+ case X86_VENDOR_CENTAUR:
+ case X86_VENDOR_TRANSMETA:
+ return;
}
- pat_disable(cpu_has_pat ?
- "PAT disabled. Not yet verified on this CPU type." :
- "PAT not supported by CPU.");
+ pat_disable("PAT disabled. Not yet verified on this CPU type.");
}
#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 24586682829..32e73520adf 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,13 +1,22 @@
#include <linux/init.h>
#include <linux/bitops.h>
#include <linux/mm.h>
+
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/apic.h>
+#ifdef CONFIG_X86_64
+# include <asm/numa_64.h>
+# include <asm/mmconfig.h>
+# include <asm/cacheflush.h>
+#endif
+
#include <mach_apic.h>
+
#include "cpu.h"
+#ifdef CONFIG_X86_32
/*
* B step AMD K6 before B 9730xxxx have hardware bugs that can cause
* misexecution of code under Linux. Owners of such processors should
@@ -24,60 +33,273 @@
extern void vide(void);
__asm__(".align 4\nvide: ret");
-#ifdef CONFIG_X86_LOCAL_APIC
-#define ENABLE_C1E_MASK 0x18000000
-#define CPUID_PROCESSOR_SIGNATURE 1
-#define CPUID_XFAM 0x0ff00000
-#define CPUID_XFAM_K8 0x00000000
-#define CPUID_XFAM_10H 0x00100000
-#define CPUID_XFAM_11H 0x00200000
-#define CPUID_XMOD 0x000f0000
-#define CPUID_XMOD_REV_F 0x00040000
-
-/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
-static __cpuinit int amd_apic_timer_broken(void)
+static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
{
- u32 lo, hi;
- u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
- switch (eax & CPUID_XFAM) {
- case CPUID_XFAM_K8:
- if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
- break;
- case CPUID_XFAM_10H:
- case CPUID_XFAM_11H:
- rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
- if (lo & ENABLE_C1E_MASK) {
- if (smp_processor_id() != boot_cpu_physical_apicid)
- printk(KERN_INFO "AMD C1E detected late. "
- " Force timer broadcast.\n");
- return 1;
+/*
+ * General Systems BIOSen alias the cpu frequency registers
+ * of the Elan at 0x000df000. Unfortuantly, one of the Linux
+ * drivers subsequently pokes it, and changes the CPU speed.
+ * Workaround : Remove the unneeded alias.
+ */
+#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
+#define CBAR_ENB (0x80000000)
+#define CBAR_KEY (0X000000CB)
+ if (c->x86_model == 9 || c->x86_model == 10) {
+ if (inl (CBAR) & CBAR_ENB)
+ outl (0 | CBAR_KEY, CBAR);
+ }
+}
+
+
+static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+ int mbytes = num_physpages >> (20-PAGE_SHIFT);
+
+ if (c->x86_model < 6) {
+ /* Based on AMD doc 20734R - June 2000 */
+ if (c->x86_model == 0) {
+ clear_cpu_cap(c, X86_FEATURE_APIC);
+ set_cpu_cap(c, X86_FEATURE_PGE);
}
- break;
- default:
- /* err on the side of caution */
- return 1;
+ return;
+ }
+
+ if (c->x86_model == 6 && c->x86_mask == 1) {
+ const int K6_BUG_LOOP = 1000000;
+ int n;
+ void (*f_vide)(void);
+ unsigned long d, d2;
+
+ printk(KERN_INFO "AMD K6 stepping B detected - ");
+
+ /*
+ * It looks like AMD fixed the 2.6.2 bug and improved indirect
+ * calls at the same time.
+ */
+
+ n = K6_BUG_LOOP;
+ f_vide = vide;
+ rdtscl(d);
+ while (n--)
+ f_vide();
+ rdtscl(d2);
+ d = d2-d;
+
+ if (d > 20*K6_BUG_LOOP)
+ printk("system stability may be impaired when more than 32 MB are used.\n");
+ else
+ printk("probably OK (after B9730xxxx).\n");
+ printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
+ }
+
+ /* K6 with old style WHCR */
+ if (c->x86_model < 8 ||
+ (c->x86_model == 8 && c->x86_mask < 8)) {
+ /* We can only write allocate on the low 508Mb */
+ if (mbytes > 508)
+ mbytes = 508;
+
+ rdmsr(MSR_K6_WHCR, l, h);
+ if ((l&0x0000FFFF) == 0) {
+ unsigned long flags;
+ l = (1<<0)|((mbytes/4)<<1);
+ local_irq_save(flags);
+ wbinvd();
+ wrmsr(MSR_K6_WHCR, l, h);
+ local_irq_restore(flags);
+ printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+ mbytes);
+ }
+ return;
+ }
+
+ if ((c->x86_model == 8 && c->x86_mask > 7) ||
+ c->x86_model == 9 || c->x86_model == 13) {
+ /* The more serious chips .. */
+
+ if (mbytes > 4092)
+ mbytes = 4092;
+
+ rdmsr(MSR_K6_WHCR, l, h);
+ if ((l&0xFFFF0000) == 0) {
+ unsigned long flags;
+ l = ((mbytes>>2)<<22)|(1<<16);
+ local_irq_save(flags);
+ wbinvd();
+ wrmsr(MSR_K6_WHCR, l, h);
+ local_irq_restore(flags);
+ printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+ mbytes);
+ }
+
+ return;
+ }
+
+ if (c->x86_model == 10) {
+ /* AMD Geode LX is model 10 */
+ /* placeholder for any needed mods */
+ return;
}
- return 0;
+}
+
+static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+
+ /*
+ * Bit 15 of Athlon specific MSR 15, needs to be 0
+ * to enable SSE on Palomino/Morgan/Barton CPU's.
+ * If the BIOS didn't enable it already, enable it here.
+ */
+ if (c->x86_model >= 6 && c->x86_model <= 10) {
+ if (!cpu_has(c, X86_FEATURE_XMM)) {
+ printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+ rdmsr(MSR_K7_HWCR, l, h);
+ l &= ~0x00008000;
+ wrmsr(MSR_K7_HWCR, l, h);
+ set_cpu_cap(c, X86_FEATURE_XMM);
+ }
+ }
+
+ /*
+ * It's been determined by AMD that Athlons since model 8 stepping 1
+ * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
+ * As per AMD technical note 27212 0.2
+ */
+ if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
+ rdmsr(MSR_K7_CLK_CTL, l, h);
+ if ((l & 0xfff00000) != 0x20000000) {
+ printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
+ ((l & 0x000fffff)|0x20000000));
+ wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
+ }
+ }
+
+ set_cpu_cap(c, X86_FEATURE_K7);
}
#endif
-int force_mwait __cpuinitdata;
+#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+static int __cpuinit nearby_node(int apicid)
+{
+ int i, node;
-static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+ for (i = apicid - 1; i >= 0; i--) {
+ node = apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
+ node = apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ return first_node(node_online_map); /* Shouldn't happen */
+}
+#endif
+
+/*
+ * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
+ * Assumes number of cores is a power of two.
+ */
+static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_HT
+ unsigned bits;
+
+ bits = c->x86_coreid_bits;
+
+ /* Low order bits define the core id (index of core in socket) */
+ c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
+ /* Convert the initial APIC ID into the socket ID */
+ c->phys_proc_id = c->initial_apicid >> bits;
+#endif
+}
+
+static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
- if (cpuid_eax(0x80000000) >= 0x80000007) {
- c->x86_power = cpuid_edx(0x80000007);
- if (c->x86_power & (1<<8))
- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+ int cpu = smp_processor_id();
+ int node;
+ unsigned apicid = hard_smp_processor_id();
+
+ node = c->phys_proc_id;
+ if (apicid_to_node[apicid] != NUMA_NO_NODE)
+ node = apicid_to_node[apicid];
+ if (!node_online(node)) {
+ /* Two possibilities here:
+ - The CPU is missing memory and no node was created.
+ In that case try picking one from a nearby CPU
+ - The APIC IDs differ from the HyperTransport node IDs
+ which the K8 northbridge parsing fills in.
+ Assume they are all increased by a constant offset,
+ but in the same order as the HT nodeids.
+ If that doesn't result in a usable node fall back to the
+ path for the previous case. */
+
+ int ht_nodeid = c->initial_apicid;
+
+ if (ht_nodeid >= 0 &&
+ apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = apicid_to_node[ht_nodeid];
+ /* Pick a nearby node */
+ if (!node_online(node))
+ node = nearby_node(apicid);
}
+ numa_set_node(cpu, node);
+
+ printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+#endif
}
-static void __cpuinit init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
{
- u32 l, h;
- int mbytes = num_physpages >> (20-PAGE_SHIFT);
- int r;
+#ifdef CONFIG_X86_HT
+ unsigned bits, ecx;
+
+ /* Multi core CPU? */
+ if (c->extended_cpuid_level < 0x80000008)
+ return;
+
+ ecx = cpuid_ecx(0x80000008);
+
+ c->x86_max_cores = (ecx & 0xff) + 1;
+
+ /* CPU telling us the core id bits shift? */
+ bits = (ecx >> 12) & 0xF;
+
+ /* Otherwise recompute */
+ if (bits == 0) {
+ while ((1 << bits) < c->x86_max_cores)
+ bits++;
+ }
+
+ c->x86_coreid_bits = bits;
+#endif
+}
+
+static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+ early_init_amd_mc(c);
+
+ /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
+ if (c->x86_power & (1<<8))
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+#ifdef CONFIG_X86_64
+ set_cpu_cap(c, X86_FEATURE_SYSCALL32);
+#else
+ /* Set MTRR capability flag if appropriate */
+ if (c->x86 == 5)
+ if (c->x86_model == 13 || c->x86_model == 9 ||
+ (c->x86_model == 8 && c->x86_mask >= 8))
+ set_cpu_cap(c, X86_FEATURE_K6_MTRR);
+#endif
+}
+static void __cpuinit init_amd(struct cpuinfo_x86 *c)
+{
#ifdef CONFIG_SMP
unsigned long long value;
@@ -88,7 +310,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
* Errata 63 for SH-B3 steppings
* Errata 122 for all steppings (F+ have it disabled by default)
*/
- if (c->x86 == 15) {
+ if (c->x86 == 0xf) {
rdmsrl(MSR_K7_HWCR, value);
value |= 1 << 6;
wrmsrl(MSR_K7_HWCR, value);
@@ -98,218 +320,119 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
early_init_amd(c);
/*
- * FIXME: We should handle the K5 here. Set up the write
- * range and also turn on MSR 83 bits 4 and 31 (write alloc,
- * no bus pipeline)
- */
-
- /*
* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
* 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
*/
clear_cpu_cap(c, 0*32+31);
- r = get_model_name(c);
-
- switch (c->x86) {
- case 4:
- /*
- * General Systems BIOSen alias the cpu frequency registers
- * of the Elan at 0x000df000. Unfortuantly, one of the Linux
- * drivers subsequently pokes it, and changes the CPU speed.
- * Workaround : Remove the unneeded alias.
- */
-#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
-#define CBAR_ENB (0x80000000)
-#define CBAR_KEY (0X000000CB)
- if (c->x86_model == 9 || c->x86_model == 10) {
- if (inl (CBAR) & CBAR_ENB)
- outl (0 | CBAR_KEY, CBAR);
- }
- break;
- case 5:
- if (c->x86_model < 6) {
- /* Based on AMD doc 20734R - June 2000 */
- if (c->x86_model == 0) {
- clear_cpu_cap(c, X86_FEATURE_APIC);
- set_cpu_cap(c, X86_FEATURE_PGE);
- }
- break;
- }
-
- if (c->x86_model == 6 && c->x86_mask == 1) {
- const int K6_BUG_LOOP = 1000000;
- int n;
- void (*f_vide)(void);
- unsigned long d, d2;
-
- printk(KERN_INFO "AMD K6 stepping B detected - ");
-
- /*
- * It looks like AMD fixed the 2.6.2 bug and improved indirect
- * calls at the same time.
- */
-
- n = K6_BUG_LOOP;
- f_vide = vide;
- rdtscl(d);
- while (n--)
- f_vide();
- rdtscl(d2);
- d = d2-d;
-
- if (d > 20*K6_BUG_LOOP)
- printk("system stability may be impaired when more than 32 MB are used.\n");
- else
- printk("probably OK (after B9730xxxx).\n");
- printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
- }
-
- /* K6 with old style WHCR */
- if (c->x86_model < 8 ||
- (c->x86_model == 8 && c->x86_mask < 8)) {
- /* We can only write allocate on the low 508Mb */
- if (mbytes > 508)
- mbytes = 508;
-
- rdmsr(MSR_K6_WHCR, l, h);
- if ((l&0x0000FFFF) == 0) {
- unsigned long flags;
- l = (1<<0)|((mbytes/4)<<1);
- local_irq_save(flags);
- wbinvd();
- wrmsr(MSR_K6_WHCR, l, h);
- local_irq_restore(flags);
- printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
- mbytes);
- }
- break;
- }
-
- if ((c->x86_model == 8 && c->x86_mask > 7) ||
- c->x86_model == 9 || c->x86_model == 13) {
- /* The more serious chips .. */
-
- if (mbytes > 4092)
- mbytes = 4092;
-
- rdmsr(MSR_K6_WHCR, l, h);
- if ((l&0xFFFF0000) == 0) {
- unsigned long flags;
- l = ((mbytes>>2)<<22)|(1<<16);
- local_irq_save(flags);
- wbinvd();
- wrmsr(MSR_K6_WHCR, l, h);
- local_irq_restore(flags);
- printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
- mbytes);
- }
-
- /* Set MTRR capability flag if appropriate */
- if (c->x86_model == 13 || c->x86_model == 9 ||
- (c->x86_model == 8 && c->x86_mask >= 8))
- set_cpu_cap(c, X86_FEATURE_K6_MTRR);
- break;
- }
-
- if (c->x86_model == 10) {
- /* AMD Geode LX is model 10 */
- /* placeholder for any needed mods */
- break;
- }
- break;
- case 6: /* An Athlon/Duron */
+#ifdef CONFIG_X86_64
+ /* On C+ stepping K8 rep microcode works well for copy/memset */
+ if (c->x86 == 0xf) {
+ u32 level;
- /*
- * Bit 15 of Athlon specific MSR 15, needs to be 0
- * to enable SSE on Palomino/Morgan/Barton CPU's.
- * If the BIOS didn't enable it already, enable it here.
- */
- if (c->x86_model >= 6 && c->x86_model <= 10) {
- if (!cpu_has(c, X86_FEATURE_XMM)) {
- printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
- rdmsr(MSR_K7_HWCR, l, h);
- l &= ~0x00008000;
- wrmsr(MSR_K7_HWCR, l, h);
- set_cpu_cap(c, X86_FEATURE_XMM);
- }
- }
-
- /*
- * It's been determined by AMD that Athlons since model 8 stepping 1
- * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
- * As per AMD technical note 27212 0.2
- */
- if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
- rdmsr(MSR_K7_CLK_CTL, l, h);
- if ((l & 0xfff00000) != 0x20000000) {
- printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
- ((l & 0x000fffff)|0x20000000));
- wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
- }
- }
- break;
+ level = cpuid_eax(1);
+ if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}
+ if (c->x86 == 0x10 || c->x86 == 0x11)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+#else
+
+ /*
+ * FIXME: We should handle the K5 here. Set up the write
+ * range and also turn on MSR 83 bits 4 and 31 (write alloc,
+ * no bus pipeline)
+ */
switch (c->x86) {
- case 15:
- /* Use K8 tuning for Fam10h and Fam11h */
- case 0x10:
- case 0x11:
- set_cpu_cap(c, X86_FEATURE_K8);
+ case 4:
+ init_amd_k5(c);
break;
- case 6:
- set_cpu_cap(c, X86_FEATURE_K7);
+ case 5:
+ init_amd_k6(c);
+ break;
+ case 6: /* An Athlon/Duron */
+ init_amd_k7(c);
break;
}
+
+ /* K6s reports MCEs but don't actually have all the MSRs */
+ if (c->x86 < 6)
+ clear_cpu_cap(c, X86_FEATURE_MCE);
+#endif
+
+ /* Enable workaround for FXSAVE leak */
if (c->x86 >= 6)
set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
- display_cacheinfo(c);
-
- if (cpuid_eax(0x80000000) >= 0x80000008)
- c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+ if (!c->x86_model_id[0]) {
+ switch (c->x86) {
+ case 0xf:
+ /* Should distinguish Models here, but this is only
+ a fallback anyways. */
+ strcpy(c->x86_model_id, "Hammer");
+ break;
+ }
+ }
-#ifdef CONFIG_X86_HT
- /*
- * On a AMD multi core setup the lower bits of the APIC id
- * distinguish the cores.
- */
- if (c->x86_max_cores > 1) {
- int cpu = smp_processor_id();
- unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
+ display_cacheinfo(c);
- if (bits == 0) {
- while ((1 << bits) < c->x86_max_cores)
- bits++;
- }
- c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
- c->phys_proc_id >>= bits;
- printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
- cpu, c->x86_max_cores, c->cpu_core_id);
+ /* Multi core CPU? */
+ if (c->extended_cpuid_level >= 0x80000008) {
+ amd_detect_cmp(c);
+ srat_detect_node(c);
}
+
+#ifdef CONFIG_X86_32
+ detect_ht(c);
#endif
- if (cpuid_eax(0x80000000) >= 0x80000006) {
- if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000))
+ if (c->extended_cpuid_level >= 0x80000006) {
+ if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000))
num_cache_leaves = 4;
else
num_cache_leaves = 3;
}
-#ifdef CONFIG_X86_LOCAL_APIC
- if (amd_apic_timer_broken())
- local_apic_timer_disabled = 1;
-#endif
-
- /* K6s reports MCEs but don't actually have all the MSRs */
- if (c->x86 < 6)
- clear_cpu_cap(c, X86_FEATURE_MCE);
+ if (c->x86 >= 0xf && c->x86 <= 0x11)
+ set_cpu_cap(c, X86_FEATURE_K8);
- if (cpu_has_xmm2)
+ if (cpu_has_xmm2) {
+ /* MFENCE stops RDTSC speculation */
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ }
+
+#ifdef CONFIG_X86_64
+ if (c->x86 == 0x10) {
+ /* do this for boot cpu */
+ if (c == &boot_cpu_data)
+ check_enable_amd_mmconf_dmi();
+
+ fam10h_check_enable_mmcfg();
+ }
+
+ if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
+ unsigned long long tseg;
+
+ /*
+ * Split up direct mapping around the TSEG SMM area.
+ * Don't do it for gbpages because there seems very little
+ * benefit in doing so.
+ */
+ if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+ printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+ if ((tseg>>PMD_SHIFT) <
+ (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
+ ((tseg>>PMD_SHIFT) <
+ (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
+ (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
+ set_memory_4k((unsigned long)__va(tseg), 1);
+ }
+ }
+#endif
}
+#ifdef CONFIG_X86_32
static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
/* AMD errata T13 (order #21922) */
@@ -322,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int
}
return size;
}
+#endif
static struct cpu_dev amd_cpu_dev __cpuinitdata = {
.c_vendor = "AMD",
.c_ident = { "AuthenticAMD" },
+#ifdef CONFIG_X86_32
.c_models = {
{ .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
{
@@ -338,9 +463,11 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = {
}
},
},
+ .c_size_cache = amd_size_cache,
+#endif
.c_early_init = early_init_amd,
.c_init = init_amd,
- .c_size_cache = amd_size_cache,
+ .c_x86_vendor = X86_VENDOR_AMD,
};
-cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
+cpu_dev_register(amd_cpu_dev);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 170d2f5523b..c8e315f1aa8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -50,6 +50,8 @@ static double __initdata y = 3145727.0;
*/
static void __init check_fpu(void)
{
+ s32 fdiv_bug;
+
if (!boot_cpu_data.hard_math) {
#ifndef CONFIG_MATH_EMULATION
printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
@@ -59,8 +61,12 @@ static void __init check_fpu(void)
return;
}
-/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */
- /* Test for the divl bug.. */
+ /*
+ * trap_init() enabled FXSR and company _before_ testing for FP
+ * problems here.
+ *
+ * Test for the divl bug..
+ */
__asm__("fninit\n\t"
"fldl %1\n\t"
"fdivl %2\n\t"
@@ -70,8 +76,10 @@ static void __init check_fpu(void)
"fistpl %0\n\t"
"fwait\n\t"
"fninit"
- : "=m" (*&boot_cpu_data.fdiv_bug)
+ : "=m" (*&fdiv_bug)
: "m" (*&x), "m" (*&y));
+
+ boot_cpu_data.fdiv_bug = fdiv_bug;
if (boot_cpu_data.fdiv_bug)
printk("Hmm, FPU with FDIV bug.\n");
}
@@ -108,10 +116,15 @@ static void __init check_popad(void)
"movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
: "=&a" (res)
: "d" (inp)
- : "ecx", "edi" );
- /* If this fails, it means that any user program may lock the CPU hard. Too bad. */
- if (res != 12345678) printk( "Buggy.\n" );
- else printk( "OK.\n" );
+ : "ecx", "edi");
+ /*
+ * If this fails, it means that any user program may lock the
+ * CPU hard. Too bad.
+ */
+ if (res != 12345678)
+ printk("Buggy.\n");
+ else
+ printk("OK.\n");
#endif
}
@@ -122,13 +135,7 @@ static void __init check_popad(void)
* (for due to lack of "invlpg" and working WP on a i386)
* - In order to run on anything without a TSC, we need to be
* compiled for a i486.
- * - In order to support the local APIC on a buggy Pentium machine,
- * we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
- * which happens implicitly if compiled for a Pentium or lower
- * (unless an advanced selection of CPU features is used) as an
- * otherwise config implies a properly working local APIC without
- * the need to do extra reads from the APIC.
-*/
+ */
static void __init check_config(void)
{
@@ -137,25 +144,11 @@ static void __init check_config(void)
* i486+ only features! (WP works in supervisor mode and the
* new "invlpg" and "bswap" instructions)
*/
-#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP)
+#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
+ defined(CONFIG_X86_BSWAP)
if (boot_cpu_data.x86 == 3)
panic("Kernel requires i486+ for 'invlpg' and other features");
#endif
-
-/*
- * If we were told we had a good local APIC, check for buggy Pentia,
- * i.e. all B steppings and the C2 stepping of P54C when using their
- * integrated APIC (see 11AP erratum in "Pentium Processor
- * Specification Update").
- */
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
- && cpu_has_apic
- && boot_cpu_data.x86 == 5
- && boot_cpu_data.x86_model == 2
- && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
- panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
-#endif
}
@@ -170,6 +163,7 @@ void __init check_bugs(void)
check_fpu();
check_hlt();
check_popad();
- init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+ init_utsname()->machine[1] =
+ '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions();
}
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
new file mode 100644
index 00000000000..9a3ed0649d4
--- /dev/null
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ * Copyright (C) 2000 SuSE
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/alternative.h>
+#include <asm/bugs.h>
+#include <asm/processor.h>
+#include <asm/mtrr.h>
+#include <asm/cacheflush.h>
+
+void __init check_bugs(void)
+{
+ identify_boot_cpu();
+#if !defined(CONFIG_SMP)
+ printk("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+#endif
+ alternative_instructions();
+
+ /*
+ * Make sure the first 2MB area is not mapped by huge pages
+ * There are typically fixed size MTRRs in there and overlapping
+ * MTRRs into large pages causes slow downs.
+ *
+ * Right now we don't do that with gbpages because there seems
+ * very little benefit for that case.
+ */
+ if (!direct_gbpages)
+ set_memory_4k((unsigned long)__va(0), 1);
+}
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e0f45edd6a5..89bfdd9cacc 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
if (c->x86_model >= 6 && c->x86_model < 9)
set_cpu_cap(c, X86_FEATURE_3DNOW);
- get_model_name(c);
display_cacheinfo(c);
}
@@ -314,6 +313,16 @@ enum {
EAMD3D = 1<<20,
};
+static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+{
+ switch (c->x86) {
+ case 5:
+ /* Emulate MTRRs using Centaur's MCR. */
+ set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
+ break;
+ }
+}
+
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
{
@@ -462,8 +471,10 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
.c_vendor = "Centaur",
.c_ident = { "CentaurHauls" },
+ .c_early_init = early_init_centaur,
.c_init = init_centaur,
.c_size_cache = centaur_size_cache,
+ .c_x86_vendor = X86_VENDOR_CENTAUR,
};
-cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev);
+cpu_dev_register(centaur_cpu_dev);
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
new file mode 100644
index 00000000000..a1625f5a1e7
--- /dev/null
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -0,0 +1,37 @@
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+{
+ if (c->x86 == 0x6 && c->x86_model >= 0xf)
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+ set_cpu_cap(c, X86_FEATURE_SYSENTER32);
+}
+
+static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
+{
+ early_init_centaur(c);
+
+ if (c->x86 == 0x6 && c->x86_model >= 0xf) {
+ c->x86_cache_alignment = c->x86_clflush_size * 2;
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+ }
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+}
+
+static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
+ .c_vendor = "Centaur",
+ .c_ident = { "CentaurHauls" },
+ .c_early_init = early_init_centaur,
+ .c_init = init_centaur,
+ .c_x86_vendor = X86_VENDOR_CENTAUR,
+};
+
+cpu_dev_register(centaur_cpu_dev);
+
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c
new file mode 100644
index 00000000000..2056ccf572c
--- /dev/null
+++ b/arch/x86/kernel/cpu/cmpxchg.c
@@ -0,0 +1,72 @@
+/*
+ * cmpxchg*() fallbacks for CPU not supporting these instructions
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+
+#ifndef CONFIG_X86_CMPXCHG
+unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
+{
+ u8 prev;
+ unsigned long flags;
+
+ /* Poor man's cmpxchg for 386. Unsuitable for SMP */
+ local_irq_save(flags);
+ prev = *(u8 *)ptr;
+ if (prev == old)
+ *(u8 *)ptr = new;
+ local_irq_restore(flags);
+ return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u8);
+
+unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
+{
+ u16 prev;
+ unsigned long flags;
+
+ /* Poor man's cmpxchg for 386. Unsuitable for SMP */
+ local_irq_save(flags);
+ prev = *(u16 *)ptr;
+ if (prev == old)
+ *(u16 *)ptr = new;
+ local_irq_restore(flags);
+ return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u16);
+
+unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
+{
+ u32 prev;
+ unsigned long flags;
+
+ /* Poor man's cmpxchg for 386. Unsuitable for SMP */
+ local_irq_save(flags);
+ prev = *(u32 *)ptr;
+ if (prev == old)
+ *(u32 *)ptr = new;
+ local_irq_restore(flags);
+ return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u32);
+#endif
+
+#ifndef CONFIG_X86_CMPXCHG64
+unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
+{
+ u64 prev;
+ unsigned long flags;
+
+ /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
+ local_irq_save(flags);
+ prev = *(u64 *)ptr;
+ if (prev == old)
+ *(u64 *)ptr = new;
+ local_irq_restore(flags);
+ return prev;
+}
+EXPORT_SYMBOL(cmpxchg_486_u64);
+#endif
+
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d0463a94624..25581dcb280 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,27 +1,62 @@
#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
#include <linux/string.h>
+#include <linux/bootmem.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/kgdb.h>
+#include <linux/topology.h>
#include <linux/delay.h>
#include <linux/smp.h>
-#include <linux/module.h>
#include <linux/percpu.h>
-#include <linux/bootmem.h>
-#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/msr.h>
#include <asm/io.h>
+#include <asm/linkage.h>
#include <asm/mmu_context.h>
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/pat.h>
+#include <asm/asm.h>
+#include <asm/numa.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
#include <mach_apic.h>
+#include <asm/genapic.h>
#endif
+#include <asm/pda.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/atomic.h>
+#include <asm/proto.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+
#include "cpu.h"
+static struct cpu_dev *this_cpu __cpuinitdata;
+
+#ifdef CONFIG_X86_64
+/* We need valid kernel segments for data and code in long mode too
+ * IRET will check the segment types kkeil 2000/10/28
+ * Also sysret mandates a special GDT layout
+ */
+/* The TLS descriptors are currently at a different place compared to i386.
+ Hopefully nobody expects them at a fixed place (Wine?) */
DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
+ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
+ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
+ [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
+ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
+ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
+} };
+#else
+DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -55,17 +90,157 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
} };
+#endif
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
-
+#ifdef CONFIG_X86_32
static int cachesize_override __cpuinitdata = -1;
static int disable_x86_serial_nr __cpuinitdata = 1;
-struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
+static int __init cachesize_setup(char *str)
+{
+ get_option(&str, &cachesize_override);
+ return 1;
+}
+__setup("cachesize=", cachesize_setup);
+
+static int __init x86_fxsr_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_FXSR);
+ setup_clear_cpu_cap(X86_FEATURE_XMM);
+ return 1;
+}
+__setup("nofxsr", x86_fxsr_setup);
+
+static int __init x86_sep_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_SEP);
+ return 1;
+}
+__setup("nosep", x86_sep_setup);
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+ u32 f1, f2;
+
+ /*
+ * Cyrix and IDT cpus allow disabling of CPUID
+ * so the code below may return different results
+ * when it is executed before and after enabling
+ * the CPUID. Add "volatile" to not allow gcc to
+ * optimize the subsequent calls to this function.
+ */
+ asm volatile ("pushfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl %2,%0\n\t"
+ "pushl %0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "popfl\n\t"
+ : "=&r" (f1), "=&r" (f2)
+ : "ir" (flag));
+
+ return ((f1^f2) & flag) != 0;
+}
+
+/* Probe for the CPUID instruction */
+static int __cpuinit have_cpuid_p(void)
+{
+ return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
+ /* Disable processor serial number */
+ unsigned long lo, hi;
+ rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+ lo |= 0x200000;
+ wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+ printk(KERN_NOTICE "CPU serial number disabled.\n");
+ clear_cpu_cap(c, X86_FEATURE_PN);
+
+ /* Disabling the serial number may affect the cpuid level */
+ c->cpuid_level = cpuid_eax(0);
+ }
+}
+
+static int __init x86_serial_nr_setup(char *s)
+{
+ disable_x86_serial_nr = 0;
+ return 1;
+}
+__setup("serialnumber", x86_serial_nr_setup);
+#else
+static inline int flag_is_changeable_p(u32 flag)
+{
+ return 1;
+}
+/* Probe for the CPUID instruction */
+static inline int have_cpuid_p(void)
+{
+ return 1;
+}
+static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+{
+}
+#endif
+
+/*
+ * Naming convention should be: <Name> [(<Codename>)]
+ * This table only is used unless init_<vendor>() below doesn't set it;
+ * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
+ *
+ */
+
+/* Look up CPU names by table lookup. */
+static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+{
+ struct cpu_model_info *info;
+
+ if (c->x86_model >= 16)
+ return NULL; /* Range check */
+
+ if (!this_cpu)
+ return NULL;
+
+ info = this_cpu->c_models;
+
+ while (info && info->family) {
+ if (info->family == c->x86)
+ return info->model_names[c->x86_model];
+ info++;
+ }
+ return NULL; /* Not found */
+}
+
+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+
+/* Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one. */
+void switch_to_new_gdt(void)
+{
+ struct desc_ptr gdt_descr;
+
+ gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
+#ifdef CONFIG_X86_32
+ asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
+#endif
+}
+
+static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
static void __cpuinit default_init(struct cpuinfo_x86 *c)
{
+#ifdef CONFIG_X86_64
+ display_cacheinfo(c);
+#else
/* Not much we can do here... */
/* Check if at least it has cpuid */
if (c->cpuid_level == -1) {
@@ -75,28 +250,22 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
else if (c->x86 == 3)
strcpy(c->x86_model_id, "386");
}
+#endif
}
static struct cpu_dev __cpuinitdata default_cpu = {
.c_init = default_init,
.c_vendor = "Unknown",
+ .c_x86_vendor = X86_VENDOR_UNKNOWN,
};
-static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
-
-static int __init cachesize_setup(char *str)
-{
- get_option(&str, &cachesize_override);
- return 1;
-}
-__setup("cachesize=", cachesize_setup);
-int __cpuinit get_model_name(struct cpuinfo_x86 *c)
+static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
char *p, *q;
- if (cpuid_eax(0x80000000) < 0x80000004)
- return 0;
+ if (c->extended_cpuid_level < 0x80000004)
+ return;
v = (unsigned int *) c->x86_model_id;
cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
@@ -115,30 +284,34 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
while (q <= &c->x86_model_id[48])
*q++ = '\0'; /* Zero-pad the rest */
}
-
- return 1;
}
-
void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
{
- unsigned int n, dummy, ecx, edx, l2size;
+ unsigned int n, dummy, ebx, ecx, edx, l2size;
- n = cpuid_eax(0x80000000);
+ n = c->extended_cpuid_level;
if (n >= 0x80000005) {
- cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+ cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
- c->x86_cache_size = (ecx>>24)+(edx>>24);
+ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+ c->x86_cache_size = (ecx>>24) + (edx>>24);
+#ifdef CONFIG_X86_64
+ /* On K8 L1 TLB is inclusive, so don't count it */
+ c->x86_tlbsize = 0;
+#endif
}
if (n < 0x80000006) /* Some chips just has a large L1. */
return;
- ecx = cpuid_ecx(0x80000006);
+ cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
l2size = ecx >> 16;
+#ifdef CONFIG_X86_64
+ c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
+#else
/* do processor-specific cache resizing */
if (this_cpu->c_size_cache)
l2size = this_cpu->c_size_cache(c, l2size);
@@ -149,116 +322,106 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
if (l2size == 0)
return; /* Again, no L2 cache is possible */
+#endif
c->x86_cache_size = l2size;
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
- l2size, ecx & 0xFF);
+ l2size, ecx & 0xFF);
}
-/*
- * Naming convention should be: <Name> [(<Codename>)]
- * This table only is used unless init_<vendor>() below doesn't set it;
- * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
- *
- */
-
-/* Look up CPU names by table lookup. */
-static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
{
- struct cpu_model_info *info;
+#ifdef CONFIG_X86_HT
+ u32 eax, ebx, ecx, edx;
+ int index_msb, core_bits;
- if (c->x86_model >= 16)
- return NULL; /* Range check */
+ if (!cpu_has(c, X86_FEATURE_HT))
+ return;
- if (!this_cpu)
- return NULL;
+ if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+ goto out;
- info = this_cpu->c_models;
+ if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
+ return;
- while (info && info->family) {
- if (info->family == c->x86)
- return info->model_names[c->x86_model];
- info++;
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+
+ smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+ if (smp_num_siblings == 1) {
+ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
+ } else if (smp_num_siblings > 1) {
+
+ if (smp_num_siblings > NR_CPUS) {
+ printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
+ smp_num_siblings);
+ smp_num_siblings = 1;
+ return;
+ }
+
+ index_msb = get_count_order(smp_num_siblings);
+#ifdef CONFIG_X86_64
+ c->phys_proc_id = phys_pkg_id(index_msb);
+#else
+ c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
+#endif
+
+ smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+ index_msb = get_count_order(smp_num_siblings);
+
+ core_bits = get_count_order(c->x86_max_cores);
+
+#ifdef CONFIG_X86_64
+ c->cpu_core_id = phys_pkg_id(index_msb) &
+ ((1 << core_bits) - 1);
+#else
+ c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
+ ((1 << core_bits) - 1);
+#endif
}
- return NULL; /* Not found */
-}
+out:
+ if ((c->x86_max_cores * smp_num_siblings) > 1) {
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ c->phys_proc_id);
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ c->cpu_core_id);
+ }
+#endif
+}
-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
{
char *v = c->x86_vendor_id;
int i;
static int printed;
for (i = 0; i < X86_VENDOR_NUM; i++) {
- if (cpu_devs[i]) {
- if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
- (cpu_devs[i]->c_ident[1] &&
- !strcmp(v, cpu_devs[i]->c_ident[1]))) {
- c->x86_vendor = i;
- if (!early)
- this_cpu = cpu_devs[i];
- return;
- }
+ if (!cpu_devs[i])
+ break;
+
+ if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
+ (cpu_devs[i]->c_ident[1] &&
+ !strcmp(v, cpu_devs[i]->c_ident[1]))) {
+ this_cpu = cpu_devs[i];
+ c->x86_vendor = this_cpu->c_x86_vendor;
+ return;
}
}
+
if (!printed) {
printed++;
- printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
+ printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
printk(KERN_ERR "CPU: Your system may be unstable.\n");
}
+
c->x86_vendor = X86_VENDOR_UNKNOWN;
this_cpu = &default_cpu;
}
-
-static int __init x86_fxsr_setup(char *s)
-{
- setup_clear_cpu_cap(X86_FEATURE_FXSR);
- setup_clear_cpu_cap(X86_FEATURE_XMM);
- return 1;
-}
-__setup("nofxsr", x86_fxsr_setup);
-
-
-static int __init x86_sep_setup(char *s)
-{
- setup_clear_cpu_cap(X86_FEATURE_SEP);
- return 1;
-}
-__setup("nosep", x86_sep_setup);
-
-
-/* Standard macro to see if a specific flag is changeable */
-static inline int flag_is_changeable_p(u32 flag)
-{
- u32 f1, f2;
-
- asm("pushfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "movl %0,%1\n\t"
- "xorl %2,%0\n\t"
- "pushl %0\n\t"
- "popfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "popfl\n\t"
- : "=&r" (f1), "=&r" (f2)
- : "ir" (flag));
-
- return ((f1^f2) & flag) != 0;
-}
-
-
-/* Probe for the CPUID instruction */
-static int __cpuinit have_cpuid_p(void)
-{
- return flag_is_changeable_p(X86_EFLAGS_ID);
-}
-
-void __init cpu_detect(struct cpuinfo_x86 *c)
+void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
{
/* Get vendor name */
cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
@@ -267,50 +430,87 @@ void __init cpu_detect(struct cpuinfo_x86 *c)
(unsigned int *)&c->x86_vendor_id[4]);
c->x86 = 4;
+ /* Intel-defined flags: level 0x00000001 */
if (c->cpuid_level >= 0x00000001) {
u32 junk, tfms, cap0, misc;
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
- c->x86 = (tfms >> 8) & 15;
- c->x86_model = (tfms >> 4) & 15;
+ c->x86 = (tfms >> 8) & 0xf;
+ c->x86_model = (tfms >> 4) & 0xf;
+ c->x86_mask = tfms & 0xf;
if (c->x86 == 0xf)
c->x86 += (tfms >> 20) & 0xff;
if (c->x86 >= 0x6)
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
- c->x86_mask = tfms & 15;
+ c->x86_model += ((tfms >> 16) & 0xf) << 4;
if (cap0 & (1<<19)) {
- c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+ c->x86_cache_alignment = c->x86_clflush_size;
}
}
}
-static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
+
+static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 tfms, xlvl;
- unsigned int ebx;
+ u32 ebx;
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
- if (have_cpuid_p()) {
- /* Intel-defined flags: level 0x00000001 */
- if (c->cpuid_level >= 0x00000001) {
- u32 capability, excap;
- cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
- c->x86_capability[0] = capability;
- c->x86_capability[4] = excap;
- }
+ /* Intel-defined flags: level 0x00000001 */
+ if (c->cpuid_level >= 0x00000001) {
+ u32 capability, excap;
+ cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
+ c->x86_capability[0] = capability;
+ c->x86_capability[4] = excap;
+ }
- /* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- if ((xlvl & 0xffff0000) == 0x80000000) {
- if (xlvl >= 0x80000001) {
- c->x86_capability[1] = cpuid_edx(0x80000001);
- c->x86_capability[6] = cpuid_ecx(0x80000001);
- }
+ /* AMD-defined flags: level 0x80000001 */
+ xlvl = cpuid_eax(0x80000000);
+ c->extended_cpuid_level = xlvl;
+ if ((xlvl & 0xffff0000) == 0x80000000) {
+ if (xlvl >= 0x80000001) {
+ c->x86_capability[1] = cpuid_edx(0x80000001);
+ c->x86_capability[6] = cpuid_ecx(0x80000001);
}
+ }
+
+#ifdef CONFIG_X86_64
+ if (c->extended_cpuid_level >= 0x80000008) {
+ u32 eax = cpuid_eax(0x80000008);
+ c->x86_virt_bits = (eax >> 8) & 0xff;
+ c->x86_phys_bits = eax & 0xff;
}
+#endif
+
+ if (c->extended_cpuid_level >= 0x80000007)
+ c->x86_power = cpuid_edx(0x80000007);
}
+static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_32
+ int i;
+
+ /*
+ * First of all, decide if this is a 486 or higher
+ * It's a 486 if we can modify the AC flag
+ */
+ if (flag_is_changeable_p(X86_EFLAGS_AC))
+ c->x86 = 4;
+ else
+ c->x86 = 3;
+
+ for (i = 0; i < X86_VENDOR_NUM; i++)
+ if (cpu_devs[i] && cpu_devs[i]->c_identify) {
+ c->x86_vendor_id[0] = 0;
+ cpu_devs[i]->c_identify(c);
+ if (c->x86_vendor_id[0]) {
+ get_cpu_vendor(c);
+ break;
+ }
+ }
+#endif
+}
+
/*
* Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -320,144 +520,147 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
* WARNING: this function is only called on the BP. Don't add code here
* that is supposed to run on all CPUs.
*/
-static void __init early_cpu_detect(void)
+static void __init early_identify_cpu(struct cpuinfo_x86 *c)
{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- c->x86_cache_alignment = 32;
+#ifdef CONFIG_X86_64
+ c->x86_clflush_size = 64;
+#else
c->x86_clflush_size = 32;
+#endif
+ c->x86_cache_alignment = c->x86_clflush_size;
+
+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
+ c->extended_cpuid_level = 0;
+
+ if (!have_cpuid_p())
+ identify_cpu_without_cpuid(c);
+ /* cyrix could have cpuid enabled via c_identify()*/
if (!have_cpuid_p())
return;
cpu_detect(c);
- get_cpu_vendor(c, 1);
+ get_cpu_vendor(c);
+
+ get_cpu_cap(c);
- if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
- cpu_devs[c->x86_vendor]->c_early_init)
- cpu_devs[c->x86_vendor]->c_early_init(c);
+ if (this_cpu->c_early_init)
+ this_cpu->c_early_init(c);
- early_get_cap(c);
+ validate_pat_support(c);
}
-static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
+void __init early_cpu_init(void)
{
- u32 tfms, xlvl;
- unsigned int ebx;
-
- if (have_cpuid_p()) {
- /* Get vendor name */
- cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
- (unsigned int *)&c->x86_vendor_id[0],
- (unsigned int *)&c->x86_vendor_id[8],
- (unsigned int *)&c->x86_vendor_id[4]);
-
- get_cpu_vendor(c, 0);
- /* Initialize the standard set of capabilities */
- /* Note that the vendor-specific code below might override */
- /* Intel-defined flags: level 0x00000001 */
- if (c->cpuid_level >= 0x00000001) {
- u32 capability, excap;
- cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
- c->x86_capability[0] = capability;
- c->x86_capability[4] = excap;
- c->x86 = (tfms >> 8) & 15;
- c->x86_model = (tfms >> 4) & 15;
- if (c->x86 == 0xf)
- c->x86 += (tfms >> 20) & 0xff;
- if (c->x86 >= 0x6)
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
- c->x86_mask = tfms & 15;
- c->initial_apicid = (ebx >> 24) & 0xFF;
-#ifdef CONFIG_X86_HT
- c->apicid = phys_pkg_id(c->initial_apicid, 0);
- c->phys_proc_id = c->initial_apicid;
-#else
- c->apicid = c->initial_apicid;
-#endif
- if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
- c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
- } else {
- /* Have CPUID level 0 only - unheard of */
- c->x86 = 4;
- }
-
- /* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- if ((xlvl & 0xffff0000) == 0x80000000) {
- if (xlvl >= 0x80000001) {
- c->x86_capability[1] = cpuid_edx(0x80000001);
- c->x86_capability[6] = cpuid_ecx(0x80000001);
- }
- if (xlvl >= 0x80000004)
- get_model_name(c); /* Default name */
+ struct cpu_dev **cdev;
+ int count = 0;
+
+ printk("KERNEL supported cpus:\n");
+ for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
+ struct cpu_dev *cpudev = *cdev;
+ unsigned int j;
+
+ if (count >= X86_VENDOR_NUM)
+ break;
+ cpu_devs[count] = cpudev;
+ count++;
+
+ for (j = 0; j < 2; j++) {
+ if (!cpudev->c_ident[j])
+ continue;
+ printk(" %s %s\n", cpudev->c_vendor,
+ cpudev->c_ident[j]);
}
-
- init_scattered_cpuid_features(c);
}
+ early_identify_cpu(&boot_cpu_data);
}
-static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6; unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect. In the latter case it doesn't even *fail*
+ * reliably, so probing for it doesn't even work. Disable it completely
+ * unless we can find a reliable way to detect all the broken cases.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
{
- if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
- /* Disable processor serial number */
- unsigned long lo, hi;
- rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
- lo |= 0x200000;
- wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
- printk(KERN_NOTICE "CPU serial number disabled.\n");
- clear_cpu_cap(c, X86_FEATURE_PN);
-
- /* Disabling the serial number may affect the cpuid level */
- c->cpuid_level = cpuid_eax(0);
- }
+ clear_cpu_cap(c, X86_FEATURE_NOPL);
}
-static int __init x86_serial_nr_setup(char *s)
+static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
{
- disable_x86_serial_nr = 0;
- return 1;
-}
-__setup("serialnumber", x86_serial_nr_setup);
+ c->extended_cpuid_level = 0;
+
+ if (!have_cpuid_p())
+ identify_cpu_without_cpuid(c);
+
+ /* cyrix could have cpuid enabled via c_identify()*/
+ if (!have_cpuid_p())
+ return;
+
+ cpu_detect(c);
+
+ get_cpu_vendor(c);
+ get_cpu_cap(c);
+ if (c->cpuid_level >= 0x00000001) {
+ c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_X86_HT
+ c->apicid = phys_pkg_id(c->initial_apicid, 0);
+# else
+ c->apicid = c->initial_apicid;
+# endif
+#endif
+
+#ifdef CONFIG_X86_HT
+ c->phys_proc_id = c->initial_apicid;
+#endif
+ }
+
+ get_model_name(c); /* Default name */
+
+ init_scattered_cpuid_features(c);
+ detect_nopl(c);
+}
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
{
int i;
c->loops_per_jiffy = loops_per_jiffy;
c->x86_cache_size = -1;
c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->cpuid_level = -1; /* CPUID not detected */
c->x86_model = c->x86_mask = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_max_cores = 1;
+ c->x86_coreid_bits = 0;
+#ifdef CONFIG_X86_64
+ c->x86_clflush_size = 64;
+#else
+ c->cpuid_level = -1; /* CPUID not detected */
c->x86_clflush_size = 32;
+#endif
+ c->x86_cache_alignment = c->x86_clflush_size;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
- if (!have_cpuid_p()) {
- /*
- * First of all, decide if this is a 486 or higher
- * It's a 486 if we can modify the AC flag
- */
- if (flag_is_changeable_p(X86_EFLAGS_AC))
- c->x86 = 4;
- else
- c->x86 = 3;
- }
-
generic_identify(c);
if (this_cpu->c_identify)
this_cpu->c_identify(c);
+#ifdef CONFIG_X86_64
+ c->apicid = phys_pkg_id(0);
+#endif
+
/*
* Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are
@@ -491,6 +694,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
c->x86, c->x86_model);
}
+#ifdef CONFIG_X86_64
+ detect_ht(c);
+#endif
+
/*
* On SMP, boot_cpu_data holds the common feature set between
* all CPUs; so make sure that we indicate which features are
@@ -499,7 +706,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
*/
if (c != &boot_cpu_data) {
/* AND the already accumulated flags with these */
- for (i = 0 ; i < NCAPINTS ; i++)
+ for (i = 0; i < NCAPINTS; i++)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
@@ -507,72 +714,91 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
for (i = 0; i < NCAPINTS; i++)
c->x86_capability[i] &= ~cleared_cpu_caps[i];
+#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
mcheck_init(c);
+#endif
select_idle_routine(c);
+
+#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+ numa_add_cpu(smp_processor_id());
+#endif
}
+#ifdef CONFIG_X86_64
+static void vgetcpu_set_mode(void)
+{
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
+ vgetcpu_mode = VGETCPU_RDTSCP;
+ else
+ vgetcpu_mode = VGETCPU_LSL;
+}
+#endif
+
void __init identify_boot_cpu(void)
{
identify_cpu(&boot_cpu_data);
+#ifdef CONFIG_X86_32
sysenter_setup();
enable_sep_cpu();
+#else
+ vgetcpu_set_mode();
+#endif
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
{
BUG_ON(c == &boot_cpu_data);
identify_cpu(c);
+#ifdef CONFIG_X86_32
enable_sep_cpu();
+#endif
mtrr_ap_init();
}
-#ifdef CONFIG_X86_HT
-void __cpuinit detect_ht(struct cpuinfo_x86 *c)
-{
- u32 eax, ebx, ecx, edx;
- int index_msb, core_bits;
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
-
- if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
- return;
-
- smp_num_siblings = (ebx & 0xff0000) >> 16;
+struct msr_range {
+ unsigned min;
+ unsigned max;
+};
- if (smp_num_siblings == 1) {
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
- } else if (smp_num_siblings > 1) {
+static struct msr_range msr_range_array[] __cpuinitdata = {
+ { 0x00000000, 0x00000418},
+ { 0xc0000000, 0xc000040b},
+ { 0xc0010000, 0xc0010142},
+ { 0xc0011000, 0xc001103b},
+};
- if (smp_num_siblings > NR_CPUS) {
- printk(KERN_WARNING "CPU: Unsupported number of the "
- "siblings %d", smp_num_siblings);
- smp_num_siblings = 1;
- return;
+static void __cpuinit print_cpu_msr(void)
+{
+ unsigned index;
+ u64 val;
+ int i;
+ unsigned index_min, index_max;
+
+ for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
+ index_min = msr_range_array[i].min;
+ index_max = msr_range_array[i].max;
+ for (index = index_min; index < index_max; index++) {
+ if (rdmsrl_amd_safe(index, &val))
+ continue;
+ printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
}
+ }
+}
- index_msb = get_count_order(smp_num_siblings);
- c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
-
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- c->phys_proc_id);
-
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
-
- index_msb = get_count_order(smp_num_siblings) ;
-
- core_bits = get_count_order(c->x86_max_cores);
+static int show_msr __cpuinitdata;
+static __init int setup_show_msr(char *arg)
+{
+ int num;
- c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
- ((1 << core_bits) - 1);
+ get_option(&arg, &num);
- if (c->x86_max_cores > 1)
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- c->cpu_core_id);
- }
+ if (num > 0)
+ show_msr = num;
+ return 1;
}
-#endif
+__setup("show_msr=", setup_show_msr);
static __init int setup_noclflush(char *arg)
{
@@ -590,18 +816,26 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
else if (c->cpuid_level >= 0)
vendor = c->x86_vendor_id;
- if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
- printk("%s ", vendor);
+ if (vendor && !strstr(c->x86_model_id, vendor))
+ printk(KERN_CONT "%s ", vendor);
- if (!c->x86_model_id[0])
- printk("%d86", c->x86);
+ if (c->x86_model_id[0])
+ printk(KERN_CONT "%s", c->x86_model_id);
else
- printk("%s", c->x86_model_id);
+ printk(KERN_CONT "%d86", c->x86);
if (c->x86_mask || c->cpuid_level >= 0)
- printk(" stepping %02x\n", c->x86_mask);
+ printk(KERN_CONT " stepping %02x\n", c->x86_mask);
else
- printk("\n");
+ printk(KERN_CONT "\n");
+
+#ifdef CONFIG_SMP
+ if (c->cpu_index < show_msr)
+ print_cpu_msr();
+#else
+ if (show_msr)
+ print_cpu_msr();
+#endif
}
static __init int setup_disablecpuid(char *arg)
@@ -617,19 +851,89 @@ __setup("clearcpuid=", setup_disablecpuid);
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
-void __init early_cpu_init(void)
+#ifdef CONFIG_X86_64
+struct x8664_pda **_cpu_pda __read_mostly;
+EXPORT_SYMBOL(_cpu_pda);
+
+struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
+
+char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+
+void __cpuinit pda_init(int cpu)
{
- struct cpu_vendor_dev *cvdev;
+ struct x8664_pda *pda = cpu_pda(cpu);
+
+ /* Setup up data that may be needed in __get_free_pages early */
+ loadsegment(fs, 0);
+ loadsegment(gs, 0);
+ /* Memory clobbers used to order PDA accessed */
+ mb();
+ wrmsrl(MSR_GS_BASE, pda);
+ mb();
+
+ pda->cpunumber = cpu;
+ pda->irqcount = -1;
+ pda->kernelstack = (unsigned long)stack_thread_info() -
+ PDA_STACKOFFSET + THREAD_SIZE;
+ pda->active_mm = &init_mm;
+ pda->mmu_state = 0;
+
+ if (cpu == 0) {
+ /* others are initialized in smpboot.c */
+ pda->pcurrent = &init_task;
+ pda->irqstackptr = boot_cpu_stack;
+ pda->irqstackptr += IRQSTACKSIZE - 64;
+ } else {
+ if (!pda->irqstackptr) {
+ pda->irqstackptr = (char *)
+ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+ if (!pda->irqstackptr)
+ panic("cannot allocate irqstack for cpu %d",
+ cpu);
+ pda->irqstackptr += IRQSTACKSIZE - 64;
+ }
+
+ if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
+ pda->nodenumber = cpu_to_node(cpu);
+ }
+}
+
+char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
+ DEBUG_STKSZ] __page_aligned_bss;
- for (cvdev = __x86cpuvendor_start ;
- cvdev < __x86cpuvendor_end ;
- cvdev++)
- cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
+extern asmlinkage void ignore_sysret(void);
- early_cpu_detect();
- validate_pat_support(&boot_cpu_data);
+/* May not be marked __init: used by software suspend */
+void syscall_init(void)
+{
+ /*
+ * LSTAR and STAR live in a bit strange symbiosis.
+ * They both write to the same internal register. STAR allows to
+ * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
+ */
+ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
+ wrmsrl(MSR_LSTAR, system_call);
+ wrmsrl(MSR_CSTAR, ignore_sysret);
+
+#ifdef CONFIG_IA32_EMULATION
+ syscall32_cpu_init();
+#endif
+
+ /* Flags to clear on syscall */
+ wrmsrl(MSR_SYSCALL_MASK,
+ X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
}
+unsigned long kernel_eflags;
+
+/*
+ * Copies of the original ist values from the tss are only accessed during
+ * debugging, no special alignment required.
+ */
+DEFINE_PER_CPU(struct orig_ist, orig_ist);
+
+#else
+
/* Make sure %fs is initialized properly in idle threads */
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{
@@ -637,25 +941,136 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
regs->fs = __KERNEL_PERCPU;
return regs;
}
-
-/* Current gdt points %fs at the "master" per-cpu area: after this,
- * it's on the real one. */
-void switch_to_new_gdt(void)
-{
- struct desc_ptr gdt_descr;
-
- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
- gdt_descr.size = GDT_SIZE - 1;
- load_gdt(&gdt_descr);
- asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
-}
+#endif
/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
+ * A lot of state is already set up in PDA init for 64 bit
*/
+#ifdef CONFIG_X86_64
+void __cpuinit cpu_init(void)
+{
+ int cpu = stack_smp_processor_id();
+ struct tss_struct *t = &per_cpu(init_tss, cpu);
+ struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
+ unsigned long v;
+ char *estacks = NULL;
+ struct task_struct *me;
+ int i;
+
+ /* CPU 0 is initialised in head64.c */
+ if (cpu != 0)
+ pda_init(cpu);
+ else
+ estacks = boot_exception_stacks;
+
+ me = current;
+
+ if (cpu_test_and_set(cpu, cpu_initialized))
+ panic("CPU#%d already initialized!\n", cpu);
+
+ printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+
+ /*
+ * Initialize the per-CPU GDT with the boot GDT,
+ * and set up the GDT descriptor:
+ */
+
+ switch_to_new_gdt();
+ load_idt((const struct desc_ptr *)&idt_descr);
+
+ memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
+ syscall_init();
+
+ wrmsrl(MSR_FS_BASE, 0);
+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
+ barrier();
+
+ check_efer();
+ if (cpu != 0 && x2apic)
+ enable_x2apic();
+
+ /*
+ * set up and load the per-CPU TSS
+ */
+ if (!orig_ist->ist[0]) {
+ static const unsigned int order[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+ };
+ for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+ if (cpu) {
+ estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
+ if (!estacks)
+ panic("Cannot allocate exception "
+ "stack %ld %d\n", v, cpu);
+ }
+ estacks += PAGE_SIZE << order[v];
+ orig_ist->ist[v] = t->x86_tss.ist[v] =
+ (unsigned long)estacks;
+ }
+ }
+
+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ /*
+ * <= is required because the CPU will access up to
+ * 8 bits beyond the end of the IO permission bitmap.
+ */
+ for (i = 0; i <= IO_BITMAP_LONGS; i++)
+ t->io_bitmap[i] = ~0UL;
+
+ atomic_inc(&init_mm.mm_count);
+ me->active_mm = &init_mm;
+ if (me->mm)
+ BUG();
+ enter_lazy_tlb(&init_mm, me);
+
+ load_sp0(t, &current->thread);
+ set_tss_desc(cpu, t);
+ load_TR_desc();
+ load_LDT(&init_mm.context);
+
+#ifdef CONFIG_KGDB
+ /*
+ * If the kgdb is connected no debug regs should be altered. This
+ * is only applicable when KGDB and a KGDB I/O module are built
+ * into the kernel and you are using early debugging with
+ * kgdbwait. KGDB will control the kernel HW breakpoint registers.
+ */
+ if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
+ arch_kgdb_ops.correct_hw_break();
+ else {
+#endif
+ /*
+ * Clear all 6 debug registers:
+ */
+
+ set_debugreg(0UL, 0);
+ set_debugreg(0UL, 1);
+ set_debugreg(0UL, 2);
+ set_debugreg(0UL, 3);
+ set_debugreg(0UL, 6);
+ set_debugreg(0UL, 7);
+#ifdef CONFIG_KGDB
+ /* If the kgdb is connected no debug regs should be altered. */
+ }
+#endif
+
+ fpu_init();
+
+ raw_local_save_flags(kernel_eflags);
+
+ if (is_uv_system())
+ uv_cpu_init();
+}
+
+#else
+
void __cpuinit cpu_init(void)
{
int cpu = smp_processor_id();
@@ -709,19 +1124,21 @@ void __cpuinit cpu_init(void)
/*
* Force FPU initialization:
*/
- current_thread_info()->status = 0;
+ if (cpu_has_xsave)
+ current_thread_info()->status = TS_XSAVE;
+ else
+ current_thread_info()->status = 0;
clear_used_math();
mxcsr_feature_mask_init();
-}
-#ifdef CONFIG_HOTPLUG_CPU
-void __cpuinit cpu_uninit(void)
-{
- int cpu = raw_smp_processor_id();
- cpu_clear(cpu, cpu_initialized);
+ /*
+ * Boot processor to setup the FP and extended state context info.
+ */
+ if (!smp_processor_id())
+ init_thread_xstate();
- /* lazy TLB state */
- per_cpu(cpu_tlbstate, cpu).state = 0;
- per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+ xsave_init();
}
+
+
#endif
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 783691b2a73..de4094a3921 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,3 +1,6 @@
+#ifndef ARCH_X86_CPU_H
+
+#define ARCH_X86_CPU_H
struct cpu_model_info {
int vendor;
@@ -18,21 +21,16 @@ struct cpu_dev {
void (*c_init)(struct cpuinfo_x86 * c);
void (*c_identify)(struct cpuinfo_x86 * c);
unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
+ int c_x86_vendor;
};
-extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
-
-struct cpu_vendor_dev {
- int vendor;
- struct cpu_dev *cpu_dev;
-};
-
-#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \
- static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \
- __attribute__((__section__(".x86cpuvendor.init"))) = \
- { cpu_vendor_id, cpu_dev }
+#define cpu_dev_register(cpu_devX) \
+ static struct cpu_dev *__cpu_dev_##cpu_devX __used \
+ __attribute__((__section__(".x86_cpu_dev.init"))) = \
+ &cpu_devX;
-extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
+extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[];
-extern int get_model_name(struct cpuinfo_x86 *c);
extern void display_cacheinfo(struct cpuinfo_x86 *c);
+
+#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index cb7a5715596..efae3b22a0f 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -235,9 +235,9 @@ config X86_LONGHAUL
If in doubt, say N.
config X86_E_POWERSAVER
- tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)"
+ tristate "VIA C7 Enhanced PowerSaver"
select CPU_FREQ_TABLE
- depends on X86_32 && EXPERIMENTAL
+ depends on X86_32
help
This adds the CPUFreq driver for VIA C7 processors.
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index b0c8208df9f..c24c4a487b7 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -202,7 +202,7 @@ static void drv_write(struct drv_cmd *cmd)
cpumask_t saved_mask = current->cpus_allowed;
unsigned int i;
- for_each_cpu_mask(i, cmd->mask) {
+ for_each_cpu_mask_nr(i, cmd->mask) {
set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
do_drv_write(cmd);
}
@@ -256,7 +256,8 @@ static u32 get_cur_val(const cpumask_t *mask)
* Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
* no meaning should be associated with absolute values of these MSRs.
*/
-static unsigned int get_measured_perf(unsigned int cpu)
+static unsigned int get_measured_perf(struct cpufreq_policy *policy,
+ unsigned int cpu)
{
union {
struct {
@@ -326,7 +327,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
#endif
- retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100;
+ retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100;
put_cpu();
set_cpus_allowed_ptr(current, &saved_mask);
@@ -451,7 +452,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
freqs.old = perf->states[perf->state].core_frequency * 1000;
freqs.new = data->freq_table[next_state].frequency;
- for_each_cpu_mask(i, cmd.mask) {
+ for_each_cpu_mask_nr(i, cmd.mask) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -466,7 +467,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
}
}
- for_each_cpu_mask(i, cmd.mask) {
+ for_each_cpu_mask_nr(i, cmd.mask) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
@@ -785,7 +786,11 @@ static int __init acpi_cpufreq_init(void)
if (ret)
return ret;
- return cpufreq_register_driver(&acpi_cpufreq_driver);
+ ret = cpufreq_register_driver(&acpi_cpufreq_driver);
+ if (ret)
+ free_percpu(acpi_perf_data);
+
+ return ret;
}
static void __exit acpi_cpufreq_exit(void)
@@ -795,8 +800,6 @@ static void __exit acpi_cpufreq_exit(void)
cpufreq_unregister_driver(&acpi_cpufreq_driver);
free_percpu(acpi_perf_data);
-
- return;
}
module_param(acpi_pstate_strict, uint, 0644);
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index f03e9153618..965ea52767a 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -26,9 +26,10 @@
#define NFORCE2_SAFE_DISTANCE 50
/* Delay in ms between FSB changes */
-//#define NFORCE2_DELAY 10
+/* #define NFORCE2_DELAY 10 */
-/* nforce2_chipset:
+/*
+ * nforce2_chipset:
* FSB is changed using the chipset
*/
static struct pci_dev *nforce2_chipset_dev;
@@ -36,13 +37,13 @@ static struct pci_dev *nforce2_chipset_dev;
/* fid:
* multiplier * 10
*/
-static int fid = 0;
+static int fid;
/* min_fsb, max_fsb:
* minimum and maximum FSB (= FSB at boot time)
*/
-static int min_fsb = 0;
-static int max_fsb = 0;
+static int min_fsb;
+static int max_fsb;
MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
@@ -53,7 +54,7 @@ module_param(min_fsb, int, 0444);
MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
MODULE_PARM_DESC(min_fsb,
- "Minimum FSB to use, if not defined: current FSB - 50");
+ "Minimum FSB to use, if not defined: current FSB - 50");
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
@@ -139,7 +140,7 @@ static unsigned int nforce2_fsb_read(int bootfsb)
/* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
- 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL);
+ 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL);
if (!nforce2_sub5)
return 0;
@@ -147,13 +148,13 @@ static unsigned int nforce2_fsb_read(int bootfsb)
fsb /= 1000000;
/* Check if PLL register is already set */
- pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp);
+ pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
- if(bootfsb || !temp)
+ if (bootfsb || !temp)
return fsb;
-
+
/* Use PLL register FSB value */
- pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp);
+ pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp);
fsb = nforce2_calc_fsb(temp);
return fsb;
@@ -184,7 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb)
}
/* First write? Then set actual value */
- pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp);
+ pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
if (!temp) {
pll = nforce2_calc_pll(tfsb);
@@ -210,7 +211,8 @@ static int nforce2_set_fsb(unsigned int fsb)
tfsb--;
/* Calculate the PLL reg. value */
- if ((pll = nforce2_calc_pll(tfsb)) == -1)
+ pll = nforce2_calc_pll(tfsb);
+ if (pll == -1)
return -EINVAL;
nforce2_write_pll(pll);
@@ -249,7 +251,7 @@ static unsigned int nforce2_get(unsigned int cpu)
static int nforce2_target(struct cpufreq_policy *policy,
unsigned int target_freq, unsigned int relation)
{
-// unsigned long flags;
+/* unsigned long flags; */
struct cpufreq_freqs freqs;
unsigned int target_fsb;
@@ -271,17 +273,17 @@ static int nforce2_target(struct cpufreq_policy *policy,
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
/* Disable IRQs */
- //local_irq_save(flags);
+ /* local_irq_save(flags); */
if (nforce2_set_fsb(target_fsb) < 0)
printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
- target_fsb);
+ target_fsb);
else
dprintk("Changed FSB successfully to %d\n",
- target_fsb);
+ target_fsb);
/* Enable IRQs */
- //local_irq_restore(flags);
+ /* local_irq_restore(flags); */
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
@@ -302,8 +304,8 @@ static int nforce2_verify(struct cpufreq_policy *policy)
policy->max = (fsb_pol_max + 1) * fid * 100;
cpufreq_verify_within_limits(policy,
- policy->cpuinfo.min_freq,
- policy->cpuinfo.max_freq);
+ policy->cpuinfo.min_freq,
+ policy->cpuinfo.max_freq);
return 0;
}
@@ -347,7 +349,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
/* Set maximum FSB to FSB at boot time */
max_fsb = nforce2_fsb_read(1);
- if(!max_fsb)
+ if (!max_fsb)
return -EIO;
if (!min_fsb)
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 94619c22f56..fe613c93b36 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -25,8 +25,8 @@
#include <linux/cpufreq.h>
#include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
+#include <linux/timex.h>
+#include <linux/io.h>
#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */
#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */
@@ -44,7 +44,7 @@ struct s_elan_multiplier {
* It is important that the frequencies
* are listed in ascending order here!
*/
-struct s_elan_multiplier elan_multiplier[] = {
+static struct s_elan_multiplier elan_multiplier[] = {
{1000, 0x02, 0x18},
{2000, 0x02, 0x10},
{4000, 0x02, 0x08},
@@ -82,7 +82,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
u8 clockspeed_reg; /* Clock Speed Register */
local_irq_disable();
- outb_p(0x80,REG_CSCIR);
+ outb_p(0x80, REG_CSCIR);
clockspeed_reg = inb_p(REG_CSCDR);
local_irq_enable();
@@ -98,10 +98,10 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
}
/* 33 MHz is not 32 MHz... */
- if ((clockspeed_reg & 0xE0)==0xA0)
+ if ((clockspeed_reg & 0xE0) == 0xA0)
return 33000;
- return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000);
+ return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000;
}
@@ -117,7 +117,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
* There is no return value.
*/
-static void elanfreq_set_cpu_state (unsigned int state)
+static void elanfreq_set_cpu_state(unsigned int state)
{
struct cpufreq_freqs freqs;
@@ -144,20 +144,20 @@ static void elanfreq_set_cpu_state (unsigned int state)
*/
local_irq_disable();
- outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */
- outb_p(0x00,REG_CSCDR);
+ outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */
+ outb_p(0x00, REG_CSCDR);
local_irq_enable(); /* wait till internal pipelines and */
udelay(1000); /* buffers have cleaned up */
local_irq_disable();
/* now, set the CPU clock speed register (0x80) */
- outb_p(0x80,REG_CSCIR);
- outb_p(elan_multiplier[state].val80h,REG_CSCDR);
+ outb_p(0x80, REG_CSCIR);
+ outb_p(elan_multiplier[state].val80h, REG_CSCDR);
/* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
- outb_p(0x40,REG_CSCIR);
- outb_p(elan_multiplier[state].val40h,REG_CSCDR);
+ outb_p(0x40, REG_CSCIR);
+ outb_p(elan_multiplier[state].val40h, REG_CSCDR);
udelay(10000);
local_irq_enable();
@@ -173,12 +173,12 @@ static void elanfreq_set_cpu_state (unsigned int state)
* for the hardware supported by the driver.
*/
-static int elanfreq_verify (struct cpufreq_policy *policy)
+static int elanfreq_verify(struct cpufreq_policy *policy)
{
return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
}
-static int elanfreq_target (struct cpufreq_policy *policy,
+static int elanfreq_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
@@ -205,7 +205,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
/* capability check */
if ((c->x86_vendor != X86_VENDOR_AMD) ||
- (c->x86 != 4) || (c->x86_model!=10))
+ (c->x86 != 4) || (c->x86_model != 10))
return -ENODEV;
/* max freq */
@@ -213,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
max_freq = elanfreq_get_cpu_frequency(0);
/* table init */
- for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+ for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
if (elanfreq_table[i].frequency > max_freq)
elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
}
@@ -224,7 +224,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
if (result)
- return (result);
+ return result;
cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
return 0;
@@ -260,7 +260,7 @@ __setup("elanfreq=", elanfreq_setup);
#endif
-static struct freq_attr* elanfreq_attr[] = {
+static struct freq_attr *elanfreq_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -284,9 +284,9 @@ static int __init elanfreq_init(void)
/* Test if we have the right hardware */
if ((c->x86_vendor != X86_VENDOR_AMD) ||
- (c->x86 != 4) || (c->x86_model!=10)) {
+ (c->x86 != 4) || (c->x86_model != 10)) {
printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
- return -ENODEV;
+ return -ENODEV;
}
return cpufreq_register_driver(&elanfreq_driver);
}
@@ -298,7 +298,7 @@ static void __exit elanfreq_exit(void)
}
-module_param (max_freq, int, 0444);
+module_param(max_freq, int, 0444);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>");
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 199e4e05e5d..b8e05ee4f73 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
return 0;
/* notifiers */
- for_each_cpu_mask(i, policy->cpus) {
+ for_each_cpu_mask_nr(i, policy->cpus) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
/* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
* Developer's Manual, Volume 3
*/
- for_each_cpu_mask(i, policy->cpus)
+ for_each_cpu_mask_nr(i, policy->cpus)
cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
/* notifiers */
- for_each_cpu_mask(i, policy->cpus) {
+ for_each_cpu_mask_nr(i, policy->cpus) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
@@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
}
if (c->x86 != 0xF) {
- printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n");
+ printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n");
return 0;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index eb9b62b0830..b5ced806a31 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -15,12 +15,11 @@
#include <linux/slab.h>
#include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
+#include <linux/timex.h>
+#include <linux/io.h>
-
-#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
- as it is unused */
+#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
+ as it is unused */
static unsigned int busfreq; /* FSB, in 10 kHz */
static unsigned int max_multiplier;
@@ -53,7 +52,7 @@ static int powernow_k6_get_cpu_multiplier(void)
msrval = POWERNOW_IOPORT + 0x1;
wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
- invalue=inl(POWERNOW_IOPORT + 0x8);
+ invalue = inl(POWERNOW_IOPORT + 0x8);
msrval = POWERNOW_IOPORT + 0x0;
wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
@@ -67,9 +66,9 @@ static int powernow_k6_get_cpu_multiplier(void)
*
* Tries to change the PowerNow! multiplier
*/
-static void powernow_k6_set_state (unsigned int best_i)
+static void powernow_k6_set_state(unsigned int best_i)
{
- unsigned long outvalue=0, invalue=0;
+ unsigned long outvalue = 0, invalue = 0;
unsigned long msrval;
struct cpufreq_freqs freqs;
@@ -90,10 +89,10 @@ static void powernow_k6_set_state (unsigned int best_i)
msrval = POWERNOW_IOPORT + 0x1;
wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
- invalue=inl(POWERNOW_IOPORT + 0x8);
+ invalue = inl(POWERNOW_IOPORT + 0x8);
invalue = invalue & 0xf;
outvalue = outvalue | invalue;
- outl(outvalue ,(POWERNOW_IOPORT + 0x8));
+ outl(outvalue , (POWERNOW_IOPORT + 0x8));
msrval = POWERNOW_IOPORT + 0x0;
wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
@@ -124,7 +123,7 @@ static int powernow_k6_verify(struct cpufreq_policy *policy)
*
* sets a new CPUFreq policy
*/
-static int powernow_k6_target (struct cpufreq_policy *policy,
+static int powernow_k6_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
@@ -152,7 +151,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
busfreq = cpu_khz / max_multiplier;
/* table init */
- for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
+ for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
if (clock_ratio[i].index > max_multiplier)
clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
else
@@ -165,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
if (result)
- return (result);
+ return result;
cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
@@ -176,8 +175,8 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
{
unsigned int i;
- for (i=0; i<8; i++) {
- if (i==max_multiplier)
+ for (i = 0; i < 8; i++) {
+ if (i == max_multiplier)
powernow_k6_set_state(i);
}
cpufreq_frequency_table_put_attr(policy->cpu);
@@ -189,7 +188,7 @@ static unsigned int powernow_k6_get(unsigned int cpu)
return busfreq * powernow_k6_get_cpu_multiplier();
}
-static struct freq_attr* powernow_k6_attr[] = {
+static struct freq_attr *powernow_k6_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -227,7 +226,7 @@ static int __init powernow_k6_init(void)
}
if (cpufreq_register_driver(&powernow_k6_driver)) {
- release_region (POWERNOW_IOPORT, 16);
+ release_region(POWERNOW_IOPORT, 16);
return -EINVAL;
}
@@ -243,13 +242,13 @@ static int __init powernow_k6_init(void)
static void __exit powernow_k6_exit(void)
{
cpufreq_unregister_driver(&powernow_k6_driver);
- release_region (POWERNOW_IOPORT, 16);
+ release_region(POWERNOW_IOPORT, 16);
}
-MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
+MODULE_LICENSE("GPL");
module_init(powernow_k6_init);
module_exit(powernow_k6_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
index f8a63b3664e..35fb4eaf6e1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
@@ -1,5 +1,4 @@
/*
- * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $
* (C) 2003 Dave Jones.
*
* Licensed under the terms of the GNU GPL License version 2.
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 206791eb46e..84bb395038d 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid)
return 800 + (fid * 100);
}
-
/* Return a frequency in KHz, given an input fid */
static u32 find_khz_freq_from_fid(u32 fid)
{
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p
return data[pstate].frequency;
}
-
/* Return the vco fid for an input fid
*
* Each "low" fid has corresponding "high" fid, and you can get to "low" fids
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void)
wrmsr(MSR_FIDVID_CTL, lo, hi);
}
-
/* write the new fid value along with the other control fields to the msr */
static int write_new_fid(struct powernow_k8_data *data, u32 fid)
{
@@ -966,7 +963,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
freqs.old = find_khz_freq_from_fid(data->currfid);
freqs.new = find_khz_freq_from_fid(fid);
- for_each_cpu_mask(i, *(data->available_cores)) {
+ for_each_cpu_mask_nr(i, *(data->available_cores)) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -974,7 +971,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
res = transition_fid_vid(data, fid, vid);
freqs.new = find_khz_freq_from_fid(data->currfid);
- for_each_cpu_mask(i, *(data->available_cores)) {
+ for_each_cpu_mask_nr(i, *(data->available_cores)) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
@@ -997,7 +994,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
- for_each_cpu_mask(i, *(data->available_cores)) {
+ for_each_cpu_mask_nr(i, *(data->available_cores)) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -1005,7 +1002,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
res = transition_pstate(data, pstate);
freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
- for_each_cpu_mask(i, *(data->available_cores)) {
+ for_each_cpu_mask_nr(i, *(data->available_cores)) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 908dd347c67..3b5f06423e7 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -26,9 +26,10 @@
#include <asm/cpufeature.h>
#define PFX "speedstep-centrino: "
-#define MAINTAINER "cpufreq@lists.linux.org.uk"
+#define MAINTAINER "cpufreq@vger.kernel.org"
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
+#define dprintk(msg...) \
+ cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
#define INTEL_MSR_RANGE (0xffff)
@@ -66,11 +67,12 @@ struct cpu_model
struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
};
-static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x);
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
+ const struct cpu_id *x);
/* Operating points for current CPU */
-static struct cpu_model *centrino_model[NR_CPUS];
-static const struct cpu_id *centrino_cpu[NR_CPUS];
+static DEFINE_PER_CPU(struct cpu_model *, centrino_model);
+static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu);
static struct cpufreq_driver centrino_driver;
@@ -255,7 +257,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
return -ENOENT;
}
- centrino_model[policy->cpu] = model;
+ per_cpu(centrino_model, policy->cpu) = model;
dprintk("found \"%s\": max frequency: %dkHz\n",
model->model_name, model->max_freq);
@@ -264,10 +266,14 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
}
#else
-static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; }
+static inline int centrino_cpu_init_table(struct cpufreq_policy *policy)
+{
+ return -ENODEV;
+}
#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
-static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x)
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
+ const struct cpu_id *x)
{
if ((c->x86 == x->x86) &&
(c->x86_model == x->x86_model) &&
@@ -286,23 +292,28 @@ static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
* for centrino, as some DSDTs are buggy.
* Ideally, this can be done using the acpi_data structure.
*/
- if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) ||
- (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) ||
- (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) {
+ if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) ||
+ (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) ||
+ (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) {
msr = (msr >> 8) & 0xff;
return msr * 100000;
}
- if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points))
+ if ((!per_cpu(centrino_model, cpu)) ||
+ (!per_cpu(centrino_model, cpu)->op_points))
return 0;
msr &= 0xffff;
- for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) {
- if (msr == centrino_model[cpu]->op_points[i].index)
- return centrino_model[cpu]->op_points[i].frequency;
+ for (i = 0;
+ per_cpu(centrino_model, cpu)->op_points[i].frequency
+ != CPUFREQ_TABLE_END;
+ i++) {
+ if (msr == per_cpu(centrino_model, cpu)->op_points[i].index)
+ return per_cpu(centrino_model, cpu)->
+ op_points[i].frequency;
}
if (failsafe)
- return centrino_model[cpu]->op_points[i-1].frequency;
+ return per_cpu(centrino_model, cpu)->op_points[i-1].frequency;
else
return 0;
}
@@ -347,7 +358,8 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
int i;
/* Only Intel makes Enhanced Speedstep-capable CPUs */
- if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST))
+ if (cpu->x86_vendor != X86_VENDOR_INTEL ||
+ !cpu_has(cpu, X86_FEATURE_EST))
return -ENODEV;
if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
@@ -361,9 +373,9 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
break;
if (i != N_IDS)
- centrino_cpu[policy->cpu] = &cpu_ids[i];
+ per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
- if (!centrino_cpu[policy->cpu]) {
+ if (!per_cpu(centrino_cpu, policy->cpu)) {
dprintk("found unsupported CPU with "
"Enhanced SpeedStep: send /proc/cpuinfo to "
MAINTAINER "\n");
@@ -386,23 +398,26 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
/* check to see if it stuck */
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
if (!(l & (1<<16))) {
- printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n");
+ printk(KERN_INFO PFX
+ "couldn't enable Enhanced SpeedStep\n");
return -ENODEV;
}
}
freq = get_cur_freq(policy->cpu);
-
- policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */
+ policy->cpuinfo.transition_latency = 10000;
+ /* 10uS transition latency */
policy->cur = freq;
dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
- ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points);
+ ret = cpufreq_frequency_table_cpuinfo(policy,
+ per_cpu(centrino_model, policy->cpu)->op_points);
if (ret)
return (ret);
- cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu);
+ cpufreq_frequency_table_get_attr(
+ per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu);
return 0;
}
@@ -411,12 +426,12 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
{
unsigned int cpu = policy->cpu;
- if (!centrino_model[cpu])
+ if (!per_cpu(centrino_model, cpu))
return -ENODEV;
cpufreq_frequency_table_put_attr(cpu);
- centrino_model[cpu] = NULL;
+ per_cpu(centrino_model, cpu) = NULL;
return 0;
}
@@ -430,17 +445,26 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
*/
static int centrino_verify (struct cpufreq_policy *policy)
{
- return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points);
+ return cpufreq_frequency_table_verify(policy,
+ per_cpu(centrino_model, policy->cpu)->op_points);
}
/**
* centrino_setpolicy - set a new CPUFreq policy
* @policy: new policy
* @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ * @relation: how that frequency relates to achieved frequency
+ * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
*
* Sets a new CPUFreq policy.
*/
+struct allmasks {
+ cpumask_t online_policy_cpus;
+ cpumask_t saved_mask;
+ cpumask_t set_mask;
+ cpumask_t covered_cpus;
+};
+
static int centrino_target (struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
@@ -448,48 +472,55 @@ static int centrino_target (struct cpufreq_policy *policy,
unsigned int newstate = 0;
unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu;
struct cpufreq_freqs freqs;
- cpumask_t online_policy_cpus;
- cpumask_t saved_mask;
- cpumask_t set_mask;
- cpumask_t covered_cpus;
int retval = 0;
unsigned int j, k, first_cpu, tmp;
-
- if (unlikely(centrino_model[cpu] == NULL))
- return -ENODEV;
+ CPUMASK_ALLOC(allmasks);
+ CPUMASK_PTR(online_policy_cpus, allmasks);
+ CPUMASK_PTR(saved_mask, allmasks);
+ CPUMASK_PTR(set_mask, allmasks);
+ CPUMASK_PTR(covered_cpus, allmasks);
+
+ if (unlikely(allmasks == NULL))
+ return -ENOMEM;
+
+ if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
+ retval = -ENODEV;
+ goto out;
+ }
if (unlikely(cpufreq_frequency_table_target(policy,
- centrino_model[cpu]->op_points,
+ per_cpu(centrino_model, cpu)->op_points,
target_freq,
relation,
&newstate))) {
- return -EINVAL;
+ retval = -EINVAL;
+ goto out;
}
#ifdef CONFIG_HOTPLUG_CPU
/* cpufreq holds the hotplug lock, so we are safe from here on */
- cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
+ cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus);
#else
- online_policy_cpus = policy->cpus;
+ *online_policy_cpus = policy->cpus;
#endif
- saved_mask = current->cpus_allowed;
+ *saved_mask = current->cpus_allowed;
first_cpu = 1;
- cpus_clear(covered_cpus);
- for_each_cpu_mask(j, online_policy_cpus) {
+ cpus_clear(*covered_cpus);
+ for_each_cpu_mask_nr(j, *online_policy_cpus) {
/*
* Support for SMP systems.
* Make sure we are running on CPU that wants to change freq
*/
- cpus_clear(set_mask);
+ cpus_clear(*set_mask);
if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
- cpus_or(set_mask, set_mask, online_policy_cpus);
+ cpus_or(*set_mask, *set_mask, *online_policy_cpus);
else
- cpu_set(j, set_mask);
+ cpu_set(j, *set_mask);
- set_cpus_allowed_ptr(current, &set_mask);
+ set_cpus_allowed_ptr(current, set_mask);
preempt_disable();
- if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) {
+ if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) {
dprintk("couldn't limit to CPUs in this domain\n");
retval = -EAGAIN;
if (first_cpu) {
@@ -500,7 +531,7 @@ static int centrino_target (struct cpufreq_policy *policy,
break;
}
- msr = centrino_model[cpu]->op_points[newstate].index;
+ msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
if (first_cpu) {
rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
@@ -517,7 +548,7 @@ static int centrino_target (struct cpufreq_policy *policy,
dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
target_freq, freqs.old, freqs.new, msr);
- for_each_cpu_mask(k, online_policy_cpus) {
+ for_each_cpu_mask_nr(k, *online_policy_cpus) {
freqs.cpu = k;
cpufreq_notify_transition(&freqs,
CPUFREQ_PRECHANGE);
@@ -536,11 +567,11 @@ static int centrino_target (struct cpufreq_policy *policy,
break;
}
- cpu_set(j, covered_cpus);
+ cpu_set(j, *covered_cpus);
preempt_enable();
}
- for_each_cpu_mask(k, online_policy_cpus) {
+ for_each_cpu_mask_nr(k, *online_policy_cpus) {
freqs.cpu = k;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
@@ -553,30 +584,32 @@ static int centrino_target (struct cpufreq_policy *policy,
* Best effort undo..
*/
- if (!cpus_empty(covered_cpus)) {
- for_each_cpu_mask(j, covered_cpus) {
+ if (!cpus_empty(*covered_cpus))
+ for_each_cpu_mask_nr(j, *covered_cpus) {
set_cpus_allowed_ptr(current,
&cpumask_of_cpu(j));
wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
}
- }
tmp = freqs.new;
freqs.new = freqs.old;
freqs.old = tmp;
- for_each_cpu_mask(j, online_policy_cpus) {
+ for_each_cpu_mask_nr(j, *online_policy_cpus) {
freqs.cpu = j;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
}
- set_cpus_allowed_ptr(current, &saved_mask);
- return 0;
+ set_cpus_allowed_ptr(current, saved_mask);
+ retval = 0;
+ goto out;
migrate_end:
preempt_enable();
- set_cpus_allowed_ptr(current, &saved_mask);
- return 0;
+ set_cpus_allowed_ptr(current, saved_mask);
+out:
+ CPUMASK_FREE(allmasks);
+ return retval;
}
static struct freq_attr* centrino_attr[] = {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 1b50244b1fd..191f7263c61 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -279,7 +279,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
cpus_allowed = current->cpus_allowed;
- for_each_cpu_mask(i, policy->cpus) {
+ for_each_cpu_mask_nr(i, policy->cpus) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -292,7 +292,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
/* allow to be run on all CPUs */
set_cpus_allowed_ptr(current, &cpus_allowed);
- for_each_cpu_mask(i, policy->cpus) {
+ for_each_cpu_mask_nr(i, policy->cpus) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 3fd7a67bb06..ffd0f5ed071 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -15,13 +15,11 @@
/*
* Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
*/
-static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
{
unsigned char ccr2, ccr3;
- unsigned long flags;
/* we test for DEVID by checking whether CCR3 is writable */
- local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, ccr3 ^ 0x80);
getCx86(0xc0); /* dummy to change bus */
@@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
*dir0 = getCx86(CX86_DIR0);
*dir1 = getCx86(CX86_DIR1);
}
- local_irq_restore(flags);
}
+static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __do_cyrix_devid(dir0, dir1);
+ local_irq_restore(flags);
+}
/*
* Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
* order to identify the Cyrix CPU model after we're out of setup.c
@@ -116,7 +121,7 @@ static void __cpuinit set_cx86_reorder(void)
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
/* Load/Store Serialize to mem access disable (=reorder it) */
- setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
+ setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80);
/* set load/store serialize from 1GB to 4GB */
ccr3 |= 0xe0;
setCx86(CX86_CCR3, ccr3);
@@ -127,28 +132,11 @@ static void __cpuinit set_cx86_memwb(void)
printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
/* CCR2 bit 2: unlock NW bit */
- setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
+ setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
/* set 'Not Write-through' */
write_cr0(read_cr0() | X86_CR0_NW);
/* CCR2 bit 2: lock NW bit and set WT1 */
- setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
-}
-
-static void __cpuinit set_cx86_inc(void)
-{
- unsigned char ccr3;
-
- printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
-
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- /* PCR1 -- Performance Control */
- /* Incrementor on, whatever that is */
- setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
- /* PCR0 -- Performance Control */
- /* Incrementor Margin 10 */
- setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
+ setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14);
}
/*
@@ -162,23 +150,40 @@ static void __cpuinit geode_configure(void)
local_irq_save(flags);
/* Suspend on halt power saving and enable #SUSP pin */
- setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
+ setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
/* FPU fast, DTE cache, Mem bypass */
- setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
+ setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38);
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
set_cx86_memwb();
set_cx86_reorder();
- set_cx86_inc();
local_irq_restore(flags);
}
+static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c)
+{
+ unsigned char dir0, dir0_msn, dir1 = 0;
+
+ __do_cyrix_devid(&dir0, &dir1);
+ dir0_msn = dir0 >> 4; /* identifies CPU "family" */
+
+ switch (dir0_msn) {
+ case 3: /* 6x86/6x86L */
+ /* Emulate MTRRs using Cyrix's ARRs. */
+ set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
+ break;
+ case 5: /* 6x86MX/M II */
+ /* Emulate MTRRs using Cyrix's ARRs. */
+ set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
+ break;
+ }
+}
static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
{
@@ -286,7 +291,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
/* GXm supports extended cpuid levels 'ala' AMD */
if (c->cpuid_level == 2) {
/* Enable cxMMX extensions (GX1 Datasheet 54) */
- setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
+ setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1);
/*
* GXm : 0x30 ... 0x5f GXm datasheet 51
@@ -296,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
*/
if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f))
geode_configure();
- get_model_name(c); /* get CPU marketing name */
return;
} else { /* MediaGX */
Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
@@ -309,7 +313,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
if (dir1 > 7) {
dir0_msn++; /* M II */
/* Enable MMX extensions (App note 108) */
- setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
+ setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1);
} else {
c->coma_bug = 1; /* 6x86MX, it has the bug. */
}
@@ -424,7 +428,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */
+ setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
local_irq_restore(flags);
}
@@ -434,16 +438,19 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
.c_vendor = "Cyrix",
.c_ident = { "CyrixInstead" },
+ .c_early_init = early_init_cyrix,
.c_init = init_cyrix,
.c_identify = cyrix_identify,
+ .c_x86_vendor = X86_VENDOR_CYRIX,
};
-cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev);
+cpu_dev_register(cyrix_cpu_dev);
static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
.c_vendor = "NSC",
.c_ident = { "Geode by NSC" },
.c_init = init_nsc,
+ .c_x86_vendor = X86_VENDOR_NSC,
};
-cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev);
+cpu_dev_register(nsc_cpu_dev);
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
deleted file mode 100644
index e43ad4ad4cb..00000000000
--- a/arch/x86/kernel/cpu/feature_names.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Strings for the various x86 capability flags.
- *
- * This file must not contain any executable code.
- */
-
-#include <asm/cpufeature.h>
-
-/*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
-const char * const x86_cap_flags[NCAPINTS*32] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
- "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
- NULL, NULL, NULL, NULL,
- "constant_tsc", "up", NULL, "arch_perfmon",
- "pebs", "bts", NULL, NULL,
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
- "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic",
- "cr8_legacy", "abm", "sse4a", "misalignsse",
- "3dnowprefetch", "osvw", "ibs", "sse5",
- "skinit", "wdt", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Auxiliary (Linux-defined) */
- "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-};
-
-const char *const x86_power_flags[32] = {
- "ts", /* temperature sensor */
- "fid", /* frequency id control */
- "vid", /* voltage id control */
- "ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
- "", /* tsc invariant mapped to constant_tsc */
- /* nothing */
-};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fe9224c51d3..99468dbd08d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -15,6 +15,11 @@
#include <asm/ds.h>
#include <asm/bugs.h>
+#ifdef CONFIG_X86_64
+#include <asm/topology.h>
+#include <asm/numa_64.h>
+#endif
+
#include "cpu.h"
#ifdef CONFIG_X86_LOCAL_APIC
@@ -23,23 +28,22 @@
#include <mach_apic.h>
#endif
-#ifdef CONFIG_X86_INTEL_USERCOPY
-/*
- * Alignment at which movsl is preferred for bulk memory copies.
- */
-struct movsl_mask movsl_mask __read_mostly;
-#endif
-
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
{
- /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
- if (c->x86 == 15 && c->x86_cache_alignment == 64)
- c->x86_cache_alignment = 128;
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+#ifdef CONFIG_X86_64
+ set_cpu_cap(c, X86_FEATURE_SYSENTER32);
+#else
+ /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
+ if (c->x86 == 15 && c->x86_cache_alignment == 64)
+ c->x86_cache_alignment = 128;
+#endif
}
+#ifdef CONFIG_X86_32
/*
* Early probe support logic for ppro memory erratum #50
*
@@ -59,15 +63,54 @@ int __cpuinit ppro_with_ram_bug(void)
return 0;
}
+#ifdef CONFIG_X86_F00F_BUG
+static void __cpuinit trap_init_f00f_bug(void)
+{
+ __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
-/*
- * P4 Xeon errata 037 workaround.
- * Hardware prefetcher may cause stale data to be loaded into the cache.
- */
-static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
+ /*
+ * Update the IDT descriptor and reload the IDT so that
+ * it uses the read-only mapped virtual address.
+ */
+ idt_descr.address = fix_to_virt(FIX_F00F_IDT);
+ load_idt(&idt_descr);
+}
+#endif
+
+static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
{
unsigned long lo, hi;
+#ifdef CONFIG_X86_F00F_BUG
+ /*
+ * All current models of Pentium and Pentium with MMX technology CPUs
+ * have the F0 0F bug, which lets nonprivileged users lock up the system.
+ * Note that the workaround only should be initialized once...
+ */
+ c->f00f_bug = 0;
+ if (!paravirt_enabled() && c->x86 == 5) {
+ static int f00f_workaround_enabled;
+
+ c->f00f_bug = 1;
+ if (!f00f_workaround_enabled) {
+ trap_init_f00f_bug();
+ printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+ f00f_workaround_enabled = 1;
+ }
+ }
+#endif
+
+ /*
+ * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
+ * model 3 mask 3
+ */
+ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+ clear_cpu_cap(c, X86_FEATURE_SEP);
+
+ /*
+ * P4 Xeon errata 037 workaround.
+ * Hardware prefetcher may cause stale data to be loaded into the cache.
+ */
if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
if ((lo & (1<<9)) == 0) {
@@ -77,13 +120,68 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
}
}
+
+ /*
+ * See if we have a good local APIC by checking for buggy Pentia,
+ * i.e. all B steppings and the C2 stepping of P54C when using their
+ * integrated APIC (see 11AP erratum in "Pentium Processor
+ * Specification Update").
+ */
+ if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
+ (c->x86_mask < 0x6 || c->x86_mask == 0xb))
+ set_cpu_cap(c, X86_FEATURE_11AP);
+
+
+#ifdef CONFIG_X86_INTEL_USERCOPY
+ /*
+ * Set up the preferred alignment for movsl bulk memory moves
+ */
+ switch (c->x86) {
+ case 4: /* 486: untested */
+ break;
+ case 5: /* Old Pentia: untested */
+ break;
+ case 6: /* PII/PIII only like movsl with 8-byte alignment */
+ movsl_mask.mask = 7;
+ break;
+ case 15: /* P4 is OK down to 8-byte alignment */
+ movsl_mask.mask = 7;
+ break;
+ }
+#endif
+
+#ifdef CONFIG_X86_NUMAQ
+ numaq_tsc_disable();
+#endif
+}
+#else
+static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
+{
}
+#endif
+static void __cpuinit srat_detect_node(void)
+{
+#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+ unsigned node;
+ int cpu = smp_processor_id();
+ int apicid = hard_smp_processor_id();
+
+ /* Don't do the funky fallback heuristics the AMD version employs
+ for now. */
+ node = apicid_to_node[apicid];
+ if (node == NUMA_NO_NODE || !node_online(node))
+ node = first_node(node_online_map);
+ numa_set_node(cpu, node);
+
+ printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+#endif
+}
/*
* find out the number of processor cores on the die
*/
-static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c)
+static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx;
@@ -98,45 +196,51 @@ static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c)
return 1;
}
-#ifdef CONFIG_X86_F00F_BUG
-static void __cpuinit trap_init_f00f_bug(void)
+static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
- __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
-
- /*
- * Update the IDT descriptor and reload the IDT so that
- * it uses the read-only mapped virtual address.
- */
- idt_descr.address = fix_to_virt(FIX_F00F_IDT);
- load_idt(&idt_descr);
+ /* Intel VMX MSR indicated features */
+#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
+#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
+#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
+#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
+#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
+#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
+
+ u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
+
+ clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+ clear_cpu_cap(c, X86_FEATURE_VNMI);
+ clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+ clear_cpu_cap(c, X86_FEATURE_EPT);
+ clear_cpu_cap(c, X86_FEATURE_VPID);
+
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+ msr_ctl = vmx_msr_high | vmx_msr_low;
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
+ set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
+ set_cpu_cap(c, X86_FEATURE_VNMI);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
+ vmx_msr_low, vmx_msr_high);
+ msr_ctl2 = vmx_msr_high | vmx_msr_low;
+ if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
+ (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
+ set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
+ set_cpu_cap(c, X86_FEATURE_EPT);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
+ set_cpu_cap(c, X86_FEATURE_VPID);
+ }
}
-#endif
static void __cpuinit init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
- char *p = NULL;
early_init_intel(c);
-#ifdef CONFIG_X86_F00F_BUG
- /*
- * All current models of Pentium and Pentium with MMX technology CPUs
- * have the F0 0F bug, which lets nonprivileged users lock up the system.
- * Note that the workaround only should be initialized once...
- */
- c->f00f_bug = 0;
- if (!paravirt_enabled() && c->x86 == 5) {
- static int f00f_workaround_enabled;
-
- c->f00f_bug = 1;
- if (!f00f_workaround_enabled) {
- trap_init_f00f_bug();
- printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
- f00f_workaround_enabled = 1;
- }
- }
-#endif
+ intel_workarounds(c);
l2 = init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
@@ -146,16 +250,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
}
- /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
- if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
- clear_cpu_cap(c, X86_FEATURE_SEP);
+ if (cpu_has_xmm2)
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ if (cpu_has_ds) {
+ unsigned int l1;
+ rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
+ if (!(l1 & (1<<11)))
+ set_cpu_cap(c, X86_FEATURE_BTS);
+ if (!(l1 & (1<<12)))
+ set_cpu_cap(c, X86_FEATURE_PEBS);
+ ds_init_intel(c);
+ }
+#ifdef CONFIG_X86_64
+ if (c->x86 == 15)
+ c->x86_cache_alignment = c->x86_clflush_size * 2;
+ if (c->x86 == 6)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+#else
/*
* Names for the Pentium II/Celeron processors
* detectable only by also checking the cache size.
* Dixon is NOT a Celeron.
*/
if (c->x86 == 6) {
+ char *p = NULL;
+
switch (c->x86_model) {
case 5:
if (c->x86_mask == 0) {
@@ -178,56 +298,41 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
p = "Celeron (Coppermine)";
break;
}
- }
- if (p)
- strcpy(c->x86_model_id, p);
-
- c->x86_max_cores = num_cpu_cores(c);
-
- detect_ht(c);
-
- /* Work around errata */
- Intel_errata_workarounds(c);
-
-#ifdef CONFIG_X86_INTEL_USERCOPY
- /*
- * Set up the preferred alignment for movsl bulk memory moves
- */
- switch (c->x86) {
- case 4: /* 486: untested */
- break;
- case 5: /* Old Pentia: untested */
- break;
- case 6: /* PII/PIII only like movsl with 8-byte alignment */
- movsl_mask.mask = 7;
- break;
- case 15: /* P4 is OK down to 8-byte alignment */
- movsl_mask.mask = 7;
- break;
+ if (p)
+ strcpy(c->x86_model_id, p);
}
-#endif
- if (cpu_has_xmm2)
- set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
- if (c->x86 == 15) {
+ if (c->x86 == 15)
set_cpu_cap(c, X86_FEATURE_P4);
- }
if (c->x86 == 6)
set_cpu_cap(c, X86_FEATURE_P3);
- if (cpu_has_ds) {
- unsigned int l1;
- rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
- if (!(l1 & (1<<11)))
- set_cpu_cap(c, X86_FEATURE_BTS);
- if (!(l1 & (1<<12)))
- set_cpu_cap(c, X86_FEATURE_PEBS);
- }
if (cpu_has_bts)
- ds_init_intel(c);
+ ptrace_bts_init_intel(c);
+
+#endif
+
+ detect_extended_topology(c);
+ if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
+ /*
+ * let's use the legacy cpuid vector 0x1 and 0x4 for topology
+ * detection.
+ */
+ c->x86_max_cores = intel_num_cpu_cores(c);
+#ifdef CONFIG_X86_32
+ detect_ht(c);
+#endif
+ }
+
+ /* Work around errata */
+ srat_detect_node();
+
+ if (cpu_has(c, X86_FEATURE_VMX))
+ detect_vmx_virtcap(c);
}
+#ifdef CONFIG_X86_32
static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
/*
@@ -240,10 +345,12 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
size = 256;
return size;
}
+#endif
static struct cpu_dev intel_cpu_dev __cpuinitdata = {
.c_vendor = "Intel",
.c_ident = { "GenuineIntel" },
+#ifdef CONFIG_X86_32
.c_models = {
{ .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
{
@@ -293,76 +400,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
}
},
},
+ .c_size_cache = intel_size_cache,
+#endif
.c_early_init = early_init_intel,
.c_init = init_intel,
- .c_size_cache = intel_size_cache,
+ .c_x86_vendor = X86_VENDOR_INTEL,
};
-cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
-
-#ifndef CONFIG_X86_CMPXCHG
-unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
-{
- u8 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg for 386. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u8 *)ptr;
- if (prev == old)
- *(u8 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u8);
-
-unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
-{
- u16 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg for 386. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u16 *)ptr;
- if (prev == old)
- *(u16 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u16);
-
-unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
-{
- u32 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg for 386. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u32 *)ptr;
- if (prev == old)
- *(u32 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u32);
-#endif
-
-#ifndef CONFIG_X86_CMPXCHG64
-unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
-{
- u64 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u64 *)ptr;
- if (prev == old)
- *(u64 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_486_u64);
-#endif
-
-/* arch_initcall(intel_cpu_init); */
+cpu_dev_register(intel_cpu_dev);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 26d615dcb14..3f46afbb1cf 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,8 +1,8 @@
/*
- * Routines to indentify caches on Intel CPU.
+ * Routines to indentify caches on Intel CPU.
*
- * Changes:
- * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
+ * Changes:
+ * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
* Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
* Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
*/
@@ -13,6 +13,7 @@
#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/sched.h>
+#include <linux/pci.h>
#include <asm/processor.h>
#include <asm/smp.h>
@@ -62,6 +63,7 @@ static struct _cache_table cache_table[] __cpuinitdata =
{ 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
{ 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
{ 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
+ { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */
{ 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
{ 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
{ 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -129,9 +131,18 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
+ unsigned long can_disable;
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};
+#ifdef CONFIG_PCI
+static struct pci_device_id k8_nb_id[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+ {}
+};
+#endif
+
unsigned short num_cache_leaves;
/* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -181,9 +192,10 @@ static unsigned short assocs[] __cpuinitdata = {
static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
-static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
- union _cpuid4_leaf_ebx *ebx,
- union _cpuid4_leaf_ecx *ecx)
+static void __cpuinit
+amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+ union _cpuid4_leaf_ebx *ebx,
+ union _cpuid4_leaf_ecx *ecx)
{
unsigned dummy;
unsigned line_size, lines_per_tag, assoc, size_in_kb;
@@ -250,27 +262,40 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
(ebx->split.ways_of_associativity + 1) - 1;
}
-static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+ if (index < 3)
+ return;
+ this_leaf->can_disable = 1;
+}
+
+static int
+__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned edx;
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
- else
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (boot_cpu_data.x86 >= 0x10)
+ amd_check_l3_disable(index, this_leaf);
+ } else {
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ }
+
if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */
this_leaf->eax = eax;
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
- this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
+ this_leaf->size = (ecx.split.number_of_sets + 1) *
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}
@@ -452,7 +477,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
/* pointer to _cpuid4_info array (for each cache leaf) */
static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
-#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
+#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
#ifdef CONFIG_SMP
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@ -488,8 +513,8 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
int sibling;
this_leaf = CPUID4_INFO_IDX(cpu, index);
- for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
- sibling_leaf = CPUID4_INFO_IDX(sibling, index);
+ for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
+ sibling_leaf = CPUID4_INFO_IDX(sibling, index);
cpu_clear(cpu, sibling_leaf->shared_cpu_map);
}
}
@@ -571,7 +596,7 @@ struct _index_kobject {
/* pointer to array of kobjects for cpuX/cache/indexY */
static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
-#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
+#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
@@ -636,6 +661,99 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
}
}
+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
+#ifdef CONFIG_PCI
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ struct pci_dev *dev = NULL;
+ int i;
+
+ for (i = 0; i <= node; i++) {
+ do {
+ dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
+ if (!dev)
+ break;
+ } while (!pci_match_id(&k8_nb_id[0], dev));
+ if (!dev)
+ break;
+ }
+ return dev;
+}
+#else
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ return NULL;
+}
+#endif
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = NULL;
+ ssize_t ret = 0;
+ int i;
+
+ if (!this_leaf->can_disable)
+ return sprintf(buf, "Feature not enabled\n");
+
+ dev = get_k8_northbridge(node);
+ if (!dev) {
+ printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < 2; i++) {
+ unsigned int reg;
+
+ pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
+
+ ret += sprintf(buf, "%sEntry: %d\n", buf, i);
+ ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n",
+ buf,
+ reg & 0x80000000 ? "Disabled" : "Allowed",
+ reg & 0x40000000 ? "Disabled" : "Allowed");
+ ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
+ buf, (reg & 0x30000) >> 16, reg & 0xfff);
+ }
+ return ret;
+}
+
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+ size_t count)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = NULL;
+ unsigned int ret, index, val;
+
+ if (!this_leaf->can_disable)
+ return 0;
+
+ if (strlen(buf) > 15)
+ return -EINVAL;
+
+ ret = sscanf(buf, "%x %x", &index, &val);
+ if (ret != 2)
+ return -EINVAL;
+ if (index > 1)
+ return -EINVAL;
+
+ val |= 0xc0000000;
+ dev = get_k8_northbridge(node);
+ if (!dev) {
+ printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+ return -EINVAL;
+ }
+
+ pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+ wbinvd();
+ pci_write_config_dword(dev, 0x1BC + index * 4, val);
+
+ return 1;
+}
+
struct _cache_attr {
struct attribute attr;
ssize_t (*show)(struct _cpuid4_info *, char *);
@@ -656,6 +774,8 @@ define_one_ro(size);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);
+static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
+
static struct attribute * default_attrs[] = {
&type.attr,
&level.attr,
@@ -666,12 +786,10 @@ static struct attribute * default_attrs[] = {
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
+ &cache_disable.attr,
NULL
};
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)
-
static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
{
struct _cache_attr *fattr = to_attr(attr);
@@ -681,14 +799,22 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
ret = fattr->show ?
fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
buf) :
- 0;
+ 0;
return ret;
}
static ssize_t store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
{
- return 0;
+ struct _cache_attr *fattr = to_attr(attr);
+ struct _index_kobject *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->store ?
+ fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf, count) :
+ 0;
+ return ret;
}
static struct sysfs_ops sysfs_ops = {
@@ -779,15 +905,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
}
kobject_put(per_cpu(cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
- break;
+ return retval;
}
kobject_uevent(&(this_object->kobj), KOBJ_ADD);
}
- if (!retval)
- cpu_set(cpu, cache_dev_map);
+ cpu_set(cpu, cache_dev_map);
kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
- return retval;
+ return 0;
}
static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index e633c9c2b76..f390c9f6635 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -9,23 +9,23 @@
#include <linux/interrupt.h>
#include <linux/smp.h>
-#include <asm/processor.h>
+#include <asm/processor.h>
#include <asm/system.h>
#include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For AMD Athlon/Duron */
-static void k7_machine_check(struct pt_regs * regs, long error_code)
+static void k7_machine_check(struct pt_regs *regs, long error_code)
{
- int recover=1;
+ int recover = 1;
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
int i;
- rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+ rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
if (mcgstl & (1<<0)) /* Recoverable ? */
- recover=0;
+ recover = 0;
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
smp_processor_id(), mcgsth, mcgstl);
@@ -60,12 +60,12 @@ static void k7_machine_check(struct pt_regs * regs, long error_code)
}
if (recover&2)
- panic ("CPU context corrupt");
+ panic("CPU context corrupt");
if (recover&1)
- panic ("Unable to continue");
- printk (KERN_EMERG "Attempting to continue.\n");
+ panic("Unable to continue");
+ printk(KERN_EMERG "Attempting to continue.\n");
mcgstl &= ~(1<<2);
- wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+ wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
}
@@ -81,25 +81,25 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
machine_check_vector = k7_machine_check;
wmb();
- printk (KERN_INFO "Intel machine check architecture supported.\n");
- rdmsr (MSR_IA32_MCG_CAP, l, h);
+ printk(KERN_INFO "Intel machine check architecture supported.\n");
+ rdmsr(MSR_IA32_MCG_CAP, l, h);
if (l & (1<<8)) /* Control register present ? */
- wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+ wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
nr_mce_banks = l & 0xff;
/* Clear status for MC index 0 separately, we don't touch CTL,
* as some K7 Athlons cause spurious MCEs when its enabled. */
if (boot_cpu_data.x86 == 6) {
- wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+ wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
i = 1;
} else
i = 0;
- for (; i<nr_mce_banks; i++) {
- wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
- wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+ for (; i < nr_mce_banks; i++) {
+ wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+ wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
}
- set_in_cr4 (X86_CR4_MCE);
- printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+ set_in_cr4(X86_CR4_MCE);
+ printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index e07e8c068ae..4b031a4ac85 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/smp_lock.h>
#include <linux/string.h>
#include <linux/rcupdate.h>
#include <linux/kallsyms.h>
@@ -31,7 +32,7 @@
#include <asm/idle.h>
#define MISC_MCELOG_MINOR 227
-#define NR_BANKS 6
+#define NR_SYSFS_BANKS 6
atomic_t mce_entry;
@@ -46,7 +47,7 @@ static int mce_dont_init;
*/
static int tolerant = 1;
static int banks;
-static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
+static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
static unsigned long notify_user;
static int rip_msr;
static int mce_bootlog = -1;
@@ -209,7 +210,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
barrier();
for (i = 0; i < banks; i++) {
- if (!bank[i])
+ if (i < NR_SYSFS_BANKS && !bank[i])
continue;
m.misc = 0;
@@ -363,7 +364,7 @@ static void mcheck_check_cpu(void *info)
static void mcheck_timer(struct work_struct *work)
{
- on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
+ on_each_cpu(mcheck_check_cpu, NULL, 1);
/*
* Alert userspace if needed. If we logged an MCE, reduce the
@@ -444,9 +445,10 @@ static void mce_init(void *dummy)
rdmsrl(MSR_IA32_MCG_CAP, cap);
banks = cap & 0xff;
- if (banks > NR_BANKS) {
- printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
- banks = NR_BANKS;
+ if (banks > MCE_EXTENDED_BANK) {
+ banks = MCE_EXTENDED_BANK;
+ printk(KERN_INFO "MCE: warning: using only %d banks\n",
+ MCE_EXTENDED_BANK);
}
/* Use accurate RIP reporting if available. */
if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
@@ -462,7 +464,11 @@ static void mce_init(void *dummy)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
for (i = 0; i < banks; i++) {
- wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+ if (i < NR_SYSFS_BANKS)
+ wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+ else
+ wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
+
wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
}
}
@@ -527,10 +533,12 @@ static int open_exclu; /* already open exclusive? */
static int mce_open(struct inode *inode, struct file *file)
{
+ lock_kernel();
spin_lock(&mce_state_lock);
if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
spin_unlock(&mce_state_lock);
+ unlock_kernel();
return -EBUSY;
}
@@ -539,6 +547,7 @@ static int mce_open(struct inode *inode, struct file *file)
open_count++;
spin_unlock(&mce_state_lock);
+ unlock_kernel();
return nonseekable_open(inode, file);
}
@@ -571,7 +580,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
char __user *buf = ubuf;
int i, err;
- cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
+ cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
if (!cpu_tsc)
return -ENOMEM;
@@ -612,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
* Collect entries that were still getting written before the
* synchronize.
*/
- on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
+ on_each_cpu(collect_tscs, cpu_tsc, 1);
for (i = next; i < MCE_LOG_LEN; i++) {
if (mcelog.entry[i].finished &&
mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -737,7 +746,7 @@ static void mce_restart(void)
if (next_interval)
cancel_delayed_work(&mcheck_work);
/* Timer race is harmless here */
- on_each_cpu(mce_init, NULL, 1, 1);
+ on_each_cpu(mce_init, NULL, 1);
next_interval = check_interval * HZ;
if (next_interval)
schedule_delayed_work(&mcheck_work,
@@ -750,13 +759,18 @@ static struct sysdev_class mce_sysclass = {
};
DEFINE_PER_CPU(struct sys_device, device_mce);
+void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata;
/* Why are there no generic functions for this? */
#define ACCESSOR(name, var, start) \
- static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
+ static ssize_t show_ ## name(struct sys_device *s, \
+ struct sysdev_attribute *attr, \
+ char *buf) { \
return sprintf(buf, "%lx\n", (unsigned long)var); \
} \
- static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
+ static ssize_t set_ ## name(struct sys_device *s, \
+ struct sysdev_attribute *attr, \
+ const char *buf, size_t siz) { \
char *end; \
unsigned long new = simple_strtoul(buf, &end, 0); \
if (end == buf) return -EINVAL; \
@@ -766,7 +780,10 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
} \
static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
-/* TBD should generate these dynamically based on number of available banks */
+/*
+ * TBD should generate these dynamically based on number of available banks.
+ * Have only 6 contol banks in /sysfs until then.
+ */
ACCESSOR(bank0ctl,bank[0],mce_restart())
ACCESSOR(bank1ctl,bank[1],mce_restart())
ACCESSOR(bank2ctl,bank[2],mce_restart())
@@ -774,14 +791,16 @@ ACCESSOR(bank3ctl,bank[3],mce_restart())
ACCESSOR(bank4ctl,bank[4],mce_restart())
ACCESSOR(bank5ctl,bank[5],mce_restart())
-static ssize_t show_trigger(struct sys_device *s, char *buf)
+static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
+ char *buf)
{
strcpy(buf, trigger);
strcat(buf, "\n");
return strlen(trigger) + 1;
}
-static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
+static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
+ const char *buf,size_t siz)
{
char *p;
int len;
@@ -794,12 +813,12 @@ static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
}
static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
-ACCESSOR(tolerant,tolerant,)
+static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
ACCESSOR(check_interval,check_interval,mce_restart())
static struct sysdev_attribute *mce_attributes[] = {
&attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
&attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
- &attr_tolerant, &attr_check_interval, &attr_trigger,
+ &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
NULL
};
@@ -841,7 +860,7 @@ error:
return err;
}
-static void mce_remove_device(unsigned int cpu)
+static __cpuinit void mce_remove_device(unsigned int cpu)
{
int i;
@@ -865,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
mce_create_device(cpu);
+ if (threshold_cpu_callback)
+ threshold_cpu_callback(action, cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
+ if (threshold_cpu_callback)
+ threshold_cpu_callback(action, cpu);
mce_remove_device(cpu);
break;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 7c9a813e119..5eb390a4b2e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -527,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out_free;
- for_each_cpu_mask(i, b->cpus) {
+ for_each_cpu_mask_nr(i, b->cpus) {
if (i == cpu)
continue;
@@ -617,7 +617,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
#endif
/* remove all sibling symlinks before unregistering */
- for_each_cpu_mask(i, b->cpus) {
+ for_each_cpu_mask_nr(i, b->cpus) {
if (i == cpu)
continue;
@@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
deallocate_threshold_block(cpu, bank);
free_out:
+ kobject_del(b->kobj);
kobject_put(b->kobj);
kfree(b);
per_cpu(threshold_banks, cpu)[bank] = NULL;
@@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu)
}
/* get notified when a cpu comes on/off */
-static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action,
+ unsigned int cpu)
{
- /* cpu was unsigned int to begin with */
- unsigned int cpu = (unsigned long)hcpu;
-
if (cpu >= NR_CPUS)
- goto out;
+ return;
switch (action) {
case CPU_ONLINE:
@@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
default:
break;
}
- out:
- return NOTIFY_OK;
}
-static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
- .notifier_call = threshold_cpu_callback,
-};
-
static __init int threshold_init_device(void)
{
unsigned lcpu = 0;
@@ -684,7 +676,7 @@ static __init int threshold_init_device(void)
if (err)
return err;
}
- register_hotcpu_notifier(&threshold_cpu_notifier);
+ threshold_cpu_callback = amd_64_threshold_cpu_callback;
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 00ccb6c14ec..cc1fccdd31e 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
static void mce_work_fn(struct work_struct *work)
{
- on_each_cpu(mce_checkregs, NULL, 1, 1);
+ on_each_cpu(mce_checkregs, NULL, 1);
schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
}
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index cb03345554a..9b60fce09f7 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -8,7 +8,7 @@
#include <linux/interrupt.h>
#include <linux/smp.h>
-#include <asm/processor.h>
+#include <asm/processor.h>
#include <asm/system.h>
#include <asm/msr.h>
#include <asm/apic.h>
@@ -32,12 +32,12 @@ struct intel_mce_extended_msrs {
/* u32 *reserved[]; */
};
-static int mce_num_extended_msrs = 0;
+static int mce_num_extended_msrs;
#ifdef CONFIG_X86_MCE_P4THERMAL
static void unexpected_thermal_interrupt(struct pt_regs *regs)
-{
+{
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
smp_processor_id());
add_taint(TAINT_MACHINE_CHECK);
@@ -83,7 +83,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
* be some SMM goo which handles it, so we can't even put a handler
* since it might be delivered via SMI already -zwanem.
*/
- rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
@@ -91,7 +91,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
return; /* -EBUSY */
}
- /* check whether a vector already exists, temporarily masked? */
+ /* check whether a vector already exists, temporarily masked? */
if (h & APIC_VECTOR_MASK) {
printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
"installed\n",
@@ -102,20 +102,20 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
/* The temperature transition interrupt handler setup */
h = THERMAL_APIC_VECTOR; /* our delivery vector */
h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
- apic_write_around(APIC_LVTTHMR, h);
+ apic_write(APIC_LVTTHMR, h);
- rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
- wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+ rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+ wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
/* ok we're good to go... */
vendor_thermal_interrupt = intel_thermal_interrupt;
-
- rdmsr (MSR_IA32_MISC_ENABLE, l, h);
- wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
- l = apic_read (APIC_LVTTHMR);
- apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
- printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+
+ l = apic_read(APIC_LVTTHMR);
+ apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+ printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
/* enable thermal throttle processing */
atomic_set(&therm_throt_en, 1);
@@ -129,28 +129,28 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
{
u32 h;
- rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
- rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
- rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
- rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
- rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
- rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
- rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
- rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
- rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
- rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
+ rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
+ rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
+ rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
+ rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
+ rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
+ rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
+ rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
+ rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
+ rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
+ rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
}
-static void intel_machine_check(struct pt_regs * regs, long error_code)
+static void intel_machine_check(struct pt_regs *regs, long error_code)
{
- int recover=1;
+ int recover = 1;
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
int i;
- rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+ rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
if (mcgstl & (1<<0)) /* Recoverable ? */
- recover=0;
+ recover = 0;
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
smp_processor_id(), mcgsth, mcgstl);
@@ -191,20 +191,20 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
}
if (recover & 2)
- panic ("CPU context corrupt");
+ panic("CPU context corrupt");
if (recover & 1)
- panic ("Unable to continue");
+ panic("Unable to continue");
printk(KERN_EMERG "Attempting to continue.\n");
- /*
- * Do not clear the MSR_IA32_MCi_STATUS if the error is not
+ /*
+ * Do not clear the MSR_IA32_MCi_STATUS if the error is not
* recoverable/continuable.This will allow BIOS to look at the MSRs
* for errors if the OS could not log the error.
*/
- for (i=0; i<nr_mce_banks; i++) {
+ for (i = 0; i < nr_mce_banks; i++) {
u32 msr;
msr = MSR_IA32_MC0_STATUS+i*4;
- rdmsr (msr, low, high);
+ rdmsr(msr, low, high);
if (high&(1<<31)) {
/* Clear it */
wrmsr(msr, 0UL, 0UL);
@@ -214,7 +214,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
}
}
mcgstl &= ~(1<<2);
- wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+ wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
}
@@ -222,30 +222,30 @@ void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
-
+
machine_check_vector = intel_machine_check;
wmb();
- printk (KERN_INFO "Intel machine check architecture supported.\n");
- rdmsr (MSR_IA32_MCG_CAP, l, h);
+ printk(KERN_INFO "Intel machine check architecture supported.\n");
+ rdmsr(MSR_IA32_MCG_CAP, l, h);
if (l & (1<<8)) /* Control register present ? */
- wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+ wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
nr_mce_banks = l & 0xff;
- for (i=0; i<nr_mce_banks; i++) {
- wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
- wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+ for (i = 0; i < nr_mce_banks; i++) {
+ wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+ wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
}
- set_in_cr4 (X86_CR4_MCE);
- printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+ set_in_cr4(X86_CR4_MCE);
+ printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
/* Check for P4/Xeon extended MCE MSRs */
- rdmsr (MSR_IA32_MCG_CAP, l, h);
+ rdmsr(MSR_IA32_MCG_CAP, l, h);
if (l & (1<<9)) {/* MCG_EXT_P */
mce_num_extended_msrs = (l >> 16) & 0xff;
- printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
+ printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
" available\n",
smp_processor_id(), mce_num_extended_msrs);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 1f4cc48c14c..d5ae2243f0b 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -35,6 +35,7 @@ atomic_t therm_throt_en = ATOMIC_INIT(0);
#define define_therm_throt_sysdev_show_func(name) \
static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
+ struct sysdev_attribute *attr, \
char *buf) \
{ \
unsigned int cpu = dev->id; \
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
new file mode 100644
index 00000000000..dfea390e160
--- /dev/null
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -0,0 +1,32 @@
+#!/usr/bin/perl
+#
+# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
+#
+
+($in, $out) = @ARGV;
+
+open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
+open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
+
+print OUT "#include <asm/cpufeature.h>\n\n";
+print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
+
+while (defined($line = <IN>)) {
+ if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
+ $macro = $1;
+ $feature = $2;
+ $tail = $3;
+ if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
+ $feature = $1;
+ }
+
+ if ($feature ne '') {
+ printf OUT "\t%-32s = \"%s\",\n",
+ "[$macro]", "\L$feature";
+ }
+ }
+}
+print OUT "};\n";
+
+close(IN);
+close(OUT);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 5d241ce94a4..4e8d77f01ee 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
static unsigned long smp_changes_mask;
static struct mtrr_state mtrr_state = {};
static int mtrr_state_set;
-static u64 tom2;
+u64 mtrr_tom2;
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
}
}
- if (tom2) {
- if (start >= (1ULL<<32) && (end < tom2))
+ if (mtrr_tom2) {
+ if (start >= (1ULL<<32) && (end < mtrr_tom2))
return MTRR_TYPE_WRBACK;
}
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
+/* fill the MSR pair relating to a var range */
+void fill_mtrr_var_range(unsigned int index,
+ u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+ struct mtrr_var_range *vr;
+
+ vr = mtrr_state.var_ranges;
+
+ vr[index].base_lo = base_lo;
+ vr[index].base_hi = base_hi;
+ vr[index].mask_lo = mask_lo;
+ vr[index].mask_hi = mask_hi;
+}
+
static void
get_fixed_ranges(mtrr_type * frs)
{
@@ -213,13 +227,13 @@ void __init get_mtrr_state(void)
mtrr_state.enabled = (lo & 0xc00) >> 10;
if (amd_special_default_mtrr()) {
- unsigned lo, hi;
+ unsigned low, high;
/* TOP_MEM2 */
- rdmsr(MSR_K8_TOP_MEM2, lo, hi);
- tom2 = hi;
- tom2 <<= 32;
- tom2 |= lo;
- tom2 &= 0xffffff8000000ULL;
+ rdmsr(MSR_K8_TOP_MEM2, low, high);
+ mtrr_tom2 = high;
+ mtrr_tom2 <<= 32;
+ mtrr_tom2 |= low;
+ mtrr_tom2 &= 0xffffff800000ULL;
}
if (mtrr_show) {
int high_width;
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
else
printk(KERN_INFO "MTRR %u disabled\n", i);
}
- if (tom2) {
+ if (mtrr_tom2) {
printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
- tom2, tom2>>20);
+ mtrr_tom2, mtrr_tom2>>20);
}
}
mtrr_state_set = 1;
@@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
if (lo != msrwords[0] || hi != msrwords[1]) {
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 15 &&
+ (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
k8_enable_fixed_iorrs();
mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
@@ -365,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type *type)
{
unsigned int mask_lo, mask_hi, base_lo, base_hi;
+ unsigned int tmp, hi;
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
if ((mask_lo & 0x800) == 0) {
@@ -378,8 +393,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
/* Work out the shifted address mask. */
- mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT)
- | mask_lo >> PAGE_SHIFT;
+ tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
+ mask_lo = size_or_mask | tmp;
+ /* Expand tmp with high bits to all 1s*/
+ hi = fls(tmp);
+ if (hi > 0) {
+ tmp |= ~((1<<(hi - 1)) - 1);
+
+ if (tmp != mask_lo) {
+ WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
+ mask_lo = tmp;
+ }
+ }
/* This works correctly if size is a power of two, i.e. a
contiguous range. */
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 84c480bb371..4c4214690dd 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
}
/* RED-PEN: base can be > 32bit */
len += seq_printf(seq,
- "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
+ "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
i, base, base >> (20 - PAGE_SHIFT), size, factor,
- mtrr_attrib_to_str(type), mtrr_usage_table[i]);
+ mtrr_usage_table[i], mtrr_attrib_to_str(type));
}
}
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6a1e278d932..c78c04821ea 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
+#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
@@ -222,7 +223,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
atomic_set(&data.gate,0);
/* Start the ball rolling on other CPUs */
- if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
+ if (smp_call_function(ipi_handler, &data, 0) != 0)
panic("mtrr: timed out waiting for other CPUs\n");
local_irq_save(flags);
@@ -609,6 +610,891 @@ static struct sysdev_driver mtrr_sysdev_driver = {
.resume = mtrr_restore,
};
+/* should be related to MTRR_VAR_RANGES nums */
+#define RANGE_NUM 256
+
+struct res_range {
+ unsigned long start;
+ unsigned long end;
+};
+
+static int __init
+add_range(struct res_range *range, int nr_range, unsigned long start,
+ unsigned long end)
+{
+ /* out of slots */
+ if (nr_range >= RANGE_NUM)
+ return nr_range;
+
+ range[nr_range].start = start;
+ range[nr_range].end = end;
+
+ nr_range++;
+
+ return nr_range;
+}
+
+static int __init
+add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
+ unsigned long end)
+{
+ int i;
+
+ /* try to merge it with old one */
+ for (i = 0; i < nr_range; i++) {
+ unsigned long final_start, final_end;
+ unsigned long common_start, common_end;
+
+ if (!range[i].end)
+ continue;
+
+ common_start = max(range[i].start, start);
+ common_end = min(range[i].end, end);
+ if (common_start > common_end + 1)
+ continue;
+
+ final_start = min(range[i].start, start);
+ final_end = max(range[i].end, end);
+
+ range[i].start = final_start;
+ range[i].end = final_end;
+ return nr_range;
+ }
+
+ /* need to add that */
+ return add_range(range, nr_range, start, end);
+}
+
+static void __init
+subtract_range(struct res_range *range, unsigned long start, unsigned long end)
+{
+ int i, j;
+
+ for (j = 0; j < RANGE_NUM; j++) {
+ if (!range[j].end)
+ continue;
+
+ if (start <= range[j].start && end >= range[j].end) {
+ range[j].start = 0;
+ range[j].end = 0;
+ continue;
+ }
+
+ if (start <= range[j].start && end < range[j].end &&
+ range[j].start < end + 1) {
+ range[j].start = end + 1;
+ continue;
+ }
+
+
+ if (start > range[j].start && end >= range[j].end &&
+ range[j].end > start - 1) {
+ range[j].end = start - 1;
+ continue;
+ }
+
+ if (start > range[j].start && end < range[j].end) {
+ /* find the new spare */
+ for (i = 0; i < RANGE_NUM; i++) {
+ if (range[i].end == 0)
+ break;
+ }
+ if (i < RANGE_NUM) {
+ range[i].end = range[j].end;
+ range[i].start = end + 1;
+ } else {
+ printk(KERN_ERR "run of slot in ranges\n");
+ }
+ range[j].end = start - 1;
+ continue;
+ }
+ }
+}
+
+static int __init cmp_range(const void *x1, const void *x2)
+{
+ const struct res_range *r1 = x1;
+ const struct res_range *r2 = x2;
+ long start1, start2;
+
+ start1 = r1->start;
+ start2 = r2->start;
+
+ return start1 - start2;
+}
+
+struct var_mtrr_range_state {
+ unsigned long base_pfn;
+ unsigned long size_pfn;
+ mtrr_type type;
+};
+
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static int __initdata debug_print;
+
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+ unsigned long extra_remove_base,
+ unsigned long extra_remove_size)
+{
+ unsigned long i, base, size;
+ mtrr_type type;
+
+ for (i = 0; i < num_var_ranges; i++) {
+ type = range_state[i].type;
+ if (type != MTRR_TYPE_WRBACK)
+ continue;
+ base = range_state[i].base_pfn;
+ size = range_state[i].size_pfn;
+ nr_range = add_range_with_merge(range, nr_range, base,
+ base + size - 1);
+ }
+ if (debug_print) {
+ printk(KERN_DEBUG "After WB checking\n");
+ for (i = 0; i < nr_range; i++)
+ printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+ range[i].start, range[i].end + 1);
+ }
+
+ /* take out UC ranges */
+ for (i = 0; i < num_var_ranges; i++) {
+ type = range_state[i].type;
+ if (type != MTRR_TYPE_UNCACHABLE &&
+ type != MTRR_TYPE_WRPROT)
+ continue;
+ size = range_state[i].size_pfn;
+ if (!size)
+ continue;
+ base = range_state[i].base_pfn;
+ subtract_range(range, base, base + size - 1);
+ }
+ if (extra_remove_size)
+ subtract_range(range, extra_remove_base,
+ extra_remove_base + extra_remove_size - 1);
+
+ /* get new range num */
+ nr_range = 0;
+ for (i = 0; i < RANGE_NUM; i++) {
+ if (!range[i].end)
+ continue;
+ nr_range++;
+ }
+ if (debug_print) {
+ printk(KERN_DEBUG "After UC checking\n");
+ for (i = 0; i < nr_range; i++)
+ printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+ range[i].start, range[i].end + 1);
+ }
+
+ /* sort the ranges */
+ sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+ if (debug_print) {
+ printk(KERN_DEBUG "After sorting\n");
+ for (i = 0; i < nr_range; i++)
+ printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+ range[i].start, range[i].end + 1);
+ }
+
+ /* clear those is not used */
+ for (i = nr_range; i < RANGE_NUM; i++)
+ memset(&range[i], 0, sizeof(range[i]));
+
+ return nr_range;
+}
+
+static struct res_range __initdata range[RANGE_NUM];
+
+#ifdef CONFIG_MTRR_SANITIZER
+
+static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+{
+ unsigned long sum;
+ int i;
+
+ sum = 0;
+ for (i = 0; i < nr_range; i++)
+ sum += range[i].end + 1 - range[i].start;
+
+ return sum;
+}
+
+static int enable_mtrr_cleanup __initdata =
+ CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+ if (enable_mtrr_cleanup != -1)
+ enable_mtrr_cleanup = 0;
+ return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+ if (enable_mtrr_cleanup != -1)
+ enable_mtrr_cleanup = 1;
+ return 0;
+}
+early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
+static int __init mtrr_cleanup_debug_setup(char *str)
+{
+ debug_print = 1;
+ return 0;
+}
+early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
+
+struct var_mtrr_state {
+ unsigned long range_startk;
+ unsigned long range_sizek;
+ unsigned long chunk_sizek;
+ unsigned long gran_sizek;
+ unsigned int reg;
+};
+
+static void __init
+set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+ unsigned char type, unsigned int address_bits)
+{
+ u32 base_lo, base_hi, mask_lo, mask_hi;
+ u64 base, mask;
+
+ if (!sizek) {
+ fill_mtrr_var_range(reg, 0, 0, 0, 0);
+ return;
+ }
+
+ mask = (1ULL << address_bits) - 1;
+ mask &= ~((((u64)sizek) << 10) - 1);
+
+ base = ((u64)basek) << 10;
+
+ base |= type;
+ mask |= 0x800;
+
+ base_lo = base & ((1ULL<<32) - 1);
+ base_hi = base >> 32;
+
+ mask_lo = mask & ((1ULL<<32) - 1);
+ mask_hi = mask >> 32;
+
+ fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static void __init
+save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+ unsigned char type)
+{
+ range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
+ range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
+ range_state[reg].type = type;
+}
+
+static void __init
+set_var_mtrr_all(unsigned int address_bits)
+{
+ unsigned long basek, sizek;
+ unsigned char type;
+ unsigned int reg;
+
+ for (reg = 0; reg < num_var_ranges; reg++) {
+ basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
+ sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
+ type = range_state[reg].type;
+
+ set_var_mtrr(reg, basek, sizek, type, address_bits);
+ }
+}
+
+static unsigned long to_size_factor(unsigned long sizek, char *factorp)
+{
+ char factor;
+ unsigned long base = sizek;
+
+ if (base & ((1<<10) - 1)) {
+ /* not MB alignment */
+ factor = 'K';
+ } else if (base & ((1<<20) - 1)){
+ factor = 'M';
+ base >>= 10;
+ } else {
+ factor = 'G';
+ base >>= 20;
+ }
+
+ *factorp = factor;
+
+ return base;
+}
+
+static unsigned int __init
+range_to_mtrr(unsigned int reg, unsigned long range_startk,
+ unsigned long range_sizek, unsigned char type)
+{
+ if (!range_sizek || (reg >= num_var_ranges))
+ return reg;
+
+ while (range_sizek) {
+ unsigned long max_align, align;
+ unsigned long sizek;
+
+ /* Compute the maximum size I can make a range */
+ if (range_startk)
+ max_align = ffs(range_startk) - 1;
+ else
+ max_align = 32;
+ align = fls(range_sizek) - 1;
+ if (align > max_align)
+ align = max_align;
+
+ sizek = 1 << align;
+ if (debug_print) {
+ char start_factor = 'K', size_factor = 'K';
+ unsigned long start_base, size_base;
+
+ start_base = to_size_factor(range_startk, &start_factor),
+ size_base = to_size_factor(sizek, &size_factor),
+
+ printk(KERN_DEBUG "Setting variable MTRR %d, "
+ "base: %ld%cB, range: %ld%cB, type %s\n",
+ reg, start_base, start_factor,
+ size_base, size_factor,
+ (type == MTRR_TYPE_UNCACHABLE)?"UC":
+ ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+ );
+ }
+ save_var_mtrr(reg++, range_startk, sizek, type);
+ range_startk += sizek;
+ range_sizek -= sizek;
+ if (reg >= num_var_ranges)
+ break;
+ }
+ return reg;
+}
+
+static unsigned __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
+ unsigned long sizek)
+{
+ unsigned long hole_basek, hole_sizek;
+ unsigned long second_basek, second_sizek;
+ unsigned long range0_basek, range0_sizek;
+ unsigned long range_basek, range_sizek;
+ unsigned long chunk_sizek;
+ unsigned long gran_sizek;
+
+ hole_basek = 0;
+ hole_sizek = 0;
+ second_basek = 0;
+ second_sizek = 0;
+ chunk_sizek = state->chunk_sizek;
+ gran_sizek = state->gran_sizek;
+
+ /* align with gran size, prevent small block used up MTRRs */
+ range_basek = ALIGN(state->range_startk, gran_sizek);
+ if ((range_basek > basek) && basek)
+ return second_sizek;
+ state->range_sizek -= (range_basek - state->range_startk);
+ range_sizek = ALIGN(state->range_sizek, gran_sizek);
+
+ while (range_sizek > state->range_sizek) {
+ range_sizek -= gran_sizek;
+ if (!range_sizek)
+ return 0;
+ }
+ state->range_sizek = range_sizek;
+
+ /* try to append some small hole */
+ range0_basek = state->range_startk;
+ range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+
+ /* no increase */
+ if (range0_sizek == state->range_sizek) {
+ if (debug_print)
+ printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
+ range0_basek<<10,
+ (range0_basek + state->range_sizek)<<10);
+ state->reg = range_to_mtrr(state->reg, range0_basek,
+ state->range_sizek, MTRR_TYPE_WRBACK);
+ return 0;
+ }
+
+ /* only cut back, when it is not the last */
+ if (sizek) {
+ while (range0_basek + range0_sizek > (basek + sizek)) {
+ if (range0_sizek >= chunk_sizek)
+ range0_sizek -= chunk_sizek;
+ else
+ range0_sizek = 0;
+
+ if (!range0_sizek)
+ break;
+ }
+ }
+
+second_try:
+ range_basek = range0_basek + range0_sizek;
+
+ /* one hole in the middle */
+ if (range_basek > basek && range_basek <= (basek + sizek))
+ second_sizek = range_basek - basek;
+
+ if (range0_sizek > state->range_sizek) {
+
+ /* one hole in middle or at end */
+ hole_sizek = range0_sizek - state->range_sizek - second_sizek;
+
+ /* hole size should be less than half of range0 size */
+ if (hole_sizek >= (range0_sizek >> 1) &&
+ range0_sizek >= chunk_sizek) {
+ range0_sizek -= chunk_sizek;
+ second_sizek = 0;
+ hole_sizek = 0;
+
+ goto second_try;
+ }
+ }
+
+ if (range0_sizek) {
+ if (debug_print)
+ printk(KERN_DEBUG "range0: %016lx - %016lx\n",
+ range0_basek<<10,
+ (range0_basek + range0_sizek)<<10);
+ state->reg = range_to_mtrr(state->reg, range0_basek,
+ range0_sizek, MTRR_TYPE_WRBACK);
+ }
+
+ if (range0_sizek < state->range_sizek) {
+ /* need to handle left over */
+ range_sizek = state->range_sizek - range0_sizek;
+
+ if (debug_print)
+ printk(KERN_DEBUG "range: %016lx - %016lx\n",
+ range_basek<<10,
+ (range_basek + range_sizek)<<10);
+ state->reg = range_to_mtrr(state->reg, range_basek,
+ range_sizek, MTRR_TYPE_WRBACK);
+ }
+
+ if (hole_sizek) {
+ hole_basek = range_basek - hole_sizek - second_sizek;
+ if (debug_print)
+ printk(KERN_DEBUG "hole: %016lx - %016lx\n",
+ hole_basek<<10,
+ (hole_basek + hole_sizek)<<10);
+ state->reg = range_to_mtrr(state->reg, hole_basek,
+ hole_sizek, MTRR_TYPE_UNCACHABLE);
+ }
+
+ return second_sizek;
+}
+
+static void __init
+set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
+ unsigned long size_pfn)
+{
+ unsigned long basek, sizek;
+ unsigned long second_sizek = 0;
+
+ if (state->reg >= num_var_ranges)
+ return;
+
+ basek = base_pfn << (PAGE_SHIFT - 10);
+ sizek = size_pfn << (PAGE_SHIFT - 10);
+
+ /* See if I can merge with the last range */
+ if ((basek <= 1024) ||
+ (state->range_startk + state->range_sizek == basek)) {
+ unsigned long endk = basek + sizek;
+ state->range_sizek = endk - state->range_startk;
+ return;
+ }
+ /* Write the range mtrrs */
+ if (state->range_sizek != 0)
+ second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
+
+ /* Allocate an msr */
+ state->range_startk = basek + second_sizek;
+ state->range_sizek = sizek - second_sizek;
+}
+
+/* mininum size of mtrr block that can take hole */
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+ if (!p)
+ return -EINVAL;
+ mtrr_chunk_size = memparse(p, &p);
+ return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+/* granity of mtrr of block */
+static u64 mtrr_gran_size __initdata;
+
+static int __init parse_mtrr_gran_size_opt(char *p)
+{
+ if (!p)
+ return -EINVAL;
+ mtrr_gran_size = memparse(p, &p);
+ return 0;
+}
+early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
+
+static int nr_mtrr_spare_reg __initdata =
+ CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
+
+static int __init parse_mtrr_spare_reg(char *arg)
+{
+ if (arg)
+ nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
+ return 0;
+}
+
+early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
+
+static int __init
+x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+ u64 chunk_size, u64 gran_size)
+{
+ struct var_mtrr_state var_state;
+ int i;
+ int num_reg;
+
+ var_state.range_startk = 0;
+ var_state.range_sizek = 0;
+ var_state.reg = 0;
+ var_state.chunk_sizek = chunk_size >> 10;
+ var_state.gran_sizek = gran_size >> 10;
+
+ memset(range_state, 0, sizeof(range_state));
+
+ /* Write the range etc */
+ for (i = 0; i < nr_range; i++)
+ set_var_mtrr_range(&var_state, range[i].start,
+ range[i].end - range[i].start + 1);
+
+ /* Write the last range */
+ if (var_state.range_sizek != 0)
+ range_to_mtrr_with_hole(&var_state, 0, 0);
+
+ num_reg = var_state.reg;
+ /* Clear out the extra MTRR's */
+ while (var_state.reg < num_var_ranges) {
+ save_var_mtrr(var_state.reg, 0, 0, 0);
+ var_state.reg++;
+ }
+
+ return num_reg;
+}
+
+struct mtrr_cleanup_result {
+ unsigned long gran_sizek;
+ unsigned long chunk_sizek;
+ unsigned long lose_cover_sizek;
+ unsigned int num_reg;
+ int bad;
+};
+
+/*
+ * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
+ * chunk size: gran_size, ..., 2G
+ * so we need (1+16)*8
+ */
+#define NUM_RESULT 136
+#define PSHIFT (PAGE_SHIFT - 10)
+
+static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
+static struct res_range __initdata range_new[RANGE_NUM];
+static unsigned long __initdata min_loss_pfn[RANGE_NUM];
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+ unsigned long extra_remove_base, extra_remove_size;
+ unsigned long base, size, def, dummy;
+ mtrr_type type;
+ int nr_range, nr_range_new;
+ u64 chunk_size, gran_size;
+ unsigned long range_sums, range_sums_new;
+ int index_good;
+ int num_reg_good;
+ int i;
+
+ /* extra one for all 0 */
+ int num[MTRR_NUM_TYPES + 1];
+
+ if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+ return 0;
+ rdmsr(MTRRdefType_MSR, def, dummy);
+ def &= 0xff;
+ if (def != MTRR_TYPE_UNCACHABLE)
+ return 0;
+
+ /* get it and store it aside */
+ memset(range_state, 0, sizeof(range_state));
+ for (i = 0; i < num_var_ranges; i++) {
+ mtrr_if->get(i, &base, &size, &type);
+ range_state[i].base_pfn = base;
+ range_state[i].size_pfn = size;
+ range_state[i].type = type;
+ }
+
+ /* check entries number */
+ memset(num, 0, sizeof(num));
+ for (i = 0; i < num_var_ranges; i++) {
+ type = range_state[i].type;
+ size = range_state[i].size_pfn;
+ if (type >= MTRR_NUM_TYPES)
+ continue;
+ if (!size)
+ type = MTRR_NUM_TYPES;
+ if (type == MTRR_TYPE_WRPROT)
+ type = MTRR_TYPE_UNCACHABLE;
+ num[type]++;
+ }
+
+ /* check if we got UC entries */
+ if (!num[MTRR_TYPE_UNCACHABLE])
+ return 0;
+
+ /* check if we only had WB and UC */
+ if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+ num_var_ranges - num[MTRR_NUM_TYPES])
+ return 0;
+
+ /* print original var MTRRs at first, for debugging: */
+ printk(KERN_DEBUG "original variable MTRRs\n");
+ for (i = 0; i < num_var_ranges; i++) {
+ char start_factor = 'K', size_factor = 'K';
+ unsigned long start_base, size_base;
+
+ size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+ if (!size_base)
+ continue;
+
+ size_base = to_size_factor(size_base, &size_factor),
+ start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+ start_base = to_size_factor(start_base, &start_factor),
+ type = range_state[i].type;
+
+ printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+ i, start_base, start_factor,
+ size_base, size_factor,
+ (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+ ((type == MTRR_TYPE_WRPROT) ? "WP" :
+ ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+ );
+ }
+
+ memset(range, 0, sizeof(range));
+ extra_remove_size = 0;
+ extra_remove_base = 1 << (32 - PAGE_SHIFT);
+ if (mtrr_tom2)
+ extra_remove_size =
+ (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
+ nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
+ extra_remove_size);
+ /*
+ * [0, 1M) should always be coverred by var mtrr with WB
+ * and fixed mtrrs should take effective before var mtrr for it
+ */
+ nr_range = add_range_with_merge(range, nr_range, 0,
+ (1ULL<<(20 - PAGE_SHIFT)) - 1);
+ /* sort the ranges */
+ sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+
+ range_sums = sum_ranges(range, nr_range);
+ printk(KERN_INFO "total RAM coverred: %ldM\n",
+ range_sums >> (20 - PAGE_SHIFT));
+
+ if (mtrr_chunk_size && mtrr_gran_size) {
+ int num_reg;
+ char gran_factor, chunk_factor, lose_factor;
+ unsigned long gran_base, chunk_base, lose_base;
+
+ debug_print++;
+ /* convert ranges to var ranges state */
+ num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
+ mtrr_gran_size);
+
+ /* we got new setting in range_state, check it */
+ memset(range_new, 0, sizeof(range_new));
+ nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+ extra_remove_base,
+ extra_remove_size);
+ range_sums_new = sum_ranges(range_new, nr_range_new);
+
+ i = 0;
+ result[i].chunk_sizek = mtrr_chunk_size >> 10;
+ result[i].gran_sizek = mtrr_gran_size >> 10;
+ result[i].num_reg = num_reg;
+ if (range_sums < range_sums_new) {
+ result[i].lose_cover_sizek =
+ (range_sums_new - range_sums) << PSHIFT;
+ result[i].bad = 1;
+ } else
+ result[i].lose_cover_sizek =
+ (range_sums - range_sums_new) << PSHIFT;
+
+ gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+ chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+ lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+ printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+ result[i].bad?"*BAD*":" ",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
+ result[i].num_reg, result[i].bad?"-":"",
+ lose_base, lose_factor);
+ if (!result[i].bad) {
+ set_var_mtrr_all(address_bits);
+ return 1;
+ }
+ printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
+ "will find optimal one\n");
+ debug_print--;
+ memset(result, 0, sizeof(result[0]));
+ }
+
+ i = 0;
+ memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
+ memset(result, 0, sizeof(result));
+ for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
+ char gran_factor;
+ unsigned long gran_base;
+
+ if (debug_print)
+ gran_base = to_size_factor(gran_size >> 10, &gran_factor);
+
+ for (chunk_size = gran_size; chunk_size < (1ULL<<32);
+ chunk_size <<= 1) {
+ int num_reg;
+
+ if (debug_print) {
+ char chunk_factor;
+ unsigned long chunk_base;
+
+ chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
+ printk(KERN_INFO "\n");
+ printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ }
+ if (i >= NUM_RESULT)
+ continue;
+
+ /* convert ranges to var ranges state */
+ num_reg = x86_setup_var_mtrrs(range, nr_range,
+ chunk_size, gran_size);
+
+ /* we got new setting in range_state, check it */
+ memset(range_new, 0, sizeof(range_new));
+ nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+ extra_remove_base, extra_remove_size);
+ range_sums_new = sum_ranges(range_new, nr_range_new);
+
+ result[i].chunk_sizek = chunk_size >> 10;
+ result[i].gran_sizek = gran_size >> 10;
+ result[i].num_reg = num_reg;
+ if (range_sums < range_sums_new) {
+ result[i].lose_cover_sizek =
+ (range_sums_new - range_sums) << PSHIFT;
+ result[i].bad = 1;
+ } else
+ result[i].lose_cover_sizek =
+ (range_sums - range_sums_new) << PSHIFT;
+
+ /* double check it */
+ if (!result[i].bad && !result[i].lose_cover_sizek) {
+ if (nr_range_new != nr_range ||
+ memcmp(range, range_new, sizeof(range)))
+ result[i].bad = 1;
+ }
+
+ if (!result[i].bad && (range_sums - range_sums_new <
+ min_loss_pfn[num_reg])) {
+ min_loss_pfn[num_reg] =
+ range_sums - range_sums_new;
+ }
+ i++;
+ }
+ }
+
+ /* print out all */
+ for (i = 0; i < NUM_RESULT; i++) {
+ char gran_factor, chunk_factor, lose_factor;
+ unsigned long gran_base, chunk_base, lose_base;
+
+ gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+ chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+ lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+ printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+ result[i].bad?"*BAD*":" ",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
+ result[i].num_reg, result[i].bad?"-":"",
+ lose_base, lose_factor);
+ }
+
+ /* try to find the optimal index */
+ if (nr_mtrr_spare_reg >= num_var_ranges)
+ nr_mtrr_spare_reg = num_var_ranges - 1;
+ num_reg_good = -1;
+ for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
+ if (!min_loss_pfn[i])
+ num_reg_good = i;
+ }
+
+ index_good = -1;
+ if (num_reg_good != -1) {
+ for (i = 0; i < NUM_RESULT; i++) {
+ if (!result[i].bad &&
+ result[i].num_reg == num_reg_good &&
+ !result[i].lose_cover_sizek) {
+ index_good = i;
+ break;
+ }
+ }
+ }
+
+ if (index_good != -1) {
+ char gran_factor, chunk_factor, lose_factor;
+ unsigned long gran_base, chunk_base, lose_base;
+
+ printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+ i = index_good;
+ gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+ chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+ lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+ printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n",
+ result[i].num_reg, lose_base, lose_factor);
+ /* convert ranges to var ranges state */
+ chunk_size = result[i].chunk_sizek;
+ chunk_size <<= 10;
+ gran_size = result[i].gran_sizek;
+ gran_size <<= 10;
+ debug_print++;
+ x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+ debug_print--;
+ set_var_mtrr_all(address_bits);
+ return 1;
+ }
+
+ printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
+ printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+
+ return 0;
+}
+#else
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+ return 0;
+}
+#endif
+
+static int __initdata changed_by_mtrr_cleanup;
+
static int disable_mtrr_trim;
static int __init disable_mtrr_trim_setup(char *str)
@@ -648,6 +1534,19 @@ int __init amd_special_default_mtrr(void)
return 0;
}
+static u64 __init real_trim_memory(unsigned long start_pfn,
+ unsigned long limit_pfn)
+{
+ u64 trim_start, trim_size;
+ trim_start = start_pfn;
+ trim_start <<= PAGE_SHIFT;
+ trim_size = limit_pfn;
+ trim_size <<= PAGE_SHIFT;
+ trim_size -= trim_start;
+
+ return e820_update_range(trim_start, trim_size, E820_RAM,
+ E820_RESERVED);
+}
/**
* mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
* @end_pfn: ending page frame number
@@ -663,8 +1562,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
{
unsigned long i, base, size, highest_pfn = 0, def, dummy;
mtrr_type type;
- u64 trim_start, trim_size;
+ int nr_range;
+ u64 total_trim_size;
+ /* extra one for all 0 */
+ int num[MTRR_NUM_TYPES + 1];
/*
* Make sure we only trim uncachable memory on machines that
* support the Intel MTRR architecture:
@@ -676,44 +1578,92 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
if (def != MTRR_TYPE_UNCACHABLE)
return 0;
- if (amd_special_default_mtrr())
- return 0;
+ /* get it and store it aside */
+ memset(range_state, 0, sizeof(range_state));
+ for (i = 0; i < num_var_ranges; i++) {
+ mtrr_if->get(i, &base, &size, &type);
+ range_state[i].base_pfn = base;
+ range_state[i].size_pfn = size;
+ range_state[i].type = type;
+ }
/* Find highest cached pfn */
for (i = 0; i < num_var_ranges; i++) {
- mtrr_if->get(i, &base, &size, &type);
+ type = range_state[i].type;
if (type != MTRR_TYPE_WRBACK)
continue;
+ base = range_state[i].base_pfn;
+ size = range_state[i].size_pfn;
if (highest_pfn < base + size)
highest_pfn = base + size;
}
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
if (!highest_pfn) {
- if (!kvm_para_available()) {
- printk(KERN_WARNING
+ WARN(!kvm_para_available(), KERN_WARNING
"WARNING: strange, CPU MTRRs all blank?\n");
- WARN_ON(1);
- }
return 0;
}
- if (highest_pfn < end_pfn) {
+ /* check entries number */
+ memset(num, 0, sizeof(num));
+ for (i = 0; i < num_var_ranges; i++) {
+ type = range_state[i].type;
+ if (type >= MTRR_NUM_TYPES)
+ continue;
+ size = range_state[i].size_pfn;
+ if (!size)
+ type = MTRR_NUM_TYPES;
+ num[type]++;
+ }
+
+ /* no entry for WB? */
+ if (!num[MTRR_TYPE_WRBACK])
+ return 0;
+
+ /* check if we only had WB and UC */
+ if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+ num_var_ranges - num[MTRR_NUM_TYPES])
+ return 0;
+
+ memset(range, 0, sizeof(range));
+ nr_range = 0;
+ if (mtrr_tom2) {
+ range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
+ range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
+ if (highest_pfn < range[nr_range].end + 1)
+ highest_pfn = range[nr_range].end + 1;
+ nr_range++;
+ }
+ nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
+
+ total_trim_size = 0;
+ /* check the head */
+ if (range[0].start)
+ total_trim_size += real_trim_memory(0, range[0].start);
+ /* check the holes */
+ for (i = 0; i < nr_range - 1; i++) {
+ if (range[i].end + 1 < range[i+1].start)
+ total_trim_size += real_trim_memory(range[i].end + 1,
+ range[i+1].start);
+ }
+ /* check the top */
+ i = nr_range - 1;
+ if (range[i].end + 1 < end_pfn)
+ total_trim_size += real_trim_memory(range[i].end + 1,
+ end_pfn);
+
+ if (total_trim_size) {
printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
- " all of memory, losing %luMB of RAM.\n",
- (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
+ " all of memory, losing %lluMB of RAM.\n",
+ total_trim_size >> 20);
- WARN_ON(1);
+ if (!changed_by_mtrr_cleanup)
+ WARN_ON(1);
printk(KERN_INFO "update e820 for mtrr\n");
- trim_start = highest_pfn;
- trim_start <<= PAGE_SHIFT;
- trim_size = end_pfn;
- trim_size <<= PAGE_SHIFT;
- trim_size -= trim_start;
- update_memory_range(trim_start, trim_size, E820_RAM,
- E820_RESERVED);
update_e820();
+
return 1;
}
@@ -729,18 +1679,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
*/
void __init mtrr_bp_init(void)
{
+ u32 phys_addr;
init_ifs();
+ phys_addr = 32;
+
if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
size_or_mask = 0xff000000; /* 36 bits */
size_and_mask = 0x00f00000;
+ phys_addr = 36;
/* This is an AMD specific MSR, but we assume(hope?) that
Intel will implement it to when they extend the address
bus of the Xeon. */
if (cpuid_eax(0x80000000) >= 0x80000008) {
- u32 phys_addr;
phys_addr = cpuid_eax(0x80000008) & 0xff;
/* CPUID workaround for Intel 0F33/0F34 CPU */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1711,7 @@ void __init mtrr_bp_init(void)
don't support PAE */
size_or_mask = 0xfff00000; /* 32 bits */
size_and_mask = 0;
+ phys_addr = 32;
}
} else {
switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1745,15 @@ void __init mtrr_bp_init(void)
if (mtrr_if) {
set_num_var_ranges();
init_table();
- if (use_intel())
+ if (use_intel()) {
get_mtrr_state();
+
+ if (mtrr_cleanup(phys_addr)) {
+ changed_by_mtrr_cleanup = 1;
+ mtrr_if->set_all();
+ }
+
+ }
}
}
@@ -822,16 +1783,17 @@ void mtrr_ap_init(void)
*/
void mtrr_save_state(void)
{
- smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
+ smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
}
static int __init mtrr_init_finialize(void)
{
if (!mtrr_if)
return 0;
- if (use_intel())
- mtrr_state_warn();
- else {
+ if (use_intel()) {
+ if (!changed_by_mtrr_cleanup)
+ mtrr_state_warn();
+ } else {
/* The CPUs haven't MTRR and seem to not support SMP. They have
* specific drivers, we use a tricky method to support
* suspend/resume for them.
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2cc77eb6fea..2dc4ec656b2 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt);
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
+void fill_mtrr_var_range(unsigned int index,
+ u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
void get_mtrr_state(void);
extern void set_mtrr_ops(struct mtrr_ops * ops);
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
extern unsigned int num_var_ranges;
+extern u64 mtrr_tom2;
void mtrr_state_warn(void);
const char *mtrr_attrib_to_str(int x);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f9ae93adffe..6bff382094f 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -1,11 +1,15 @@
-/* local apic based NMI watchdog for various CPUs.
- This file also handles reservation of performance counters for coordination
- with other users (like oprofile).
-
- Note that these events normally don't tick when the CPU idles. This means
- the frequency varies with CPU load.
-
- Original code for K7/P6 written by Keith Owens */
+/*
+ * local apic based NMI watchdog for various CPUs.
+ *
+ * This file also handles reservation of performance counters for coordination
+ * with other users (like oprofile).
+ *
+ * Note that these events normally don't tick when the CPU idles. This means
+ * the frequency varies with CPU load.
+ *
+ * Original code for K7/P6 written by Keith Owens
+ *
+ */
#include <linux/percpu.h>
#include <linux/module.h>
@@ -36,12 +40,16 @@ struct wd_ops {
static const struct wd_ops *wd_ops;
-/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
- * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
+/*
+ * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
+ * offset from MSR_P4_BSU_ESCR0.
+ *
+ * It will be the max for all platforms (for now)
*/
#define NMI_MAX_COUNTER_BITS 66
-/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
+/*
+ * perfctr_nmi_owner tracks the ownership of the perfctr registers:
* evtsel_nmi_owner tracks the ownership of the event selection
* - different performance counters/ event selection may be reserved for
* different subsystems this reservation system just tries to coordinate
@@ -73,8 +81,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
return 0;
}
-/* converts an msr to an appropriate reservation bit */
-/* returns the bit offset of the event selection register */
+/*
+ * converts an msr to an appropriate reservation bit
+ * returns the bit offset of the event selection register
+ */
static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
{
/* returns the bit offset of the event selection register */
@@ -114,6 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
return (!test_bit(counter, perfctr_nmi_owner));
}
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
int reserve_perfctr_nmi(unsigned int msr)
{
@@ -128,6 +139,7 @@ int reserve_perfctr_nmi(unsigned int msr)
return 1;
return 0;
}
+EXPORT_SYMBOL(reserve_perfctr_nmi);
void release_perfctr_nmi(unsigned int msr)
{
@@ -140,6 +152,7 @@ void release_perfctr_nmi(unsigned int msr)
clear_bit(counter, perfctr_nmi_owner);
}
+EXPORT_SYMBOL(release_perfctr_nmi);
int reserve_evntsel_nmi(unsigned int msr)
{
@@ -154,6 +167,7 @@ int reserve_evntsel_nmi(unsigned int msr)
return 1;
return 0;
}
+EXPORT_SYMBOL(reserve_evntsel_nmi);
void release_evntsel_nmi(unsigned int msr)
{
@@ -166,11 +180,6 @@ void release_evntsel_nmi(unsigned int msr)
clear_bit(counter, evntsel_nmi_owner);
}
-
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
-EXPORT_SYMBOL(reserve_perfctr_nmi);
-EXPORT_SYMBOL(release_perfctr_nmi);
-EXPORT_SYMBOL(reserve_evntsel_nmi);
EXPORT_SYMBOL(release_evntsel_nmi);
void disable_lapic_nmi_watchdog(void)
@@ -180,8 +189,10 @@ void disable_lapic_nmi_watchdog(void)
if (atomic_read(&nmi_active) <= 0)
return;
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
- wd_ops->unreserve();
+ on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
+
+ if (wd_ops)
+ wd_ops->unreserve();
BUG_ON(atomic_read(&nmi_active) != 0);
}
@@ -202,7 +213,7 @@ void enable_lapic_nmi_watchdog(void)
return;
}
- on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+ on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
touch_nmi_watchdog();
}
@@ -232,31 +243,32 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
return retval;
}
-static void
-write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
+static void write_watchdog_counter(unsigned int perfctr_msr,
+ const char *descr, unsigned nmi_hz)
{
u64 count = (u64)cpu_khz * 1000;
do_div(count, nmi_hz);
if(descr)
- Dprintk("setting %s to -0x%08Lx\n", descr, count);
+ pr_debug("setting %s to -0x%08Lx\n", descr, count);
wrmsrl(perfctr_msr, 0 - count);
}
static void write_watchdog_counter32(unsigned int perfctr_msr,
- const char *descr, unsigned nmi_hz)
+ const char *descr, unsigned nmi_hz)
{
u64 count = (u64)cpu_khz * 1000;
do_div(count, nmi_hz);
if(descr)
- Dprintk("setting %s to -0x%08Lx\n", descr, count);
+ pr_debug("setting %s to -0x%08Lx\n", descr, count);
wrmsr(perfctr_msr, (u32)(-count), 0);
}
-/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
- nicely stable so there is not much variety */
-
+/*
+ * AMD K7/K8/Family10h/Family11h support.
+ * AMD keeps this interface nicely stable so there is not much variety
+ */
#define K7_EVNTSEL_ENABLE (1 << 22)
#define K7_EVNTSEL_INT (1 << 20)
#define K7_EVNTSEL_OS (1 << 17)
@@ -283,13 +295,19 @@ static int setup_k7_watchdog(unsigned nmi_hz)
/* setup the timer */
wrmsr(evntsel_msr, evntsel, 0);
write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
+
+ /* initialize the wd struct before enabling */
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; /* unused */
+
+ /* ok, everything is initialized, announce that we're set */
+ cpu_nmi_set_wd_enabled();
+
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= K7_EVNTSEL_ENABLE;
wrmsr(evntsel_msr, evntsel, 0);
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; //unused
return 1;
}
@@ -325,18 +343,19 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
}
static const struct wd_ops k7_wd_ops = {
- .reserve = single_msr_reserve,
- .unreserve = single_msr_unreserve,
- .setup = setup_k7_watchdog,
- .rearm = single_msr_rearm,
- .stop = single_msr_stop_watchdog,
- .perfctr = MSR_K7_PERFCTR0,
- .evntsel = MSR_K7_EVNTSEL0,
- .checkbit = 1ULL<<47,
+ .reserve = single_msr_reserve,
+ .unreserve = single_msr_unreserve,
+ .setup = setup_k7_watchdog,
+ .rearm = single_msr_rearm,
+ .stop = single_msr_stop_watchdog,
+ .perfctr = MSR_K7_PERFCTR0,
+ .evntsel = MSR_K7_EVNTSEL0,
+ .checkbit = 1ULL << 47,
};
-/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
-
+/*
+ * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
+ */
#define P6_EVNTSEL0_ENABLE (1 << 22)
#define P6_EVNTSEL_INT (1 << 20)
#define P6_EVNTSEL_OS (1 << 17)
@@ -366,58 +385,91 @@ static int setup_p6_watchdog(unsigned nmi_hz)
wrmsr(evntsel_msr, evntsel, 0);
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
+
+ /* initialize the wd struct before enabling */
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; /* unused */
+
+ /* ok, everything is initialized, announce that we're set */
+ cpu_nmi_set_wd_enabled();
+
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= P6_EVNTSEL0_ENABLE;
wrmsr(evntsel_msr, evntsel, 0);
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; //unused
return 1;
}
static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
{
- /* P6 based Pentium M need to re-unmask
+ /*
+ * P6 based Pentium M need to re-unmask
* the apic vector but it doesn't hurt
* other P6 variant.
- * ArchPerfom/Core Duo also needs this */
+ * ArchPerfom/Core Duo also needs this
+ */
apic_write(APIC_LVTPC, APIC_DM_NMI);
+
/* P6/ARCH_PERFMON has 32 bit counter write */
write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
}
static const struct wd_ops p6_wd_ops = {
- .reserve = single_msr_reserve,
- .unreserve = single_msr_unreserve,
- .setup = setup_p6_watchdog,
- .rearm = p6_rearm,
- .stop = single_msr_stop_watchdog,
- .perfctr = MSR_P6_PERFCTR0,
- .evntsel = MSR_P6_EVNTSEL0,
- .checkbit = 1ULL<<39,
+ .reserve = single_msr_reserve,
+ .unreserve = single_msr_unreserve,
+ .setup = setup_p6_watchdog,
+ .rearm = p6_rearm,
+ .stop = single_msr_stop_watchdog,
+ .perfctr = MSR_P6_PERFCTR0,
+ .evntsel = MSR_P6_EVNTSEL0,
+ .checkbit = 1ULL << 39,
};
-/* Intel P4 performance counters. By far the most complicated of all. */
-
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
-#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
-#define P4_ESCR_OS (1<<3)
-#define P4_ESCR_USR (1<<2)
-#define P4_CCCR_OVF_PMI0 (1<<26)
-#define P4_CCCR_OVF_PMI1 (1<<27)
-#define P4_CCCR_THRESHOLD(N) ((N)<<20)
-#define P4_CCCR_COMPLEMENT (1<<19)
-#define P4_CCCR_COMPARE (1<<18)
-#define P4_CCCR_REQUIRED (3<<16)
-#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
-#define P4_CCCR_ENABLE (1<<12)
-#define P4_CCCR_OVF (1<<31)
-
-/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- CRU_ESCR0 (with any non-null event selector) through a complemented
- max threshold. [IA32-Vol3, Section 14.9.9] */
-
+/*
+ * Intel P4 performance counters.
+ * By far the most complicated of all.
+ */
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
+#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
+#define P4_ESCR_OS (1 << 3)
+#define P4_ESCR_USR (1 << 2)
+#define P4_CCCR_OVF_PMI0 (1 << 26)
+#define P4_CCCR_OVF_PMI1 (1 << 27)
+#define P4_CCCR_THRESHOLD(N) ((N) << 20)
+#define P4_CCCR_COMPLEMENT (1 << 19)
+#define P4_CCCR_COMPARE (1 << 18)
+#define P4_CCCR_REQUIRED (3 << 16)
+#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
+#define P4_CCCR_ENABLE (1 << 12)
+#define P4_CCCR_OVF (1 << 31)
+
+#define P4_CONTROLS 18
+static unsigned int p4_controls[18] = {
+ MSR_P4_BPU_CCCR0,
+ MSR_P4_BPU_CCCR1,
+ MSR_P4_BPU_CCCR2,
+ MSR_P4_BPU_CCCR3,
+ MSR_P4_MS_CCCR0,
+ MSR_P4_MS_CCCR1,
+ MSR_P4_MS_CCCR2,
+ MSR_P4_MS_CCCR3,
+ MSR_P4_FLAME_CCCR0,
+ MSR_P4_FLAME_CCCR1,
+ MSR_P4_FLAME_CCCR2,
+ MSR_P4_FLAME_CCCR3,
+ MSR_P4_IQ_CCCR0,
+ MSR_P4_IQ_CCCR1,
+ MSR_P4_IQ_CCCR2,
+ MSR_P4_IQ_CCCR3,
+ MSR_P4_IQ_CCCR4,
+ MSR_P4_IQ_CCCR5,
+};
+/*
+ * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+ * CRU_ESCR0 (with any non-null event selector) through a complemented
+ * max threshold. [IA32-Vol3, Section 14.9.9]
+ */
static int setup_p4_watchdog(unsigned nmi_hz)
{
unsigned int perfctr_msr, evntsel_msr, cccr_msr;
@@ -442,7 +494,8 @@ static int setup_p4_watchdog(unsigned nmi_hz)
#endif
ht_num = 0;
- /* performance counters are shared resources
+ /*
+ * performance counters are shared resources
* assign each hyperthread its own set
* (re-use the ESCR0 register, seems safe
* and keeps the cccr_val the same)
@@ -453,12 +506,38 @@ static int setup_p4_watchdog(unsigned nmi_hz)
evntsel_msr = MSR_P4_CRU_ESCR0;
cccr_msr = MSR_P4_IQ_CCCR0;
cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+
+ /*
+ * If we're on the kdump kernel or other situation, we may
+ * still have other performance counter registers set to
+ * interrupt and they'll keep interrupting forever because
+ * of the P4_CCCR_OVF quirk. So we need to ACK all the
+ * pending interrupts and disable all the registers here,
+ * before reenabling the NMI delivery. Refer to p4_rearm()
+ * about the P4_CCCR_OVF quirk.
+ */
+ if (reset_devices) {
+ unsigned int low, high;
+ int i;
+
+ for (i = 0; i < P4_CONTROLS; i++) {
+ rdmsr(p4_controls[i], low, high);
+ low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
+ wrmsr(p4_controls[i], low, high);
+ }
+ }
} else {
/* logical cpu 1 */
perfctr_msr = MSR_P4_IQ_PERFCTR1;
evntsel_msr = MSR_P4_CRU_ESCR0;
cccr_msr = MSR_P4_IQ_CCCR1;
- cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
+
+ /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
+ if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
+ cccr_val = P4_CCCR_OVF_PMI0;
+ else
+ cccr_val = P4_CCCR_OVF_PMI1;
+ cccr_val |= P4_CCCR_ESCR_SELECT(4);
}
evntsel = P4_ESCR_EVENT_SELECT(0x3F)
@@ -473,12 +552,17 @@ static int setup_p4_watchdog(unsigned nmi_hz)
wrmsr(evntsel_msr, evntsel, 0);
wrmsr(cccr_msr, cccr_val, 0);
write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- cccr_val |= P4_CCCR_ENABLE;
- wrmsr(cccr_msr, cccr_val, 0);
+
wd->perfctr_msr = perfctr_msr;
wd->evntsel_msr = evntsel_msr;
wd->cccr_msr = cccr_msr;
+
+ /* ok, everything is initialized, announce that we're set */
+ cpu_nmi_set_wd_enabled();
+
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ cccr_val |= P4_CCCR_ENABLE;
+ wrmsr(cccr_msr, cccr_val, 0);
return 1;
}
@@ -540,20 +624,21 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
}
static const struct wd_ops p4_wd_ops = {
- .reserve = p4_reserve,
- .unreserve = p4_unreserve,
- .setup = setup_p4_watchdog,
- .rearm = p4_rearm,
- .stop = stop_p4_watchdog,
+ .reserve = p4_reserve,
+ .unreserve = p4_unreserve,
+ .setup = setup_p4_watchdog,
+ .rearm = p4_rearm,
+ .stop = stop_p4_watchdog,
/* RED-PEN this is wrong for the other sibling */
- .perfctr = MSR_P4_BPU_PERFCTR0,
- .evntsel = MSR_P4_BSU_ESCR0,
- .checkbit = 1ULL<<39,
+ .perfctr = MSR_P4_BPU_PERFCTR0,
+ .evntsel = MSR_P4_BSU_ESCR0,
+ .checkbit = 1ULL << 39,
};
-/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
- all future Intel CPUs. */
-
+/*
+ * Watchdog using the Intel architected PerfMon.
+ * Used for Core2 and hopefully all future Intel CPUs.
+ */
#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
@@ -593,25 +678,29 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
wrmsr(evntsel_msr, evntsel, 0);
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
wd->perfctr_msr = perfctr_msr;
wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; //unused
+ wd->cccr_msr = 0; /* unused */
+
+ /* ok, everything is initialized, announce that we're set */
+ cpu_nmi_set_wd_enabled();
+
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
return 1;
}
static struct wd_ops intel_arch_wd_ops __read_mostly = {
- .reserve = single_msr_reserve,
- .unreserve = single_msr_unreserve,
- .setup = setup_intel_arch_watchdog,
- .rearm = p6_rearm,
- .stop = single_msr_stop_watchdog,
- .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
- .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
+ .reserve = single_msr_reserve,
+ .unreserve = single_msr_unreserve,
+ .setup = setup_intel_arch_watchdog,
+ .rearm = p6_rearm,
+ .stop = single_msr_stop_watchdog,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
+ .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
};
static void probe_nmi_watchdog(void)
@@ -624,8 +713,10 @@ static void probe_nmi_watchdog(void)
wd_ops = &k7_wd_ops;
break;
case X86_VENDOR_INTEL:
- /* Work around Core Duo (Yonah) errata AE49 where perfctr1
- doesn't have a working enable bit. */
+ /*
+ * Work around Core Duo (Yonah) errata AE49 where perfctr1
+ * doesn't have a working enable bit.
+ */
if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
@@ -636,7 +727,7 @@ static void probe_nmi_watchdog(void)
}
switch (boot_cpu_data.x86) {
case 6:
- if (boot_cpu_data.x86_model > 0xd)
+ if (boot_cpu_data.x86_model > 13)
return;
wd_ops = &p6_wd_ops;
@@ -697,10 +788,11 @@ int lapic_wd_event(unsigned nmi_hz)
{
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
u64 ctr;
+
rdmsrl(wd->perfctr_msr, ctr);
- if (ctr & wd_ops->checkbit) { /* perfctr still running? */
+ if (ctr & wd_ops->checkbit) /* perfctr still running? */
return 0;
- }
+
wd_ops->rearm(wd, nmi_hz);
return 1;
}
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
new file mode 100644
index 00000000000..5abbea297e0
--- /dev/null
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -0,0 +1,20 @@
+/*
+ * Strings for the various x86 power flags
+ *
+ * This file must not contain any executable code.
+ */
+
+#include <asm/cpufeature.h>
+
+const char *const x86_power_flags[32] = {
+ "ts", /* temperature sensor */
+ "fid", /* frequency id control */
+ "vid", /* voltage id control */
+ "ttp", /* thermal trip */
+ "tm",
+ "stc",
+ "100mhzsteps",
+ "hwpstate",
+ "", /* tsc invariant mapped to constant_tsc */
+ /* nothing */
+};
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 0d0d9057e7c..a26c480b949 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -160,7 +160,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
{
if (*pos == 0) /* just in case, cpu 0 is not the first */
*pos = first_cpu(cpu_online_map);
- if ((*pos) < NR_CPUS && cpu_online(*pos))
+ if ((*pos) < nr_cpu_ids && cpu_online(*pos))
return &cpu_data(*pos);
return NULL;
}
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index b911a2c61b8..52b3fefbd5a 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -5,6 +5,18 @@
#include <asm/msr.h>
#include "cpu.h"
+static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c)
+{
+ u32 xlvl;
+
+ /* Transmeta-defined flags: level 0x80860001 */
+ xlvl = cpuid_eax(0x80860000);
+ if ((xlvl & 0xffff0000) == 0x80860000) {
+ if (xlvl >= 0x80860001)
+ c->x86_capability[2] = cpuid_edx(0x80860001);
+ }
+}
+
static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
{
unsigned int cap_mask, uk, max, dummy;
@@ -12,7 +24,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev;
char cpu_info[65];
- get_model_name(c); /* Same as AMD/Cyrix */
+ early_init_transmeta(c);
+
display_cacheinfo(c);
/* Print CMS and CPU revision */
@@ -85,23 +98,12 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
#endif
}
-static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c)
-{
- u32 xlvl;
-
- /* Transmeta-defined flags: level 0x80860001 */
- xlvl = cpuid_eax(0x80860000);
- if ((xlvl & 0xffff0000) == 0x80860000) {
- if (xlvl >= 0x80860001)
- c->x86_capability[2] = cpuid_edx(0x80860001);
- }
-}
-
static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
.c_vendor = "Transmeta",
.c_ident = { "GenuineTMx86", "TransmetaCPU" },
+ .c_early_init = early_init_transmeta,
.c_init = init_transmeta,
- .c_identify = transmeta_identify,
+ .c_x86_vendor = X86_VENDOR_TRANSMETA,
};
-cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev);
+cpu_dev_register(transmeta_cpu_dev);
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index b1fc90989d7..e777f79e096 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -19,7 +19,8 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = {
}
},
},
+ .c_x86_vendor = X86_VENDOR_UMC,
};
-cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev);
+cpu_dev_register(umc_cpu_dev);