48 files changed, 4340 insertions, 2464 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82db7f45e2d..4e242f9a06e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -14,11 +14,12 @@ obj-y			+= vmware.o hypervisor.o
 obj-$(CONFIG_X86_32)	+= bugs.o cmpxchg.o
 obj-$(CONFIG_X86_64)	+= bugs_64.o
 
+obj-$(CONFIG_X86_CPU_DEBUG)		+= cpu_debug.o
+
 obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o
 obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
 obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
-obj-$(CONFIG_CPU_SUP_CENTAUR_32)	+= centaur.o
-obj-$(CONFIG_CPU_SUP_CENTAUR_64)	+= centaur_64.o
+obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 2cf23634b6d..8220ae69849 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -7,7 +7,7 @@
 #include <asm/pat.h>
 #include <asm/processor.h>
 
-#include <mach_apic.h>
+#include <asm/apic.h>
 
 struct cpuid_bit {
 	u16 feature;
@@ -29,7 +29,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 	u32 regs[4];
 	const struct cpuid_bit *cb;
 
-	static const struct cpuid_bit cpuid_bits[] = {
+	static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
 		{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
 		{ 0, 0, 0, 0 }
 	};
@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
  */
 void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_X86_SMP
+#ifdef CONFIG_SMP
 	unsigned int eax, ebx, ecx, edx, sub_index;
 	unsigned int ht_mask_width, core_plus_mask_width;
 	unsigned int core_select_mask, core_level_siblings;
@@ -116,22 +116,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
 
 	core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
 
-#ifdef CONFIG_X86_32
-	c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
+	c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
 						 & core_select_mask;
-	c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+	c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
 	/*
 	 * Reinit the apicid, now that we have extended initial_apicid.
 	 */
-	c->apicid = phys_pkg_id(c->initial_apicid, 0);
-#else
-	c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
-	c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
-	/*
-	 * Reinit the apicid, now that we have extended initial_apicid.
-	 */
-	c->apicid = phys_pkg_id(0);
-#endif
+	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
+
 	c->x86_max_cores = (core_level_siblings / smp_num_siblings);
 
 
@@ -143,37 +135,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
 	return;
 #endif
 }
-
-#ifdef CONFIG_X86_PAT
-void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
-{
-	if (!cpu_has_pat)
-		pat_disable("PAT not supported by CPU.");
-
-	switch (c->x86_vendor) {
-	case X86_VENDOR_INTEL:
-		/*
-		 * There is a known erratum on Pentium III and Core Solo
-		 * and Core Duo CPUs.
-		 * " Page with PAT set to WC while associated MTRR is UC
-		 *   may consolidate to UC "
-		 * Because of this erratum, it is better to stick with
-		 * setting WC in MTRR rather than using PAT on these CPUs.
-		 *
-		 * Enable PAT WC only on P4, Core 2 or later CPUs.
-		 */
-		if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
-			return;
-
-		pat_disable("PAT WC disabled due to known CPU erratum.");
-		return;
-
-	case X86_VENDOR_AMD:
-	case X86_VENDOR_CENTAUR:
-	case X86_VENDOR_TRANSMETA:
-		return;
-	}
-
-	pat_disable("PAT disabled. Not yet verified on this CPU type.");
-}
-#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c878f6aa91..7e4a459daa6 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
+#include <asm/cpu.h>
 
 #ifdef CONFIG_X86_64
 # include <asm/numa_64.h>
@@ -12,8 +13,6 @@
 # include <asm/cacheflush.h>
 #endif
 
-#include <mach_apic.h>
-
 #include "cpu.h"
 
 #ifdef CONFIG_X86_32
@@ -143,6 +142,55 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
 	}
 }
 
+static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	/* calling is from identify_secondary_cpu() ? */
+	if (c->cpu_index == boot_cpu_id)
+		return;
+
+	/*
+	 * Certain Athlons might work (for various values of 'work') in SMP
+	 * but they are not certified as MP capable.
+	 */
+	/* Athlon 660/661 is valid. */
+	if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
+	    (c->x86_mask == 1)))
+		goto valid_k7;
+
+	/* Duron 670 is valid */
+	if ((c->x86_model == 7) && (c->x86_mask == 0))
+		goto valid_k7;
+
+	/*
+	 * Athlon 662, Duron 671, and Athlon >model 7 have capability
+	 * bit. It's worth noting that the A5 stepping (662) of some
+	 * Athlon XP's have the MP bit set.
+	 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
+	 * more.
+	 */
+	if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
+	    ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+	     (c->x86_model > 7))
+		if (cpu_has_mp)
+			goto valid_k7;
+
+	/* If we get here, not a certified SMP capable AMD system. */
+
+	/*
+	 * Don't taint if we are running SMP kernel on a single non-MP
+	 * approved Athlon
+	 */
+	WARN_ONCE(1, "WARNING: This combination of AMD"
+		"processors is not suitable for SMP.\n");
+	if (!test_taint(TAINT_UNSAFE_SMP))
+		add_taint(TAINT_UNSAFE_SMP);
+
+valid_k7:
+	;
+#endif
+}
+
 static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@@ -177,6 +225,8 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 	}
 
 	set_cpu_cap(c, X86_FEATURE_K7);
+
+	amd_k7_smp_check(c);
 }
 #endif
 
@@ -452,7 +502,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int
 }
 #endif
 
-static struct cpu_dev amd_cpu_dev __cpuinitdata = {
+static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
 	.c_vendor	= "AMD",
 	.c_ident	= { "AuthenticAMD" },
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 89bfdd9cacc..c95e831bb09 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,11 +1,11 @@
+#include <linux/bitops.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/bitops.h>
 
 #include <asm/processor.h>
-#include <asm/msr.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
+#include <asm/msr.h>
 
 #include "cpu.h"
 
@@ -276,7 +276,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
 		 */
 		c->x86_capability[5] = cpuid_edx(0xC0000001);
 	}
-
+#ifdef CONFIG_X86_32
 	/* Cyrix III family needs CX8 & PGE explicitly enabled. */
 	if (c->x86_model >= 6 && c->x86_model <= 9) {
 		rdmsr(MSR_VIA_FCR, lo, hi);
@@ -288,6 +288,11 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
 	/* Before Nehemiah, the C3's had 3dNOW! */
 	if (c->x86_model >= 6 && c->x86_model < 9)
 		set_cpu_cap(c, X86_FEATURE_3DNOW);
+#endif
+	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
+		c->x86_cache_alignment = c->x86_clflush_size * 2;
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+	}
 
 	display_cacheinfo(c);
 }
@@ -316,16 +321,25 @@ enum {
 static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
 {
 	switch (c->x86) {
+#ifdef CONFIG_X86_32
 	case 5:
 		/* Emulate MTRRs using Centaur's MCR. */
 		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
 		break;
+#endif
+	case 6:
+		if (c->x86_model >= 0xf)
+			set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+		break;
 	}
+#ifdef CONFIG_X86_64
+	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
+#endif
 }
 
 static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 {
-
+#ifdef CONFIG_X86_32
 	char *name;
 	u32  fcr_set = 0;
 	u32  fcr_clr = 0;
@@ -337,8 +351,10 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 	 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
 	 */
 	clear_cpu_cap(c, 0*32+31);
-
+#endif
+	early_init_centaur(c);
 	switch (c->x86) {
+#ifdef CONFIG_X86_32
 	case 5:
 		switch (c->x86_model) {
 		case 4:
@@ -442,16 +458,20 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 		}
 		sprintf(c->x86_model_id, "WinChip %s", name);
 		break;
-
+#endif
 	case 6:
 		init_c3(c);
 		break;
 	}
+#ifdef CONFIG_X86_64
+	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+#endif
 }
 
 static unsigned int __cpuinit
 centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
+#ifdef CONFIG_X86_32
 	/* VIA C3 CPUs (670-68F) need further shifting. */
 	if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
 		size >>= 8;
@@ -464,11 +484,11 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 	if ((c->x86 == 6) && (c->x86_model == 9) &&
 				(c->x86_mask == 1) && (size == 65))
 		size -= 1;
-
+#endif
 	return size;
 }
 
-static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
+static const struct cpu_dev __cpuinitconst centaur_cpu_dev = {
 	.c_vendor	= "Centaur",
 	.c_ident	= { "CentaurHauls" },
 	.c_early_init	= early_init_centaur,
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
deleted file mode 100644
index a1625f5a1e7..00000000000
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-
-#include "cpu.h"
-
-static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
-{
-	if (c->x86 == 0x6 && c->x86_model >= 0xf)
-		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
-
-	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
-}
-
-static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
-{
-	early_init_centaur(c);
-
-	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
-		c->x86_cache_alignment = c->x86_clflush_size * 2;
-		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
-	}
-	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
-}
-
-static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
-	.c_vendor	= "Centaur",
-	.c_ident	= { "CentaurHauls" },
-	.c_early_init	= early_init_centaur,
-	.c_init		= init_centaur,
-	.c_x86_vendor	= X86_VENDOR_CENTAUR,
-};
-
-cpu_dev_register(centaur_cpu_dev);
-
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 83492b1f93b..e2962cc1e27 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,118 +1,129 @@
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
 #include <linux/bootmem.h>
+#include <linux/linkage.h>
 #include <linux/bitops.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/kgdb.h>
-#include <linux/topology.h>
+#include <linux/percpu.h>
+#include <linux/string.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kgdb.h>
 #include <linux/smp.h>
-#include <linux/percpu.h>
-#include <asm/i387.h>
-#include <asm/msr.h>
-#include <asm/io.h>
-#include <asm/linkage.h>
+#include <linux/io.h>
+
+#include <asm/stackprotector.h>
 #include <asm/mmu_context.h>
+#include <asm/hypervisor.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/topology.h>
+#include <asm/cpumask.h>
+#include <asm/pgtable.h>
+#include <asm/atomic.h>
+#include <asm/proto.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
 #include <asm/mtrr.h>
+#include <asm/numa.h>
+#include <asm/asm.h>
+#include <asm/cpu.h>
 #include <asm/mce.h>
+#include <asm/msr.h>
 #include <asm/pat.h>
-#include <asm/asm.h>
-#include <asm/numa.h>
 #include <asm/smp.h>
+
 #ifdef CONFIG_X86_LOCAL_APIC
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#include <mach_apic.h>
-#include <asm/genapic.h>
+#include <asm/uv/uv.h>
 #endif
 
-#include <asm/pda.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-#include <asm/atomic.h>
-#include <asm/proto.h>
-#include <asm/sections.h>
-#include <asm/setup.h>
-#include <asm/hypervisor.h>
-
 #include "cpu.h"
 
 #ifdef CONFIG_X86_64
 
 /* all of these masks are initialized in setup_cpu_local_masks() */
-cpumask_var_t cpu_callin_mask;
-cpumask_var_t cpu_callout_mask;
 cpumask_var_t cpu_initialized_mask;
+cpumask_var_t cpu_callout_mask;
+cpumask_var_t cpu_callin_mask;
 
 /* representing cpus for which sibling maps can be computed */
 cpumask_var_t cpu_sibling_setup_mask;
 
+/* correctly size the local cpu masks */
+void __init setup_cpu_local_masks(void)
+{
+	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
+	alloc_bootmem_cpumask_var(&cpu_callin_mask);
+	alloc_bootmem_cpumask_var(&cpu_callout_mask);
+	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+}
+
 #else /* CONFIG_X86_32 */
 
-cpumask_t cpu_callin_map;
+cpumask_t cpu_sibling_setup_map;
 cpumask_t cpu_callout_map;
 cpumask_t cpu_initialized;
-cpumask_t cpu_sibling_setup_map;
+cpumask_t cpu_callin_map;
 
 #endif /* CONFIG_X86_32 */
 
 
-static struct cpu_dev *this_cpu __cpuinitdata;
+static const struct cpu_dev *this_cpu __cpuinitdata;
 
+DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
 #ifdef CONFIG_X86_64
-/* We need valid kernel segments for data and code in long mode too
- * IRET will check the segment types  kkeil 2000/10/28
- * Also sysret mandates a special GDT layout
- */
-/* The TLS descriptors are currently at a different place compared to i386.
-   Hopefully nobody expects them at a fixed place (Wine?) */
-DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
-	[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
-	[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
-	[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
-	[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
-	[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
-	[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
-} };
+	/*
+	 * We need valid kernel segments for data and code in long mode too
+	 * IRET will check the segment types  kkeil 2000/10/28
+	 * Also sysret mandates a special GDT layout
+	 *
+	 * TLS descriptors are currently at a different place compared to i386.
+	 * Hopefully nobody expects them at a fixed place (Wine?)
+	 */
+	[GDT_ENTRY_KERNEL32_CS]		= { { { 0x0000ffff, 0x00cf9b00 } } },
+	[GDT_ENTRY_KERNEL_CS]		= { { { 0x0000ffff, 0x00af9b00 } } },
+	[GDT_ENTRY_KERNEL_DS]		= { { { 0x0000ffff, 0x00cf9300 } } },
+	[GDT_ENTRY_DEFAULT_USER32_CS]	= { { { 0x0000ffff, 0x00cffb00 } } },
+	[GDT_ENTRY_DEFAULT_USER_DS]	= { { { 0x0000ffff, 0x00cff300 } } },
+	[GDT_ENTRY_DEFAULT_USER_CS]	= { { { 0x0000ffff, 0x00affb00 } } },
 #else
-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
-	[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
-	[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
-	[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
-	[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
+	[GDT_ENTRY_KERNEL_CS]		= { { { 0x0000ffff, 0x00cf9a00 } } },
+	[GDT_ENTRY_KERNEL_DS]		= { { { 0x0000ffff, 0x00cf9200 } } },
+	[GDT_ENTRY_DEFAULT_USER_CS]	= { { { 0x0000ffff, 0x00cffa00 } } },
+	[GDT_ENTRY_DEFAULT_USER_DS]	= { { { 0x0000ffff, 0x00cff200 } } },
 	/*
 	 * Segments used for calling PnP BIOS have byte granularity.
 	 * They code segments and data segments have fixed 64k limits,
 	 * the transfer segment sizes are set at run time.
 	 */
 	/* 32-bit code */
-	[GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
+	[GDT_ENTRY_PNPBIOS_CS32]	= { { { 0x0000ffff, 0x00409a00 } } },
 	/* 16-bit code */
-	[GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
+	[GDT_ENTRY_PNPBIOS_CS16]	= { { { 0x0000ffff, 0x00009a00 } } },
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_DS]		= { { { 0x0000ffff, 0x00009200 } } },
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_TS1]		= { { { 0x00000000, 0x00009200 } } },
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_TS2]		= { { { 0x00000000, 0x00009200 } } },
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * are set at run time.  All have 64k limits.
 	 */
 	/* 32-bit code */
-	[GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
+	[GDT_ENTRY_APMBIOS_BASE]	= { { { 0x0000ffff, 0x00409a00 } } },
 	/* 16-bit code */
-	[GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
+	[GDT_ENTRY_APMBIOS_BASE+1]	= { { { 0x0000ffff, 0x00009a00 } } },
 	/* data */
-	[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
+	[GDT_ENTRY_APMBIOS_BASE+2]	= { { { 0x0000ffff, 0x00409200 } } },
 
-	[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
-	[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
-} };
+	[GDT_ENTRY_ESPFIX_SS]		= { { { 0x00000000, 0x00c09200 } } },
+	[GDT_ENTRY_PERCPU]		= { { { 0x0000ffff, 0x00cf9200 } } },
+	GDT_STACK_CANARY_INIT
 #endif
+} };
 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
 
 #ifdef CONFIG_X86_32
@@ -153,16 +164,17 @@ static inline int flag_is_changeable_p(u32 flag)
 	 * the CPUID. Add "volatile" to not allow gcc to
 	 * optimize the subsequent calls to this function.
 	 */
-	asm volatile ("pushfl\n\t"
-		      "pushfl\n\t"
-		      "popl %0\n\t"
-		      "movl %0,%1\n\t"
-		      "xorl %2,%0\n\t"
-		      "pushl %0\n\t"
-		      "popfl\n\t"
-		      "pushfl\n\t"
-		      "popl %0\n\t"
-		      "popfl\n\t"
+	asm volatile ("pushfl		\n\t"
+		      "pushfl		\n\t"
+		      "popl %0		\n\t"
+		      "movl %0, %1	\n\t"
+		      "xorl %2, %0	\n\t"
+		      "pushl %0		\n\t"
+		      "popfl		\n\t"
+		      "pushfl		\n\t"
+		      "popl %0		\n\t"
+		      "popfl		\n\t"
+
 		      : "=&r" (f1), "=&r" (f2)
 		      : "ir" (flag));
 
@@ -177,18 +189,22 @@ static int __cpuinit have_cpuid_p(void)
 
 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 {
-	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
-		/* Disable processor serial number */
-		unsigned long lo, hi;
-		rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
-		lo |= 0x200000;
-		wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
-		printk(KERN_NOTICE "CPU serial number disabled.\n");
-		clear_cpu_cap(c, X86_FEATURE_PN);
-
-		/* Disabling the serial number may affect the cpuid level */
-		c->cpuid_level = cpuid_eax(0);
-	}
+	unsigned long lo, hi;
+
+	if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
+		return;
+
+	/* Disable processor serial number: */
+
+	rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+	lo |= 0x200000;
+	wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+
+	printk(KERN_NOTICE "CPU serial number disabled.\n");
+	clear_cpu_cap(c, X86_FEATURE_PN);
+
+	/* Disabling the serial number may affect the cpuid level */
+	c->cpuid_level = cpuid_eax(0);
 }
 
 static int __init x86_serial_nr_setup(char *s)
@@ -213,16 +229,64 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 #endif
 
 /*
+ * Some CPU features depend on higher CPUID levels, which may not always
+ * be available due to CPUID level capping or broken virtualization
+ * software.  Add those features to this table to auto-disable them.
+ */
+struct cpuid_dependent_feature {
+	u32 feature;
+	u32 level;
+};
+
+static const struct cpuid_dependent_feature __cpuinitconst
+cpuid_dependent_features[] = {
+	{ X86_FEATURE_MWAIT,		0x00000005 },
+	{ X86_FEATURE_DCA,		0x00000009 },
+	{ X86_FEATURE_XSAVE,		0x0000000d },
+	{ 0, 0 }
+};
+
+static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
+{
+	const struct cpuid_dependent_feature *df;
+
+	for (df = cpuid_dependent_features; df->feature; df++) {
+
+		if (!cpu_has(c, df->feature))
+			continue;
+		/*
+		 * Note: cpuid_level is set to -1 if unavailable, but
+		 * extended_extended_level is set to 0 if unavailable
+		 * and the legitimate extended levels are all negative
+		 * when signed; hence the weird messing around with
+		 * signs here...
+		 */
+		if (!((s32)df->level < 0 ?
+		     (u32)df->level > (u32)c->extended_cpuid_level :
+		     (s32)df->level > (s32)c->cpuid_level))
+			continue;
+
+		clear_cpu_cap(c, df->feature);
+		if (!warn)
+			continue;
+
+		printk(KERN_WARNING
+		       "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
+				x86_cap_flags[df->feature], df->level);
+	}
+}
+
+/*
  * Naming convention should be: <Name> [(<Codename>)]
  * This table only is used unless init_<vendor>() below doesn't set it;
- * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
- *
+ * in particular, if CPUID levels 0x80000002..4 are supported, this
+ * isn't used
  */
 
 /* Look up CPU names by table lookup. */
-static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
 {
-	struct cpu_model_info *info;
+	const struct cpu_model_info *info;
 
 	if (c->x86_model >= 16)
 		return NULL;	/* Range check */
@@ -242,21 +306,34 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
 
 __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
 
-/* Current gdt points %fs at the "master" per-cpu area: after this,
- * it's on the real one. */
-void switch_to_new_gdt(void)
+void load_percpu_segment(int cpu)
+{
+#ifdef CONFIG_X86_32
+	loadsegment(fs, __KERNEL_PERCPU);
+#else
+	loadsegment(gs, 0);
+	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+#endif
+	load_stack_canary_segment();
+}
+
+/*
+ * Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one.
+ */
+void switch_to_new_gdt(int cpu)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
-#ifdef CONFIG_X86_32
-	asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
-#endif
+	/* Reload the per-cpu base */
+
+	load_percpu_segment(cpu);
 }
 
-static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
+static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
 
 static void __cpuinit default_init(struct cpuinfo_x86 *c)
 {
@@ -275,7 +352,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
 #endif
 }
 
-static struct cpu_dev __cpuinitdata default_cpu = {
+static const struct cpu_dev __cpuinitconst default_cpu = {
 	.c_init	= default_init,
 	.c_vendor = "Unknown",
 	.c_x86_vendor = X86_VENDOR_UNKNOWN,
@@ -289,22 +366,24 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
 	if (c->extended_cpuid_level < 0x80000004)
 		return;
 
-	v = (unsigned int *) c->x86_model_id;
+	v = (unsigned int *)c->x86_model_id;
 	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
 	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
 	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
 	c->x86_model_id[48] = 0;
 
-	/* Intel chips right-justify this string for some dumb reason;
-	   undo that brain damage */
+	/*
+	 * Intel chips right-justify this string for some dumb reason;
+	 * undo that brain damage:
+	 */
 	p = q = &c->x86_model_id[0];
 	while (*p == ' ')
-	     p++;
+		p++;
 	if (p != q) {
-	     while (*p)
-		  *q++ = *p++;
-	     while (q <= &c->x86_model_id[48])
-		  *q++ = '\0';	/* Zero-pad the rest */
+		while (*p)
+			*q++ = *p++;
+		while (q <= &c->x86_model_id[48])
+			*q++ = '\0';	/* Zero-pad the rest */
 	}
 }
 
@@ -373,36 +452,30 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
 	if (smp_num_siblings == 1) {
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-	} else if (smp_num_siblings > 1) {
+		goto out;
+	}
 
-		if (smp_num_siblings > nr_cpu_ids) {
-			printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
-					smp_num_siblings);
-			smp_num_siblings = 1;
-			return;
-		}
+	if (smp_num_siblings <= 1)
+		goto out;
 
-		index_msb = get_count_order(smp_num_siblings);
-#ifdef CONFIG_X86_64
-		c->phys_proc_id = phys_pkg_id(index_msb);
-#else
-		c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
-#endif
+	if (smp_num_siblings > nr_cpu_ids) {
+		pr_warning("CPU: Unsupported number of siblings %d",
+			   smp_num_siblings);
+		smp_num_siblings = 1;
+		return;
+	}
 
-		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+	index_msb = get_count_order(smp_num_siblings);
+	c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
 
-		index_msb = get_count_order(smp_num_siblings);
+	smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 
-		core_bits = get_count_order(c->x86_max_cores);
+	index_msb = get_count_order(smp_num_siblings);
 
-#ifdef CONFIG_X86_64
-		c->cpu_core_id = phys_pkg_id(index_msb) &
-					       ((1 << core_bits) - 1);
-#else
-		c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
-					       ((1 << core_bits) - 1);
-#endif
-	}
+	core_bits = get_count_order(c->x86_max_cores);
+
+	c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
+				       ((1 << core_bits) - 1);
 
 out:
 	if ((c->x86_max_cores * smp_num_siblings) > 1) {
@@ -417,8 +490,8 @@ out:
 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 {
 	char *v = c->x86_vendor_id;
-	int i;
 	static int printed;
+	int i;
 
 	for (i = 0; i < X86_VENDOR_NUM; i++) {
 		if (!cpu_devs[i])
@@ -427,6 +500,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 		if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
 		    (cpu_devs[i]->c_ident[1] &&
 		     !strcmp(v, cpu_devs[i]->c_ident[1]))) {
+
 			this_cpu = cpu_devs[i];
 			c->x86_vendor = this_cpu->c_x86_vendor;
 			return;
@@ -435,7 +509,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 
 	if (!printed) {
 		printed++;
-		printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
+		printk(KERN_ERR
+		    "CPU: vendor_id '%s' unknown, using generic init.\n", v);
+
 		printk(KERN_ERR "CPU: Your system may be unstable.\n");
 	}
 
@@ -455,14 +531,17 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
 	/* Intel-defined flags: level 0x00000001 */
 	if (c->cpuid_level >= 0x00000001) {
 		u32 junk, tfms, cap0, misc;
+
 		cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
 		c->x86 = (tfms >> 8) & 0xf;
 		c->x86_model = (tfms >> 4) & 0xf;
 		c->x86_mask = tfms & 0xf;
+
 		if (c->x86 == 0xf)
 			c->x86 += (tfms >> 20) & 0xff;
 		if (c->x86 >= 0x6)
 			c->x86_model += ((tfms >> 16) & 0xf) << 4;
+
 		if (cap0 & (1<<19)) {
 			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
 			c->x86_cache_alignment = c->x86_clflush_size;
@@ -478,6 +557,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
 	/* Intel-defined flags: level 0x00000001 */
 	if (c->cpuid_level >= 0x00000001) {
 		u32 capability, excap;
+
 		cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
 		c->x86_capability[0] = capability;
 		c->x86_capability[4] = excap;
@@ -486,6 +566,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
 	/* AMD-defined flags: level 0x80000001 */
 	xlvl = cpuid_eax(0x80000000);
 	c->extended_cpuid_level = xlvl;
+
 	if ((xlvl & 0xffff0000) == 0x80000000) {
 		if (xlvl >= 0x80000001) {
 			c->x86_capability[1] = cpuid_edx(0x80000001);
@@ -493,13 +574,15 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
 		}
 	}
 
-#ifdef CONFIG_X86_64
 	if (c->extended_cpuid_level >= 0x80000008) {
 		u32 eax = cpuid_eax(0x80000008);
 
 		c->x86_virt_bits = (eax >> 8) & 0xff;
 		c->x86_phys_bits = eax & 0xff;
 	}
+#ifdef CONFIG_X86_32
+	else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
+		c->x86_phys_bits = 36;
 #endif
 
 	if (c->extended_cpuid_level >= 0x80000007)
@@ -546,8 +629,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_X86_64
 	c->x86_clflush_size = 64;
+	c->x86_phys_bits = 36;
+	c->x86_virt_bits = 48;
 #else
 	c->x86_clflush_size = 32;
+	c->x86_phys_bits = 32;
+	c->x86_virt_bits = 32;
 #endif
 	c->x86_cache_alignment = c->x86_clflush_size;
 
@@ -570,21 +657,20 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	if (this_cpu->c_early_init)
 		this_cpu->c_early_init(c);
 
-	validate_pat_support(c);
-
 #ifdef CONFIG_SMP
 	c->cpu_index = boot_cpu_id;
 #endif
+	filter_cpuid_features(c, false);
 }
 
 void __init early_cpu_init(void)
 {
-	struct cpu_dev **cdev;
+	const struct cpu_dev *const *cdev;
 	int count = 0;
 
-	printk("KERNEL supported cpus:\n");
+	printk(KERN_INFO "KERNEL supported cpus:\n");
 	for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
-		struct cpu_dev *cpudev = *cdev;
+		const struct cpu_dev *cpudev = *cdev;
 		unsigned int j;
 
 		if (count >= X86_VENDOR_NUM)
@@ -595,7 +681,7 @@ void __init early_cpu_init(void)
 		for (j = 0; j < 2; j++) {
 			if (!cpudev->c_ident[j])
 				continue;
-			printk("  %s %s\n", cpudev->c_vendor,
+			printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
 				cpudev->c_ident[j]);
 		}
 	}
@@ -637,7 +723,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 		c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
 #ifdef CONFIG_X86_32
 # ifdef CONFIG_X86_HT
-		c->apicid = phys_pkg_id(c->initial_apicid, 0);
+		c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
 # else
 		c->apicid = c->initial_apicid;
 # endif
@@ -671,9 +757,13 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_coreid_bits = 0;
 #ifdef CONFIG_X86_64
 	c->x86_clflush_size = 64;
+	c->x86_phys_bits = 36;
+	c->x86_virt_bits = 48;
 #else
 	c->cpuid_level = -1;	/* CPUID not detected */
 	c->x86_clflush_size = 32;
+	c->x86_phys_bits = 32;
+	c->x86_virt_bits = 32;
 #endif
 	c->x86_cache_alignment = c->x86_clflush_size;
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
@@ -684,7 +774,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 		this_cpu->c_identify(c);
 
 #ifdef CONFIG_X86_64
-	c->apicid = phys_pkg_id(0);
+	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
 #endif
 
 	/*
@@ -704,13 +794,16 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	squash_the_stupid_serial_number(c);
 
 	/*
-	 * The vendor-specific functions might have changed features.  Now
-	 * we do "generic changes."
+	 * The vendor-specific functions might have changed features.
+	 * Now we do "generic changes."
 	 */
 
+	/* Filter out anything that depends on CPUID levels we don't have */
+	filter_cpuid_features(c, true);
+
 	/* If the model name is still unset, do table lookup. */
 	if (!c->x86_model_id[0]) {
-		char *p;
+		const char *p;
 		p = table_lookup_model(c);
 		if (p)
 			strcpy(c->x86_model_id, p);
@@ -785,11 +878,11 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
 }
 
 struct msr_range {
-	unsigned min;
-	unsigned max;
+	unsigned	min;
+	unsigned	max;
 };
 
-static struct msr_range msr_range_array[] __cpuinitdata = {
+static const struct msr_range msr_range_array[] __cpuinitconst = {
 	{ 0x00000000, 0x00000418},
 	{ 0xc0000000, 0xc000040b},
 	{ 0xc0010000, 0xc0010142},
@@ -798,14 +891,15 @@ static struct msr_range msr_range_array[] __cpuinitdata = {
 
 static void __cpuinit print_cpu_msr(void)
 {
+	unsigned index_min, index_max;
 	unsigned index;
 	u64 val;
 	int i;
-	unsigned index_min, index_max;
 
 	for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
 		index_min = msr_range_array[i].min;
 		index_max = msr_range_array[i].max;
+
 		for (index = index_min; index < index_max; index++) {
 			if (rdmsrl_amd_safe(index, &val))
 				continue;
@@ -815,6 +909,7 @@ static void __cpuinit print_cpu_msr(void)
 }
 
 static int show_msr __cpuinitdata;
+
 static __init int setup_show_msr(char *arg)
 {
 	int num;
@@ -836,12 +931,14 @@ __setup("noclflush", setup_noclflush);
 
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
-	char *vendor = NULL;
+	const char *vendor = NULL;
 
-	if (c->x86_vendor < X86_VENDOR_NUM)
+	if (c->x86_vendor < X86_VENDOR_NUM) {
 		vendor = this_cpu->c_vendor;
-	else if (c->cpuid_level >= 0)
-		vendor = c->x86_vendor_id;
+	} else {
+		if (c->cpuid_level >= 0)
+			vendor = c->x86_vendor_id;
+	}
 
 	if (vendor && !strstr(c->x86_model_id, vendor))
 		printk(KERN_CONT "%s ", vendor);
@@ -868,65 +965,45 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 static __init int setup_disablecpuid(char *arg)
 {
 	int bit;
+
 	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
 		setup_clear_cpu_cap(bit);
 	else
 		return 0;
+
 	return 1;
 }
 __setup("clearcpuid=", setup_disablecpuid);
 
 #ifdef CONFIG_X86_64
-struct x8664_pda **_cpu_pda __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
-
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 
-static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+DEFINE_PER_CPU_FIRST(union irq_stack_union,
+		     irq_stack_union) __aligned(PAGE_SIZE);
 
-void __cpuinit pda_init(int cpu)
-{
-	struct x8664_pda *pda = cpu_pda(cpu);
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
 
-	/* Setup up data that may be needed in __get_free_pages early */
-	loadsegment(fs, 0);
-	loadsegment(gs, 0);
-	/* Memory clobbers used to order PDA accessed */
-	mb();
-	wrmsrl(MSR_GS_BASE, pda);
-	mb();
-
-	pda->cpunumber = cpu;
-	pda->irqcount = -1;
-	pda->kernelstack = (unsigned long)stack_thread_info() -
-				 PDA_STACKOFFSET + THREAD_SIZE;
-	pda->active_mm = &init_mm;
-	pda->mmu_state = 0;
-
-	if (cpu == 0) {
-		/* others are initialized in smpboot.c */
-		pda->pcurrent = &init_task;
-		pda->irqstackptr = boot_cpu_stack;
-		pda->irqstackptr += IRQSTACKSIZE - 64;
-	} else {
-		if (!pda->irqstackptr) {
-			pda->irqstackptr = (char *)
-				__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
-			if (!pda->irqstackptr)
-				panic("cannot allocate irqstack for cpu %d",
-				      cpu);
-			pda->irqstackptr += IRQSTACKSIZE - 64;
-		}
+DEFINE_PER_CPU(unsigned long, kernel_stack) =
+	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
-		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
-			pda->nodenumber = cpu_to_node(cpu);
-	}
-}
+DEFINE_PER_CPU(unsigned int, irq_count) = -1;
 
-static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
-				  DEBUG_STKSZ] __page_aligned_bss;
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
+	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
+};
 
-extern asmlinkage void ignore_sysret(void);
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
+	__aligned(PAGE_SIZE);
 
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
@@ -957,16 +1034,38 @@ unsigned long kernel_eflags;
  */
 DEFINE_PER_CPU(struct orig_ist, orig_ist);
 
-#else
+#else	/* CONFIG_X86_64 */
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+DEFINE_PER_CPU(unsigned long, stack_canary);
+#endif
 
-/* Make sure %fs is initialized properly in idle threads */
+/* Make sure %fs and %gs are initialized properly in idle threads */
 struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
 {
 	memset(regs, 0, sizeof(struct pt_regs));
 	regs->fs = __KERNEL_PERCPU;
+	regs->gs = __KERNEL_STACK_CANARY;
+
 	return regs;
 }
-#endif
+#endif	/* CONFIG_X86_64 */
+
+/*
+ * Clear all 6 debug registers:
+ */
+static void clear_all_debug_regs(void)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		/* Ignore db4, db5 */
+		if ((i == 4) || (i == 5))
+			continue;
+
+		set_debugreg(0, i);
+	}
+}
 
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
@@ -976,21 +1075,25 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
  * A lot of state is already set up in PDA init for 64 bit
  */
 #ifdef CONFIG_X86_64
+
 void __cpuinit cpu_init(void)
 {
-	int cpu = stack_smp_processor_id();
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
-	struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
-	unsigned long v;
-	char *estacks = NULL;
+	struct orig_ist *orig_ist;
 	struct task_struct *me;
+	struct tss_struct *t;
+	unsigned long v;
+	int cpu;
 	int i;
 
-	/* CPU 0 is initialised in head64.c */
-	if (cpu != 0)
-		pda_init(cpu);
-	else
-		estacks = boot_exception_stacks;
+	cpu = stack_smp_processor_id();
+	t = &per_cpu(init_tss, cpu);
+	orig_ist = &per_cpu(orig_ist, cpu);
+
+#ifdef CONFIG_NUMA
+	if (cpu != 0 && percpu_read(node_number) == 0 &&
+	    cpu_to_node(cpu) != NUMA_NO_NODE)
+		percpu_write(node_number, cpu_to_node(cpu));
+#endif
 
 	me = current;
 
@@ -1006,7 +1109,9 @@ void __cpuinit cpu_init(void)
 	 * and set up the GDT descriptor:
 	 */
 
-	switch_to_new_gdt();
+	switch_to_new_gdt(cpu);
+	loadsegment(fs, 0);
+
 	load_idt((const struct desc_ptr *)&idt_descr);
 
 	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1017,31 +1122,24 @@ void __cpuinit cpu_init(void)
 	barrier();
 
 	check_efer();
-	if (cpu != 0 && x2apic)
+	if (cpu != 0)
 		enable_x2apic();
 
 	/*
 	 * set up and load the per-CPU TSS
 	 */
 	if (!orig_ist->ist[0]) {
-		static const unsigned int order[N_EXCEPTION_STACKS] = {
-		  [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
-		  [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
-		};
+		char *estacks = per_cpu(exception_stacks, cpu);
+
 		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
-			if (cpu) {
-				estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
-				if (!estacks)
-					panic("Cannot allocate exception "
-					      "stack %ld %d\n", v, cpu);
-			}
-			estacks += PAGE_SIZE << order[v];
+			estacks += exception_stack_sizes[v];
 			orig_ist->ist[v] = t->x86_tss.ist[v] =
 					(unsigned long)estacks;
 		}
 	}
 
 	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+
 	/*
 	 * <= is required because the CPU will access up to
 	 * 8 bits beyond the end of the IO permission bitmap.
@@ -1051,8 +1149,7 @@ void __cpuinit cpu_init(void)
 
 	atomic_inc(&init_mm.mm_count);
 	me->active_mm = &init_mm;
-	if (me->mm)
-		BUG();
+	BUG_ON(me->mm);
 	enter_lazy_tlb(&init_mm, me);
 
 	load_sp0(t, &current->thread);
@@ -1069,22 +1166,9 @@ void __cpuinit cpu_init(void)
 	 */
 	if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
 		arch_kgdb_ops.correct_hw_break();
-	else {
-#endif
-	/*
-	 * Clear all 6 debug registers:
-	 */
-
-	set_debugreg(0UL, 0);
-	set_debugreg(0UL, 1);
-	set_debugreg(0UL, 2);
-	set_debugreg(0UL, 3);
-	set_debugreg(0UL, 6);
-	set_debugreg(0UL, 7);
-#ifdef CONFIG_KGDB
-	/* If the kgdb is connected no debug regs should be altered. */
-	}
+	else
 #endif
+		clear_all_debug_regs();
 
 	fpu_init();
 
@@ -1105,7 +1189,8 @@ void __cpuinit cpu_init(void)
 
 	if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
-		for (;;) local_irq_enable();
+		for (;;)
+			local_irq_enable();
 	}
 
 	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
@@ -1114,15 +1199,14 @@ void __cpuinit cpu_init(void)
 		clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
 	load_idt(&idt_descr);
-	switch_to_new_gdt();
+	switch_to_new_gdt(cpu);
 
 	/*
 	 * Set up and load the per-CPU TSS and LDT
 	 */
 	atomic_inc(&init_mm.mm_count);
 	curr->active_mm = &init_mm;
-	if (curr->mm)
-		BUG();
+	BUG_ON(curr->mm);
 	enter_lazy_tlb(&init_mm, curr);
 
 	load_sp0(t, thread);
@@ -1135,16 +1219,7 @@ void __cpuinit cpu_init(void)
 	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
 #endif
 
-	/* Clear %gs. */
-	asm volatile ("mov %0, %%gs" : : "r" (0));
-
-	/* Clear all 6 debug registers: */
-	set_debugreg(0, 0);
-	set_debugreg(0, 1);
-	set_debugreg(0, 2);
-	set_debugreg(0, 3);
-	set_debugreg(0, 6);
-	set_debugreg(0, 7);
+	clear_all_debug_regs();
 
 	/*
 	 * Force FPU initialization:
@@ -1164,6 +1239,4 @@ void __cpuinit cpu_init(void)
 
 	xsave_init();
 }
-
-
 #endif
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index de4094a3921..6de9a908e40 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -3,33 +3,34 @@
 #define ARCH_X86_CPU_H
 
 struct cpu_model_info {
-	int vendor;
-	int family;
-	char *model_names[16];
+	int		vendor;
+	int		family;
+	const char	*model_names[16];
 };
 
 /* attempt to consolidate cpu attributes */
 struct cpu_dev {
-	char	* c_vendor;
+	const char	*c_vendor;
 
 	/* some have two possibilities for cpuid string */
-	char	* c_ident[2];	
+	const char	*c_ident[2];
 
 	struct		cpu_model_info c_models[4];
 
-	void            (*c_early_init)(struct cpuinfo_x86 *c);
-	void		(*c_init)(struct cpuinfo_x86 * c);
-	void		(*c_identify)(struct cpuinfo_x86 * c);
-	unsigned int	(*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
-	int	c_x86_vendor;
+	void            (*c_early_init)(struct cpuinfo_x86 *);
+	void		(*c_init)(struct cpuinfo_x86 *);
+	void		(*c_identify)(struct cpuinfo_x86 *);
+	unsigned int	(*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
+	int		c_x86_vendor;
 };
 
 #define cpu_dev_register(cpu_devX) \
-	static struct cpu_dev *__cpu_dev_##cpu_devX __used \
+	static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
 	__attribute__((__section__(".x86_cpu_dev.init"))) = \
 	&cpu_devX;
 
-extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[];
+extern const struct cpu_dev *const __x86_cpu_dev_start[],
+			    *const __x86_cpu_dev_end[];
 
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
 
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
new file mode 100755
index 00000000000..46e29ab96c6
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -0,0 +1,901 @@
+/*
+ * CPU x86 architecture debug code
+ *
+ * Copyright(C) 2009 Jaswinder Singh Rajput
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+
+#include <asm/cpu_debug.h>
+#include <asm/paravirt.h>
+#include <asm/system.h>
+#include <asm/traps.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
+
+static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
+static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
+static DEFINE_PER_CPU(unsigned, cpu_modelflag);
+static DEFINE_PER_CPU(int, cpu_priv_count);
+static DEFINE_PER_CPU(unsigned, cpu_model);
+
+static DEFINE_MUTEX(cpu_debug_lock);
+
+static struct dentry *cpu_debugfs_dir;
+
+static struct cpu_debug_base cpu_base[] = {
+	{ "mc",		CPU_MC,		0	},
+	{ "monitor",	CPU_MONITOR,	0	},
+	{ "time",	CPU_TIME,	0	},
+	{ "pmc",	CPU_PMC,	1	},
+	{ "platform",	CPU_PLATFORM,	0	},
+	{ "apic",	CPU_APIC,	0	},
+	{ "poweron",	CPU_POWERON,	0	},
+	{ "control",	CPU_CONTROL,	0	},
+	{ "features",	CPU_FEATURES,	0	},
+	{ "lastbranch",	CPU_LBRANCH,	0	},
+	{ "bios",	CPU_BIOS,	0	},
+	{ "freq",	CPU_FREQ,	0	},
+	{ "mtrr",	CPU_MTRR,	0	},
+	{ "perf",	CPU_PERF,	0	},
+	{ "cache",	CPU_CACHE,	0	},
+	{ "sysenter",	CPU_SYSENTER,	0	},
+	{ "therm",	CPU_THERM,	0	},
+	{ "misc",	CPU_MISC,	0	},
+	{ "debug",	CPU_DEBUG,	0	},
+	{ "pat",	CPU_PAT,	0	},
+	{ "vmx",	CPU_VMX,	0	},
+	{ "call",	CPU_CALL,	0	},
+	{ "base",	CPU_BASE,	0	},
+	{ "ver",	CPU_VER,	0	},
+	{ "conf",	CPU_CONF,	0	},
+	{ "smm",	CPU_SMM,	0	},
+	{ "svm",	CPU_SVM,	0	},
+	{ "osvm",	CPU_OSVM,	0	},
+	{ "tss",	CPU_TSS,	0	},
+	{ "cr",		CPU_CR,		0	},
+	{ "dt",		CPU_DT,		0	},
+	{ "registers",	CPU_REG_ALL,	0	},
+};
+
+static struct cpu_file_base cpu_file[] = {
+	{ "index",	CPU_REG_ALL,	0	},
+	{ "value",	CPU_REG_ALL,	1	},
+};
+
+/* Intel Registers Range */
+static struct cpu_debug_range cpu_intel_range[] = {
+	{ 0x00000000, 0x00000001, CPU_MC,	CPU_INTEL_ALL		},
+	{ 0x00000006, 0x00000007, CPU_MONITOR,	CPU_CX_AT_XE		},
+	{ 0x00000010, 0x00000010, CPU_TIME,	CPU_INTEL_ALL		},
+	{ 0x00000011, 0x00000013, CPU_PMC,	CPU_INTEL_PENTIUM	},
+	{ 0x00000017, 0x00000017, CPU_PLATFORM,	CPU_PX_CX_AT_XE		},
+	{ 0x0000001B, 0x0000001B, CPU_APIC,	CPU_P6_CX_AT_XE		},
+
+	{ 0x0000002A, 0x0000002A, CPU_POWERON,	CPU_PX_CX_AT_XE		},
+	{ 0x0000002B, 0x0000002B, CPU_POWERON,	CPU_INTEL_XEON		},
+	{ 0x0000002C, 0x0000002C, CPU_FREQ,	CPU_INTEL_XEON		},
+	{ 0x0000003A, 0x0000003A, CPU_CONTROL,	CPU_CX_AT_XE		},
+
+	{ 0x00000040, 0x00000043, CPU_LBRANCH,	CPU_PM_CX_AT_XE		},
+	{ 0x00000044, 0x00000047, CPU_LBRANCH,	CPU_PM_CO_AT		},
+	{ 0x00000060, 0x00000063, CPU_LBRANCH,	CPU_C2_AT		},
+	{ 0x00000064, 0x00000067, CPU_LBRANCH,	CPU_INTEL_ATOM		},
+
+	{ 0x00000079, 0x00000079, CPU_BIOS,	CPU_P6_CX_AT_XE		},
+	{ 0x00000088, 0x0000008A, CPU_CACHE,	CPU_INTEL_P6		},
+	{ 0x0000008B, 0x0000008B, CPU_BIOS,	CPU_P6_CX_AT_XE		},
+	{ 0x0000009B, 0x0000009B, CPU_MONITOR,	CPU_INTEL_XEON		},
+
+	{ 0x000000C1, 0x000000C2, CPU_PMC,	CPU_P6_CX_AT		},
+	{ 0x000000CD, 0x000000CD, CPU_FREQ,	CPU_CX_AT		},
+	{ 0x000000E7, 0x000000E8, CPU_PERF,	CPU_CX_AT		},
+	{ 0x000000FE, 0x000000FE, CPU_MTRR,	CPU_P6_CX_XE		},
+
+	{ 0x00000116, 0x00000116, CPU_CACHE,	CPU_INTEL_P6		},
+	{ 0x00000118, 0x00000118, CPU_CACHE,	CPU_INTEL_P6		},
+	{ 0x00000119, 0x00000119, CPU_CACHE,	CPU_INTEL_PX		},
+	{ 0x0000011A, 0x0000011B, CPU_CACHE,	CPU_INTEL_P6		},
+	{ 0x0000011E, 0x0000011E, CPU_CACHE,	CPU_PX_CX_AT		},
+
+	{ 0x00000174, 0x00000176, CPU_SYSENTER,	CPU_P6_CX_AT_XE		},
+	{ 0x00000179, 0x0000017A, CPU_MC,	CPU_PX_CX_AT_XE		},
+	{ 0x0000017B, 0x0000017B, CPU_MC,	CPU_P6_XE		},
+	{ 0x00000186, 0x00000187, CPU_PMC,	CPU_P6_CX_AT		},
+	{ 0x00000198, 0x00000199, CPU_PERF,	CPU_PM_CX_AT_XE		},
+	{ 0x0000019A, 0x0000019A, CPU_TIME,	CPU_PM_CX_AT_XE		},
+	{ 0x0000019B, 0x0000019D, CPU_THERM,	CPU_PM_CX_AT_XE		},
+	{ 0x000001A0, 0x000001A0, CPU_MISC,	CPU_PM_CX_AT_XE		},
+
+	{ 0x000001C9, 0x000001C9, CPU_LBRANCH,	CPU_PM_CX_AT		},
+	{ 0x000001D7, 0x000001D8, CPU_LBRANCH,	CPU_INTEL_XEON		},
+	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	CPU_CX_AT_XE		},
+	{ 0x000001DA, 0x000001DA, CPU_LBRANCH,	CPU_INTEL_XEON		},
+	{ 0x000001DB, 0x000001DB, CPU_LBRANCH,	CPU_P6_XE		},
+	{ 0x000001DC, 0x000001DC, CPU_LBRANCH,	CPU_INTEL_P6		},
+	{ 0x000001DD, 0x000001DE, CPU_LBRANCH,	CPU_PX_CX_AT_XE		},
+	{ 0x000001E0, 0x000001E0, CPU_LBRANCH,	CPU_INTEL_P6		},
+
+	{ 0x00000200, 0x0000020F, CPU_MTRR,	CPU_P6_CX_XE		},
+	{ 0x00000250, 0x00000250, CPU_MTRR,	CPU_P6_CX_XE		},
+	{ 0x00000258, 0x00000259, CPU_MTRR,	CPU_P6_CX_XE		},
+	{ 0x00000268, 0x0000026F, CPU_MTRR,	CPU_P6_CX_XE		},
+	{ 0x00000277, 0x00000277, CPU_PAT,	CPU_C2_AT_XE		},
+	{ 0x000002FF, 0x000002FF, CPU_MTRR,	CPU_P6_CX_XE		},
+
+	{ 0x00000300, 0x00000308, CPU_PMC,	CPU_INTEL_XEON		},
+	{ 0x00000309, 0x0000030B, CPU_PMC,	CPU_C2_AT_XE		},
+	{ 0x0000030C, 0x00000311, CPU_PMC,	CPU_INTEL_XEON		},
+	{ 0x00000345, 0x00000345, CPU_PMC,	CPU_C2_AT		},
+	{ 0x00000360, 0x00000371, CPU_PMC,	CPU_INTEL_XEON		},
+	{ 0x0000038D, 0x00000390, CPU_PMC,	CPU_C2_AT		},
+	{ 0x000003A0, 0x000003BE, CPU_PMC,	CPU_INTEL_XEON		},
+	{ 0x000003C0, 0x000003CD, CPU_PMC,	CPU_INTEL_XEON		},
+	{ 0x000003E0, 0x000003E1, CPU_PMC,	CPU_INTEL_XEON		},
+	{ 0x000003F0, 0x000003F0, CPU_PMC,	CPU_INTEL_XEON		},
+	{ 0x000003F1, 0x000003F1, CPU_PMC,	CPU_C2_AT_XE		},
+	{ 0x000003F2, 0x000003F2, CPU_PMC,	CPU_INTEL_XEON		},
+
+	{ 0x00000400, 0x00000402, CPU_MC,	CPU_PM_CX_AT_XE		},
+	{ 0x00000403, 0x00000403, CPU_MC,	CPU_INTEL_XEON		},
+	{ 0x00000404, 0x00000406, CPU_MC,	CPU_PM_CX_AT_XE		},
+	{ 0x00000407, 0x00000407, CPU_MC,	CPU_INTEL_XEON		},
+	{ 0x00000408, 0x0000040A, CPU_MC,	CPU_PM_CX_AT_XE		},
+	{ 0x0000040B, 0x0000040B, CPU_MC,	CPU_INTEL_XEON		},
+	{ 0x0000040C, 0x0000040E, CPU_MC,	CPU_PM_CX_XE		},
+	{ 0x0000040F, 0x0000040F, CPU_MC,	CPU_INTEL_XEON		},
+	{ 0x00000410, 0x00000412, CPU_MC,	CPU_PM_CX_AT_XE		},
+	{ 0x00000413, 0x00000417, CPU_MC,	CPU_CX_AT_XE		},
+	{ 0x00000480, 0x0000048B, CPU_VMX,	CPU_CX_AT_XE		},
+
+	{ 0x00000600, 0x00000600, CPU_DEBUG,	CPU_PM_CX_AT_XE		},
+	{ 0x00000680, 0x0000068F, CPU_LBRANCH,	CPU_INTEL_XEON		},
+	{ 0x000006C0, 0x000006CF, CPU_LBRANCH,	CPU_INTEL_XEON		},
+
+	{ 0x000107CC, 0x000107D3, CPU_PMC,	CPU_INTEL_XEON_MP	},
+
+	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	CPU_INTEL_XEON		},
+	{ 0xC0000081, 0xC0000082, CPU_CALL,	CPU_INTEL_XEON		},
+	{ 0xC0000084, 0xC0000084, CPU_CALL,	CPU_INTEL_XEON		},
+	{ 0xC0000100, 0xC0000102, CPU_BASE,	CPU_INTEL_XEON		},
+};
+
+/* AMD Registers Range */
+static struct cpu_debug_range cpu_amd_range[] = {
+	{ 0x00000000, 0x00000001, CPU_MC,	CPU_K10_PLUS,		},
+	{ 0x00000010, 0x00000010, CPU_TIME,	CPU_K8_PLUS,		},
+	{ 0x0000001B, 0x0000001B, CPU_APIC,	CPU_K8_PLUS,		},
+	{ 0x0000002A, 0x0000002A, CPU_POWERON,	CPU_K7_PLUS		},
+	{ 0x0000008B, 0x0000008B, CPU_VER,	CPU_K8_PLUS		},
+	{ 0x000000FE, 0x000000FE, CPU_MTRR,	CPU_K8_PLUS,		},
+
+	{ 0x00000174, 0x00000176, CPU_SYSENTER,	CPU_K8_PLUS,		},
+	{ 0x00000179, 0x0000017B, CPU_MC,	CPU_K8_PLUS,		},
+	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	CPU_K8_PLUS,		},
+	{ 0x000001DB, 0x000001DE, CPU_LBRANCH,	CPU_K8_PLUS,		},
+
+	{ 0x00000200, 0x0000020F, CPU_MTRR,	CPU_K8_PLUS,		},
+	{ 0x00000250, 0x00000250, CPU_MTRR,	CPU_K8_PLUS,		},
+	{ 0x00000258, 0x00000259, CPU_MTRR,	CPU_K8_PLUS,		},
+	{ 0x00000268, 0x0000026F, CPU_MTRR,	CPU_K8_PLUS,		},
+	{ 0x00000277, 0x00000277, CPU_PAT,	CPU_K8_PLUS,		},
+	{ 0x000002FF, 0x000002FF, CPU_MTRR,	CPU_K8_PLUS,		},
+
+	{ 0x00000400, 0x00000413, CPU_MC,	CPU_K8_PLUS,		},
+
+	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	CPU_AMD_ALL,		},
+	{ 0xC0000081, 0xC0000084, CPU_CALL,	CPU_K8_PLUS,		},
+	{ 0xC0000100, 0xC0000102, CPU_BASE,	CPU_K8_PLUS,		},
+	{ 0xC0000103, 0xC0000103, CPU_TIME,	CPU_K10_PLUS,		},
+
+	{ 0xC0010000, 0xC0010007, CPU_PMC,	CPU_K8_PLUS,		},
+	{ 0xC0010010, 0xC0010010, CPU_CONF,	CPU_K7_PLUS,		},
+	{ 0xC0010015, 0xC0010015, CPU_CONF,	CPU_K7_PLUS,		},
+	{ 0xC0010016, 0xC001001A, CPU_MTRR,	CPU_K8_PLUS,		},
+	{ 0xC001001D, 0xC001001D, CPU_MTRR,	CPU_K8_PLUS,		},
+	{ 0xC001001F, 0xC001001F, CPU_CONF,	CPU_K8_PLUS,		},
+	{ 0xC0010030, 0xC0010035, CPU_BIOS,	CPU_K8_PLUS,		},
+	{ 0xC0010044, 0xC0010048, CPU_MC,	CPU_K8_PLUS,		},
+	{ 0xC0010050, 0xC0010056, CPU_SMM,	CPU_K0F_PLUS,		},
+	{ 0xC0010058, 0xC0010058, CPU_CONF,	CPU_K10_PLUS,		},
+	{ 0xC0010060, 0xC0010060, CPU_CACHE,	CPU_AMD_11,		},
+	{ 0xC0010061, 0xC0010068, CPU_SMM,	CPU_K10_PLUS,		},
+	{ 0xC0010069, 0xC001006B, CPU_SMM,	CPU_AMD_11,		},
+	{ 0xC0010070, 0xC0010071, CPU_SMM,	CPU_K10_PLUS,		},
+	{ 0xC0010111, 0xC0010113, CPU_SMM,	CPU_K8_PLUS,		},
+	{ 0xC0010114, 0xC0010118, CPU_SVM,	CPU_K10_PLUS,		},
+	{ 0xC0010140, 0xC0010141, CPU_OSVM,	CPU_K10_PLUS,		},
+	{ 0xC0011022, 0xC0011023, CPU_CONF,	CPU_K10_PLUS,		},
+};
+
+
+/* Intel */
+static int get_intel_modelflag(unsigned model)
+{
+	int flag;
+
+	switch (model) {
+	case 0x0501:
+	case 0x0502:
+	case 0x0504:
+		flag = CPU_INTEL_PENTIUM;
+		break;
+	case 0x0601:
+	case 0x0603:
+	case 0x0605:
+	case 0x0607:
+	case 0x0608:
+	case 0x060A:
+	case 0x060B:
+		flag = CPU_INTEL_P6;
+		break;
+	case 0x0609:
+	case 0x060D:
+		flag = CPU_INTEL_PENTIUM_M;
+		break;
+	case 0x060E:
+		flag = CPU_INTEL_CORE;
+		break;
+	case 0x060F:
+	case 0x0617:
+		flag = CPU_INTEL_CORE2;
+		break;
+	case 0x061C:
+		flag = CPU_INTEL_ATOM;
+		break;
+	case 0x0F00:
+	case 0x0F01:
+	case 0x0F02:
+	case 0x0F03:
+	case 0x0F04:
+		flag = CPU_INTEL_XEON_P4;
+		break;
+	case 0x0F06:
+		flag = CPU_INTEL_XEON_MP;
+		break;
+	default:
+		flag = CPU_NONE;
+		break;
+	}
+
+	return flag;
+}
+
+/* AMD */
+static int get_amd_modelflag(unsigned model)
+{
+	int flag;
+
+	switch (model >> 8) {
+	case 0x6:
+		flag = CPU_AMD_K6;
+		break;
+	case 0x7:
+		flag = CPU_AMD_K7;
+		break;
+	case 0x8:
+		flag = CPU_AMD_K8;
+		break;
+	case 0xf:
+		flag = CPU_AMD_0F;
+		break;
+	case 0x10:
+		flag = CPU_AMD_10;
+		break;
+	case 0x11:
+		flag = CPU_AMD_11;
+		break;
+	default:
+		flag = CPU_NONE;
+		break;
+	}
+
+	return flag;
+}
+
+static int get_cpu_modelflag(unsigned cpu)
+{
+	int flag;
+
+	flag = per_cpu(cpu_model, cpu);
+
+	switch (flag >> 16) {
+	case X86_VENDOR_INTEL:
+		flag = get_intel_modelflag(flag);
+		break;
+	case X86_VENDOR_AMD:
+		flag = get_amd_modelflag(flag & 0xffff);
+		break;
+	default:
+		flag = CPU_NONE;
+		break;
+	}
+
+	return flag;
+}
+
+static int get_cpu_range_count(unsigned cpu)
+{
+	int index;
+
+	switch (per_cpu(cpu_model, cpu) >> 16) {
+	case X86_VENDOR_INTEL:
+		index = ARRAY_SIZE(cpu_intel_range);
+		break;
+	case X86_VENDOR_AMD:
+		index = ARRAY_SIZE(cpu_amd_range);
+		break;
+	default:
+		index = 0;
+		break;
+	}
+
+	return index;
+}
+
+static int is_typeflag_valid(unsigned cpu, unsigned flag)
+{
+	unsigned vendor, modelflag;
+	int i, index;
+
+	/* Standard Registers should be always valid */
+	if (flag >= CPU_TSS)
+		return 1;
+
+	modelflag = per_cpu(cpu_modelflag, cpu);
+	vendor = per_cpu(cpu_model, cpu) >> 16;
+	index = get_cpu_range_count(cpu);
+
+	for (i = 0; i < index; i++) {
+		switch (vendor) {
+		case X86_VENDOR_INTEL:
+			if ((cpu_intel_range[i].model & modelflag) &&
+			    (cpu_intel_range[i].flag & flag))
+				return 1;
+			break;
+		case X86_VENDOR_AMD:
+			if ((cpu_amd_range[i].model & modelflag) &&
+			    (cpu_amd_range[i].flag & flag))
+				return 1;
+			break;
+		}
+	}
+
+	/* Invalid */
+	return 0;
+}
+
+static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
+			      int index, unsigned flag)
+{
+	unsigned modelflag;
+
+	modelflag = per_cpu(cpu_modelflag, cpu);
+	*max = 0;
+	switch (per_cpu(cpu_model, cpu) >> 16) {
+	case X86_VENDOR_INTEL:
+		if ((cpu_intel_range[index].model & modelflag) &&
+		    (cpu_intel_range[index].flag & flag)) {
+			*min = cpu_intel_range[index].min;
+			*max = cpu_intel_range[index].max;
+		}
+		break;
+	case X86_VENDOR_AMD:
+		if ((cpu_amd_range[index].model & modelflag) &&
+		    (cpu_amd_range[index].flag & flag)) {
+			*min = cpu_amd_range[index].min;
+			*max = cpu_amd_range[index].max;
+		}
+		break;
+	}
+
+	return *max;
+}
+
+/* This function can also be called with seq = NULL for printk */
+static void print_cpu_data(struct seq_file *seq, unsigned type,
+			   u32 low, u32 high)
+{
+	struct cpu_private *priv;
+	u64 val = high;
+
+	if (seq) {
+		priv = seq->private;
+		if (priv->file) {
+			val = (val << 32) | low;
+			seq_printf(seq, "0x%llx\n", val);
+		} else
+			seq_printf(seq, " %08x: %08x_%08x\n",
+				   type, high, low);
+	} else
+		printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
+}
+
+/* This function can also be called with seq = NULL for printk */
+static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
+{
+	unsigned msr, msr_min, msr_max;
+	struct cpu_private *priv;
+	u32 low, high;
+	int i, range;
+
+	if (seq) {
+		priv = seq->private;
+		if (priv->file) {
+			if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
+					       &low, &high))
+				print_cpu_data(seq, priv->reg, low, high);
+			return;
+		}
+	}
+
+	range = get_cpu_range_count(cpu);
+
+	for (i = 0; i < range; i++) {
+		if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
+			continue;
+
+		for (msr = msr_min; msr <= msr_max; msr++) {
+			if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
+				continue;
+			print_cpu_data(seq, msr, low, high);
+		}
+	}
+}
+
+static void print_tss(void *arg)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	struct seq_file *seq = arg;
+	unsigned int seg;
+
+	seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
+	seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
+	seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
+	seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
+
+	seq_printf(seq, " RSI\t: %016lx\n", regs->si);
+	seq_printf(seq, " RDI\t: %016lx\n", regs->di);
+	seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
+	seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
+
+#ifdef CONFIG_X86_64
+	seq_printf(seq, " R08\t: %016lx\n", regs->r8);
+	seq_printf(seq, " R09\t: %016lx\n", regs->r9);
+	seq_printf(seq, " R10\t: %016lx\n", regs->r10);
+	seq_printf(seq, " R11\t: %016lx\n", regs->r11);
+	seq_printf(seq, " R12\t: %016lx\n", regs->r12);
+	seq_printf(seq, " R13\t: %016lx\n", regs->r13);
+	seq_printf(seq, " R14\t: %016lx\n", regs->r14);
+	seq_printf(seq, " R15\t: %016lx\n", regs->r15);
+#endif
+
+	asm("movl %%cs,%0" : "=r" (seg));
+	seq_printf(seq, " CS\t:             %04x\n", seg);
+	asm("movl %%ds,%0" : "=r" (seg));
+	seq_printf(seq, " DS\t:             %04x\n", seg);
+	seq_printf(seq, " SS\t:             %04lx\n", regs->ss & 0xffff);
+	asm("movl %%es,%0" : "=r" (seg));
+	seq_printf(seq, " ES\t:             %04x\n", seg);
+	asm("movl %%fs,%0" : "=r" (seg));
+	seq_printf(seq, " FS\t:             %04x\n", seg);
+	asm("movl %%gs,%0" : "=r" (seg));
+	seq_printf(seq, " GS\t:             %04x\n", seg);
+
+	seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
+
+	seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
+}
+
+static void print_cr(void *arg)
+{
+	struct seq_file *seq = arg;
+
+	seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
+	seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
+	seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
+	seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
+#ifdef CONFIG_X86_64
+	seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
+#endif
+}
+
+static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
+{
+	seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
+}
+
+static void print_dt(void *seq)
+{
+	struct desc_ptr dt;
+	unsigned long ldt;
+
+	/* IDT */
+	store_idt((struct desc_ptr *)&dt);
+	print_desc_ptr("IDT", seq, dt);
+
+	/* GDT */
+	store_gdt((struct desc_ptr *)&dt);
+	print_desc_ptr("GDT", seq, dt);
+
+	/* LDT */
+	store_ldt(ldt);
+	seq_printf(seq, " LDT\t: %016lx\n", ldt);
+
+	/* TR */
+	store_tr(ldt);
+	seq_printf(seq, " TR\t: %016lx\n", ldt);
+}
+
+static void print_dr(void *arg)
+{
+	struct seq_file *seq = arg;
+	unsigned long dr;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		/* Ignore db4, db5 */
+		if ((i == 4) || (i == 5))
+			continue;
+		get_debugreg(dr, i);
+		seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
+	}
+
+	seq_printf(seq, "\n MSR\t:\n");
+}
+
+static void print_apic(void *arg)
+{
+	struct seq_file *seq = arg;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(seq, " LAPIC\t:\n");
+	seq_printf(seq, " ID\t\t: %08x\n",  apic_read(APIC_ID) >> 24);
+	seq_printf(seq, " LVR\t\t: %08x\n",  apic_read(APIC_LVR));
+	seq_printf(seq, " TASKPRI\t: %08x\n",  apic_read(APIC_TASKPRI));
+	seq_printf(seq, " ARBPRI\t\t: %08x\n",  apic_read(APIC_ARBPRI));
+	seq_printf(seq, " PROCPRI\t: %08x\n",  apic_read(APIC_PROCPRI));
+	seq_printf(seq, " LDR\t\t: %08x\n",  apic_read(APIC_LDR));
+	seq_printf(seq, " DFR\t\t: %08x\n",  apic_read(APIC_DFR));
+	seq_printf(seq, " SPIV\t\t: %08x\n",  apic_read(APIC_SPIV));
+	seq_printf(seq, " ISR\t\t: %08x\n",  apic_read(APIC_ISR));
+	seq_printf(seq, " ESR\t\t: %08x\n",  apic_read(APIC_ESR));
+	seq_printf(seq, " ICR\t\t: %08x\n",  apic_read(APIC_ICR));
+	seq_printf(seq, " ICR2\t\t: %08x\n",  apic_read(APIC_ICR2));
+	seq_printf(seq, " LVTT\t\t: %08x\n",  apic_read(APIC_LVTT));
+	seq_printf(seq, " LVTTHMR\t: %08x\n",  apic_read(APIC_LVTTHMR));
+	seq_printf(seq, " LVTPC\t\t: %08x\n",  apic_read(APIC_LVTPC));
+	seq_printf(seq, " LVT0\t\t: %08x\n",  apic_read(APIC_LVT0));
+	seq_printf(seq, " LVT1\t\t: %08x\n",  apic_read(APIC_LVT1));
+	seq_printf(seq, " LVTERR\t\t: %08x\n",  apic_read(APIC_LVTERR));
+	seq_printf(seq, " TMICT\t\t: %08x\n",  apic_read(APIC_TMICT));
+	seq_printf(seq, " TMCCT\t\t: %08x\n",  apic_read(APIC_TMCCT));
+	seq_printf(seq, " TDCR\t\t: %08x\n",  apic_read(APIC_TDCR));
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+	seq_printf(seq, "\n MSR\t:\n");
+}
+
+static int cpu_seq_show(struct seq_file *seq, void *v)
+{
+	struct cpu_private *priv = seq->private;
+
+	if (priv == NULL)
+		return -EINVAL;
+
+	switch (cpu_base[priv->type].flag) {
+	case CPU_TSS:
+		smp_call_function_single(priv->cpu, print_tss, seq, 1);
+		break;
+	case CPU_CR:
+		smp_call_function_single(priv->cpu, print_cr, seq, 1);
+		break;
+	case CPU_DT:
+		smp_call_function_single(priv->cpu, print_dt, seq, 1);
+		break;
+	case CPU_DEBUG:
+		if (priv->file == CPU_INDEX_BIT)
+			smp_call_function_single(priv->cpu, print_dr, seq, 1);
+		print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
+		break;
+	case CPU_APIC:
+		if (priv->file == CPU_INDEX_BIT)
+			smp_call_function_single(priv->cpu, print_apic, seq, 1);
+		print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
+		break;
+
+	default:
+		print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
+		break;
+	}
+	seq_printf(seq, "\n");
+
+	return 0;
+}
+
+static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	if (*pos == 0) /* One time is enough ;-) */
+		return seq;
+
+	return NULL;
+}
+
+static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	(*pos)++;
+
+	return cpu_seq_start(seq, pos);
+}
+
+static void cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static const struct seq_operations cpu_seq_ops = {
+	.start		= cpu_seq_start,
+	.next		= cpu_seq_next,
+	.stop		= cpu_seq_stop,
+	.show		= cpu_seq_show,
+};
+
+static int cpu_seq_open(struct inode *inode, struct file *file)
+{
+	struct cpu_private *priv = inode->i_private;
+	struct seq_file *seq;
+	int err;
+
+	err = seq_open(file, &cpu_seq_ops);
+	if (!err) {
+		seq = file->private_data;
+		seq->private = priv;
+	}
+
+	return err;
+}
+
+static int write_msr(struct cpu_private *priv, u64 val)
+{
+	u32 low, high;
+
+	high = (val >> 32) & 0xffffffff;
+	low = val & 0xffffffff;
+
+	if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high))
+		return 0;
+
+	return -EPERM;
+}
+
+static int write_cpu_register(struct cpu_private *priv, const char *buf)
+{
+	int ret = -EPERM;
+	u64 val;
+
+	ret = strict_strtoull(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	/* Supporting only MSRs */
+	if (priv->type < CPU_TSS_BIT)
+		return write_msr(priv, val);
+
+	return ret;
+}
+
+static ssize_t cpu_write(struct file *file, const char __user *ubuf,
+			     size_t count, loff_t *off)
+{
+	struct seq_file *seq = file->private_data;
+	struct cpu_private *priv = seq->private;
+	char buf[19];
+
+	if ((priv == NULL) || (count >= sizeof(buf)))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write))
+		if (!write_cpu_register(priv, buf))
+			return count;
+
+	return -EACCES;
+}
+
+static const struct file_operations cpu_fops = {
+	.owner		= THIS_MODULE,
+	.open		= cpu_seq_open,
+	.read		= seq_read,
+	.write		= cpu_write,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
+			   unsigned file, struct dentry *dentry)
+{
+	struct cpu_private *priv = NULL;
+
+	/* Already intialized */
+	if (file == CPU_INDEX_BIT)
+		if (per_cpu(cpu_arr[type].init, cpu))
+			return 0;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+
+	priv->cpu = cpu;
+	priv->type = type;
+	priv->reg = reg;
+	priv->file = file;
+	mutex_lock(&cpu_debug_lock);
+	per_cpu(priv_arr[type], cpu) = priv;
+	per_cpu(cpu_priv_count, cpu)++;
+	mutex_unlock(&cpu_debug_lock);
+
+	if (file)
+		debugfs_create_file(cpu_file[file].name, S_IRUGO,
+				    dentry, (void *)priv, &cpu_fops);
+	else {
+		debugfs_create_file(cpu_base[type].name, S_IRUGO,
+				    per_cpu(cpu_arr[type].dentry, cpu),
+				    (void *)priv, &cpu_fops);
+		mutex_lock(&cpu_debug_lock);
+		per_cpu(cpu_arr[type].init, cpu) = 1;
+		mutex_unlock(&cpu_debug_lock);
+	}
+
+	return 0;
+}
+
+static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
+			     struct dentry *dentry)
+{
+	unsigned file;
+	int err = 0;
+
+	for (file = 0; file <  ARRAY_SIZE(cpu_file); file++) {
+		err = cpu_create_file(cpu, type, reg, file, dentry);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
+{
+	struct dentry *cpu_dentry = NULL;
+	unsigned reg, reg_min, reg_max;
+	int i, range, err = 0;
+	char reg_dir[12];
+	u32 low, high;
+
+	range = get_cpu_range_count(cpu);
+
+	for (i = 0; i < range; i++) {
+		if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
+				   cpu_base[type].flag))
+			continue;
+
+		for (reg = reg_min; reg <= reg_max; reg++) {
+			if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
+				continue;
+
+			sprintf(reg_dir, "0x%x", reg);
+			cpu_dentry = debugfs_create_dir(reg_dir, dentry);
+			err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
+			if (err)
+				return err;
+		}
+	}
+
+	return err;
+}
+
+static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
+{
+	struct dentry *cpu_dentry = NULL;
+	unsigned type;
+	int err = 0;
+
+	for (type = 0; type <  ARRAY_SIZE(cpu_base) - 1; type++) {
+		if (!is_typeflag_valid(cpu, cpu_base[type].flag))
+			continue;
+		cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
+		per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
+
+		if (type < CPU_TSS_BIT)
+			err = cpu_init_msr(cpu, type, cpu_dentry);
+		else
+			err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
+					      cpu_dentry);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+static int cpu_init_cpu(void)
+{
+	struct dentry *cpu_dentry = NULL;
+	struct cpuinfo_x86 *cpui;
+	char cpu_dir[12];
+	unsigned cpu;
+	int err = 0;
+
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
+		cpui = &cpu_data(cpu);
+		if (!cpu_has(cpui, X86_FEATURE_MSR))
+			continue;
+		per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
+					   (cpui->x86 << 8) |
+					   (cpui->x86_model));
+		per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
+
+		sprintf(cpu_dir, "cpu%d", cpu);
+		cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
+		err = cpu_init_allreg(cpu, cpu_dentry);
+
+		pr_info("cpu%d(%d) debug files %d\n",
+			cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
+		if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
+			pr_err("Register files count %d exceeds limit %d\n",
+				per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
+			per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
+			err = -ENFILE;
+		}
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+static int __init cpu_debug_init(void)
+{
+	cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
+
+	return cpu_init_cpu();
+}
+
+static void __exit cpu_debug_exit(void)
+{
+	int i, cpu;
+
+	if (cpu_debugfs_dir)
+		debugfs_remove_recursive(cpu_debugfs_dir);
+
+	for (cpu = 0; cpu <  nr_cpu_ids; cpu++)
+		for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
+			kfree(per_cpu(priv_arr[i], cpu));
+}
+
+module_init(cpu_debug_init);
+module_exit(cpu_debug_exit);
+
+MODULE_AUTHOR("Jaswinder Singh Rajput");
+MODULE_DESCRIPTION("CPU Debug module");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 65792c2cc46..52c83987547 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -87,30 +87,15 @@ config X86_POWERNOW_K7_ACPI
 config X86_POWERNOW_K8
 	tristate "AMD Opteron/Athlon64 PowerNow!"
 	select CPU_FREQ_TABLE
+	depends on ACPI && ACPI_PROCESSOR
 	help
-	  This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors.
+	  This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called powernow-k8.
 
 	  For details, take a look at <file:Documentation/cpu-freq/>.
 
-	  If in doubt, say N.
-
-config X86_POWERNOW_K8_ACPI
-	bool
-	prompt "ACPI Support" if X86_32
-	depends on ACPI && X86_POWERNOW_K8 && ACPI_PROCESSOR
-	depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
-	default y
-	help
-	  This provides access to the K8s Processor Performance States via ACPI.
-	  This driver is probably required for CPUFreq to work with multi-socket and
-	  SMP systems.  It is not required on at least some single-socket yet
-	  multi-core systems, even if SMP is enabled.
-
-	  It is safe to say Y here.
-
 config X86_GX_SUSPMOD
 	tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
 	depends on X86_32 && PCI
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 560f7760dae..509296df294 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -1,6 +1,11 @@
+# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
+# K8 systems. ACPI is preferred to all other hardware-specific drivers.
+# speedstep-* is preferred over p4-clockmod.
+
+obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o
 obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
 obj-$(CONFIG_X86_POWERNOW_K7)		+= powernow-k7.o
-obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o
 obj-$(CONFIG_X86_LONGHAUL)		+= longhaul.o
 obj-$(CONFIG_X86_E_POWERSAVER)		+= e_powersaver.o
 obj-$(CONFIG_ELAN_CPUFREQ)		+= elanfreq.o
@@ -10,7 +15,6 @@ obj-$(CONFIG_X86_GX_SUSPMOD)		+= gx-suspmod.o
 obj-$(CONFIG_X86_SPEEDSTEP_ICH)		+= speedstep-ich.o
 obj-$(CONFIG_X86_SPEEDSTEP_LIB)		+= speedstep-lib.o
 obj-$(CONFIG_X86_SPEEDSTEP_SMI)		+= speedstep-smi.o
-obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o
 obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
 obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
 obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4b1c319d30c..23da96e57b1 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -1,5 +1,5 @@
 /*
- * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $)
+ * acpi-cpufreq.c - ACPI Processor P-States Driver
  *
  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
@@ -36,16 +36,18 @@
 #include <linux/ftrace.h>
 
 #include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+
 #include <acpi/processor.h>
 
-#include <asm/io.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
-#include <asm/delay.h>
-#include <asm/uaccess.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"acpi-cpufreq", msg)
 
 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
@@ -95,7 +97,7 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
 
 	perf = data->acpi_data;
 
-	for (i=0; i<perf->state_count; i++) {
+	for (i = 0; i < perf->state_count; i++) {
 		if (value == perf->states[i].status)
 			return data->freq_table[i].frequency;
 	}
@@ -110,7 +112,7 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
 	msr &= INTEL_MSR_RANGE;
 	perf = data->acpi_data;
 
-	for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
 		if (msr == perf->states[data->freq_table[i].index].status)
 			return data->freq_table[i].frequency;
 	}
@@ -138,15 +140,13 @@ struct io_addr {
 	u8 bit_width;
 };
 
-typedef union {
-	struct msr_addr msr;
-	struct io_addr io;
-} drv_addr_union;
-
 struct drv_cmd {
 	unsigned int type;
 	const struct cpumask *mask;
-	drv_addr_union addr;
+	union {
+		struct msr_addr msr;
+		struct io_addr io;
+	} addr;
 	u32 val;
 };
 
@@ -369,7 +369,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
 	unsigned int cur_freq;
 	unsigned int i;
 
-	for (i=0; i<100; i++) {
+	for (i = 0; i < 100; i++) {
 		cur_freq = extract_freq(get_cur_val(mask), data);
 		if (cur_freq == freq)
 			return 1;
@@ -494,7 +494,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
 		unsigned long freq;
 		unsigned long freqn = perf->states[0].core_frequency * 1000;
 
-		for (i=0; i<(perf->state_count-1); i++) {
+		for (i = 0; i < (perf->state_count-1); i++) {
 			freq = freqn;
 			freqn = perf->states[i+1].core_frequency * 1000;
 			if ((2 * cpu_khz) > (freqn + freq)) {
@@ -601,7 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (!data)
 		return -ENOMEM;
 
-	data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
+	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
 	per_cpu(drv_data, cpu) = data;
 
 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
@@ -673,7 +673,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	/* detect transition latency */
 	policy->cpuinfo.transition_latency = 0;
-	for (i=0; i<perf->state_count; i++) {
+	for (i = 0; i < perf->state_count; i++) {
 		if ((perf->states[i].transition_latency * 1000) >
 		    policy->cpuinfo.transition_latency)
 			policy->cpuinfo.transition_latency =
@@ -682,8 +682,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	data->max_freq = perf->states[0].core_frequency * 1000;
 	/* table init */
-	for (i=0; i<perf->state_count; i++) {
-		if (i>0 && perf->states[i].core_frequency >=
+	for (i = 0; i < perf->state_count; i++) {
+		if (i > 0 && perf->states[i].core_frequency >=
 		    data->freq_table[valid_states-1].frequency / 1000)
 			continue;
 
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 965ea52767a..733093d6043 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -32,7 +32,7 @@
  * nforce2_chipset:
  * FSB is changed using the chipset
  */
-static struct pci_dev *nforce2_chipset_dev;
+static struct pci_dev *nforce2_dev;
 
 /* fid:
  * multiplier * 10
@@ -56,7 +56,9 @@ MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
 MODULE_PARM_DESC(min_fsb,
 		"Minimum FSB to use, if not defined: current FSB - 50");
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
+#define PFX "cpufreq-nforce2: "
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"cpufreq-nforce2", msg)
 
 /**
  * nforce2_calc_fsb - calculate FSB
@@ -118,11 +120,11 @@ static void nforce2_write_pll(int pll)
 	int temp;
 
 	/* Set the pll addr. to 0x00 */
-	pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0);
+	pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0);
 
 	/* Now write the value in all 64 registers */
 	for (temp = 0; temp <= 0x3f; temp++)
-		pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll);
+		pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll);
 
 	return;
 }
@@ -139,8 +141,8 @@ static unsigned int nforce2_fsb_read(int bootfsb)
 	u32 fsb, temp = 0;
 
 	/* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
-	nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
-						0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL);
+	nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF,
+				PCI_ANY_ID, PCI_ANY_ID, NULL);
 	if (!nforce2_sub5)
 		return 0;
 
@@ -148,13 +150,13 @@ static unsigned int nforce2_fsb_read(int bootfsb)
 	fsb /= 1000000;
 
 	/* Check if PLL register is already set */
-	pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
+	pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
 
 	if (bootfsb || !temp)
 		return fsb;
 
 	/* Use PLL register FSB value */
-	pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp);
+	pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp);
 	fsb = nforce2_calc_fsb(temp);
 
 	return fsb;
@@ -174,18 +176,18 @@ static int nforce2_set_fsb(unsigned int fsb)
 	int pll = 0;
 
 	if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
-		printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb);
+		printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
 		return -EINVAL;
 	}
 
 	tfsb = nforce2_fsb_read(0);
 	if (!tfsb) {
-		printk(KERN_ERR "cpufreq: Error while reading the FSB\n");
+		printk(KERN_ERR PFX "Error while reading the FSB\n");
 		return -EINVAL;
 	}
 
 	/* First write? Then set actual value */
-	pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
+	pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
 	if (!temp) {
 		pll = nforce2_calc_pll(tfsb);
 
@@ -197,7 +199,7 @@ static int nforce2_set_fsb(unsigned int fsb)
 
 	/* Enable write access */
 	temp = 0x01;
-	pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp);
+	pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp);
 
 	diff = tfsb - fsb;
 
@@ -222,7 +224,7 @@ static int nforce2_set_fsb(unsigned int fsb)
 	}
 
 	temp = 0x40;
-	pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp);
+	pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp);
 
 	return 0;
 }
@@ -244,7 +246,8 @@ static unsigned int nforce2_get(unsigned int cpu)
  * nforce2_target - set a new CPUFreq policy
  * @policy: new policy
  * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ * @relation: how that frequency relates to achieved frequency
+ *  (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
  *
  * Sets a new CPUFreq policy.
  */
@@ -276,7 +279,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 	/* local_irq_save(flags); */
 
 	if (nforce2_set_fsb(target_fsb) < 0)
-		printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
+		printk(KERN_ERR PFX "Changing FSB to %d failed\n",
 			target_fsb);
 	else
 		dprintk("Changed FSB successfully to %d\n",
@@ -327,8 +330,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
 	/* FIX: Get FID from CPU */
 	if (!fid) {
 		if (!cpu_khz) {
-			printk(KERN_WARNING
-			       "cpufreq: cpu_khz not set, can't calculate multiplier!\n");
+			printk(KERN_WARNING PFX
+			"cpu_khz not set, can't calculate multiplier!\n");
 			return -ENODEV;
 		}
 
@@ -343,7 +346,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
 		}
 	}
 
-	printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb,
+	printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
 	       fid / 10, fid % 10);
 
 	/* Set maximum FSB to FSB at boot time */
@@ -392,17 +395,18 @@ static struct cpufreq_driver nforce2_driver = {
  */
 static unsigned int nforce2_detect_chipset(void)
 {
-	nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
+	nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
 					PCI_DEVICE_ID_NVIDIA_NFORCE2,
 					PCI_ANY_ID, PCI_ANY_ID, NULL);
 
-	if (nforce2_chipset_dev == NULL)
+	if (nforce2_dev == NULL)
 		return -ENODEV;
 
-	printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n",
-	       nforce2_chipset_dev->revision);
-	printk(KERN_INFO
-	       "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n");
+	printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
+	       nforce2_dev->revision);
+	printk(KERN_INFO PFX
+	       "FSB changing is maybe unstable and can lead to "
+	       "crashes and data loss.\n");
 
 	return 0;
 }
@@ -420,7 +424,7 @@ static int __init nforce2_init(void)
 
 	/* detect chipset */
 	if (nforce2_detect_chipset()) {
-		printk(KERN_ERR "cpufreq: No nForce2 chipset.\n");
+		printk(KERN_INFO PFX "No nForce2 chipset.\n");
 		return -ENODEV;
 	}
 
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index c2f930d8664..35a257dd4bb 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -12,12 +12,12 @@
 #include <linux/cpufreq.h>
 #include <linux/ioport.h>
 #include <linux/slab.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+#include <linux/delay.h>
 
 #include <asm/msr.h>
 #include <asm/tsc.h>
-#include <asm/timex.h>
-#include <asm/io.h>
-#include <asm/delay.h>
 
 #define EPS_BRAND_C7M	0
 #define EPS_BRAND_C7	1
@@ -184,7 +184,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 		break;
 	}
 
-	switch(brand) {
+	switch (brand) {
 	case EPS_BRAND_C7M:
 		printk(KERN_CONT "C7-M\n");
 		break;
@@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 	}
 	/* Enable Enhanced PowerSaver */
 	rdmsrl(MSR_IA32_MISC_ENABLE, val);
-	if (!(val & 1 << 16)) {
-		val |= 1 << 16;
+	if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+		val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
 		wrmsrl(MSR_IA32_MISC_ENABLE, val);
 		/* Can be locked at 0 */
 		rdmsrl(MSR_IA32_MISC_ENABLE, val);
-		if (!(val & 1 << 16)) {
+		if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
 			printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
 			return -ENODEV;
 		}
@@ -218,17 +218,20 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 	/* Print voltage and multiplier */
 	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 	current_voltage = lo & 0xff;
-	printk(KERN_INFO "eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
+	printk(KERN_INFO "eps: Current voltage = %dmV\n",
+			current_voltage * 16 + 700);
 	current_multiplier = (lo >> 8) & 0xff;
 	printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
 
 	/* Print limits */
 	max_voltage = hi & 0xff;
-	printk(KERN_INFO "eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
+	printk(KERN_INFO "eps: Highest voltage = %dmV\n",
+			max_voltage * 16 + 700);
 	max_multiplier = (hi >> 8) & 0xff;
 	printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
 	min_voltage = (hi >> 16) & 0xff;
-	printk(KERN_INFO "eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
+	printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
+			min_voltage * 16 + 700);
 	min_multiplier = (hi >> 24) & 0xff;
 	printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
 
@@ -318,7 +321,7 @@ static int eps_cpu_exit(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static struct freq_attr* eps_attr[] = {
+static struct freq_attr *eps_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,
 };
@@ -356,7 +359,7 @@ static void __exit eps_exit(void)
 	cpufreq_unregister_driver(&eps_driver);
 }
 
-MODULE_AUTHOR("Rafa� Bilski <rafalbilski@interia.pl>");
+MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>");
 MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
 MODULE_LICENSE("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index fe613c93b36..006b278b0d5 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -184,7 +184,8 @@ static int elanfreq_target(struct cpufreq_policy *policy,
 {
 	unsigned int newstate = 0;
 
-	if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate))
+	if (cpufreq_frequency_table_target(policy, &elanfreq_table[0],
+				target_freq, relation, &newstate))
 		return -EINVAL;
 
 	elanfreq_set_cpu_state(newstate);
@@ -301,7 +302,8 @@ static void __exit elanfreq_exit(void)
 module_param(max_freq, int, 0444);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>");
+MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, "
+		"Sven Geggus <sven@geggus.net>");
 MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
 
 module_init(elanfreq_init);
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index 9d9eae82e60..ac27ec2264d 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -79,8 +79,9 @@
 #include <linux/smp.h>
 #include <linux/cpufreq.h>
 #include <linux/pci.h>
+#include <linux/errno.h>
+
 #include <asm/processor-cyrix.h>
-#include <asm/errno.h>
 
 /* PCI config registers, all at F0 */
 #define PCI_PMER1	0x80	/* power management enable register 1 */
@@ -122,8 +123,8 @@ static struct gxfreq_params *gx_params;
 static int stock_freq;
 
 /* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
-static int pci_busclk = 0;
-module_param (pci_busclk, int, 0444);
+static int pci_busclk;
+module_param(pci_busclk, int, 0444);
 
 /* maximum duration for which the cpu may be suspended
  * (32us * MAX_DURATION). If no parameter is given, this defaults
@@ -132,7 +133,7 @@ module_param (pci_busclk, int, 0444);
  * is suspended -- processing power is just 0.39% of what it used to be,
  * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
 static int max_duration = 255;
-module_param (max_duration, int, 0444);
+module_param(max_duration, int, 0444);
 
 /* For the default policy, we want at least some processing power
  * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
@@ -140,7 +141,8 @@ module_param (max_duration, int, 0444);
 #define POLICY_MIN_DIV 20
 
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"gx-suspmod", msg)
 
 /**
  * we can detect a core multipiler from dir0_lsb
@@ -166,12 +168,20 @@ static int gx_freq_mult[16] = {
  *	Low Level chipset interface				*
  ****************************************************************/
 static struct pci_device_id gx_chipset_tbl[] __initdata = {
-	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY,
+		PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520,
+		PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510,
+		PCI_ANY_ID, PCI_ANY_ID },
 	{ 0, },
 };
 
+static void gx_write_byte(int reg, int value)
+{
+	pci_write_config_byte(gx_params->cs55x0, reg, value);
+}
+
 /**
  * gx_detect_chipset:
  *
@@ -200,7 +210,8 @@ static __init struct pci_dev *gx_detect_chipset(void)
 /**
  * gx_get_cpuspeed:
  *
- * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs.
+ * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi
+ * Geode CPU runs.
  */
 static unsigned int gx_get_cpuspeed(unsigned int cpu)
 {
@@ -217,17 +228,18 @@ static unsigned int gx_get_cpuspeed(unsigned int cpu)
  *
  **/
 
-static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration)
+static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration,
+		u8 *off_duration)
 {
 	unsigned int i;
 	u8 tmp_on, tmp_off;
 	int old_tmp_freq = stock_freq;
 	int tmp_freq;
 
-	*off_duration=1;
-	*on_duration=0;
+	*off_duration = 1;
+	*on_duration = 0;
 
-	for (i=max_duration; i>0; i--) {
+	for (i = max_duration; i > 0; i--) {
 		tmp_off = ((khz * i) / stock_freq) & 0xff;
 		tmp_on = i - tmp_off;
 		tmp_freq = (stock_freq * tmp_off) / i;
@@ -259,26 +271,34 @@ static void gx_set_cpuspeed(unsigned int khz)
 	freqs.cpu = 0;
 	freqs.old = gx_get_cpuspeed(0);
 
-	new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration);
+	new_khz = gx_validate_speed(khz, &gx_params->on_duration,
+			&gx_params->off_duration);
 
 	freqs.new = new_khz;
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 	local_irq_save(flags);
 
-	if (new_khz != stock_freq) {  /* if new khz == 100% of CPU speed, it is special case */
+
+
+	if (new_khz != stock_freq) {
+		/* if new khz == 100% of CPU speed, it is special case */
 		switch (gx_params->cs55x0->device) {
 		case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
 			pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
 			/* FIXME: need to test other values -- Zwane,Miura */
-			pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */
-			pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */
-			pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1);
-
-			if (gx_params->cs55x0->revision < 0x10) {   /* CS5530(rev 1.2, 1.3) */
-				suscfg = gx_params->pci_suscfg | SUSMOD;
-			} else {                           /* CS5530A,B.. */
-				suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE;
+			/* typical 2 to 4ms */
+			gx_write_byte(PCI_IRQTC, 4);
+			/* typical 50 to 100ms */
+			gx_write_byte(PCI_VIDTC, 100);
+			gx_write_byte(PCI_PMER1, pmer1);
+
+			if (gx_params->cs55x0->revision < 0x10) {
+				/* CS5530(rev 1.2, 1.3) */
+				suscfg = gx_params->pci_suscfg|SUSMOD;
+			} else {
+				/* CS5530A,B.. */
+				suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE;
 			}
 			break;
 		case PCI_DEVICE_ID_CYRIX_5520:
@@ -294,13 +314,13 @@ static void gx_set_cpuspeed(unsigned int khz)
 		suscfg = gx_params->pci_suscfg & ~(SUSMOD);
 		gx_params->off_duration = 0;
 		gx_params->on_duration = 0;
-		dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n");
+		dprintk("suspend modulation disabled: cpu runs 100%% speed.\n");
 	}
 
-	pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration);
-	pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration);
+	gx_write_byte(PCI_MODOFF, gx_params->off_duration);
+	gx_write_byte(PCI_MODON, gx_params->on_duration);
 
-	pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg);
+	gx_write_byte(PCI_SUSCFG, suscfg);
 	pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
 
 	local_irq_restore(flags);
@@ -334,7 +354,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy)
 		return -EINVAL;
 
 	policy->cpu = 0;
-	cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
+	cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
+			stock_freq);
 
 	/* it needs to be assured that at least one supported frequency is
 	 * within policy->min and policy->max. If it is not, policy->max
@@ -354,7 +375,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy)
 	policy->max = tmp_freq;
 	if (policy->max < policy->min)
 		policy->max = policy->min;
-	cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
+	cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
+			stock_freq);
 
 	return 0;
 }
@@ -398,18 +420,18 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
 		return -ENODEV;
 
 	/* determine maximum frequency */
-	if (pci_busclk) {
+	if (pci_busclk)
 		maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
-	} else if (cpu_khz) {
+	else if (cpu_khz)
 		maxfreq = cpu_khz;
-	} else {
+	else
 		maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
-	}
+
 	stock_freq = maxfreq;
 	curfreq = gx_get_cpuspeed(0);
 
 	dprintk("cpu max frequency is %d.\n", maxfreq);
-	dprintk("cpu current frequency is %dkHz.\n",curfreq);
+	dprintk("cpu current frequency is %dkHz.\n", curfreq);
 
 	/* setup basic struct for cpufreq API */
 	policy->cpu = 0;
@@ -447,7 +469,8 @@ static int __init cpufreq_gx_init(void)
 	struct pci_dev *gx_pci;
 
 	/* Test if we have the right hardware */
-	if ((gx_pci = gx_detect_chipset()) == NULL)
+	gx_pci = gx_detect_chipset();
+	if (gx_pci == NULL)
 		return -ENODEV;
 
 	/* check whether module parameters are sane */
@@ -468,9 +491,11 @@ static int __init cpufreq_gx_init(void)
 	pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
 	pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
 	pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
-	pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration));
+	pci_read_config_byte(params->cs55x0, PCI_MODOFF,
+			&(params->off_duration));
 
-	if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) {
+	ret = cpufreq_register_driver(&gx_suspmod_driver);
+	if (ret) {
 		kfree(params);
 		return ret;                   /* register error! */
 	}
@@ -485,9 +510,9 @@ static void __exit cpufreq_gx_exit(void)
 	kfree(gx_params);
 }
 
-MODULE_AUTHOR ("Hiroshi Miura <miura@da-cha.org>");
-MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
+MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
+MODULE_LICENSE("GPL");
 
 module_init(cpufreq_gx_init);
 module_exit(cpufreq_gx_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index a4cff5d6e38..f1c51aea064 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -30,12 +30,12 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+#include <linux/kernel.h>
 
 #include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
-#include <asm/acpi.h>
-#include <linux/acpi.h>
 #include <acpi/processor.h>
 
 #include "longhaul.h"
@@ -58,7 +58,7 @@
 #define USE_NORTHBRIDGE		(1 << 2)
 
 static int cpu_model;
-static unsigned int numscales=16;
+static unsigned int numscales = 16;
 static unsigned int fsb;
 
 static const struct mV_pos *vrm_mV_table;
@@ -67,8 +67,8 @@ static const unsigned char *mV_vrm_table;
 static unsigned int highest_speed, lowest_speed; /* kHz */
 static unsigned int minmult, maxmult;
 static int can_scale_voltage;
-static struct acpi_processor *pr = NULL;
-static struct acpi_processor_cx *cx = NULL;
+static struct acpi_processor *pr;
+static struct acpi_processor_cx *cx;
 static u32 acpi_regs_addr;
 static u8 longhaul_flags;
 static unsigned int longhaul_index;
@@ -78,12 +78,13 @@ static int scale_voltage;
 static int disable_acpi_c3;
 static int revid_errata;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"longhaul", msg)
 
 
 /* Clock ratios multiplied by 10 */
-static int clock_ratio[32];
-static int eblcr_table[32];
+static int mults[32];
+static int eblcr[32];
 static int longhaul_version;
 static struct cpufreq_frequency_table *longhaul_table;
 
@@ -93,7 +94,7 @@ static char speedbuffer[8];
 static char *print_speed(int speed)
 {
 	if (speed < 1000) {
-		snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed);
+		snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed);
 		return speedbuffer;
 	}
 
@@ -122,27 +123,28 @@ static unsigned int calc_speed(int mult)
 
 static int longhaul_get_cpu_mult(void)
 {
-	unsigned long invalue=0,lo, hi;
+	unsigned long invalue = 0, lo, hi;
 
-	rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi);
-	invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22;
-	if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) {
+	rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi);
+	invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22;
+	if (longhaul_version == TYPE_LONGHAUL_V2 ||
+	    longhaul_version == TYPE_POWERSAVER) {
 		if (lo & (1<<27))
-			invalue+=16;
+			invalue += 16;
 	}
-	return eblcr_table[invalue];
+	return eblcr[invalue];
 }
 
 /* For processor with BCR2 MSR */
 
-static void do_longhaul1(unsigned int clock_ratio_index)
+static void do_longhaul1(unsigned int mults_index)
 {
 	union msr_bcr2 bcr2;
 
 	rdmsrl(MSR_VIA_BCR2, bcr2.val);
 	/* Enable software clock multiplier */
 	bcr2.bits.ESOFTBF = 1;
-	bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff;
+	bcr2.bits.CLOCKMUL = mults_index & 0xff;
 
 	/* Sync to timer tick */
 	safe_halt();
@@ -161,7 +163,7 @@ static void do_longhaul1(unsigned int clock_ratio_index)
 
 /* For processor with Longhaul MSR */
 
-static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
+static void do_powersaver(int cx_address, unsigned int mults_index,
 			  unsigned int dir)
 {
 	union msr_longhaul longhaul;
@@ -173,11 +175,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
 		longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
 	else
 		longhaul.bits.RevisionKey = 0;
-	longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf;
-	longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
+	longhaul.bits.SoftBusRatio = mults_index & 0xf;
+	longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4;
 	/* Setup new voltage */
 	if (can_scale_voltage)
-		longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f;
+		longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f;
 	/* Sync to timer tick */
 	safe_halt();
 	/* Raise voltage if necessary */
@@ -240,14 +242,14 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
 
 /**
  * longhaul_set_cpu_frequency()
- * @clock_ratio_index : bitpattern of the new multiplier.
+ * @mults_index : bitpattern of the new multiplier.
  *
  * Sets a new clock ratio.
  */
 
 static void longhaul_setstate(unsigned int table_index)
 {
-	unsigned int clock_ratio_index;
+	unsigned int mults_index;
 	int speed, mult;
 	struct cpufreq_freqs freqs;
 	unsigned long flags;
@@ -256,9 +258,9 @@ static void longhaul_setstate(unsigned int table_index)
 	u32 bm_timeout = 1000;
 	unsigned int dir = 0;
 
-	clock_ratio_index = longhaul_table[table_index].index;
+	mults_index = longhaul_table[table_index].index;
 	/* Safety precautions */
-	mult = clock_ratio[clock_ratio_index & 0x1f];
+	mult = mults[mults_index & 0x1f];
 	if (mult == -1)
 		return;
 	speed = calc_speed(mult);
@@ -274,7 +276,7 @@ static void longhaul_setstate(unsigned int table_index)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
+	dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
 			fsb, mult/10, mult%10, print_speed(speed/1000));
 retry_loop:
 	preempt_disable();
@@ -282,8 +284,8 @@ retry_loop:
 
 	pic2_mask = inb(0xA1);
 	pic1_mask = inb(0x21);	/* works on C3. save mask. */
-	outb(0xFF,0xA1);	/* Overkill */
-	outb(0xFE,0x21);	/* TMR0 only */
+	outb(0xFF, 0xA1);	/* Overkill */
+	outb(0xFE, 0x21);	/* TMR0 only */
 
 	/* Wait while PCI bus is busy. */
 	if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
@@ -312,7 +314,7 @@ retry_loop:
 	 * Software controlled multipliers only.
 	 */
 	case TYPE_LONGHAUL_V1:
-		do_longhaul1(clock_ratio_index);
+		do_longhaul1(mults_index);
 		break;
 
 	/*
@@ -327,9 +329,9 @@ retry_loop:
 		if (longhaul_flags & USE_ACPI_C3) {
 			/* Don't allow wakeup */
 			acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
-			do_powersaver(cx->address, clock_ratio_index, dir);
+			do_powersaver(cx->address, mults_index, dir);
 		} else {
-			do_powersaver(0, clock_ratio_index, dir);
+			do_powersaver(0, mults_index, dir);
 		}
 		break;
 	}
@@ -341,8 +343,8 @@ retry_loop:
 		/* Enable bus master arbitration */
 		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
 	}
-	outb(pic2_mask,0xA1);	/* restore mask */
-	outb(pic1_mask,0x21);
+	outb(pic2_mask, 0xA1);	/* restore mask */
+	outb(pic1_mask, 0x21);
 
 	local_irq_restore(flags);
 	preempt_enable();
@@ -392,7 +394,8 @@ retry_loop:
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 
 	if (!bm_timeout)
-		printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n");
+		printk(KERN_INFO PFX "Warning: Timeout while waiting for "
+				"idle PCI bus.\n");
 }
 
 /*
@@ -458,31 +461,32 @@ static int __init longhaul_get_ranges(void)
 		break;
 	}
 
-	dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n",
+	dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n",
 		 minmult/10, minmult%10, maxmult/10, maxmult%10);
 
 	highest_speed = calc_speed(maxmult);
 	lowest_speed = calc_speed(minmult);
-	dprintk ("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
+	dprintk("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
 		 print_speed(lowest_speed/1000),
 		 print_speed(highest_speed/1000));
 
 	if (lowest_speed == highest_speed) {
-		printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n");
+		printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
 		return -EINVAL;
 	}
 	if (lowest_speed > highest_speed) {
-		printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
+		printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
 			lowest_speed, highest_speed);
 		return -EINVAL;
 	}
 
-	longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL);
-	if(!longhaul_table)
+	longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table),
+			GFP_KERNEL);
+	if (!longhaul_table)
 		return -ENOMEM;
 
 	for (j = 0; j < numscales; j++) {
-		ratio = clock_ratio[j];
+		ratio = mults[j];
 		if (ratio == -1)
 			continue;
 		if (ratio > maxmult || ratio < minmult)
@@ -507,13 +511,10 @@ static int __init longhaul_get_ranges(void)
 			}
 		}
 		if (min_i != j) {
-			unsigned int temp;
-			temp = longhaul_table[j].frequency;
-			longhaul_table[j].frequency = longhaul_table[min_i].frequency;
-			longhaul_table[min_i].frequency = temp;
-			temp = longhaul_table[j].index;
-			longhaul_table[j].index = longhaul_table[min_i].index;
-			longhaul_table[min_i].index = temp;
+			swap(longhaul_table[j].frequency,
+			     longhaul_table[min_i].frequency);
+			swap(longhaul_table[j].index,
+			     longhaul_table[min_i].index);
 		}
 	}
 
@@ -521,7 +522,7 @@ static int __init longhaul_get_ranges(void)
 
 	/* Find index we are running on */
 	for (j = 0; j < k; j++) {
-		if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) {
+		if (mults[longhaul_table[j].index & 0x1f] == mult) {
 			longhaul_index = j;
 			break;
 		}
@@ -559,20 +560,22 @@ static void __init longhaul_setup_voltagescaling(void)
 	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
 
 	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
-		printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
+		printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
 					"Voltage scaling disabled.\n",
-					minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000);
+					minvid.mV/1000, minvid.mV%1000,
+					maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
 	if (minvid.mV == maxvid.mV) {
-		printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
-				"both %d.%03d. Voltage scaling disabled\n",
+		printk(KERN_INFO PFX "Claims to support voltage scaling but "
+				"min & max are both %d.%03d. "
+				"Voltage scaling disabled\n",
 				maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
-	/* How many voltage steps */
+	/* How many voltage steps*/
 	numvscales = maxvid.pos - minvid.pos + 1;
 	printk(KERN_INFO PFX
 		"Max VID=%d.%03d  "
@@ -586,7 +589,7 @@ static void __init longhaul_setup_voltagescaling(void)
 	j = longhaul.bits.MinMHzBR;
 	if (longhaul.bits.MinMHzBR4)
 		j += 16;
-	min_vid_speed = eblcr_table[j];
+	min_vid_speed = eblcr[j];
 	if (min_vid_speed == -1)
 		return;
 	switch (longhaul.bits.MinMHzFSB) {
@@ -617,7 +620,8 @@ static void __init longhaul_setup_voltagescaling(void)
 			pos = minvid.pos;
 		longhaul_table[j].index |= mV_vrm_table[pos] << 8;
 		vid = vrm_mV_table[mV_vrm_table[pos]];
-		printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV);
+		printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
+				speed, j, vid.mV);
 		j++;
 	}
 
@@ -640,7 +644,8 @@ static int longhaul_target(struct cpufreq_policy *policy,
 	unsigned int dir = 0;
 	u8 vid, current_vid;
 
-	if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index))
+	if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq,
+				relation, &table_index))
 		return -EINVAL;
 
 	/* Don't set same frequency again */
@@ -656,7 +661,8 @@ static int longhaul_target(struct cpufreq_policy *policy,
 		 * this in hardware, C3 is old and we need to do this
 		 * in software. */
 		i = longhaul_index;
-		current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f;
+		current_vid = (longhaul_table[longhaul_index].index >> 8);
+		current_vid &= 0x1f;
 		if (table_index > longhaul_index)
 			dir = 1;
 		while (i != table_index) {
@@ -691,9 +697,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
 {
 	struct acpi_device *d;
 
-	if ( acpi_bus_get_device(obj_handle, &d) ) {
+	if (acpi_bus_get_device(obj_handle, &d))
 		return 0;
-	}
+
 	*return_value = acpi_driver_data(d);
 	return 1;
 }
@@ -750,7 +756,7 @@ static int longhaul_setup_southbridge(void)
 	/* Find VT8235 southbridge */
 	dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
 	if (dev == NULL)
-	/* Find VT8237 southbridge */
+		/* Find VT8237 southbridge */
 		dev = pci_get_device(PCI_VENDOR_ID_VIA,
 				     PCI_DEVICE_ID_VIA_8237, NULL);
 	if (dev != NULL) {
@@ -769,7 +775,8 @@ static int longhaul_setup_southbridge(void)
 		if (pci_cmd & 1 << 7) {
 			pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
 			acpi_regs_addr &= 0xff00;
-			printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr);
+			printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
+					acpi_regs_addr);
 		}
 
 		pci_dev_put(dev);
@@ -781,7 +788,7 @@ static int longhaul_setup_southbridge(void)
 static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 {
 	struct cpuinfo_x86 *c = &cpu_data(0);
-	char *cpuname=NULL;
+	char *cpuname = NULL;
 	int ret;
 	u32 lo, hi;
 
@@ -791,8 +798,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 		cpu_model = CPU_SAMUEL;
 		cpuname = "C3 'Samuel' [C5A]";
 		longhaul_version = TYPE_LONGHAUL_V1;
-		memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio));
-		memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr));
+		memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
+		memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr));
 		break;
 
 	case 7:
@@ -803,10 +810,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 			cpuname = "C3 'Samuel 2' [C5B]";
 			/* Note, this is not a typo, early Samuel2's had
 			 * Samuel1 ratios. */
-			memcpy(clock_ratio, samuel1_clock_ratio,
-				sizeof(samuel1_clock_ratio));
-			memcpy(eblcr_table, samuel2_eblcr,
-				sizeof(samuel2_eblcr));
+			memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
+			memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
 			break;
 		case 1 ... 15:
 			longhaul_version = TYPE_LONGHAUL_V1;
@@ -817,10 +822,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 				cpu_model = CPU_EZRA;
 				cpuname = "C3 'Ezra' [C5C]";
 			}
-			memcpy(clock_ratio, ezra_clock_ratio,
-				sizeof(ezra_clock_ratio));
-			memcpy(eblcr_table, ezra_eblcr,
-				sizeof(ezra_eblcr));
+			memcpy(mults, ezra_mults, sizeof(ezra_mults));
+			memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr));
 			break;
 		}
 		break;
@@ -829,18 +832,16 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 		cpu_model = CPU_EZRA_T;
 		cpuname = "C3 'Ezra-T' [C5M]";
 		longhaul_version = TYPE_POWERSAVER;
-		numscales=32;
-		memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio));
-		memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr));
+		numscales = 32;
+		memcpy(mults, ezrat_mults, sizeof(ezrat_mults));
+		memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr));
 		break;
 
 	case 9:
 		longhaul_version = TYPE_POWERSAVER;
 		numscales = 32;
-		memcpy(clock_ratio,
-		       nehemiah_clock_ratio,
-		       sizeof(nehemiah_clock_ratio));
-		memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr));
+		memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
+		memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
 		switch (c->x86_mask) {
 		case 0 ... 1:
 			cpu_model = CPU_NEHEMIAH;
@@ -869,14 +870,14 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 			longhaul_version = TYPE_LONGHAUL_V1;
 	}
 
-	printk (KERN_INFO PFX "VIA %s CPU detected.  ", cpuname);
+	printk(KERN_INFO PFX "VIA %s CPU detected.  ", cpuname);
 	switch (longhaul_version) {
 	case TYPE_LONGHAUL_V1:
 	case TYPE_LONGHAUL_V2:
-		printk ("Longhaul v%d supported.\n", longhaul_version);
+		printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
 		break;
 	case TYPE_POWERSAVER:
-		printk ("Powersaver supported.\n");
+		printk(KERN_CONT "Powersaver supported.\n");
 		break;
 	};
 
@@ -940,7 +941,7 @@ static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static struct freq_attr* longhaul_attr[] = {
+static struct freq_attr *longhaul_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,
 };
@@ -966,13 +967,15 @@ static int __init longhaul_init(void)
 
 #ifdef CONFIG_SMP
 	if (num_online_cpus() > 1) {
-		printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n");
+		printk(KERN_ERR PFX "More than 1 CPU detected, "
+				"longhaul disabled.\n");
 		return -ENODEV;
 	}
 #endif
 #ifdef CONFIG_X86_IO_APIC
 	if (cpu_has_apic) {
-		printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n");
+		printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
+				"broken in this configuration.\n");
 		return -ENODEV;
 	}
 #endif
@@ -993,8 +996,8 @@ static void __exit longhaul_exit(void)
 {
 	int i;
 
-	for (i=0; i < numscales; i++) {
-		if (clock_ratio[i] == maxmult) {
+	for (i = 0; i < numscales; i++) {
+		if (mults[i] == maxmult) {
 			longhaul_setstate(i);
 			break;
 		}
@@ -1007,11 +1010,11 @@ static void __exit longhaul_exit(void)
 /* Even if BIOS is exporting ACPI C3 state, and it is used
  * with success when CPU is idle, this state doesn't
  * trigger frequency transition in some cases. */
-module_param (disable_acpi_c3, int, 0644);
+module_param(disable_acpi_c3, int, 0644);
 MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
 /* Change CPU voltage with frequency. Very usefull to save
  * power, but most VIA C3 processors aren't supporting it. */
-module_param (scale_voltage, int, 0644);
+module_param(scale_voltage, int, 0644);
 MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
 /* Force revision key to 0 for processors which doesn't
  * support voltage scaling, but are introducing itself as
@@ -1019,9 +1022,9 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
 module_param(revid_errata, int, 0644);
 MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
 
-MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
-MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
+MODULE_LICENSE("GPL");
 
 late_initcall(longhaul_init);
 module_exit(longhaul_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h
index 4fcc320997d..e2360a469f7 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h
@@ -49,14 +49,14 @@ union msr_longhaul {
 
 /*
  * Clock ratio tables. Div/Mod by 10 to get ratio.
- * The eblcr ones specify the ratio read from the CPU.
- * The clock_ratio ones specify what to write to the CPU.
+ * The eblcr values specify the ratio read from the CPU.
+ * The mults values specify what to write to the CPU.
  */
 
 /*
  * VIA C3 Samuel 1  & Samuel 2 (stepping 0)
  */
-static const int __initdata samuel1_clock_ratio[16] = {
+static const int __initdata samuel1_mults[16] = {
 	-1, /* 0000 -> RESERVED */
 	30, /* 0001 ->  3.0x */
 	40, /* 0010 ->  4.0x */
@@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = {
 /*
  * VIA C3 Ezra
  */
-static const int __initdata ezra_clock_ratio[16] = {
+static const int __initdata ezra_mults[16] = {
 	100, /* 0000 -> 10.0x */
 	30,  /* 0001 ->  3.0x */
 	40,  /* 0010 ->  4.0x */
@@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = {
 /*
  * VIA C3 (Ezra-T) [C5M].
  */
-static const int __initdata ezrat_clock_ratio[32] = {
+static const int __initdata ezrat_mults[32] = {
 	100, /* 0000 -> 10.0x */
 	30,  /* 0001 ->  3.0x */
 	40,  /* 0010 ->  4.0x */
@@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = {
 /*
  * VIA C3 Nehemiah */
 
-static const int __initdata  nehemiah_clock_ratio[32] = {
+static const int __initdata  nehemiah_mults[32] = {
 	100, /* 0000 -> 10.0x */
 	-1, /* 0001 -> 16.0x */
 	40,  /* 0010 ->  4.0x */
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index 777a7ff075d..da5f70fcb76 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -11,12 +11,13 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/cpufreq.h>
+#include <linux/timex.h>
 
 #include <asm/msr.h>
 #include <asm/processor.h>
-#include <asm/timex.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"longrun", msg)
 
 static struct cpufreq_driver	longrun_driver;
 
@@ -51,7 +52,7 @@ static void __init longrun_get_policy(struct cpufreq_policy *policy)
 	msr_lo &= 0x0000007F;
 	msr_hi &= 0x0000007F;
 
-	if ( longrun_high_freq <= longrun_low_freq ) {
+	if (longrun_high_freq <= longrun_low_freq) {
 		/* Assume degenerate Longrun table */
 		policy->min = policy->max = longrun_high_freq;
 	} else {
@@ -79,7 +80,7 @@ static int longrun_set_policy(struct cpufreq_policy *policy)
 	if (!policy)
 		return -EINVAL;
 
-	if ( longrun_high_freq <= longrun_low_freq ) {
+	if (longrun_high_freq <= longrun_low_freq) {
 		/* Assume degenerate Longrun table */
 		pctg_lo = pctg_hi = 100;
 	} else {
@@ -152,7 +153,7 @@ static unsigned int longrun_get(unsigned int cpu)
 	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
 	dprintk("cpuid eax is %u\n", eax);
 
-	return (eax * 1000);
+	return eax * 1000;
 }
 
 /**
@@ -196,7 +197,8 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
 		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
 		*high_freq = msr_lo * 1000; /* to kHz */
 
-		dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq);
+		dprintk("longrun table interface told %u - %u kHz\n",
+				*low_freq, *high_freq);
 
 		if (*low_freq > *high_freq)
 			*low_freq = *high_freq;
@@ -219,7 +221,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
 	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
 	/* try decreasing in 10% steps, some processors react only
 	 * on some barrier values */
-	for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) {
+	for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) {
 		/* set to 0 to try_hi perf_pctg */
 		msr_lo &= 0xFFFFFF80;
 		msr_hi &= 0xFFFFFF80;
@@ -236,7 +238,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
 
 	/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
 	 * eqals
-	 * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
+	 * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
 	 *
 	 * high_freq * perf_pctg is stored tempoarily into "ebx".
 	 */
@@ -317,9 +319,10 @@ static void __exit longrun_exit(void)
 }
 
 
-MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and "
+		"Efficeon processors.");
+MODULE_LICENSE("GPL");
 
 module_init(longrun_init);
 module_exit(longrun_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 3178c3acd97..41ed94915f9 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -27,15 +27,17 @@
 #include <linux/cpufreq.h>
 #include <linux/slab.h>
 #include <linux/cpumask.h>
+#include <linux/timex.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
-#include <asm/timex.h>
+#include <asm/timer.h>
 
 #include "speedstep-lib.h"
 
 #define PFX	"p4-clockmod: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"p4-clockmod", msg)
 
 /*
  * Duty Cycle (3bits), note DC_DISABLE is not specified in
@@ -58,7 +60,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
 {
 	u32 l, h;
 
-	if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV))
+	if (!cpu_online(cpu) ||
+	    (newstate > DC_DISABLE) || (newstate == DC_RESV))
 		return -EINVAL;
 
 	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
@@ -66,7 +69,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
 	if (l & 0x01)
 		dprintk("CPU#%d currently thermal throttled\n", cpu);
 
-	if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT))
+	if (has_N44_O17_errata[cpu] &&
+	    (newstate == DC_25PT || newstate == DC_DFLT))
 		newstate = DC_38PT;
 
 	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
@@ -112,7 +116,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
 	struct cpufreq_freqs freqs;
 	int i;
 
-	if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate))
+	if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
+				target_freq, relation, &newstate))
 		return -EINVAL;
 
 	freqs.old = cpufreq_p4_get(policy->cpu);
@@ -127,7 +132,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
 		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 	}
 
-	/* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
+	/* run on each logical CPU,
+	 * see section 13.15.3 of IA32 Intel Architecture Software
 	 * Developer's Manual, Volume 3
 	 */
 	for_each_cpu(i, policy->cpus)
@@ -153,28 +159,30 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 {
 	if (c->x86 == 0x06) {
 		if (cpu_has(c, X86_FEATURE_EST))
-			printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. "
-			       "The acpi-cpufreq module offers voltage scaling"
-			       " in addition of frequency scaling. You should use "
-			       "that instead of p4-clockmod, if possible.\n");
+			printk(KERN_WARNING PFX "Warning: EST-capable CPU "
+			       "detected. The acpi-cpufreq module offers "
+			       "voltage scaling in addition of frequency "
+			       "scaling. You should use that instead of "
+			       "p4-clockmod, if possible.\n");
 		switch (c->x86_model) {
 		case 0x0E: /* Core */
 		case 0x0F: /* Core Duo */
 		case 0x16: /* Celeron Core */
 			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
-			return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE);
+			return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
 		case 0x0D: /* Pentium M (Dothan) */
 			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
 			/* fall through */
 		case 0x09: /* Pentium M (Banias) */
-			return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
+			return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
 		}
 	}
 
 	if (c->x86 != 0xF) {
 		if (!cpu_has(c, X86_FEATURE_EST))
-			printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. "
-				"Please send an e-mail to <cpufreq@vger.kernel.org>\n");
+			printk(KERN_WARNING PFX "Unknown CPU. "
+				"Please send an e-mail to "
+				"<cpufreq@vger.kernel.org>\n");
 		return 0;
 	}
 
@@ -182,16 +190,16 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 	 * throttling is active or not. */
 	p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
 
-	if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) {
+	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
 		printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
 		       "The speedstep-ich or acpi cpufreq modules offer "
 		       "voltage scaling in addition of frequency scaling. "
 		       "You should use either one instead of p4-clockmod, "
 		       "if possible.\n");
-		return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M);
+		return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
 	}
 
-	return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D);
+	return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
 }
 
 
@@ -217,14 +225,20 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 		dprintk("has errata -- disabling low frequencies\n");
 	}
 
+	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
+	    c->x86_model < 2) {
+		/* switch to maximum frequency and measure result */
+		cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
+		recalibrate_cpu_khz();
+	}
 	/* get max frequency */
 	stock_freq = cpufreq_p4_get_frequency(c);
 	if (!stock_freq)
 		return -EINVAL;
 
 	/* table init */
-	for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
-		if ((i<2) && (has_N44_O17_errata[policy->cpu]))
+	for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+		if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
 			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
 		else
 			p4clockmod_table[i].frequency = (stock_freq * i)/8;
@@ -232,7 +246,10 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 	cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
 
 	/* cpuinfo and default policy values */
-	policy->cpuinfo.transition_latency = 1000000; /* assumed */
+
+	/* the transition latency is set to be 1 higher than the maximum
+	 * transition latency of the ondemand governor */
+	policy->cpuinfo.transition_latency = 10000001;
 	policy->cur = stock_freq;
 
 	return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
@@ -258,12 +275,12 @@ static unsigned int cpufreq_p4_get(unsigned int cpu)
 		l = DC_DISABLE;
 
 	if (l != DC_DISABLE)
-		return (stock_freq * l / 8);
+		return stock_freq * l / 8;
 
 	return stock_freq;
 }
 
-static struct freq_attr* p4clockmod_attr[] = {
+static struct freq_attr *p4clockmod_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,
 };
@@ -298,9 +315,10 @@ static int __init cpufreq_p4_init(void)
 
 	ret = cpufreq_register_driver(&p4clockmod_driver);
 	if (!ret)
-		printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
+		printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
+				"Modulation available\n");
 
-	return (ret);
+	return ret;
 }
 
 
@@ -310,9 +328,9 @@ static void __exit cpufreq_p4_exit(void)
 }
 
 
-MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>");
-MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
+MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
+MODULE_LICENSE("GPL");
 
 late_initcall(cpufreq_p4_init);
 module_exit(cpufreq_p4_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index c1ac5790c63..f10dea409f4 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -1,6 +1,7 @@
 /*
  *  This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
- *  (C) 2000-2003  Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski.
+ *  (C) 2000-2003  Dave Jones, Arjan van de Ven, Janne Pänkälä,
+ *                 Dominik Brodowski.
  *
  *  Licensed under the terms of the GNU GPL License version 2.
  *
@@ -13,14 +14,15 @@
 #include <linux/cpufreq.h>
 #include <linux/ioport.h>
 #include <linux/slab.h>
-
-#include <asm/msr.h>
 #include <linux/timex.h>
 #include <linux/io.h>
 
+#include <asm/msr.h>
+
 #define POWERNOW_IOPORT 0xfff0          /* it doesn't matter where, as long
 					   as it is unused */
 
+#define PFX "powernow-k6: "
 static unsigned int                     busfreq;   /* FSB, in 10 kHz */
 static unsigned int                     max_multiplier;
 
@@ -47,8 +49,8 @@ static struct cpufreq_frequency_table clock_ratio[] = {
  */
 static int powernow_k6_get_cpu_multiplier(void)
 {
-	u64             invalue = 0;
-	u32             msrval;
+	u64 invalue = 0;
+	u32 msrval;
 
 	msrval = POWERNOW_IOPORT + 0x1;
 	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
@@ -68,12 +70,12 @@ static int powernow_k6_get_cpu_multiplier(void)
  */
 static void powernow_k6_set_state(unsigned int best_i)
 {
-	unsigned long           outvalue = 0, invalue = 0;
-	unsigned long           msrval;
-	struct cpufreq_freqs    freqs;
+	unsigned long outvalue = 0, invalue = 0;
+	unsigned long msrval;
+	struct cpufreq_freqs freqs;
 
 	if (clock_ratio[best_i].index > max_multiplier) {
-		printk(KERN_ERR "cpufreq: invalid target frequency\n");
+		printk(KERN_ERR PFX "invalid target frequency\n");
 		return;
 	}
 
@@ -119,7 +121,8 @@ static int powernow_k6_verify(struct cpufreq_policy *policy)
  * powernow_k6_setpolicy - sets a new CPUFreq policy
  * @policy: new policy
  * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ * @relation: how that frequency relates to achieved frequency
+ *  (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
  *
  * sets a new CPUFreq policy
  */
@@ -127,9 +130,10 @@ static int powernow_k6_target(struct cpufreq_policy *policy,
 			       unsigned int target_freq,
 			       unsigned int relation)
 {
-	unsigned int    newstate = 0;
+	unsigned int newstate = 0;
 
-	if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate))
+	if (cpufreq_frequency_table_target(policy, &clock_ratio[0],
+				target_freq, relation, &newstate))
 		return -EINVAL;
 
 	powernow_k6_set_state(newstate);
@@ -140,7 +144,7 @@ static int powernow_k6_target(struct cpufreq_policy *policy,
 
 static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
 {
-	unsigned int i;
+	unsigned int i, f;
 	int result;
 
 	if (policy->cpu != 0)
@@ -152,10 +156,11 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
 
 	/* table init */
 	for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
-		if (clock_ratio[i].index > max_multiplier)
+		f = clock_ratio[i].index;
+		if (f > max_multiplier)
 			clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
 		else
-			clock_ratio[i].frequency = busfreq * clock_ratio[i].index;
+			clock_ratio[i].frequency = busfreq * f;
 	}
 
 	/* cpuinfo and default policy values */
@@ -185,7 +190,9 @@ static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
 
 static unsigned int powernow_k6_get(unsigned int cpu)
 {
-	return busfreq * powernow_k6_get_cpu_multiplier();
+	unsigned int ret;
+	ret = (busfreq * powernow_k6_get_cpu_multiplier());
+	return ret;
 }
 
 static struct freq_attr *powernow_k6_attr[] = {
@@ -221,7 +228,7 @@ static int __init powernow_k6_init(void)
 		return -ENODEV;
 
 	if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
-		printk("cpufreq: PowerNow IOPORT region already used.\n");
+		printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
 		return -EIO;
 	}
 
@@ -246,7 +253,8 @@ static void __exit powernow_k6_exit(void)
 }
 
 
-MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
+		"Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
 MODULE_LICENSE("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 1b446d79a8f..3c28ccd4974 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -6,10 +6,12 @@
  *  Licensed under the terms of the GNU GPL License version 2.
  *  Based upon datasheets & sample CPUs kindly provided by AMD.
  *
- * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt.
- * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
- * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect.
- * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
+ * Errata 5:
+ *  CPU may fail to execute a FID/VID change in presence of interrupt.
+ *  - We cli/sti on stepping A0 CPUs around the FID/VID transition.
+ * Errata 15:
+ *  CPU with half frequency multipliers may hang upon wakeup from disconnect.
+ *  - We disable half multipliers if ACPI is used on A0 stepping CPUs.
  */
 
 #include <linux/kernel.h>
@@ -20,11 +22,11 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/dmi.h>
+#include <linux/timex.h>
+#include <linux/io.h>
 
+#include <asm/timer.h>		/* Needed for recalibrate_cpu_khz() */
 #include <asm/msr.h>
-#include <asm/timer.h>
-#include <asm/timex.h>
-#include <asm/io.h>
 #include <asm/system.h>
 
 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
@@ -58,9 +60,9 @@ struct pst_s {
 union powernow_acpi_control_t {
 	struct {
 		unsigned long fid:5,
-		vid:5,
-		sgtc:20,
-		res1:2;
+			vid:5,
+			sgtc:20,
+			res1:2;
 	} bits;
 	unsigned long val;
 };
@@ -94,14 +96,15 @@ static struct cpufreq_frequency_table *powernow_table;
 
 static unsigned int can_scale_bus;
 static unsigned int can_scale_vid;
-static unsigned int minimum_speed=-1;
+static unsigned int minimum_speed = -1;
 static unsigned int maximum_speed;
 static unsigned int number_scales;
 static unsigned int fsb;
 static unsigned int latency;
 static char have_a0;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"powernow-k7", msg)
 
 static int check_fsb(unsigned int fsbspeed)
 {
@@ -109,7 +112,7 @@ static int check_fsb(unsigned int fsbspeed)
 	unsigned int f = fsb / 1000;
 
 	delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
-	return (delta < 5);
+	return delta < 5;
 }
 
 static int check_powernow(void)
@@ -117,24 +120,26 @@ static int check_powernow(void)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	unsigned int maxei, eax, ebx, ecx, edx;
 
-	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) {
+	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) {
 #ifdef MODULE
-		printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n");
+		printk(KERN_INFO PFX "This module only works with "
+				"AMD K7 CPUs\n");
 #endif
 		return 0;
 	}
 
 	/* Get maximum capabilities */
-	maxei = cpuid_eax (0x80000000);
+	maxei = cpuid_eax(0x80000000);
 	if (maxei < 0x80000007) {	/* Any powernow info ? */
 #ifdef MODULE
-		printk (KERN_INFO PFX "No powernow capabilities detected\n");
+		printk(KERN_INFO PFX "No powernow capabilities detected\n");
 #endif
 		return 0;
 	}
 
 	if ((c->x86_model == 6) && (c->x86_mask == 0)) {
-		printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n");
+		printk(KERN_INFO PFX "K7 660[A0] core detected, "
+				"enabling errata workarounds\n");
 		have_a0 = 1;
 	}
 
@@ -144,37 +149,42 @@ static int check_powernow(void)
 	if (!(edx & (1 << 1 | 1 << 2)))
 		return 0;
 
-	printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
+	printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
 
 	if (edx & 1 << 1) {
-		printk ("frequency");
-		can_scale_bus=1;
+		printk("frequency");
+		can_scale_bus = 1;
 	}
 
 	if ((edx & (1 << 1 | 1 << 2)) == 0x6)
-		printk (" and ");
+		printk(" and ");
 
 	if (edx & 1 << 2) {
-		printk ("voltage");
-		can_scale_vid=1;
+		printk("voltage");
+		can_scale_vid = 1;
 	}
 
-	printk (".\n");
+	printk(".\n");
 	return 1;
 }
 
+static void invalidate_entry(unsigned int entry)
+{
+	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+}
 
-static int get_ranges (unsigned char *pst)
+static int get_ranges(unsigned char *pst)
 {
 	unsigned int j;
 	unsigned int speed;
 	u8 fid, vid;
 
-	powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
+	powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
+				(number_scales + 1)), GFP_KERNEL);
 	if (!powernow_table)
 		return -ENOMEM;
 
-	for (j=0 ; j < number_scales; j++) {
+	for (j = 0 ; j < number_scales; j++) {
 		fid = *pst++;
 
 		powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
@@ -182,10 +192,10 @@ static int get_ranges (unsigned char *pst)
 
 		speed = powernow_table[j].frequency;
 
-		if ((fid_codes[fid] % 10)==5) {
+		if ((fid_codes[fid] % 10) == 5) {
 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
 			if (have_a0 == 1)
-				powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID;
+				invalidate_entry(j);
 #endif
 		}
 
@@ -197,7 +207,7 @@ static int get_ranges (unsigned char *pst)
 		vid = *pst++;
 		powernow_table[j].index |= (vid << 8); /* upper 8 bits */
 
-		dprintk ("   FID: 0x%x (%d.%dx [%dMHz])  "
+		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
 			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
 			 fid_codes[fid] % 10, speed/1000, vid,
 			 mobile_vid_table[vid]/1000,
@@ -214,13 +224,13 @@ static void change_FID(int fid)
 {
 	union msr_fidvidctl fidvidctl;
 
-	rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+	rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
 	if (fidvidctl.bits.FID != fid) {
 		fidvidctl.bits.SGTC = latency;
 		fidvidctl.bits.FID = fid;
 		fidvidctl.bits.VIDC = 0;
 		fidvidctl.bits.FIDC = 1;
-		wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+		wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
 	}
 }
 
@@ -229,18 +239,18 @@ static void change_VID(int vid)
 {
 	union msr_fidvidctl fidvidctl;
 
-	rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+	rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
 	if (fidvidctl.bits.VID != vid) {
 		fidvidctl.bits.SGTC = latency;
 		fidvidctl.bits.VID = vid;
 		fidvidctl.bits.FIDC = 0;
 		fidvidctl.bits.VIDC = 1;
-		wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+		wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
 	}
 }
 
 
-static void change_speed (unsigned int index)
+static void change_speed(unsigned int index)
 {
 	u8 fid, vid;
 	struct cpufreq_freqs freqs;
@@ -257,7 +267,7 @@ static void change_speed (unsigned int index)
 
 	freqs.cpu = 0;
 
-	rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
 	cfid = fidvidstatus.bits.CFID;
 	freqs.old = fsb * fid_codes[cfid] / 10;
 
@@ -321,12 +331,14 @@ static int powernow_acpi_init(void)
 		goto err1;
 	}
 
-	if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
+	if (acpi_processor_perf->control_register.space_id !=
+			ACPI_ADR_SPACE_FIXED_HARDWARE) {
 		retval = -ENODEV;
 		goto err2;
 	}
 
-	if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
+	if (acpi_processor_perf->status_register.space_id !=
+			ACPI_ADR_SPACE_FIXED_HARDWARE) {
 		retval = -ENODEV;
 		goto err2;
 	}
@@ -338,7 +350,8 @@ static int powernow_acpi_init(void)
 		goto err2;
 	}
 
-	powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
+	powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
+				(number_scales + 1)), GFP_KERNEL);
 	if (!powernow_table) {
 		retval = -ENOMEM;
 		goto err2;
@@ -352,7 +365,7 @@ static int powernow_acpi_init(void)
 		unsigned int speed, speed_mhz;
 
 		pc.val = (unsigned long) state->control;
-		dprintk ("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
+		dprintk("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
 			 i,
 			 (u32) state->core_frequency,
 			 (u32) state->power,
@@ -381,12 +394,12 @@ static int powernow_acpi_init(void)
 		if (speed % 1000 > 0)
 			speed_mhz++;
 
-		if ((fid_codes[fid] % 10)==5) {
+		if ((fid_codes[fid] % 10) == 5) {
 			if (have_a0 == 1)
-				powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+				invalidate_entry(i);
 		}
 
-		dprintk ("   FID: 0x%x (%d.%dx [%dMHz])  "
+		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
 			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
 			 fid_codes[fid] % 10, speed_mhz, vid,
 			 mobile_vid_table[vid]/1000,
@@ -422,7 +435,8 @@ err1:
 err05:
 	kfree(acpi_processor_perf);
 err0:
-	printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
+	printk(KERN_WARNING PFX "ACPI perflib can not be used on "
+			"this platform\n");
 	acpi_processor_perf = NULL;
 	return retval;
 }
@@ -435,7 +449,14 @@ static int powernow_acpi_init(void)
 }
 #endif
 
-static int powernow_decode_bios (int maxfid, int startvid)
+static void print_pst_entry(struct pst_s *pst, unsigned int j)
+{
+	dprintk("PST:%d (@%p)\n", j, pst);
+	dprintk(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
+		pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
+}
+
+static int powernow_decode_bios(int maxfid, int startvid)
 {
 	struct psb_s *psb;
 	struct pst_s *pst;
@@ -446,61 +467,67 @@ static int powernow_decode_bios (int maxfid, int startvid)
 
 	etuple = cpuid_eax(0x80000001);
 
-	for (i=0xC0000; i < 0xffff0 ; i+=16) {
+	for (i = 0xC0000; i < 0xffff0 ; i += 16) {
 
 		p = phys_to_virt(i);
 
-		if (memcmp(p, "AMDK7PNOW!",  10) == 0){
-			dprintk ("Found PSB header at %p\n", p);
+		if (memcmp(p, "AMDK7PNOW!",  10) == 0) {
+			dprintk("Found PSB header at %p\n", p);
 			psb = (struct psb_s *) p;
-			dprintk ("Table version: 0x%x\n", psb->tableversion);
+			dprintk("Table version: 0x%x\n", psb->tableversion);
 			if (psb->tableversion != 0x12) {
-				printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n");
+				printk(KERN_INFO PFX "Sorry, only v1.2 tables"
+						" supported right now\n");
 				return -ENODEV;
 			}
 
-			dprintk ("Flags: 0x%x\n", psb->flags);
-			if ((psb->flags & 1)==0) {
-				dprintk ("Mobile voltage regulator\n");
-			} else {
-				dprintk ("Desktop voltage regulator\n");
-			}
+			dprintk("Flags: 0x%x\n", psb->flags);
+			if ((psb->flags & 1) == 0)
+				dprintk("Mobile voltage regulator\n");
+			else
+				dprintk("Desktop voltage regulator\n");
 
 			latency = psb->settlingtime;
 			if (latency < 100) {
-				printk(KERN_INFO PFX "BIOS set settling time to %d microseconds. "
-						"Should be at least 100. Correcting.\n", latency);
+				printk(KERN_INFO PFX "BIOS set settling time "
+						"to %d microseconds. "
+						"Should be at least 100. "
+						"Correcting.\n", latency);
 				latency = 100;
 			}
-			dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime);
-			dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst);
+			dprintk("Settling Time: %d microseconds.\n",
+					psb->settlingtime);
+			dprintk("Has %d PST tables. (Only dumping ones "
+					"relevant to this CPU).\n",
+					psb->numpst);
 
-			p += sizeof (struct psb_s);
+			p += sizeof(struct psb_s);
 
 			pst = (struct pst_s *) p;
 
-			for (j=0; j<psb->numpst; j++) {
+			for (j = 0; j < psb->numpst; j++) {
 				pst = (struct pst_s *) p;
 				number_scales = pst->numpstates;
 
-				if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) &&
-				    (maxfid==pst->maxfid) && (startvid==pst->startvid))
-				{
-					dprintk ("PST:%d (@%p)\n", j, pst);
-					dprintk (" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
-						 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
-
-					ret = get_ranges ((char *) pst + sizeof (struct pst_s));
+				if ((etuple == pst->cpuid) &&
+				    check_fsb(pst->fsbspeed) &&
+				    (maxfid == pst->maxfid) &&
+				    (startvid == pst->startvid)) {
+					print_pst_entry(pst, j);
+					p = (char *)pst + sizeof(struct pst_s);
+					ret = get_ranges(p);
 					return ret;
 				} else {
 					unsigned int k;
-					p = (char *) pst + sizeof (struct pst_s);
-					for (k=0; k<number_scales; k++)
-						p+=2;
+					p = (char *)pst + sizeof(struct pst_s);
+					for (k = 0; k < number_scales; k++)
+						p += 2;
 				}
 			}
-			printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple);
-			printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n");
+			printk(KERN_INFO PFX "No PST tables match this cpuid "
+					"(0x%x)\n", etuple);
+			printk(KERN_INFO PFX "This is indicative of a broken "
+					"BIOS.\n");
 
 			return -EINVAL;
 		}
@@ -511,13 +538,14 @@ static int powernow_decode_bios (int maxfid, int startvid)
 }
 
 
-static int powernow_target (struct cpufreq_policy *policy,
+static int powernow_target(struct cpufreq_policy *policy,
 			    unsigned int target_freq,
 			    unsigned int relation)
 {
 	unsigned int newstate;
 
-	if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, relation, &newstate))
+	if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
+				relation, &newstate))
 		return -EINVAL;
 
 	change_speed(newstate);
@@ -526,7 +554,7 @@ static int powernow_target (struct cpufreq_policy *policy,
 }
 
 
-static int powernow_verify (struct cpufreq_policy *policy)
+static int powernow_verify(struct cpufreq_policy *policy)
 {
 	return cpufreq_frequency_table_verify(policy, powernow_table);
 }
@@ -566,18 +594,23 @@ static unsigned int powernow_get(unsigned int cpu)
 
 	if (cpu)
 		return 0;
-	rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
 	cfid = fidvidstatus.bits.CFID;
 
-	return (fsb * fid_codes[cfid] / 10);
+	return fsb * fid_codes[cfid] / 10;
 }
 
 
 static int __init acer_cpufreq_pst(const struct dmi_system_id *d)
 {
-	printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident);
-	printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
-	printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n");
+	printk(KERN_WARNING PFX
+		"%s laptop with broken PST tables in BIOS detected.\n",
+		d->ident);
+	printk(KERN_WARNING PFX
+		"You need to downgrade to 3A21 (09/09/2002), or try a newer "
+		"BIOS than 3A71 (01/20/2003)\n");
+	printk(KERN_WARNING PFX
+		"cpufreq scaling has been disabled as a result of this.\n");
 	return 0;
 }
 
@@ -598,7 +631,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = {
 	{ }
 };
 
-static int __init powernow_cpu_init (struct cpufreq_policy *policy)
+static int __init powernow_cpu_init(struct cpufreq_policy *policy)
 {
 	union msr_fidvidstatus fidvidstatus;
 	int result;
@@ -606,7 +639,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
 	if (policy->cpu != 0)
 		return -ENODEV;
 
-	rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
 
 	recalibrate_cpu_khz();
 
@@ -618,19 +651,21 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
 	dprintk("FSB: %3dMHz\n", fsb/1000);
 
 	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
-		printk (KERN_INFO PFX "PSB/PST known to be broken.  Trying ACPI instead\n");
+		printk(KERN_INFO PFX "PSB/PST known to be broken.  "
+				"Trying ACPI instead\n");
 		result = powernow_acpi_init();
 	} else {
-		result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID);
+		result = powernow_decode_bios(fidvidstatus.bits.MFID,
+				fidvidstatus.bits.SVID);
 		if (result) {
-			printk (KERN_INFO PFX "Trying ACPI perflib\n");
+			printk(KERN_INFO PFX "Trying ACPI perflib\n");
 			maximum_speed = 0;
 			minimum_speed = -1;
 			latency = 0;
 			result = powernow_acpi_init();
 			if (result) {
-				printk (KERN_INFO PFX "ACPI and legacy methods failed\n");
-				printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n");
+				printk(KERN_INFO PFX
+					"ACPI and legacy methods failed\n");
 			}
 		} else {
 			/* SGTC use the bus clock as timer */
@@ -642,10 +677,11 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
 	if (result)
 		return result;
 
-	printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
+	printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
 				minimum_speed/1000, maximum_speed/1000);
 
-	policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency);
+	policy->cpuinfo.transition_latency =
+		cpufreq_scale(2000000UL, fsb, latency);
 
 	policy->cur = powernow_get(0);
 
@@ -654,7 +690,8 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
 	return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
 }
 
-static int powernow_cpu_exit (struct cpufreq_policy *policy) {
+static int powernow_cpu_exit(struct cpufreq_policy *policy)
+{
 	cpufreq_frequency_table_put_attr(policy->cpu);
 
 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
@@ -669,7 +706,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
 	return 0;
 }
 
-static struct freq_attr* powernow_table_attr[] = {
+static struct freq_attr *powernow_table_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,
 };
@@ -685,15 +722,15 @@ static struct cpufreq_driver powernow_driver = {
 	.attr	= powernow_table_attr,
 };
 
-static int __init powernow_init (void)
+static int __init powernow_init(void)
 {
-	if (check_powernow()==0)
+	if (check_powernow() == 0)
 		return -ENODEV;
 	return cpufreq_register_driver(&powernow_driver);
 }
 
 
-static void __exit powernow_exit (void)
+static void __exit powernow_exit(void)
 {
 	cpufreq_unregister_driver(&powernow_driver);
 }
@@ -701,9 +738,9 @@ static void __exit powernow_exit (void)
 module_param(acpi_force,  int, 0444);
 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
 
-MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
-MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
+MODULE_LICENSE("GPL");
 
 late_initcall(powernow_init);
 module_exit(powernow_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6428aa17b40..a15ac94e0b9 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -33,16 +33,14 @@
 #include <linux/string.h>
 #include <linux/cpumask.h>
 #include <linux/sched.h>	/* for current / set_cpus_allowed() */
+#include <linux/io.h>
+#include <linux/delay.h>
 
 #include <asm/msr.h>
-#include <asm/io.h>
-#include <asm/delay.h>
 
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
 #include <linux/acpi.h>
 #include <linux/mutex.h>
 #include <acpi/processor.h>
-#endif
 
 #define PFX "powernow-k8: "
 #define VERSION "version 2.20.00"
@@ -71,7 +69,8 @@ static u32 find_khz_freq_from_fid(u32 fid)
 	return 1000 * find_freq_from_fid(fid);
 }
 
-static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate)
+static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
+		u32 pstate)
 {
 	return data[pstate].frequency;
 }
@@ -186,7 +185,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 		return 1;
 	}
 
-	lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+	lo = fid;
+	lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
+	lo |= MSR_C_LO_INIT_FID_VID;
 
 	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
 		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
@@ -194,7 +195,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 	do {
 		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
 		if (i++ > 100) {
-			printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n");
+			printk(KERN_ERR PFX
+				"Hardware error - pending bit very stuck - "
+				"no further pstate changes possible\n");
 			return 1;
 		}
 	} while (query_current_values_with_pending_wait(data));
@@ -202,14 +205,16 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 	count_off_irt(data);
 
 	if (savevid != data->currvid) {
-		printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n",
-		       savevid, data->currvid);
+		printk(KERN_ERR PFX
+			"vid change on fid trans, old 0x%x, new 0x%x\n",
+			savevid, data->currvid);
 		return 1;
 	}
 
 	if (fid != data->currfid) {
-		printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
-		        data->currfid);
+		printk(KERN_ERR PFX
+			"fid trans failed, fid 0x%x, curr 0x%x\n", fid,
+			data->currfid);
 		return 1;
 	}
 
@@ -228,7 +233,9 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
 		return 1;
 	}
 
-	lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+	lo = data->currfid;
+	lo |= (vid << MSR_C_LO_VID_SHIFT);
+	lo |= MSR_C_LO_INIT_FID_VID;
 
 	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
 		vid, lo, STOP_GRANT_5NS);
@@ -236,20 +243,24 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
 	do {
 		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
 		if (i++ > 100) {
-			printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+			printk(KERN_ERR PFX "internal error - pending bit "
+					"very stuck - no further pstate "
+					"changes possible\n");
 			return 1;
 		}
 	} while (query_current_values_with_pending_wait(data));
 
 	if (savefid != data->currfid) {
-		printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n",
+		printk(KERN_ERR PFX "fid changed on vid trans, old "
+			"0x%x new 0x%x\n",
 		       savefid, data->currfid);
 		return 1;
 	}
 
 	if (vid != data->currvid) {
-		printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid,
-				data->currvid);
+		printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
+				"curr 0x%x\n",
+				vid, data->currvid);
 		return 1;
 	}
 
@@ -261,7 +272,8 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
  * Decreasing vid codes represent increasing voltages:
  * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
  */
-static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step)
+static int decrease_vid_code_by_step(struct powernow_k8_data *data,
+		u32 reqvid, u32 step)
 {
 	if ((data->currvid - reqvid) > step)
 		reqvid = data->currvid - step;
@@ -283,7 +295,8 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
 }
 
 /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
-static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid)
+static int transition_fid_vid(struct powernow_k8_data *data,
+		u32 reqfid, u32 reqvid)
 {
 	if (core_voltage_pre_transition(data, reqvid))
 		return 1;
@@ -298,7 +311,8 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req
 		return 1;
 
 	if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
-		printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n",
+		printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
+				"curr 0x%x 0x%x\n",
 				smp_processor_id(),
 				reqfid, reqvid, data->currfid, data->currvid);
 		return 1;
@@ -311,13 +325,15 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req
 }
 
 /* Phase 1 - core voltage transition ... setup voltage */
-static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid)
+static int core_voltage_pre_transition(struct powernow_k8_data *data,
+		u32 reqvid)
 {
 	u32 rvosteps = data->rvo;
 	u32 savefid = data->currfid;
 	u32 maxvid, lo;
 
-	dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n",
+	dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
+		"reqvid 0x%x, rvo 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid, reqvid, data->rvo);
 
@@ -340,7 +356,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
 		} else {
 			dprintk("ph1: changing vid for rvo, req 0x%x\n",
 				data->currvid - 1);
-			if (decrease_vid_code_by_step(data, data->currvid - 1, 1))
+			if (decrease_vid_code_by_step(data, data->currvid-1, 1))
 				return 1;
 			rvosteps--;
 		}
@@ -350,7 +366,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
 		return 1;
 
 	if (savefid != data->currfid) {
-		printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid);
+		printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
+				data->currfid);
 		return 1;
 	}
 
@@ -363,20 +380,24 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
 /* Phase 2 - core frequency transition */
 static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 {
-	u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid;
+	u32 vcoreqfid, vcocurrfid, vcofiddiff;
+	u32 fid_interval, savevid = data->currvid;
 
-	if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
-		printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n",
-			reqfid, data->currfid);
+	if ((reqfid < HI_FID_TABLE_BOTTOM) &&
+	    (data->currfid < HI_FID_TABLE_BOTTOM)) {
+		printk(KERN_ERR PFX "ph2: illegal lo-lo transition "
+				"0x%x 0x%x\n", reqfid, data->currfid);
 		return 1;
 	}
 
 	if (data->currfid == reqfid) {
-		printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid);
+		printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
+				data->currfid);
 		return 0;
 	}
 
-	dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n",
+	dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
+		"reqfid 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid, reqfid);
 
@@ -390,14 +411,14 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 
 		if (reqfid > data->currfid) {
 			if (data->currfid > LO_FID_TABLE_TOP) {
-				if (write_new_fid(data, data->currfid + fid_interval)) {
+				if (write_new_fid(data,
+						data->currfid + fid_interval))
 					return 1;
-				}
 			} else {
 				if (write_new_fid
-				    (data, 2 + convert_fid_to_vco_fid(data->currfid))) {
+				    (data,
+				     2 + convert_fid_to_vco_fid(data->currfid)))
 					return 1;
-				}
 			}
 		} else {
 			if (write_new_fid(data, data->currfid - fid_interval))
@@ -417,7 +438,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 
 	if (data->currfid != reqfid) {
 		printk(KERN_ERR PFX
-			"ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n",
+			"ph2: mismatch, failed fid transition, "
+			"curr 0x%x, req 0x%x\n",
 			data->currfid, reqfid);
 		return 1;
 	}
@@ -435,7 +457,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 }
 
 /* Phase 3 - core voltage transition flow ... jump to the final vid. */
-static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid)
+static int core_voltage_post_transition(struct powernow_k8_data *data,
+		u32 reqvid)
 {
 	u32 savefid = data->currfid;
 	u32 savereqvid = reqvid;
@@ -457,7 +480,8 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
 
 		if (data->currvid != reqvid) {
 			printk(KERN_ERR PFX
-			       "ph3: failed vid transition\n, req 0x%x, curr 0x%x",
+			       "ph3: failed vid transition\n, "
+			       "req 0x%x, curr 0x%x",
 			       reqvid, data->currvid);
 			return 1;
 		}
@@ -508,7 +532,8 @@ static int check_supported_cpu(unsigned int cpu)
 	if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
 		if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
 		    ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
-			printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
+			printk(KERN_INFO PFX
+				"Processor cpuid %x not supported\n", eax);
 			goto out;
 		}
 
@@ -520,8 +545,10 @@ static int check_supported_cpu(unsigned int cpu)
 		}
 
 		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-		if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
-			printk(KERN_INFO PFX "Power state transitions not supported\n");
+		if ((edx & P_STATE_TRANSITION_CAPABLE)
+			!= P_STATE_TRANSITION_CAPABLE) {
+			printk(KERN_INFO PFX
+				"Power state transitions not supported\n");
 			goto out;
 		}
 	} else { /* must be a HW Pstate capable processor */
@@ -539,7 +566,8 @@ out:
 	return rc;
 }
 
-static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
+		u8 maxvid)
 {
 	unsigned int j;
 	u8 lastfid = 0xff;
@@ -550,12 +578,14 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8
 			       j, pst[j].vid);
 			return -EINVAL;
 		}
-		if (pst[j].vid < data->rvo) {	/* vid + rvo >= 0 */
+		if (pst[j].vid < data->rvo) {
+			/* vid + rvo >= 0 */
 			printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
 			       " %d\n", j);
 			return -ENODEV;
 		}
-		if (pst[j].vid < maxvid + data->rvo) {	/* vid + rvo >= maxvid */
+		if (pst[j].vid < maxvid + data->rvo) {
+			/* vid + rvo >= maxvid */
 			printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
 			       " %d\n", j);
 			return -ENODEV;
@@ -579,23 +609,31 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8
 		return -EINVAL;
 	}
 	if (lastfid > LO_FID_TABLE_TOP)
-		printk(KERN_INFO FW_BUG PFX  "first fid not from lo freq table\n");
+		printk(KERN_INFO FW_BUG PFX
+			"first fid not from lo freq table\n");
 
 	return 0;
 }
 
+static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry)
+{
+	data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+}
+
 static void print_basics(struct powernow_k8_data *data)
 {
 	int j;
 	for (j = 0; j < data->numps; j++) {
-		if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
+		if (data->powernow_table[j].frequency !=
+				CPUFREQ_ENTRY_INVALID) {
 			if (cpu_family == CPU_HW_PSTATE) {
-				printk(KERN_INFO PFX "   %d : pstate %d (%d MHz)\n",
-					j,
+				printk(KERN_INFO PFX
+					"   %d : pstate %d (%d MHz)\n", j,
 					data->powernow_table[j].index,
 					data->powernow_table[j].frequency/1000);
 			} else {
-				printk(KERN_INFO PFX "   %d : fid 0x%x (%d MHz), vid 0x%x\n",
+				printk(KERN_INFO PFX
+					"   %d : fid 0x%x (%d MHz), vid 0x%x\n",
 					j,
 					data->powernow_table[j].index & 0xff,
 					data->powernow_table[j].frequency/1000,
@@ -604,20 +642,25 @@ static void print_basics(struct powernow_k8_data *data)
 		}
 	}
 	if (data->batps)
-		printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps);
+		printk(KERN_INFO PFX "Only %d pstates on battery\n",
+				data->batps);
 }
 
-static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+static int fill_powernow_table(struct powernow_k8_data *data,
+		struct pst_s *pst, u8 maxvid)
 {
 	struct cpufreq_frequency_table *powernow_table;
 	unsigned int j;
 
-	if (data->batps) {    /* use ACPI support to get full speed on mains power */
-		printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps);
+	if (data->batps) {
+		/* use ACPI support to get full speed on mains power */
+		printk(KERN_WARNING PFX
+			"Only %d pstates usable (use ACPI driver for full "
+			"range\n", data->batps);
 		data->numps = data->batps;
 	}
 
-	for ( j=1; j<data->numps; j++ ) {
+	for (j = 1; j < data->numps; j++) {
 		if (pst[j-1].fid >= pst[j].fid) {
 			printk(KERN_ERR PFX "PST out of sequence\n");
 			return -EINVAL;
@@ -640,9 +683,11 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst,
 	}
 
 	for (j = 0; j < data->numps; j++) {
+		int freq;
 		powernow_table[j].index = pst[j].fid; /* lower 8 bits */
 		powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
-		powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid);
+		freq = find_khz_freq_from_fid(pst[j].fid);
+		powernow_table[j].frequency = freq;
 	}
 	powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
 	powernow_table[data->numps].index = 0;
@@ -658,7 +703,8 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst,
 		print_basics(data);
 
 	for (j = 0; j < data->numps; j++)
-		if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid))
+		if ((pst[j].fid == data->currfid) &&
+		    (pst[j].vid == data->currvid))
 			return 0;
 
 	dprintk("currfid/vid do not match PST, ignoring\n");
@@ -698,7 +744,8 @@ static int find_psb_table(struct powernow_k8_data *data)
 		}
 
 		data->vstable = psb->vstable;
-		dprintk("voltage stabilization time: %d(*20us)\n", data->vstable);
+		dprintk("voltage stabilization time: %d(*20us)\n",
+				data->vstable);
 
 		dprintk("flags2: 0x%x\n", psb->flags2);
 		data->rvo = psb->flags2 & 3;
@@ -713,11 +760,12 @@ static int find_psb_table(struct powernow_k8_data *data)
 
 		dprintk("numpst: 0x%x\n", psb->num_tables);
 		cpst = psb->num_tables;
-		if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){
+		if ((psb->cpuid == 0x00000fc0) ||
+		    (psb->cpuid == 0x00000fe0)) {
 			thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
-			if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) {
+			if ((thiscpuid == 0x00000fc0) ||
+			    (thiscpuid == 0x00000fe0))
 				cpst = 1;
-			}
 		}
 		if (cpst != 1) {
 			printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
@@ -732,7 +780,8 @@ static int find_psb_table(struct powernow_k8_data *data)
 
 		data->numps = psb->numps;
 		dprintk("numpstates: 0x%x\n", data->numps);
-		return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid);
+		return fill_powernow_table(data,
+				(struct pst_s *)(psb+1), maxvid);
 	}
 	/*
 	 * If you see this message, complain to BIOS manufacturer. If
@@ -745,28 +794,31 @@ static int find_psb_table(struct powernow_k8_data *data)
 	 * BIOS and Kernel Developer's Guide, which is available on
 	 * www.amd.com
 	 */
-	printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n");
+	printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
 	return -ENODEV;
 }
 
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
+		unsigned int index)
 {
+	acpi_integer control;
+
 	if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
 		return;
 
-	data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
-	data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
-	data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
-	data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
-	data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
-	data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
-}
+	control = data->acpi_data.states[index].control; data->irt = (control
+			>> IRT_SHIFT) & IRT_MASK; data->rvo = (control >>
+				RVO_SHIFT) & RVO_MASK; data->exttype = (control
+					>> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+	data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1
+		<< ((control >> MVS_SHIFT) & MVS_MASK); data->vstable =
+		(control >> VST_SHIFT) & VST_MASK; }
 
 static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 {
 	struct cpufreq_frequency_table *powernow_table;
 	int ret_val = -ENODEV;
+	acpi_integer space_id;
 
 	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
 		dprintk("register performance failed: bad ACPI data\n");
@@ -779,11 +831,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 		goto err_out;
 	}
 
-	if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
-		(data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+	space_id = data->acpi_data.control_register.space_id;
+	if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+		(space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
 		dprintk("Invalid control/status registers (%x - %x)\n",
 			data->acpi_data.control_register.space_id,
-			data->acpi_data.status_register.space_id);
+			space_id);
 		goto err_out;
 	}
 
@@ -802,7 +855,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	if (ret_val)
 		goto err_out_mem;
 
-	powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
+	powernow_table[data->acpi_data.state_count].frequency =
+		CPUFREQ_TABLE_END;
 	powernow_table[data->acpi_data.state_count].index = 0;
 	data->powernow_table = powernow_table;
 
@@ -830,13 +884,15 @@ err_out_mem:
 err_out:
 	acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
 
-	/* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
+	/* data->acpi_data.state_count informs us at ->exit()
+	 * whether ACPI was used */
 	data->acpi_data.state_count = 0;
 
 	return ret_val;
 }
 
-static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+static int fill_powernow_table_pstate(struct powernow_k8_data *data,
+		struct cpufreq_frequency_table *powernow_table)
 {
 	int i;
 	u32 hi = 0, lo = 0;
@@ -848,84 +904,101 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
 
 		index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
 		if (index > data->max_hw_pstate) {
-			printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
-			printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
-			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			printk(KERN_ERR PFX "invalid pstate %d - "
+					"bad value %d.\n", i, index);
+			printk(KERN_ERR PFX "Please report to BIOS "
+					"manufacturer\n");
+			invalidate_entry(data, i);
 			continue;
 		}
 		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
 		if (!(hi & HW_PSTATE_VALID_MASK)) {
 			dprintk("invalid pstate %d, ignoring\n", index);
-			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			invalidate_entry(data, i);
 			continue;
 		}
 
 		powernow_table[i].index = index;
 
-		powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
+		powernow_table[i].frequency =
+			data->acpi_data.states[i].core_frequency * 1000;
 	}
 	return 0;
 }
 
-static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
+		struct cpufreq_frequency_table *powernow_table)
 {
 	int i;
 	int cntlofreq = 0;
+
 	for (i = 0; i < data->acpi_data.state_count; i++) {
 		u32 fid;
 		u32 vid;
+		u32 freq, index;
+		acpi_integer status, control;
 
 		if (data->exttype) {
-			fid = data->acpi_data.states[i].status & EXT_FID_MASK;
-			vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK;
+			status =  data->acpi_data.states[i].status;
+			fid = status & EXT_FID_MASK;
+			vid = (status >> VID_SHIFT) & EXT_VID_MASK;
 		} else {
-			fid = data->acpi_data.states[i].control & FID_MASK;
-			vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+			control =  data->acpi_data.states[i].control;
+			fid = control & FID_MASK;
+			vid = (control >> VID_SHIFT) & VID_MASK;
 		}
 
 		dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
 
-		powernow_table[i].index = fid; /* lower 8 bits */
-		powernow_table[i].index |= (vid << 8); /* upper 8 bits */
-		powernow_table[i].frequency = find_khz_freq_from_fid(fid);
+		index = fid | (vid<<8);
+		powernow_table[i].index = index;
+
+		freq = find_khz_freq_from_fid(fid);
+		powernow_table[i].frequency = freq;
 
 		/* verify frequency is OK */
-		if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) ||
-			(powernow_table[i].frequency < (MIN_FREQ * 1000))) {
-			dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency);
-			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
+			dprintk("invalid freq %u kHz, ignoring\n", freq);
+			invalidate_entry(data, i);
 			continue;
 		}
 
-		/* verify voltage is OK - BIOSs are using "off" to indicate invalid */
+		/* verify voltage is OK -
+		 * BIOSs are using "off" to indicate invalid */
 		if (vid == VID_OFF) {
 			dprintk("invalid vid %u, ignoring\n", vid);
-			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			invalidate_entry(data, i);
 			continue;
 		}
 
 		/* verify only 1 entry from the lo frequency table */
 		if (fid < HI_FID_TABLE_BOTTOM) {
 			if (cntlofreq) {
-				/* if both entries are the same, ignore this one ... */
-				if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) ||
-				    (powernow_table[i].index != powernow_table[cntlofreq].index)) {
-					printk(KERN_ERR PFX "Too many lo freq table entries\n");
+				/* if both entries are the same,
+				 * ignore this one ... */
+				if ((freq != powernow_table[cntlofreq].frequency) ||
+				    (index != powernow_table[cntlofreq].index)) {
+					printk(KERN_ERR PFX
+						"Too many lo freq table "
+						"entries\n");
 					return 1;
 				}
 
-				dprintk("double low frequency table entry, ignoring it.\n");
-				powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+				dprintk("double low frequency table entry, "
+						"ignoring it.\n");
+				invalidate_entry(data, i);
 				continue;
 			} else
 				cntlofreq = i;
 		}
 
-		if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
-			printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
-				powernow_table[i].frequency,
-				(unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
-			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+		if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
+			printk(KERN_INFO PFX "invalid freq entries "
+				"%u kHz vs. %u kHz\n", freq,
+				(unsigned int)
+				(data->acpi_data.states[i].core_frequency
+				 * 1000));
+			invalidate_entry(data, i);
 			continue;
 		}
 	}
@@ -935,7 +1008,8 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
 static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
 {
 	if (data->acpi_data.state_count)
-		acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+		acpi_processor_unregister_performance(&data->acpi_data,
+				data->cpu);
 	free_cpumask_var(data->acpi_data.shared_cpu_map);
 }
 
@@ -953,15 +1027,9 @@ static int get_transition_latency(struct powernow_k8_data *data)
 	return 1000 * max_latency;
 }
 
-#else
-static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
-static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
-static int get_transition_latency(struct powernow_k8_data *data) { return 0; }
-#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
-
 /* Take a frequency, and issue the fid/vid transition command */
-static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index)
+static int transition_frequency_fidvid(struct powernow_k8_data *data,
+		unsigned int index)
 {
 	u32 fid = 0;
 	u32 vid = 0;
@@ -989,7 +1057,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
 		return 0;
 	}
 
-	if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
+	if ((fid < HI_FID_TABLE_BOTTOM) &&
+	    (data->currfid < HI_FID_TABLE_BOTTOM)) {
 		printk(KERN_ERR PFX
 		       "ignoring illegal change in lo freq table-%x to 0x%x\n",
 		       data->currfid, fid);
@@ -1017,7 +1086,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
 }
 
 /* Take a frequency, and issue the hardware pstate transition command */
-static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index)
+static int transition_frequency_pstate(struct powernow_k8_data *data,
+		unsigned int index)
 {
 	u32 pstate = 0;
 	int res, i;
@@ -1029,7 +1099,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
 	pstate = index & HW_PSTATE_MASK;
 	if (pstate > data->max_hw_pstate)
 		return 0;
-	freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
+	freqs.old = find_khz_freq_from_pstate(data->powernow_table,
+			data->currpstate);
 	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
 
 	for_each_cpu_mask_nr(i, *(data->available_cores)) {
@@ -1048,7 +1119,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
 }
 
 /* Driver entry point to switch to the target frequency */
-static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
+static int powernowk8_target(struct cpufreq_policy *pol,
+		unsigned targfreq, unsigned relation)
 {
 	cpumask_t oldmask;
 	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
@@ -1087,14 +1159,18 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
 		dprintk("targ: curr fid 0x%x, vid 0x%x\n",
 		data->currfid, data->currvid);
 
-		if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
+		if ((checkvid != data->currvid) ||
+		    (checkfid != data->currfid)) {
 			printk(KERN_INFO PFX
-				"error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
-				checkfid, data->currfid, checkvid, data->currvid);
+				"error - out of sync, fix 0x%x 0x%x, "
+				"vid 0x%x 0x%x\n",
+				checkfid, data->currfid,
+				checkvid, data->currvid);
 		}
 	}
 
-	if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
+	if (cpufreq_frequency_table_target(pol, data->powernow_table,
+				targfreq, relation, &newstate))
 		goto err_out;
 
 	mutex_lock(&fidvid_mutex);
@@ -1114,7 +1190,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
 	mutex_unlock(&fidvid_mutex);
 
 	if (cpu_family == CPU_HW_PSTATE)
-		pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate);
+		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+				newstate);
 	else
 		pol->cur = find_khz_freq_from_fid(data->currfid);
 	ret = 0;
@@ -1141,6 +1218,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	struct powernow_k8_data *data;
 	cpumask_t oldmask;
 	int rc;
+	static int print_once;
 
 	if (!cpu_online(pol->cpu))
 		return -ENODEV;
@@ -1163,33 +1241,31 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 		 * an UP version, and is deprecated by AMD.
 		 */
 		if (num_online_cpus() != 1) {
-#ifndef CONFIG_ACPI_PROCESSOR
-			printk(KERN_ERR PFX "ACPI Processor support is required "
-			       "for SMP systems but is absent. Please load the "
-			       "ACPI Processor module before starting this "
-			       "driver.\n");
-#else
-			printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide"
-			       " ACPI _PSS objects in a way that Linux "
-			       "understands. Please report this to the Linux "
-			       "ACPI maintainers and complain to your BIOS "
-			       "vendor.\n");
-#endif
-			kfree(data);
-			return -ENODEV;
+			/*
+			 * Replace this one with print_once as soon as such a
+			 * thing gets introduced
+			 */
+			if (!print_once) {
+				WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS "
+					"does not provide ACPI _PSS objects "
+					"in a way that Linux understands. "
+					"Please report this to the Linux ACPI"
+					" maintainers and complain to your "
+					"BIOS vendor.\n");
+				print_once++;
+			}
+			goto err_out;
 		}
 		if (pol->cpu != 0) {
 			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
 			       "CPU other than CPU0. Complain to your BIOS "
 			       "vendor.\n");
-			kfree(data);
-			return -ENODEV;
+			goto err_out;
 		}
 		rc = find_psb_table(data);
-		if (rc) {
-			kfree(data);
-			return -ENODEV;
-		}
+		if (rc)
+			goto err_out;
+
 		/* Take a crude guess here.
 		 * That guess was in microseconds, so multiply with 1000 */
 		pol->cpuinfo.transition_latency = (
@@ -1204,16 +1280,16 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 
 	if (smp_processor_id() != pol->cpu) {
 		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
-		goto err_out;
+		goto err_out_unmask;
 	}
 
 	if (pending_bit_stuck()) {
 		printk(KERN_ERR PFX "failing init, change pending bit set\n");
-		goto err_out;
+		goto err_out_unmask;
 	}
 
 	if (query_current_values_with_pending_wait(data))
-		goto err_out;
+		goto err_out_unmask;
 
 	if (cpu_family == CPU_OPTERON)
 		fidvid_msr_init();
@@ -1228,7 +1304,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	data->available_cores = pol->cpus;
 
 	if (cpu_family == CPU_HW_PSTATE)
-		pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
+		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+				data->currpstate);
 	else
 		pol->cur = find_khz_freq_from_fid(data->currfid);
 	dprintk("policy current frequency %d kHz\n", pol->cur);
@@ -1245,7 +1322,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
 
 	if (cpu_family == CPU_HW_PSTATE)
-		dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate);
+		dprintk("cpu_init done, current pstate 0x%x\n",
+				data->currpstate);
 	else
 		dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
 			data->currfid, data->currvid);
@@ -1254,15 +1332,16 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 
 	return 0;
 
-err_out:
+err_out_unmask:
 	set_cpus_allowed_ptr(current, &oldmask);
 	powernow_k8_cpu_exit_acpi(data);
 
+err_out:
 	kfree(data);
 	return -ENODEV;
 }
 
-static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
+static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
 {
 	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
 
@@ -1279,7 +1358,7 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
 	return 0;
 }
 
-static unsigned int powernowk8_get (unsigned int cpu)
+static unsigned int powernowk8_get(unsigned int cpu)
 {
 	struct powernow_k8_data *data;
 	cpumask_t oldmask = current->cpus_allowed;
@@ -1315,7 +1394,7 @@ out:
 	return khz;
 }
 
-static struct freq_attr* powernow_k8_attr[] = {
+static struct freq_attr *powernow_k8_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,
 };
@@ -1360,7 +1439,8 @@ static void __exit powernowk8_exit(void)
 	cpufreq_unregister_driver(&cpufreq_amd64_driver);
 }
 
-MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
+MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
+		"Mark Langsdorf <mark.langsdorf@amd.com>");
 MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
 MODULE_LICENSE("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index 8ecc75b6c7c..6c6698feade 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -45,11 +45,10 @@ struct powernow_k8_data {
 	 * frequency is in kHz */
 	struct cpufreq_frequency_table  *powernow_table;
 
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
 	/* the acpi table needs to be kept. it's only available if ACPI was
 	 * used to determine valid frequency/vid/fid states */
 	struct acpi_processor_performance acpi_data;
-#endif
+
 	/* we need to keep track of associated cores, but let cpufreq
 	 * handle hotplug events - so just point at cpufreq pol->cpus
 	 * structure */
@@ -222,10 +221,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
 
 static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
 
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
 static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
 static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
-#endif
 
 #ifdef CONFIG_SMP
 static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
index 42da9bd677d..435a996a613 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -19,17 +19,19 @@
 
 #include <linux/delay.h>
 #include <linux/cpufreq.h>
+#include <linux/timex.h>
+#include <linux/io.h>
 
 #include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
 
 #define MMCR_BASE	0xfffef000	/* The default base address */
 #define OFFS_CPUCTL	0x2   /* CPU Control Register */
 
 static __u8 __iomem *cpuctl;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"sc520_freq", msg)
+#define PFX "sc520_freq: "
 
 static struct cpufreq_frequency_table sc520_freq_table[] = {
 	{0x01,	100000},
@@ -43,7 +45,8 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
 
 	switch (clockspeed_reg & 0x03) {
 	default:
-		printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg);
+		printk(KERN_ERR PFX "error: cpuctl register has unexpected "
+				"value %02x\n", clockspeed_reg);
 	case 0x01:
 		return 100000;
 	case 0x02:
@@ -51,7 +54,7 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
 	}
 }
 
-static void sc520_freq_set_cpu_state (unsigned int state)
+static void sc520_freq_set_cpu_state(unsigned int state)
 {
 
 	struct cpufreq_freqs	freqs;
@@ -76,18 +79,19 @@ static void sc520_freq_set_cpu_state (unsigned int state)
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 };
 
-static int sc520_freq_verify (struct cpufreq_policy *policy)
+static int sc520_freq_verify(struct cpufreq_policy *policy)
 {
 	return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
 }
 
-static int sc520_freq_target (struct cpufreq_policy *policy,
+static int sc520_freq_target(struct cpufreq_policy *policy,
 			    unsigned int target_freq,
 			    unsigned int relation)
 {
 	unsigned int newstate = 0;
 
-	if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate))
+	if (cpufreq_frequency_table_target(policy, sc520_freq_table,
+				target_freq, relation, &newstate))
 		return -EINVAL;
 
 	sc520_freq_set_cpu_state(newstate);
@@ -116,7 +120,7 @@ static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
 
 	result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
 	if (result)
-		return (result);
+		return result;
 
 	cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
 
@@ -131,7 +135,7 @@ static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
 }
 
 
-static struct freq_attr* sc520_freq_attr[] = {
+static struct freq_attr *sc520_freq_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,
 };
@@ -155,13 +159,13 @@ static int __init sc520_freq_init(void)
 	int err;
 
 	/* Test if we have the right hardware */
-	if(c->x86_vendor != X86_VENDOR_AMD ||
-				c->x86 != 4 || c->x86_model != 9) {
+	if (c->x86_vendor != X86_VENDOR_AMD ||
+	    c->x86 != 4 || c->x86_model != 9) {
 		dprintk("no Elan SC520 processor found!\n");
 		return -ENODEV;
 	}
 	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
-	if(!cpuctl) {
+	if (!cpuctl) {
 		printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
 		return -ENOMEM;
 	}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index f08998278a3..c9f1fdc0283 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 	   enable it if not. */
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 
-	if (!(l & (1<<16))) {
-		l |= (1<<16);
+	if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+		l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
 		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
 		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
 
 		/* check to see if it stuck */
 		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-		if (!(l & (1<<16))) {
+		if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
 			printk(KERN_INFO PFX
 				"couldn't enable Enhanced SpeedStep\n");
 			return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index dedc1e98f16..8bbb11adb31 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev;
 
 /* speedstep_processor
  */
-static unsigned int speedstep_processor = 0;
+static unsigned int speedstep_processor;
 
 static u32 pmbase;
 
@@ -54,7 +54,8 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
 };
 
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"speedstep-ich", msg)
 
 
 /**
@@ -62,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
  *
  * Returns: -ENODEV if no register could be found
  */
-static int speedstep_find_register (void)
+static int speedstep_find_register(void)
 {
 	if (!speedstep_chipset_dev)
 		return -ENODEV;
@@ -90,7 +91,7 @@ static int speedstep_find_register (void)
  *
  *   Tries to change the SpeedStep state.
  */
-static void speedstep_set_state (unsigned int state)
+static void speedstep_set_state(unsigned int state)
 {
 	u8 pm2_blk;
 	u8 value;
@@ -133,11 +134,11 @@ static void speedstep_set_state (unsigned int state)
 
 	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
 
-	if (state == (value & 0x1)) {
-		dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000));
-	} else {
-		printk (KERN_ERR "cpufreq: change failed - I/O error\n");
-	}
+	if (state == (value & 0x1))
+		dprintk("change to %u MHz succeeded\n",
+			speedstep_get_frequency(speedstep_processor) / 1000);
+	else
+		printk(KERN_ERR "cpufreq: change failed - I/O error\n");
 
 	return;
 }
@@ -149,7 +150,7 @@ static void speedstep_set_state (unsigned int state)
  *   Tries to activate the SpeedStep status and control registers.
  * Returns -EINVAL on an unsupported chipset, and zero on success.
  */
-static int speedstep_activate (void)
+static int speedstep_activate(void)
 {
 	u16 value = 0;
 
@@ -175,20 +176,18 @@ static int speedstep_activate (void)
  * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
  * chipset, or zero on failure.
  */
-static unsigned int speedstep_detect_chipset (void)
+static unsigned int speedstep_detect_chipset(void)
 {
 	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
 			      PCI_DEVICE_ID_INTEL_82801DB_12,
-			      PCI_ANY_ID,
-			      PCI_ANY_ID,
+			      PCI_ANY_ID, PCI_ANY_ID,
 			      NULL);
 	if (speedstep_chipset_dev)
 		return 4; /* 4-M */
 
 	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
 			      PCI_DEVICE_ID_INTEL_82801CA_12,
-			      PCI_ANY_ID,
-			      PCI_ANY_ID,
+			      PCI_ANY_ID, PCI_ANY_ID,
 			      NULL);
 	if (speedstep_chipset_dev)
 		return 3; /* 3-M */
@@ -196,8 +195,7 @@ static unsigned int speedstep_detect_chipset (void)
 
 	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
 			      PCI_DEVICE_ID_INTEL_82801BA_10,
-			      PCI_ANY_ID,
-			      PCI_ANY_ID,
+			      PCI_ANY_ID, PCI_ANY_ID,
 			      NULL);
 	if (speedstep_chipset_dev) {
 		/* speedstep.c causes lockups on Dell Inspirons 8000 and
@@ -208,8 +206,7 @@ static unsigned int speedstep_detect_chipset (void)
 
 		hostbridge  = pci_get_subsys(PCI_VENDOR_ID_INTEL,
 			      PCI_DEVICE_ID_INTEL_82815_MC,
-			      PCI_ANY_ID,
-			      PCI_ANY_ID,
+			      PCI_ANY_ID, PCI_ANY_ID,
 			      NULL);
 
 		if (!hostbridge)
@@ -236,7 +233,7 @@ static unsigned int _speedstep_get(const struct cpumask *cpus)
 
 	cpus_allowed = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpus);
-	speed = speedstep_get_processor_frequency(speedstep_processor);
+	speed = speedstep_get_frequency(speedstep_processor);
 	set_cpus_allowed_ptr(current, &cpus_allowed);
 	dprintk("detected %u kHz as current frequency\n", speed);
 	return speed;
@@ -251,11 +248,12 @@ static unsigned int speedstep_get(unsigned int cpu)
  * speedstep_target - set a new CPUFreq policy
  * @policy: new policy
  * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ * @relation: how that frequency relates to achieved frequency
+ *	(CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
  *
  * Sets a new CPUFreq policy.
  */
-static int speedstep_target (struct cpufreq_policy *policy,
+static int speedstep_target(struct cpufreq_policy *policy,
 			     unsigned int target_freq,
 			     unsigned int relation)
 {
@@ -264,7 +262,8 @@ static int speedstep_target (struct cpufreq_policy *policy,
 	cpumask_t cpus_allowed;
 	int i;
 
-	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
+	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
+				target_freq, relation, &newstate))
 		return -EINVAL;
 
 	freqs.old = _speedstep_get(policy->cpus);
@@ -308,7 +307,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
  * Limit must be within speedstep_low_freq and speedstep_high_freq, with
  * at least one border included.
  */
-static int speedstep_verify (struct cpufreq_policy *policy)
+static int speedstep_verify(struct cpufreq_policy *policy)
 {
 	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
 }
@@ -344,7 +343,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 		return -EIO;
 
 	dprintk("currently at %s speed setting - %i MHz\n",
-		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
+		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
+		? "low" : "high",
 		(speed / 1000));
 
 	/* cpuinfo and default policy values */
@@ -352,9 +352,9 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 
 	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
 	if (result)
-		return (result);
+		return result;
 
-        cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+	cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
 
 	return 0;
 }
@@ -366,7 +366,7 @@ static int speedstep_cpu_exit(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static struct freq_attr* speedstep_attr[] = {
+static struct freq_attr *speedstep_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,
 };
@@ -396,13 +396,15 @@ static int __init speedstep_init(void)
 	/* detect processor */
 	speedstep_processor = speedstep_detect_processor();
 	if (!speedstep_processor) {
-		dprintk("Intel(R) SpeedStep(TM) capable processor not found\n");
+		dprintk("Intel(R) SpeedStep(TM) capable processor "
+				"not found\n");
 		return -ENODEV;
 	}
 
 	/* detect chipset */
 	if (!speedstep_detect_chipset()) {
-		dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n");
+		dprintk("Intel(R) SpeedStep(TM) for this chipset not "
+				"(yet) available.\n");
 		return -ENODEV;
 	}
 
@@ -431,9 +433,11 @@ static void __exit speedstep_exit(void)
 }
 
 
-MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
+		"Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
+		"with ICH-M southbridges.");
+MODULE_LICENSE("GPL");
 
 module_init(speedstep_init);
 module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index cdac7d62369..2e3c6862657 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -16,12 +16,16 @@
 #include <linux/slab.h>
 
 #include <asm/msr.h>
+#include <asm/tsc.h>
 #include "speedstep-lib.h"
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"speedstep-lib", msg)
+
+#define PFX "speedstep-lib: "
 
 #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
-static int relaxed_check = 0;
+static int relaxed_check;
 #else
 #define relaxed_check 0
 #endif
@@ -30,14 +34,14 @@ static int relaxed_check = 0;
  *                   GET PROCESSOR CORE SPEED IN KHZ                 *
  *********************************************************************/
 
-static unsigned int pentium3_get_frequency (unsigned int processor)
+static unsigned int pentium3_get_frequency(unsigned int processor)
 {
-        /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
+	/* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
 	struct {
 		unsigned int ratio;	/* Frequency Multiplier (x10) */
 		u8 bitmap;		/* power on configuration bits
 					[27, 25:22] (in MSR 0x2a) */
-	} msr_decode_mult [] = {
+	} msr_decode_mult[] = {
 		{ 30, 0x01 },
 		{ 35, 0x05 },
 		{ 40, 0x02 },
@@ -52,7 +56,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
 		{ 85, 0x26 },
 		{ 90, 0x20 },
 		{ 100, 0x2b },
-		{ 0, 0xff }     /* error or unknown value */
+		{ 0, 0xff }	/* error or unknown value */
 	};
 
 	/* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
@@ -60,7 +64,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
 		unsigned int value;	/* Front Side Bus speed in MHz */
 		u8 bitmap;		/* power on configuration bits [18: 19]
 					(in MSR 0x2a) */
-	} msr_decode_fsb [] = {
+	} msr_decode_fsb[] = {
 		{  66, 0x0 },
 		{ 100, 0x2 },
 		{ 133, 0x1 },
@@ -85,7 +89,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
 	}
 
 	/* decode the multiplier */
-	if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) {
+	if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
 		dprintk("workaround for early PIIIs\n");
 		msr_lo &= 0x03c00000;
 	} else
@@ -97,9 +101,10 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
 		j++;
 	}
 
-	dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
+	dprintk("speed is %u\n",
+		(msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
 
-	return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100);
+	return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
 }
 
 
@@ -112,20 +117,23 @@ static unsigned int pentiumM_get_frequency(void)
 
 	/* see table B-2 of 24547212.pdf */
 	if (msr_lo & 0x00040000) {
-		printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp);
+		printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n",
+				msr_lo, msr_tmp);
 		return 0;
 	}
 
 	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000));
+	dprintk("bits 22-26 are 0x%x, speed is %u\n",
+			msr_tmp, (msr_tmp * 100 * 1000));
 
-	return (msr_tmp * 100 * 1000);
+	return msr_tmp * 100 * 1000;
 }
 
 static unsigned int pentium_core_get_frequency(void)
 {
 	u32 fsb = 0;
 	u32 msr_lo, msr_tmp;
+	int ret;
 
 	rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp);
 	/* see table B-2 of 25366920.pdf */
@@ -153,12 +161,15 @@ static unsigned int pentium_core_get_frequency(void)
 	}
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
+			msr_lo, msr_tmp);
 
 	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb));
+	dprintk("bits 22-26 are 0x%x, speed is %u\n",
+			msr_tmp, (msr_tmp * fsb));
 
-	return (msr_tmp * fsb);
+	ret = (msr_tmp * fsb);
+	return ret;
 }
 
 
@@ -167,6 +178,16 @@ static unsigned int pentium4_get_frequency(void)
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 	u32 msr_lo, msr_hi, mult;
 	unsigned int fsb = 0;
+	unsigned int ret;
+	u8 fsb_code;
+
+	/* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency
+	 * to System Bus Frequency Ratio Field in the Processor Frequency
+	 * Configuration Register of the MSR. Therefore the current
+	 * frequency cannot be calculated and has to be measured.
+	 */
+	if (c->x86_model < 2)
+		return cpu_khz;
 
 	rdmsr(0x2c, msr_lo, msr_hi);
 
@@ -177,62 +198,61 @@ static unsigned int pentium4_get_frequency(void)
 	 * revision #12 in Table B-1: MSRs in the Pentium 4 and
 	 * Intel Xeon Processors, on page B-4 and B-5.
 	 */
-	if (c->x86_model < 2)
+	fsb_code = (msr_lo >> 16) & 0x7;
+	switch (fsb_code) {
+	case 0:
 		fsb = 100 * 1000;
-	else {
-		u8 fsb_code = (msr_lo >> 16) & 0x7;
-		switch (fsb_code) {
-		case 0:
-			fsb = 100 * 1000;
-			break;
-		case 1:
-			fsb = 13333 * 10;
-			break;
-		case 2:
-			fsb = 200 * 1000;
-			break;
-		}
+		break;
+	case 1:
+		fsb = 13333 * 10;
+		break;
+	case 2:
+		fsb = 200 * 1000;
+		break;
 	}
 
 	if (!fsb)
-		printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n");
+		printk(KERN_DEBUG PFX "couldn't detect FSB speed. "
+				"Please send an e-mail to <linux@brodo.de>\n");
 
 	/* Multiplier. */
 	mult = msr_lo >> 24;
 
-	dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult));
+	dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
+			fsb, mult, (fsb * mult));
 
-	return (fsb * mult);
+	ret = (fsb * mult);
+	return ret;
 }
 
 
-unsigned int speedstep_get_processor_frequency(unsigned int processor)
+unsigned int speedstep_get_frequency(unsigned int processor)
 {
 	switch (processor) {
-	case SPEEDSTEP_PROCESSOR_PCORE:
+	case SPEEDSTEP_CPU_PCORE:
 		return pentium_core_get_frequency();
-	case SPEEDSTEP_PROCESSOR_PM:
+	case SPEEDSTEP_CPU_PM:
 		return pentiumM_get_frequency();
-	case SPEEDSTEP_PROCESSOR_P4D:
-	case SPEEDSTEP_PROCESSOR_P4M:
+	case SPEEDSTEP_CPU_P4D:
+	case SPEEDSTEP_CPU_P4M:
 		return pentium4_get_frequency();
-	case SPEEDSTEP_PROCESSOR_PIII_T:
-	case SPEEDSTEP_PROCESSOR_PIII_C:
-	case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
+	case SPEEDSTEP_CPU_PIII_T:
+	case SPEEDSTEP_CPU_PIII_C:
+	case SPEEDSTEP_CPU_PIII_C_EARLY:
 		return pentium3_get_frequency(processor);
 	default:
 		return 0;
 	};
 	return 0;
 }
-EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency);
+EXPORT_SYMBOL_GPL(speedstep_get_frequency);
 
 
 /*********************************************************************
  *                 DETECT SPEEDSTEP-CAPABLE PROCESSOR                *
  *********************************************************************/
 
-unsigned int speedstep_detect_processor (void)
+unsigned int speedstep_detect_processor(void)
 {
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 ebx, msr_lo, msr_hi;
@@ -261,7 +281,7 @@ unsigned int speedstep_detect_processor (void)
 			 * sample has ebx = 0x0f, production has 0x0e.
 			 */
 			if ((ebx == 0x0e) || (ebx == 0x0f))
-				return SPEEDSTEP_PROCESSOR_P4M;
+				return SPEEDSTEP_CPU_P4M;
 			break;
 		case 7:
 			/*
@@ -272,7 +292,7 @@ unsigned int speedstep_detect_processor (void)
 			 * samples are only of B-stepping...
 			 */
 			if (ebx == 0x0e)
-				return SPEEDSTEP_PROCESSOR_P4M;
+				return SPEEDSTEP_CPU_P4M;
 			break;
 		case 9:
 			/*
@@ -288,10 +308,13 @@ unsigned int speedstep_detect_processor (void)
 			 * M-P4-Ms may have either ebx=0xe or 0xf [see above]
 			 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
 			 * also, M-P4M HTs have ebx=0x8, too
-			 * For now, they are distinguished by the model_id string
+			 * For now, they are distinguished by the model_id
+			 * string
 			 */
-			if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL))
-				return SPEEDSTEP_PROCESSOR_P4M;
+			if ((ebx == 0x0e) ||
+				(strstr(c->x86_model_id,
+				    "Mobile Intel(R) Pentium(R) 4") != NULL))
+				return SPEEDSTEP_CPU_P4M;
 			break;
 		default:
 			break;
@@ -301,7 +324,8 @@ unsigned int speedstep_detect_processor (void)
 
 	switch (c->x86_model) {
 	case 0x0B: /* Intel PIII [Tualatin] */
-		/* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */
+		/* cpuid_ebx(1) is 0x04 for desktop PIII,
+		 * 0x06 for mobile PIII-M */
 		ebx = cpuid_ebx(0x00000001);
 		dprintk("ebx is %x\n", ebx);
 
@@ -313,14 +337,15 @@ unsigned int speedstep_detect_processor (void)
 		/* So far all PIII-M processors support SpeedStep. See
 		 * Intel's 24540640.pdf of June 2003
 		 */
-		return SPEEDSTEP_PROCESSOR_PIII_T;
+		return SPEEDSTEP_CPU_PIII_T;
 
 	case 0x08: /* Intel PIII [Coppermine] */
 
 		/* all mobile PIII Coppermines have FSB 100 MHz
 		 * ==> sort out a few desktop PIIIs. */
 		rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi);
+		dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
+				msr_lo, msr_hi);
 		msr_lo &= 0x00c0000;
 		if (msr_lo != 0x0080000)
 			return 0;
@@ -332,13 +357,15 @@ unsigned int speedstep_detect_processor (void)
 		 * bit 56 or 57 is set
 		 */
 		rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi);
-		if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
+		dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
+				msr_lo, msr_hi);
+		if ((msr_hi & (1<<18)) &&
+		    (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
 			if (c->x86_mask == 0x01) {
 				dprintk("early PIII version\n");
-				return SPEEDSTEP_PROCESSOR_PIII_C_EARLY;
+				return SPEEDSTEP_CPU_PIII_C_EARLY;
 			} else
-				return SPEEDSTEP_PROCESSOR_PIII_C;
+				return SPEEDSTEP_CPU_PIII_C;
 		}
 
 	default:
@@ -369,7 +396,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
 	dprintk("trying to determine both speeds\n");
 
 	/* get current speed */
-	prev_speed = speedstep_get_processor_frequency(processor);
+	prev_speed = speedstep_get_frequency(processor);
 	if (!prev_speed)
 		return -EIO;
 
@@ -379,7 +406,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
 
 	/* switch to low state */
 	set_state(SPEEDSTEP_LOW);
-	*low_speed = speedstep_get_processor_frequency(processor);
+	*low_speed = speedstep_get_frequency(processor);
 	if (!*low_speed) {
 		ret = -EIO;
 		goto out;
@@ -398,7 +425,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
 	if (transition_latency)
 		do_gettimeofday(&tv2);
 
-	*high_speed = speedstep_get_processor_frequency(processor);
+	*high_speed = speedstep_get_frequency(processor);
 	if (!*high_speed) {
 		ret = -EIO;
 		goto out;
@@ -426,9 +453,12 @@ unsigned int speedstep_get_freqs(unsigned int processor,
 		/* check if the latency measurement is too high or too low
 		 * and set it to a safe value (500uSec) in that case
 		 */
-		if (*transition_latency > 10000000 || *transition_latency < 50000) {
-			printk (KERN_WARNING "speedstep: frequency transition measured seems out of "
-					"range (%u nSec), falling back to a safe one of %u nSec.\n",
+		if (*transition_latency > 10000000 ||
+		    *transition_latency < 50000) {
+			printk(KERN_WARNING PFX "frequency transition "
+					"measured seems out of range (%u "
+					"nSec), falling back to a safe one of"
+					"%u nSec.\n",
 					*transition_latency, 500000);
 			*transition_latency = 500000;
 		}
@@ -436,15 +466,16 @@ unsigned int speedstep_get_freqs(unsigned int processor,
 
 out:
 	local_irq_restore(flags);
-	return (ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(speedstep_get_freqs);
 
 #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
 module_param(relaxed_check, int, 0444);
-MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability.");
+MODULE_PARM_DESC(relaxed_check,
+		"Don't do all checks for speedstep capability.");
 #endif
 
-MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
index b11bcc608ca..2b6c04e5a30 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
@@ -12,17 +12,17 @@
 
 /* processors */
 
-#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY	0x00000001  /* Coppermine core */
-#define SPEEDSTEP_PROCESSOR_PIII_C		0x00000002  /* Coppermine core */
-#define SPEEDSTEP_PROCESSOR_PIII_T		0x00000003  /* Tualatin core */
-#define SPEEDSTEP_PROCESSOR_P4M			0x00000004  /* P4-M  */
+#define SPEEDSTEP_CPU_PIII_C_EARLY	0x00000001  /* Coppermine core */
+#define SPEEDSTEP_CPU_PIII_C		0x00000002  /* Coppermine core */
+#define SPEEDSTEP_CPU_PIII_T		0x00000003  /* Tualatin core */
+#define SPEEDSTEP_CPU_P4M		0x00000004  /* P4-M  */
 
 /* the following processors are not speedstep-capable and are not auto-detected
  * in speedstep_detect_processor(). However, their speed can be detected using
- * the speedstep_get_processor_frequency() call. */
-#define SPEEDSTEP_PROCESSOR_PM			0xFFFFFF03  /* Pentium M  */
-#define SPEEDSTEP_PROCESSOR_P4D			0xFFFFFF04  /* desktop P4  */
-#define SPEEDSTEP_PROCESSOR_PCORE		0xFFFFFF05  /* Core */
+ * the speedstep_get_frequency() call. */
+#define SPEEDSTEP_CPU_PM		0xFFFFFF03  /* Pentium M  */
+#define SPEEDSTEP_CPU_P4D		0xFFFFFF04  /* desktop P4  */
+#define SPEEDSTEP_CPU_PCORE		0xFFFFFF05  /* Core */
 
 /* speedstep states -- only two of them */
 
@@ -34,7 +34,7 @@
 extern unsigned int speedstep_detect_processor (void);
 
 /* detect the current speed (in khz) of the processor */
-extern unsigned int speedstep_get_processor_frequency(unsigned int processor);
+extern unsigned int speedstep_get_frequency(unsigned int processor);
 
 
 /* detect the low and high speeds of the processor. The callback
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 8a85c93bd62..befea088e4f 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -19,8 +19,8 @@
 #include <linux/cpufreq.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/io.h>
 #include <asm/ist.h>
-#include <asm/io.h>
 
 #include "speedstep-lib.h"
 
@@ -30,12 +30,12 @@
  * If user gives it, these are used.
  *
  */
-static int smi_port = 0;
-static int smi_cmd = 0;
-static unsigned int smi_sig = 0;
+static int smi_port;
+static int smi_cmd;
+static unsigned int smi_sig;
 
 /* info about the processor */
-static unsigned int speedstep_processor = 0;
+static unsigned int speedstep_processor;
 
 /*
  * There are only two frequency states for each processor. Values
@@ -56,12 +56,13 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
  * of DMA activity going on? */
 #define SMI_TRIES 5
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+		"speedstep-smi", msg)
 
 /**
  * speedstep_smi_ownership
  */
-static int speedstep_smi_ownership (void)
+static int speedstep_smi_ownership(void)
 {
 	u32 command, result, magic, dummy;
 	u32 function = GET_SPEEDSTEP_OWNER;
@@ -70,16 +71,18 @@ static int speedstep_smi_ownership (void)
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 	magic = virt_to_phys(magic_data);
 
-	dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port);
+	dprintk("trying to obtain ownership with command %x at port %x\n",
+			command, smi_port);
 
 	__asm__ __volatile__(
 		"push %%ebp\n"
 		"out %%al, (%%dx)\n"
 		"pop %%ebp\n"
-		: "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
-			"=S" (dummy)
+		: "=D" (result),
+		  "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
+		  "=S" (dummy)
 		: "a" (command), "b" (function), "c" (0), "d" (smi_port),
-			"D" (0), "S" (magic)
+		  "D" (0), "S" (magic)
 		: "memory"
 	);
 
@@ -97,10 +100,10 @@ static int speedstep_smi_ownership (void)
  * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
  * shows that the latter occurs if !(ist_info.event & 0xFFFF).
  */
-static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
+static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
 {
 	u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
-	u32 state=0;
+	u32 state = 0;
 	u32 function = GET_SPEEDSTEP_FREQS;
 
 	if (!(ist_info.event & 0xFFFF)) {
@@ -110,17 +113,25 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port);
+	dprintk("trying to determine frequencies with command %x at port %x\n",
+			command, smi_port);
 
 	__asm__ __volatile__(
 		"push %%ebp\n"
 		"out %%al, (%%dx)\n"
 		"pop %%ebp"
-		: "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy)
-		: "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
+		: "=a" (result),
+		  "=b" (high_mhz),
+		  "=c" (low_mhz),
+		  "=d" (state), "=D" (edi), "=S" (dummy)
+		: "a" (command),
+		  "b" (function),
+		  "c" (state),
+		  "d" (smi_port), "S" (0), "D" (0)
 	);
 
-	dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz);
+	dprintk("result %x, low_freq %u, high_freq %u\n",
+			result, low_mhz, high_mhz);
 
 	/* abort if results are obviously incorrect... */
 	if ((high_mhz + low_mhz) < 600)
@@ -137,26 +148,30 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
  * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
  *
  */
-static int speedstep_get_state (void)
+static int speedstep_get_state(void)
 {
-	u32 function=GET_SPEEDSTEP_STATE;
+	u32 function = GET_SPEEDSTEP_STATE;
 	u32 result, state, edi, command, dummy;
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port);
+	dprintk("trying to determine current setting with command %x "
+		"at port %x\n", command, smi_port);
 
 	__asm__ __volatile__(
 		"push %%ebp\n"
 		"out %%al, (%%dx)\n"
 		"pop %%ebp\n"
-		: "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy)
-		: "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0)
+		: "=a" (result),
+		  "=b" (state), "=D" (edi),
+		  "=c" (dummy), "=d" (dummy), "=S" (dummy)
+		: "a" (command), "b" (function), "c" (0),
+		  "d" (smi_port), "S" (0), "D" (0)
 	);
 
 	dprintk("state is %x, result is %x\n", state, result);
 
-	return (state & 1);
+	return state & 1;
 }
 
 
@@ -165,11 +180,11 @@ static int speedstep_get_state (void)
  * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
  *
  */
-static void speedstep_set_state (unsigned int state)
+static void speedstep_set_state(unsigned int state)
 {
 	unsigned int result = 0, command, new_state, dummy;
 	unsigned long flags;
-	unsigned int function=SET_SPEEDSTEP_STATE;
+	unsigned int function = SET_SPEEDSTEP_STATE;
 	unsigned int retry = 0;
 
 	if (state > 0x1)
@@ -180,11 +195,14 @@ static void speedstep_set_state (unsigned int state)
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port);
+	dprintk("trying to set frequency to state %u "
+		"with command %x at port %x\n",
+		state, command, smi_port);
 
 	do {
 		if (retry) {
-			dprintk("retry %u, previous result %u, waiting...\n", retry, result);
+			dprintk("retry %u, previous result %u, waiting...\n",
+					retry, result);
 			mdelay(retry * 50);
 		}
 		retry++;
@@ -192,20 +210,26 @@ static void speedstep_set_state (unsigned int state)
 			"push %%ebp\n"
 			"out %%al, (%%dx)\n"
 			"pop %%ebp"
-			: "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy),
-				"=d" (dummy), "=S" (dummy)
-			: "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
+			: "=b" (new_state), "=D" (result),
+			  "=c" (dummy), "=a" (dummy),
+			  "=d" (dummy), "=S" (dummy)
+			: "a" (command), "b" (function), "c" (state),
+			  "d" (smi_port), "S" (0), "D" (0)
 			);
 	} while ((new_state != state) && (retry <= SMI_TRIES));
 
 	/* enable IRQs */
 	local_irq_restore(flags);
 
-	if (new_state == state) {
-		dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result);
-	} else {
-		printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result);
-	}
+	if (new_state == state)
+		dprintk("change to %u MHz succeeded after %u tries "
+			"with result %u\n",
+			(speedstep_freqs[new_state].frequency / 1000),
+			retry, result);
+	else
+		printk(KERN_ERR "cpufreq: change to state %u "
+			"failed with new_state %u and result %u\n",
+			state, new_state, result);
 
 	return;
 }
@@ -219,13 +243,14 @@ static void speedstep_set_state (unsigned int state)
  *
  * Sets a new CPUFreq policy/freq.
  */
-static int speedstep_target (struct cpufreq_policy *policy,
+static int speedstep_target(struct cpufreq_policy *policy,
 			unsigned int target_freq, unsigned int relation)
 {
 	unsigned int newstate = 0;
 	struct cpufreq_freqs freqs;
 
-	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
+	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
+				target_freq, relation, &newstate))
 		return -EINVAL;
 
 	freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
@@ -250,7 +275,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
  * Limit must be within speedstep_low_freq and speedstep_high_freq, with
  * at least one border included.
  */
-static int speedstep_verify (struct cpufreq_policy *policy)
+static int speedstep_verify(struct cpufreq_policy *policy)
 {
 	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
 }
@@ -259,7 +284,8 @@ static int speedstep_verify (struct cpufreq_policy *policy)
 static int speedstep_cpu_init(struct cpufreq_policy *policy)
 {
 	int result;
-	unsigned int speed,state;
+	unsigned int speed, state;
+	unsigned int *low, *high;
 
 	/* capability check */
 	if (policy->cpu != 0)
@@ -272,19 +298,23 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 	}
 
 	/* detect low and high frequency */
-	result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency,
-				&speedstep_freqs[SPEEDSTEP_HIGH].frequency);
+	low = &speedstep_freqs[SPEEDSTEP_LOW].frequency;
+	high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency;
+
+	result = speedstep_smi_get_freqs(low, high);
 	if (result) {
-		/* fall back to speedstep_lib.c dection mechanism: try both states out */
-		dprintk("could not detect low and high frequencies by SMI call.\n");
+		/* fall back to speedstep_lib.c dection mechanism:
+		 * try both states out */
+		dprintk("could not detect low and high frequencies "
+				"by SMI call.\n");
 		result = speedstep_get_freqs(speedstep_processor,
-				&speedstep_freqs[SPEEDSTEP_LOW].frequency,
-				&speedstep_freqs[SPEEDSTEP_HIGH].frequency,
+				low, high,
 				NULL,
 				&speedstep_set_state);
 
 		if (result) {
-			dprintk("could not detect two different speeds -- aborting.\n");
+			dprintk("could not detect two different speeds"
+					" -- aborting.\n");
 			return result;
 		} else
 			dprintk("workaround worked.\n");
@@ -295,7 +325,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 	speed = speedstep_freqs[state].frequency;
 
 	dprintk("currently at %s speed setting - %i MHz\n",
-		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
+		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
+		? "low" : "high",
 		(speed / 1000));
 
 	/* cpuinfo and default policy values */
@@ -304,7 +335,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 
 	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
 	if (result)
-		return (result);
+		return result;
 
 	cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
 
@@ -321,7 +352,7 @@ static unsigned int speedstep_get(unsigned int cpu)
 {
 	if (cpu)
 		return -ENODEV;
-	return speedstep_get_processor_frequency(speedstep_processor);
+	return speedstep_get_frequency(speedstep_processor);
 }
 
 
@@ -335,7 +366,7 @@ static int speedstep_resume(struct cpufreq_policy *policy)
 	return result;
 }
 
-static struct freq_attr* speedstep_attr[] = {
+static struct freq_attr *speedstep_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,
 };
@@ -364,21 +395,23 @@ static int __init speedstep_init(void)
 	speedstep_processor = speedstep_detect_processor();
 
 	switch (speedstep_processor) {
-	case SPEEDSTEP_PROCESSOR_PIII_T:
-	case SPEEDSTEP_PROCESSOR_PIII_C:
-	case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
+	case SPEEDSTEP_CPU_PIII_T:
+	case SPEEDSTEP_CPU_PIII_C:
+	case SPEEDSTEP_CPU_PIII_C_EARLY:
 		break;
 	default:
 		speedstep_processor = 0;
 	}
 
 	if (!speedstep_processor) {
-		dprintk ("No supported Intel CPU detected.\n");
+		dprintk("No supported Intel CPU detected.\n");
 		return -ENODEV;
 	}
 
-	dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n",
-		ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level);
+	dprintk("signature:0x%.8lx, command:0x%.8lx, "
+		"event:0x%.8lx, perf_level:0x%.8lx.\n",
+		ist_info.signature, ist_info.command,
+		ist_info.event, ist_info.perf_level);
 
 	/* Error if no IST-SMI BIOS or no PARM
 		 sig= 'ISGE' aka 'Intel Speedstep Gate E' */
@@ -416,17 +449,20 @@ static void __exit speedstep_exit(void)
 	cpufreq_unregister_driver(&speedstep_driver);
 }
 
-module_param(smi_port,  int, 0444);
-module_param(smi_cmd,   int, 0444);
-module_param(smi_sig,  uint, 0444);
+module_param(smi_port, int, 0444);
+module_param(smi_cmd,  int, 0444);
+module_param(smi_sig, uint, 0444);
 
-MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2");
-MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82");
-MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface.");
+MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value "
+		"-- Intel's default setting is 0xb2");
+MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value "
+		"-- Intel's default setting is 0x82");
+MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the "
+		"SMI interface.");
 
-MODULE_AUTHOR ("Hiroshi Miura");
-MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Hiroshi Miura");
+MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface.");
+MODULE_LICENSE("GPL");
 
 module_init(speedstep_init);
 module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index ffd0f5ed071..593171e967e 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -61,23 +61,23 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
  */
 static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
 
-static char Cx86_model[][9] __cpuinitdata = {
+static const char __cpuinitconst Cx86_model[][9] = {
 	"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
 	"M II ", "Unknown"
 };
-static char Cx486_name[][5] __cpuinitdata = {
+static const char __cpuinitconst Cx486_name[][5] = {
 	"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
 	"SRx2", "DRx2"
 };
-static char Cx486S_name[][4] __cpuinitdata = {
+static const char __cpuinitconst Cx486S_name[][4] = {
 	"S", "S2", "Se", "S2e"
 };
-static char Cx486D_name[][4] __cpuinitdata = {
+static const char __cpuinitconst Cx486D_name[][4] = {
 	"DX", "DX2", "?", "?", "?", "DX4"
 };
 static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
-static char cyrix_model_mult1[] __cpuinitdata = "12??43";
-static char cyrix_model_mult2[] __cpuinitdata = "12233445";
+static const char __cpuinitconst cyrix_model_mult1[] = "12??43";
+static const char __cpuinitconst cyrix_model_mult2[] = "12233445";
 
 /*
  * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@@ -435,7 +435,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
 	}
 }
 
-static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
+static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = {
 	.c_vendor	= "Cyrix",
 	.c_ident	= { "CyrixInstead" },
 	.c_early_init	= early_init_cyrix,
@@ -446,7 +446,7 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
 
 cpu_dev_register(cyrix_cpu_dev);
 
-static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
+static const struct cpu_dev __cpuinitconst nsc_cpu_dev = {
 	.c_vendor	= "NSC",
 	.c_ident	= { "Geode by NSC" },
 	.c_init		= init_nsc,
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a38ad..7437fa133c0 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/smp.h>
+#include <linux/sched.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
 
@@ -13,6 +14,7 @@
 #include <asm/uaccess.h>
 #include <asm/ds.h>
 #include <asm/bugs.h>
+#include <asm/cpu.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/topology.h>
@@ -24,7 +26,6 @@
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/mpspec.h>
 #include <asm/apic.h>
-#include <mach_apic.h>
 #endif
 
 static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
@@ -54,15 +55,37 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 		c->x86_cache_alignment = 128;
 #endif
 
+	/* CPUID workaround for 0F33/0F34 CPU */
+	if (c->x86 == 0xF && c->x86_model == 0x3
+	    && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
+		c->x86_phys_bits = 36;
+
 	/*
 	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
-	 * with P/T states and does not stop in deep C-states
+	 * with P/T states and does not stop in deep C-states.
+	 *
+	 * It is also reliable across cores and sockets. (but not across
+	 * cabinets - we turn it off in that case explicitly.)
 	 */
 	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+		set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
+		sched_clock_stable = 1;
 	}
 
+	/*
+	 * There is a known erratum on Pentium III and Core Solo
+	 * and Core Duo CPUs.
+	 * " Page with PAT set to WC while associated MTRR is UC
+	 *   may consolidate to UC "
+	 * Because of this erratum, it is better to stick with
+	 * setting WC in MTRR rather than using PAT on these CPUs.
+	 *
+	 * Enable PAT WC only on P4, Core 2 or later CPUs.
+	 */
+	if (c->x86 == 6 && c->x86_model < 15)
+		clear_cpu_cap(c, X86_FEATURE_PAT);
 }
 
 #ifdef CONFIG_X86_32
@@ -99,6 +122,28 @@ static void __cpuinit trap_init_f00f_bug(void)
 }
 #endif
 
+static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	/* calling is from identify_secondary_cpu() ? */
+	if (c->cpu_index == boot_cpu_id)
+		return;
+
+	/*
+	 * Mask B, Pentium, but not Pentium MMX
+	 */
+	if (c->x86 == 5 &&
+	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
+	    c->x86_model <= 3) {
+		/*
+		 * Remember we have B step Pentia with bugs
+		 */
+		WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
+				    "with B stepping processors.\n");
+	}
+#endif
+}
+
 static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 {
 	unsigned long lo, hi;
@@ -135,10 +180,10 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 	 */
 	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
 		rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
-		if ((lo & (1<<9)) == 0) {
+		if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) {
 			printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
 			printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
-			lo |= (1<<9);	/* Disable hw prefetching */
+			lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
 			wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
 		}
 	}
@@ -175,6 +220,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 #ifdef CONFIG_X86_NUMAQ
 	numaq_tsc_disable();
 #endif
+
+	intel_smp_check(c);
 }
 #else
 static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
@@ -374,7 +421,7 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
 }
 #endif
 
-static struct cpu_dev intel_cpu_dev __cpuinitdata = {
+static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
 	.c_vendor	= "Intel",
 	.c_ident	= { "GenuineIntel" },
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index da299eb85fc..c471eb1a389 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -32,7 +32,7 @@ struct _cache_table
 };
 
 /* all the cache descriptor types we care about (no TLB or trace cache entries) */
-static struct _cache_table cache_table[] __cpuinitdata =
+static const struct _cache_table __cpuinitconst cache_table[] =
 {
 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
@@ -147,7 +147,16 @@ struct _cpuid4_info {
 	union _cpuid4_leaf_ecx ecx;
 	unsigned long size;
 	unsigned long can_disable;
-	cpumask_t shared_cpu_map;	/* future?: only cpus/node is needed */
+	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
+};
+
+/* subset of above _cpuid4_info w/o shared_cpu_map */
+struct _cpuid4_info_regs {
+	union _cpuid4_leaf_eax eax;
+	union _cpuid4_leaf_ebx ebx;
+	union _cpuid4_leaf_ecx ecx;
+	unsigned long size;
+	unsigned long can_disable;
 };
 
 #ifdef CONFIG_PCI
@@ -197,15 +206,15 @@ union l3_cache {
 	unsigned val;
 };
 
-static unsigned short assocs[] __cpuinitdata = {
+static const unsigned short __cpuinitconst assocs[] = {
 	[1] = 1, [2] = 2, [4] = 4, [6] = 8,
 	[8] = 16, [0xa] = 32, [0xb] = 48,
 	[0xc] = 64,
 	[0xf] = 0xffff // ??
 };
 
-static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
-static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
+static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
+static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
 
 static void __cpuinit
 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
@@ -278,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 }
 
 static void __cpuinit
-amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
 {
 	if (index < 3)
 		return;
@@ -286,7 +295,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
 }
 
 static int
-__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+__cpuinit cpuid4_cache_lookup_regs(int index,
+				   struct _cpuid4_info_regs *this_leaf)
 {
 	union _cpuid4_leaf_eax 	eax;
 	union _cpuid4_leaf_ebx 	ebx;
@@ -314,6 +324,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 	return 0;
 }
 
+static int
+__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+{
+	struct _cpuid4_info_regs *leaf_regs =
+		(struct _cpuid4_info_regs *)this_leaf;
+
+	return cpuid4_cache_lookup_regs(index, leaf_regs);
+}
+
 static int __cpuinit find_num_cache_leaves(void)
 {
 	unsigned int		eax, ebx, ecx, edx;
@@ -353,11 +372,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 		 * parameters cpuid leaf to find the cache details
 		 */
 		for (i = 0; i < num_cache_leaves; i++) {
-			struct _cpuid4_info this_leaf;
-
+			struct _cpuid4_info_regs this_leaf;
 			int retval;
 
-			retval = cpuid4_cache_lookup(i, &this_leaf);
+			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 			if (retval >= 0) {
 				switch(this_leaf.eax.split.level) {
 				    case 1:
@@ -506,17 +524,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
 
 	if (num_threads_sharing == 1)
-		cpu_set(cpu, this_leaf->shared_cpu_map);
+		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
 	else {
 		index_msb = get_count_order(num_threads_sharing);
 
 		for_each_online_cpu(i) {
 			if (cpu_data(i).apicid >> index_msb ==
 			    c->apicid >> index_msb) {
-				cpu_set(i, this_leaf->shared_cpu_map);
+				cpumask_set_cpu(i,
+					to_cpumask(this_leaf->shared_cpu_map));
 				if (i != cpu && per_cpu(cpuid4_info, i))  {
-					sibling_leaf = CPUID4_INFO_IDX(i, index);
-					cpu_set(cpu, sibling_leaf->shared_cpu_map);
+					sibling_leaf =
+						CPUID4_INFO_IDX(i, index);
+					cpumask_set_cpu(cpu, to_cpumask(
+						sibling_leaf->shared_cpu_map));
 				}
 			}
 		}
@@ -528,9 +549,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 	int sibling;
 
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
-	for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
+	for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
 		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
-		cpu_clear(cpu, sibling_leaf->shared_cpu_map);
+		cpumask_clear_cpu(cpu,
+				  to_cpumask(sibling_leaf->shared_cpu_map));
 	}
 }
 #else
@@ -635,8 +657,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 	int n = 0;
 
 	if (len > 1) {
-		cpumask_t *mask = &this_leaf->shared_cpu_map;
+		const struct cpumask *mask;
 
+		mask = to_cpumask(this_leaf->shared_cpu_map);
 		n = type?
 			cpulist_scnprintf(buf, len-2, mask) :
 			cpumask_scnprintf(buf, len-2, mask);
@@ -699,7 +722,8 @@ static struct pci_dev *get_k8_northbridge(int node)
 
 static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
 {
-	int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
+	int node = cpu_to_node(cpumask_first(mask));
 	struct pci_dev *dev = NULL;
 	ssize_t ret = 0;
 	int i;
@@ -733,7 +757,8 @@ static ssize_t
 store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
 		    size_t count)
 {
-	int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
+	int node = cpu_to_node(cpumask_first(mask));
 	struct pci_dev *dev = NULL;
 	unsigned int ret, index, val;
 
@@ -878,7 +903,7 @@ err_out:
 	return -ENOMEM;
 }
 
-static cpumask_t cache_dev_map = CPU_MASK_NONE;
+static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
 
 /* Add/Remove cache interface for CPU device */
 static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -918,7 +943,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 		}
 		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
 	}
-	cpu_set(cpu, cache_dev_map);
+	cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
 
 	kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
 	return 0;
@@ -931,9 +956,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
 
 	if (per_cpu(cpuid4_info, cpu) == NULL)
 		return;
-	if (!cpu_isset(cpu, cache_dev_map))
+	if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
 		return;
-	cpu_clear(cpu, cache_dev_map);
+	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
 
 	for (i = 0; i < num_cache_leaves; i++)
 		kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index d7d2323bbb6..b2f89829bbe 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_X86_32)		+= k7.o p4.o p5.o p6.o winchip.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o
 obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd_64.o
 obj-$(CONFIG_X86_MCE_NONFATAL)	+= non-fatal.o
+obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index dfaebce3633..3552119b091 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -60,20 +60,6 @@ void mcheck_init(struct cpuinfo_x86 *c)
 	}
 }
 
-static unsigned long old_cr4 __initdata;
-
-void __init stop_mce(void)
-{
-	old_cr4 = read_cr4();
-	clear_in_cr4(X86_CR4_MCE);
-}
-
-void __init restart_mce(void)
-{
-	if (old_cr4 & X86_CR4_MCE)
-		set_in_cr4(X86_CR4_MCE);
-}
-
 static int __init mcheck_disable(char *str)
 {
 	mce_disabled = 1;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index fe79985ce0f..ca14604611e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -3,6 +3,8 @@
  * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
  * Rest from unknown author(s).
  * 2004 Andi Kleen. Rewrote most of it.
+ * Copyright 2008 Intel Corporation
+ * Author: Andi Kleen
  */
 
 #include <linux/init.h>
@@ -24,6 +26,9 @@
 #include <linux/ctype.h>
 #include <linux/kmod.h>
 #include <linux/kdebug.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/ratelimit.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/mce.h>
@@ -32,7 +37,6 @@
 #include <asm/idle.h>
 
 #define MISC_MCELOG_MINOR 227
-#define NR_SYSFS_BANKS 6
 
 atomic_t mce_entry;
 
@@ -47,7 +51,7 @@ static int mce_dont_init;
  */
 static int tolerant = 1;
 static int banks;
-static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
+static u64 *bank;
 static unsigned long notify_user;
 static int rip_msr;
 static int mce_bootlog = -1;
@@ -58,6 +62,19 @@ static char *trigger_argv[2] = { trigger, NULL };
 
 static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
 
+/* MCA banks polled by the period polling timer for corrected events */
+DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
+	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
+};
+
+/* Do initial initialization of a struct mce */
+void mce_setup(struct mce *m)
+{
+	memset(m, 0, sizeof(struct mce));
+	m->cpu = smp_processor_id();
+	rdtscll(m->tsc);
+}
+
 /*
  * Lockless MCE logging infrastructure.
  * This avoids deadlocks on printk locks without having to break locks. Also
@@ -119,11 +136,11 @@ static void print_mce(struct mce *m)
 			print_symbol("{%s}", m->ip);
 		printk("\n");
 	}
-	printk(KERN_EMERG "TSC %Lx ", m->tsc);
+	printk(KERN_EMERG "TSC %llx ", m->tsc);
 	if (m->addr)
-		printk("ADDR %Lx ", m->addr);
+		printk("ADDR %llx ", m->addr);
 	if (m->misc)
-		printk("MISC %Lx ", m->misc);
+		printk("MISC %llx ", m->misc);
 	printk("\n");
 	printk(KERN_EMERG "This is not a software problem!\n");
 	printk(KERN_EMERG "Run through mcelog --ascii to decode "
@@ -149,8 +166,10 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
 	panic(msg);
 }
 
-static int mce_available(struct cpuinfo_x86 *c)
+int mce_available(struct cpuinfo_x86 *c)
 {
+	if (mce_dont_init)
+		return 0;
 	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
 }
 
@@ -172,7 +191,77 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
 }
 
 /*
- * The actual machine check handler
+ * Poll for corrected events or events that happened before reset.
+ * Those are just logged through /dev/mcelog.
+ *
+ * This is executed in standard interrupt context.
+ */
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+{
+	struct mce m;
+	int i;
+
+	mce_setup(&m);
+
+	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+	for (i = 0; i < banks; i++) {
+		if (!bank[i] || !test_bit(i, *b))
+			continue;
+
+		m.misc = 0;
+		m.addr = 0;
+		m.bank = i;
+		m.tsc = 0;
+
+		barrier();
+		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
+		if (!(m.status & MCI_STATUS_VAL))
+			continue;
+
+		/*
+		 * Uncorrected events are handled by the exception handler
+		 * when it is enabled. But when the exception is disabled log
+		 * everything.
+		 *
+		 * TBD do the same check for MCI_STATUS_EN here?
+		 */
+		if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
+			continue;
+
+		if (m.status & MCI_STATUS_MISCV)
+			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
+		if (m.status & MCI_STATUS_ADDRV)
+			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
+
+		if (!(flags & MCP_TIMESTAMP))
+			m.tsc = 0;
+		/*
+		 * Don't get the IP here because it's unlikely to
+		 * have anything to do with the actual error location.
+		 */
+
+		mce_log(&m);
+		add_taint(TAINT_MACHINE_CHECK);
+
+		/*
+		 * Clear state for this bank.
+		 */
+		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+	}
+
+	/*
+	 * Don't clear MCG_STATUS here because it's only defined for
+	 * exceptions.
+	 */
+}
+
+/*
+ * The actual machine check handler. This only handles real
+ * exceptions when something got corrupted coming in through int 18.
+ *
+ * This is executed in NMI context not subject to normal locking rules. This
+ * implies that most kernel services cannot be safely used. Don't even
+ * think about putting a printk in there!
  */
 void do_machine_check(struct pt_regs * regs, long error_code)
 {
@@ -190,17 +279,18 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 	 * error.
 	 */
 	int kill_it = 0;
+	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
 
 	atomic_inc(&mce_entry);
 
-	if ((regs
-	     && notify_die(DIE_NMI, "machine check", regs, error_code,
+	if (notify_die(DIE_NMI, "machine check", regs, error_code,
 			   18, SIGKILL) == NOTIFY_STOP)
-	    || !banks)
+		goto out2;
+	if (!banks)
 		goto out2;
 
-	memset(&m, 0, sizeof(struct mce));
-	m.cpu = smp_processor_id();
+	mce_setup(&m);
+
 	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
 	/* if the restart IP is not valid, we're done for */
 	if (!(m.mcgstatus & MCG_STATUS_RIPV))
@@ -210,18 +300,32 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 	barrier();
 
 	for (i = 0; i < banks; i++) {
-		if (i < NR_SYSFS_BANKS && !bank[i])
+		__clear_bit(i, toclear);
+		if (!bank[i])
 			continue;
 
 		m.misc = 0;
 		m.addr = 0;
 		m.bank = i;
-		m.tsc = 0;
 
 		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
 		if ((m.status & MCI_STATUS_VAL) == 0)
 			continue;
 
+		/*
+		 * Non uncorrected errors are handled by machine_check_poll
+		 * Leave them alone.
+		 */
+		if ((m.status & MCI_STATUS_UC) == 0)
+			continue;
+
+		/*
+		 * Set taint even when machine check was not enabled.
+		 */
+		add_taint(TAINT_MACHINE_CHECK);
+
+		__set_bit(i, toclear);
+
 		if (m.status & MCI_STATUS_EN) {
 			/* if PCC was set, there's no way out */
 			no_way_out |= !!(m.status & MCI_STATUS_PCC);
@@ -235,6 +339,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 					no_way_out = 1;
 				kill_it = 1;
 			}
+		} else {
+			/*
+			 * Machine check event was not enabled. Clear, but
+			 * ignore.
+			 */
+			continue;
 		}
 
 		if (m.status & MCI_STATUS_MISCV)
@@ -243,10 +353,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
 
 		mce_get_rip(&m, regs);
-		if (error_code >= 0)
-			rdtscll(m.tsc);
-		if (error_code != -2)
-			mce_log(&m);
+		mce_log(&m);
 
 		/* Did this bank cause the exception? */
 		/* Assume that the bank with uncorrectable errors did it,
@@ -255,14 +362,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 			panicm = m;
 			panicm_found = 1;
 		}
-
-		add_taint(TAINT_MACHINE_CHECK);
 	}
 
-	/* Never do anything final in the polling timer */
-	if (!regs)
-		goto out;
-
 	/* If we didn't find an uncorrectable error, pick
 	   the last one (shouldn't happen, just being safe). */
 	if (!panicm_found)
@@ -309,10 +410,11 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 	/* notify userspace ASAP */
 	set_thread_flag(TIF_MCE_NOTIFY);
 
- out:
 	/* the last thing we do is clear state */
-	for (i = 0; i < banks; i++)
-		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+	for (i = 0; i < banks; i++) {
+		if (test_bit(i, toclear))
+			wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+	}
 	wrmsrl(MSR_IA32_MCG_STATUS, 0);
  out2:
 	atomic_dec(&mce_entry);
@@ -332,15 +434,13 @@ void do_machine_check(struct pt_regs * regs, long error_code)
  * and historically has been the register value of the
  * MSR_IA32_THERMAL_STATUS (Intel) msr.
  */
-void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
+void mce_log_therm_throt_event(__u64 status)
 {
 	struct mce m;
 
-	memset(&m, 0, sizeof(m));
-	m.cpu = cpu;
+	mce_setup(&m);
 	m.bank = MCE_THERMAL_BANK;
 	m.status = status;
-	rdtscll(m.tsc);
 	mce_log(&m);
 }
 #endif /* CONFIG_X86_MCE_INTEL */
@@ -353,18 +453,18 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
 
 static int check_interval = 5 * 60; /* 5 minutes */
 static int next_interval; /* in jiffies */
-static void mcheck_timer(struct work_struct *work);
-static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
+static void mcheck_timer(unsigned long);
+static DEFINE_PER_CPU(struct timer_list, mce_timer);
 
-static void mcheck_check_cpu(void *info)
+static void mcheck_timer(unsigned long data)
 {
-	if (mce_available(&current_cpu_data))
-		do_machine_check(NULL, 0);
-}
+	struct timer_list *t = &per_cpu(mce_timer, data);
 
-static void mcheck_timer(struct work_struct *work)
-{
-	on_each_cpu(mcheck_check_cpu, NULL, 1);
+	WARN_ON(smp_processor_id() != data);
+
+	if (mce_available(&current_cpu_data))
+		machine_check_poll(MCP_TIMESTAMP,
+				&__get_cpu_var(mce_poll_banks));
 
 	/*
 	 * Alert userspace if needed.  If we logged an MCE, reduce the
@@ -377,31 +477,41 @@ static void mcheck_timer(struct work_struct *work)
 				(int)round_jiffies_relative(check_interval*HZ));
 	}
 
-	schedule_delayed_work(&mcheck_work, next_interval);
+	t->expires = jiffies + next_interval;
+	add_timer(t);
+}
+
+static void mce_do_trigger(struct work_struct *work)
+{
+	call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
 }
 
+static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
+
 /*
- * This is only called from process context.  This is where we do
- * anything we need to alert userspace about new MCEs.  This is called
- * directly from the poller and also from entry.S and idle, thanks to
- * TIF_MCE_NOTIFY.
+ * Notify the user(s) about new machine check events.
+ * Can be called from interrupt context, but not from machine check/NMI
+ * context.
  */
 int mce_notify_user(void)
 {
+	/* Not more than two messages every minute */
+	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
+
 	clear_thread_flag(TIF_MCE_NOTIFY);
 	if (test_and_clear_bit(0, &notify_user)) {
-		static unsigned long last_print;
-		unsigned long now = jiffies;
-
 		wake_up_interruptible(&mce_wait);
-		if (trigger[0])
-			call_usermodehelper(trigger, trigger_argv, NULL,
-						UMH_NO_WAIT);
 
-		if (time_after_eq(now, last_print + (check_interval*HZ))) {
-			last_print = now;
+		/*
+		 * There is no risk of missing notifications because
+		 * work_pending is always cleared before the function is
+		 * executed.
+		 */
+		if (trigger[0] && !work_pending(&mce_trigger_work))
+			schedule_work(&mce_trigger_work);
+
+		if (__ratelimit(&ratelimit))
 			printk(KERN_INFO "Machine check events logged\n");
-		}
 
 		return 1;
 	}
@@ -425,63 +535,78 @@ static struct notifier_block mce_idle_notifier = {
 
 static __init int periodic_mcheck_init(void)
 {
-	next_interval = check_interval * HZ;
-	if (next_interval)
-		schedule_delayed_work(&mcheck_work,
-				      round_jiffies_relative(next_interval));
-	idle_notifier_register(&mce_idle_notifier);
-	return 0;
+       idle_notifier_register(&mce_idle_notifier);
+       return 0;
 }
 __initcall(periodic_mcheck_init);
 
-
 /*
  * Initialize Machine Checks for a CPU.
  */
-static void mce_init(void *dummy)
+static int mce_cap_init(void)
 {
 	u64 cap;
-	int i;
+	unsigned b;
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
-	banks = cap & 0xff;
-	if (banks > MCE_EXTENDED_BANK) {
-		banks = MCE_EXTENDED_BANK;
-		printk(KERN_INFO "MCE: warning: using only %d banks\n",
-		       MCE_EXTENDED_BANK);
+	b = cap & 0xff;
+	if (b > MAX_NR_BANKS) {
+		printk(KERN_WARNING
+		       "MCE: Using only %u machine check banks out of %u\n",
+			MAX_NR_BANKS, b);
+		b = MAX_NR_BANKS;
 	}
+
+	/* Don't support asymmetric configurations today */
+	WARN_ON(banks != 0 && b != banks);
+	banks = b;
+	if (!bank) {
+		bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
+		if (!bank)
+			return -ENOMEM;
+		memset(bank, 0xff, banks * sizeof(u64));
+	}
+
 	/* Use accurate RIP reporting if available. */
 	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
 		rip_msr = MSR_IA32_MCG_EIP;
 
-	/* Log the machine checks left over from the previous reset.
-	   This also clears all registers */
-	do_machine_check(NULL, mce_bootlog ? -1 : -2);
+	return 0;
+}
+
+static void mce_init(void *dummy)
+{
+	u64 cap;
+	int i;
+	mce_banks_t all_banks;
+
+	/*
+	 * Log the machine checks left over from the previous reset.
+	 */
+	bitmap_fill(all_banks, MAX_NR_BANKS);
+	machine_check_poll(MCP_UC, &all_banks);
 
 	set_in_cr4(X86_CR4_MCE);
 
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	if (cap & MCG_CTL_P)
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 
 	for (i = 0; i < banks; i++) {
-		if (i < NR_SYSFS_BANKS)
-			wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
-		else
-			wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
-
+		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
 		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
 	}
 }
 
 /* Add per CPU specific workarounds here */
-static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
+static void mce_cpu_quirks(struct cpuinfo_x86 *c)
 {
 	/* This should be disabled by the BIOS, but isn't always */
 	if (c->x86_vendor == X86_VENDOR_AMD) {
-		if(c->x86 == 15)
+		if (c->x86 == 15 && banks > 4)
 			/* disable GART TBL walk error reporting, which trips off
 			   incorrectly with the IOMMU & 3ware & Cerberus. */
-			clear_bit(10, &bank[4]);
+			clear_bit(10, (unsigned long *)&bank[4]);
 		if(c->x86 <= 17 && mce_bootlog < 0)
 			/* Lots of broken BIOS around that don't clear them
 			   by default and leave crap in there. Don't log. */
@@ -504,20 +629,38 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
 	}
 }
 
+static void mce_init_timer(void)
+{
+	struct timer_list *t = &__get_cpu_var(mce_timer);
+
+	/* data race harmless because everyone sets to the same value */
+	if (!next_interval)
+		next_interval = check_interval * HZ;
+	if (!next_interval)
+		return;
+	setup_timer(t, mcheck_timer, smp_processor_id());
+	t->expires = round_jiffies(jiffies + next_interval);
+	add_timer(t);
+}
+
 /*
  * Called for each booted CPU to set up machine checks.
  * Must be called with preempt off.
  */
 void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
 {
-	mce_cpu_quirks(c);
+	if (!mce_available(c))
+		return;
 
-	if (mce_dont_init ||
-	    !mce_available(c))
+	if (mce_cap_init() < 0) {
+		mce_dont_init = 1;
 		return;
+	}
+	mce_cpu_quirks(c);
 
 	mce_init(NULL);
 	mce_cpu_features(c);
+	mce_init_timer();
 }
 
 /*
@@ -573,7 +716,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 {
 	unsigned long *cpu_tsc;
 	static DEFINE_MUTEX(mce_read_mutex);
-	unsigned next;
+	unsigned prev, next;
 	char __user *buf = ubuf;
 	int i, err;
 
@@ -592,25 +735,32 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 	}
 
 	err = 0;
-	for (i = 0; i < next; i++) {
-		unsigned long start = jiffies;
-
-		while (!mcelog.entry[i].finished) {
-			if (time_after_eq(jiffies, start + 2)) {
-				memset(mcelog.entry + i,0, sizeof(struct mce));
-				goto timeout;
+	prev = 0;
+	do {
+		for (i = prev; i < next; i++) {
+			unsigned long start = jiffies;
+
+			while (!mcelog.entry[i].finished) {
+				if (time_after_eq(jiffies, start + 2)) {
+					memset(mcelog.entry + i, 0,
+					       sizeof(struct mce));
+					goto timeout;
+				}
+				cpu_relax();
 			}
-			cpu_relax();
+			smp_rmb();
+			err |= copy_to_user(buf, mcelog.entry + i,
+					    sizeof(struct mce));
+			buf += sizeof(struct mce);
+timeout:
+			;
 		}
-		smp_rmb();
-		err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
-		buf += sizeof(struct mce);
- timeout:
-		;
-	}
 
-	memset(mcelog.entry, 0, next * sizeof(struct mce));
-	mcelog.next = 0;
+		memset(mcelog.entry + prev, 0,
+		       (next - prev) * sizeof(struct mce));
+		prev = next;
+		next = cmpxchg(&mcelog.next, prev, 0);
+	} while (next != prev);
 
 	synchronize_sched();
 
@@ -680,20 +830,6 @@ static struct miscdevice mce_log_device = {
 	&mce_chrdev_ops,
 };
 
-static unsigned long old_cr4 __initdata;
-
-void __init stop_mce(void)
-{
-	old_cr4 = read_cr4();
-	clear_in_cr4(X86_CR4_MCE);
-}
-
-void __init restart_mce(void)
-{
-	if (old_cr4 & X86_CR4_MCE)
-		set_in_cr4(X86_CR4_MCE);
-}
-
 /*
  * Old style boot options parsing. Only for compatibility.
  */
@@ -703,8 +839,7 @@ static int __init mcheck_disable(char *str)
 	return 1;
 }
 
-/* mce=off disables machine check. Note you can re-enable it later
-   using sysfs.
+/* mce=off disables machine check.
    mce=TOLERANCELEVEL (number, see above)
    mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
    mce=nobootlog Don't log MCEs from before booting. */
@@ -728,6 +863,29 @@ __setup("mce=", mcheck_enable);
  * Sysfs support
  */
 
+/*
+ * Disable machine checks on suspend and shutdown. We can't really handle
+ * them later.
+ */
+static int mce_disable(void)
+{
+	int i;
+
+	for (i = 0; i < banks; i++)
+		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+	return 0;
+}
+
+static int mce_suspend(struct sys_device *dev, pm_message_t state)
+{
+	return mce_disable();
+}
+
+static int mce_shutdown(struct sys_device *dev)
+{
+	return mce_disable();
+}
+
 /* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
    Only one CPU is active at this time, the others get readded later using
    CPU hotplug. */
@@ -738,20 +896,24 @@ static int mce_resume(struct sys_device *dev)
 	return 0;
 }
 
+static void mce_cpu_restart(void *data)
+{
+	del_timer_sync(&__get_cpu_var(mce_timer));
+	if (mce_available(&current_cpu_data))
+		mce_init(NULL);
+	mce_init_timer();
+}
+
 /* Reinit MCEs after user configuration changes */
 static void mce_restart(void)
 {
-	if (next_interval)
-		cancel_delayed_work(&mcheck_work);
-	/* Timer race is harmless here */
-	on_each_cpu(mce_init, NULL, 1);
 	next_interval = check_interval * HZ;
-	if (next_interval)
-		schedule_delayed_work(&mcheck_work,
-				      round_jiffies_relative(next_interval));
+	on_each_cpu(mce_cpu_restart, NULL, 1);
 }
 
 static struct sysdev_class mce_sysclass = {
+	.suspend = mce_suspend,
+	.shutdown = mce_shutdown,
 	.resume = mce_resume,
 	.name = "machinecheck",
 };
@@ -778,16 +940,26 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit
 	}								\
 	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
 
-/*
- * TBD should generate these dynamically based on number of available banks.
- * Have only 6 contol banks in /sysfs until then.
- */
-ACCESSOR(bank0ctl,bank[0],mce_restart())
-ACCESSOR(bank1ctl,bank[1],mce_restart())
-ACCESSOR(bank2ctl,bank[2],mce_restart())
-ACCESSOR(bank3ctl,bank[3],mce_restart())
-ACCESSOR(bank4ctl,bank[4],mce_restart())
-ACCESSOR(bank5ctl,bank[5],mce_restart())
+static struct sysdev_attribute *bank_attrs;
+
+static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
+			 char *buf)
+{
+	u64 b = bank[attr - bank_attrs];
+	return sprintf(buf, "%llx\n", b);
+}
+
+static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
+			const char *buf, size_t siz)
+{
+	char *end;
+	u64 new = simple_strtoull(buf, &end, 0);
+	if (end == buf)
+		return -EINVAL;
+	bank[attr - bank_attrs] = new;
+	mce_restart();
+	return end-buf;
+}
 
 static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
 				char *buf)
@@ -814,8 +986,6 @@ static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
 static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
 ACCESSOR(check_interval,check_interval,mce_restart())
 static struct sysdev_attribute *mce_attributes[] = {
-	&attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
-	&attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
 	&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
 	NULL
 };
@@ -845,11 +1015,22 @@ static __cpuinit int mce_create_device(unsigned int cpu)
 		if (err)
 			goto error;
 	}
+	for (i = 0; i < banks; i++) {
+		err = sysdev_create_file(&per_cpu(device_mce, cpu),
+					&bank_attrs[i]);
+		if (err)
+			goto error2;
+	}
 	cpu_set(cpu, mce_device_initialized);
 
 	return 0;
+error2:
+	while (--i >= 0) {
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
+					&bank_attrs[i]);
+	}
 error:
-	while (i--) {
+	while (--i >= 0) {
 		sysdev_remove_file(&per_cpu(device_mce,cpu),
 				   mce_attributes[i]);
 	}
@@ -868,15 +1049,46 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
 	for (i = 0; mce_attributes[i]; i++)
 		sysdev_remove_file(&per_cpu(device_mce,cpu),
 			mce_attributes[i]);
+	for (i = 0; i < banks; i++)
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
+			&bank_attrs[i]);
 	sysdev_unregister(&per_cpu(device_mce,cpu));
 	cpu_clear(cpu, mce_device_initialized);
 }
 
+/* Make sure there are no machine checks on offlined CPUs. */
+static void mce_disable_cpu(void *h)
+{
+	int i;
+	unsigned long action = *(unsigned long *)h;
+
+	if (!mce_available(&current_cpu_data))
+		return;
+	if (!(action & CPU_TASKS_FROZEN))
+		cmci_clear();
+	for (i = 0; i < banks; i++)
+		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+}
+
+static void mce_reenable_cpu(void *h)
+{
+	int i;
+	unsigned long action = *(unsigned long *)h;
+
+	if (!mce_available(&current_cpu_data))
+		return;
+	if (!(action & CPU_TASKS_FROZEN))
+		cmci_reenable();
+	for (i = 0; i < banks; i++)
+		wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
+}
+
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
 static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
 				      unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
+	struct timer_list *t = &per_cpu(mce_timer, cpu);
 
 	switch (action) {
 	case CPU_ONLINE:
@@ -891,6 +1103,21 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
 			threshold_cpu_callback(action, cpu);
 		mce_remove_device(cpu);
 		break;
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		del_timer_sync(t);
+		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+		break;
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
+		t->expires = round_jiffies(jiffies + next_interval);
+		add_timer_on(t, cpu);
+		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+		break;
+	case CPU_POST_DEAD:
+		/* intentionally ignoring frozen here */
+		cmci_rediscover(cpu);
+		break;
 	}
 	return NOTIFY_OK;
 }
@@ -899,6 +1126,34 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
 	.notifier_call = mce_cpu_callback,
 };
 
+static __init int mce_init_banks(void)
+{
+	int i;
+
+	bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
+				GFP_KERNEL);
+	if (!bank_attrs)
+		return -ENOMEM;
+
+	for (i = 0; i < banks; i++) {
+		struct sysdev_attribute *a = &bank_attrs[i];
+		a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
+		if (!a->attr.name)
+			goto nomem;
+		a->attr.mode = 0644;
+		a->show = show_bank;
+		a->store = set_bank;
+	}
+	return 0;
+
+nomem:
+	while (--i >= 0)
+		kfree(bank_attrs[i].attr.name);
+	kfree(bank_attrs);
+	bank_attrs = NULL;
+	return -ENOMEM;
+}
+
 static __init int mce_init_device(void)
 {
 	int err;
@@ -906,6 +1161,11 @@ static __init int mce_init_device(void)
 
 	if (!mce_available(&boot_cpu_data))
 		return -EIO;
+
+	err = mce_init_banks();
+	if (err)
+		return err;
+
 	err = sysdev_class_register(&mce_sysclass);
 	if (err)
 		return err;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index f2ee0ae29bd..7d01be86887 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = {
 struct threshold_bank {
 	struct kobject *kobj;
 	struct threshold_block *blocks;
-	cpumask_t cpus;
+	cpumask_var_t cpus;
 };
 static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
 
@@ -79,6 +79,8 @@ static unsigned char shared_bank[NR_BANKS] = {
 
 static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
 
+static void amd_threshold_interrupt(void);
+
 /*
  * CPU Initialization
  */
@@ -90,7 +92,8 @@ struct thresh_restart {
 };
 
 /* must be called with correct cpu affinity */
-static long threshold_restart_bank(void *_tr)
+/* Called via smp_call_function_single() */
+static void threshold_restart_bank(void *_tr)
 {
 	struct thresh_restart *tr = _tr;
 	u32 mci_misc_hi, mci_misc_lo;
@@ -117,7 +120,6 @@ static long threshold_restart_bank(void *_tr)
 
 	mci_misc_hi |= MASK_COUNT_EN_HI;
 	wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
-	return 0;
 }
 
 /* cpu init entry point, called from mce.c with preempt off */
@@ -174,6 +176,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 			tr.reset = 0;
 			tr.old_limit = 0;
 			threshold_restart_bank(&tr);
+
+			mce_threshold_vector = amd_threshold_interrupt;
 		}
 	}
 }
@@ -187,19 +191,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
  * the interrupt goes off when error_count reaches threshold_limit.
  * the handler will simply log mcelog w/ software defined bank number.
  */
-asmlinkage void mce_threshold_interrupt(void)
+static void amd_threshold_interrupt(void)
 {
 	unsigned int bank, block;
 	struct mce m;
 	u32 low = 0, high = 0, address = 0;
 
-	ack_APIC_irq();
-	exit_idle();
-	irq_enter();
-
-	memset(&m, 0, sizeof(m));
-	rdtscll(m.tsc);
-	m.cpu = smp_processor_id();
+	mce_setup(&m);
 
 	/* assume first bank caused it */
 	for (bank = 0; bank < NR_BANKS; ++bank) {
@@ -233,7 +231,8 @@ asmlinkage void mce_threshold_interrupt(void)
 
 			/* Log the machine check that caused the threshold
 			   event. */
-			do_machine_check(NULL, 0);
+			machine_check_poll(MCP_TIMESTAMP,
+					&__get_cpu_var(mce_poll_banks));
 
 			if (high & MASK_OVERFLOW_HI) {
 				rdmsrl(address, m.misc);
@@ -243,13 +242,10 @@ asmlinkage void mce_threshold_interrupt(void)
 				       + bank * NR_BLOCKS
 				       + block;
 				mce_log(&m);
-				goto out;
+				return;
 			}
 		}
 	}
-out:
-	inc_irq_stat(irq_threshold_count);
-	irq_exit();
 }
 
 /*
@@ -283,7 +279,7 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
 	tr.b = b;
 	tr.reset = 0;
 	tr.old_limit = 0;
-	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
 	return end - buf;
 }
@@ -305,23 +301,32 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
 	tr.b = b;
 	tr.reset = 0;
 
-	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
 	return end - buf;
 }
 
-static long local_error_count(void *_b)
+struct threshold_block_cross_cpu {
+	struct threshold_block *tb;
+	long retval;
+};
+
+static void local_error_count_handler(void *_tbcc)
 {
-	struct threshold_block *b = _b;
+	struct threshold_block_cross_cpu *tbcc = _tbcc;
+	struct threshold_block *b = tbcc->tb;
 	u32 low, high;
 
 	rdmsr(b->address, low, high);
-	return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
+	tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
 }
 
 static ssize_t show_error_count(struct threshold_block *b, char *buf)
 {
-	return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
+	struct threshold_block_cross_cpu tbcc = { .tb = b, };
+
+	smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1);
+	return sprintf(buf, "%lx\n", tbcc.retval);
 }
 
 static ssize_t store_error_count(struct threshold_block *b,
@@ -329,7 +334,7 @@ static ssize_t store_error_count(struct threshold_block *b,
 {
 	struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
 
-	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 	return 1;
 }
 
@@ -398,7 +403,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
 	if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
 		return 0;
 
-	if (rdmsr_safe(address, &low, &high))
+	if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
 		return 0;
 
 	if (!(high & MASK_VALID_HI)) {
@@ -462,12 +467,11 @@ out_free:
 	return err;
 }
 
-static __cpuinit long local_allocate_threshold_blocks(void *_bank)
+static __cpuinit long
+local_allocate_threshold_blocks(int cpu, unsigned int bank)
 {
-	unsigned int *bank = _bank;
-
-	return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
-					 MSR_IA32_MC0_MISC + *bank * 4);
+	return allocate_threshold_blocks(cpu, bank, 0,
+					 MSR_IA32_MC0_MISC + bank * 4);
 }
 
 /* symlinks sibling shared banks to first core.  first core owns dir/files. */
@@ -481,7 +485,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
 #ifdef CONFIG_SMP
 	if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {	/* symlink */
-		i = first_cpu(per_cpu(cpu_core_map, cpu));
+		i = cpumask_first(&per_cpu(cpu_core_map, cpu));
 
 		/* first core not up yet */
 		if (cpu_data(i).cpu_core_id)
@@ -501,7 +505,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (err)
 			goto out;
 
-		b->cpus = per_cpu(cpu_core_map, cpu);
+		cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
 		per_cpu(threshold_banks, cpu)[bank] = b;
 		goto out;
 	}
@@ -512,24 +516,29 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		err = -ENOMEM;
 		goto out;
 	}
+	if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
+		kfree(b);
+		err = -ENOMEM;
+		goto out;
+	}
 
 	b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
 	if (!b->kobj)
 		goto out_free;
 
 #ifndef CONFIG_SMP
-	b->cpus = CPU_MASK_ALL;
+	cpumask_setall(b->cpus);
 #else
-	b->cpus = per_cpu(cpu_core_map, cpu);
+	cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
 #endif
 
 	per_cpu(threshold_banks, cpu)[bank] = b;
 
-	err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
+	err = local_allocate_threshold_blocks(cpu, bank);
 	if (err)
 		goto out_free;
 
-	for_each_cpu_mask_nr(i, b->cpus) {
+	for_each_cpu(i, b->cpus) {
 		if (i == cpu)
 			continue;
 
@@ -545,6 +554,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
 out_free:
 	per_cpu(threshold_banks, cpu)[bank] = NULL;
+	free_cpumask_var(b->cpus);
 	kfree(b);
 out:
 	return err;
@@ -619,7 +629,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 #endif
 
 	/* remove all sibling symlinks before unregistering */
-	for_each_cpu_mask_nr(i, b->cpus) {
+	for_each_cpu(i, b->cpus) {
 		if (i == cpu)
 			continue;
 
@@ -632,6 +642,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 free_out:
 	kobject_del(b->kobj);
 	kobject_put(b->kobj);
+	free_cpumask_var(b->cpus);
 	kfree(b);
 	per_cpu(threshold_banks, cpu)[bank] = NULL;
 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index f44c3662436..57df3d38347 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -1,17 +1,21 @@
 /*
  * Intel specific MCE features.
  * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
+ * Copyright (C) 2008, 2009 Intel Corporation
+ * Author: Andi Kleen
  */
 
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
 #include <asm/processor.h>
+#include <asm/apic.h>
 #include <asm/msr.h>
 #include <asm/mce.h>
 #include <asm/hw_irq.h>
 #include <asm/idle.h>
 #include <asm/therm_throt.h>
+#include <asm/apic.h>
 
 asmlinkage void smp_thermal_interrupt(void)
 {
@@ -24,7 +28,7 @@ asmlinkage void smp_thermal_interrupt(void)
 
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 	if (therm_throt_process(msr_val & 1))
-		mce_log_therm_throt_event(smp_processor_id(), msr_val);
+		mce_log_therm_throt_event(msr_val);
 
 	inc_irq_stat(irq_thermal_count);
 	irq_exit();
@@ -48,13 +52,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	 */
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	h = apic_read(APIC_LVTTHMR);
-	if ((l & (1 << 3)) && (h & APIC_DM_SMI)) {
+	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 		printk(KERN_DEBUG
 		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
 		return;
 	}
 
-	if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
 		tm2 = 1;
 
 	if (h & APIC_VECTOR_MASK) {
@@ -72,7 +76,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
 
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
+	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
 
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
@@ -84,7 +88,209 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	return;
 }
 
+/*
+ * Support for Intel Correct Machine Check Interrupts. This allows
+ * the CPU to raise an interrupt when a corrected machine check happened.
+ * Normally we pick those up using a regular polling timer.
+ * Also supports reliable discovery of shared banks.
+ */
+
+static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
+
+/*
+ * cmci_discover_lock protects against parallel discovery attempts
+ * which could race against each other.
+ */
+static DEFINE_SPINLOCK(cmci_discover_lock);
+
+#define CMCI_THRESHOLD 1
+
+static int cmci_supported(int *banks)
+{
+	u64 cap;
+
+	/*
+	 * Vendor check is not strictly needed, but the initial
+	 * initialization is vendor keyed and this
+	 * makes sure none of the backdoors are entered otherwise.
+	 */
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return 0;
+	if (!cpu_has_apic || lapic_get_maxlvt() < 6)
+		return 0;
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
+	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
+	return !!(cap & MCG_CMCI_P);
+}
+
+/*
+ * The interrupt handler. This is called on every event.
+ * Just call the poller directly to log any events.
+ * This could in theory increase the threshold under high load,
+ * but doesn't for now.
+ */
+static void intel_threshold_interrupt(void)
+{
+	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+	mce_notify_user();
+}
+
+static void print_update(char *type, int *hdr, int num)
+{
+	if (*hdr == 0)
+		printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
+	*hdr = 1;
+	printk(KERN_CONT " %s:%d", type, num);
+}
+
+/*
+ * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
+ * on this CPU. Use the algorithm recommended in the SDM to discover shared
+ * banks.
+ */
+static void cmci_discover(int banks, int boot)
+{
+	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
+	int hdr = 0;
+	int i;
+
+	spin_lock(&cmci_discover_lock);
+	for (i = 0; i < banks; i++) {
+		u64 val;
+
+		if (test_bit(i, owned))
+			continue;
+
+		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+
+		/* Already owned by someone else? */
+		if (val & CMCI_EN) {
+			if (test_and_clear_bit(i, owned) || boot)
+				print_update("SHD", &hdr, i);
+			__clear_bit(i, __get_cpu_var(mce_poll_banks));
+			continue;
+		}
+
+		val |= CMCI_EN | CMCI_THRESHOLD;
+		wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
+		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+
+		/* Did the enable bit stick? -- the bank supports CMCI */
+		if (val & CMCI_EN) {
+			if (!test_and_set_bit(i, owned) || boot)
+				print_update("CMCI", &hdr, i);
+			__clear_bit(i, __get_cpu_var(mce_poll_banks));
+		} else {
+			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
+		}
+	}
+	spin_unlock(&cmci_discover_lock);
+	if (hdr)
+		printk(KERN_CONT "\n");
+}
+
+/*
+ * Just in case we missed an event during initialization check
+ * all the CMCI owned banks.
+ */
+void cmci_recheck(void)
+{
+	unsigned long flags;
+	int banks;
+
+	if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
+		return;
+	local_irq_save(flags);
+	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+	local_irq_restore(flags);
+}
+
+/*
+ * Disable CMCI on this CPU for all banks it owns when it goes down.
+ * This allows other CPUs to claim the banks on rediscovery.
+ */
+void cmci_clear(void)
+{
+	int i;
+	int banks;
+	u64 val;
+
+	if (!cmci_supported(&banks))
+		return;
+	spin_lock(&cmci_discover_lock);
+	for (i = 0; i < banks; i++) {
+		if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+			continue;
+		/* Disable CMCI */
+		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+		val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
+		wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
+		__clear_bit(i, __get_cpu_var(mce_banks_owned));
+	}
+	spin_unlock(&cmci_discover_lock);
+}
+
+/*
+ * After a CPU went down cycle through all the others and rediscover
+ * Must run in process context.
+ */
+void cmci_rediscover(int dying)
+{
+	int banks;
+	int cpu;
+	cpumask_var_t old;
+
+	if (!cmci_supported(&banks))
+		return;
+	if (!alloc_cpumask_var(&old, GFP_KERNEL))
+		return;
+	cpumask_copy(old, &current->cpus_allowed);
+
+	for_each_online_cpu (cpu) {
+		if (cpu == dying)
+			continue;
+		if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)))
+			continue;
+		/* Recheck banks in case CPUs don't all have the same */
+		if (cmci_supported(&banks))
+			cmci_discover(banks, 0);
+	}
+
+	set_cpus_allowed_ptr(current, old);
+	free_cpumask_var(old);
+}
+
+/*
+ * Reenable CMCI on this CPU in case a CPU down failed.
+ */
+void cmci_reenable(void)
+{
+	int banks;
+	if (cmci_supported(&banks))
+		cmci_discover(banks, 0);
+}
+
+static void intel_init_cmci(void)
+{
+	int banks;
+
+	if (!cmci_supported(&banks))
+		return;
+
+	mce_threshold_vector = intel_threshold_interrupt;
+	cmci_discover(banks, 1);
+	/*
+	 * For CPU #0 this runs with still disabled APIC, but that's
+	 * ok because only the vector is set up. We still do another
+	 * check for the banks later for CPU #0 just to make sure
+	 * to not miss any events.
+	 */
+	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
+	cmci_recheck();
+}
+
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
 	intel_init_thermal(c);
+	intel_init_cmci();
 }
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 9b60fce09f7..f53bdcbaf38 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -85,7 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	 */
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	h = apic_read(APIC_LVTTHMR);
-	if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 		printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
 				cpu);
 		return; /* -EBUSY */
@@ -111,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	vendor_thermal_interrupt = intel_thermal_interrupt;
 
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
 
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
new file mode 100644
index 00000000000..23ee9e730f7
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -0,0 +1,29 @@
+/*
+ * Common corrected MCE threshold handler code:
+ */
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+
+#include <asm/irq_vectors.h>
+#include <asm/apic.h>
+#include <asm/idle.h>
+#include <asm/mce.h>
+
+static void default_threshold_interrupt(void)
+{
+	printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
+			 THRESHOLD_APIC_VECTOR);
+}
+
+void (*mce_threshold_vector)(void) = default_threshold_interrupt;
+
+asmlinkage void mce_threshold_interrupt(void)
+{
+	exit_idle();
+	irq_enter();
+	inc_irq_stat(irq_threshold_count);
+	mce_threshold_vector();
+	irq_exit();
+	/* Ack only at the end to avoid potential reentry */
+	ack_APIC_irq();
+}
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index 191fc053364..f4361b56f8e 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
-obj-y		:= main.o if.o generic.o state.o
+obj-y		:= main.o if.o generic.o state.o cleanup.o
 obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
 
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
new file mode 100644
index 00000000000..ce0fe4b5c04
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -0,0 +1,1101 @@
+/*  MTRR (Memory Type Range Register) cleanup
+
+    Copyright (C) 2009 Yinghai Lu
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/sort.h>
+
+#include <asm/e820.h>
+#include <asm/mtrr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/kvm_para.h>
+#include "mtrr.h"
+
+/* should be related to MTRR_VAR_RANGES nums */
+#define RANGE_NUM 256
+
+struct res_range {
+	unsigned long start;
+	unsigned long end;
+};
+
+static int __init
+add_range(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end)
+{
+	/* out of slots */
+	if (nr_range >= RANGE_NUM)
+		return nr_range;
+
+	range[nr_range].start = start;
+	range[nr_range].end = end;
+
+	nr_range++;
+
+	return nr_range;
+}
+
+static int __init
+add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end)
+{
+	int i;
+
+	/* try to merge it with old one */
+	for (i = 0; i < nr_range; i++) {
+		unsigned long final_start, final_end;
+		unsigned long common_start, common_end;
+
+		if (!range[i].end)
+			continue;
+
+		common_start = max(range[i].start, start);
+		common_end = min(range[i].end, end);
+		if (common_start > common_end + 1)
+			continue;
+
+		final_start = min(range[i].start, start);
+		final_end = max(range[i].end, end);
+
+		range[i].start = final_start;
+		range[i].end =  final_end;
+		return nr_range;
+	}
+
+	/* need to add that */
+	return add_range(range, nr_range, start, end);
+}
+
+static void __init
+subtract_range(struct res_range *range, unsigned long start, unsigned long end)
+{
+	int i, j;
+
+	for (j = 0; j < RANGE_NUM; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end &&
+		    range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end &&
+		    range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* find the new spare */
+			for (i = 0; i < RANGE_NUM; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < RANGE_NUM) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int __init cmp_range(const void *x1, const void *x2)
+{
+	const struct res_range *r1 = x1;
+	const struct res_range *r2 = x2;
+	long start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+struct var_mtrr_range_state {
+	unsigned long base_pfn;
+	unsigned long size_pfn;
+	mtrr_type type;
+};
+
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static int __initdata debug_print;
+
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+		       unsigned long extra_remove_base,
+		       unsigned long extra_remove_size)
+{
+	unsigned long base, size;
+	mtrr_type type;
+	int i;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
+		nr_range = add_range_with_merge(range, nr_range, base,
+						base + size - 1);
+	}
+	if (debug_print) {
+		printk(KERN_DEBUG "After WB checking\n");
+		for (i = 0; i < nr_range; i++)
+			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+	}
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_UNCACHABLE &&
+		    type != MTRR_TYPE_WRPROT)
+			continue;
+		size = range_state[i].size_pfn;
+		if (!size)
+			continue;
+		base = range_state[i].base_pfn;
+		if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
+		    (mtrr_state.enabled & 1)) {
+			/* Var MTRR contains UC entry below 1M? Skip it: */
+			printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d "
+				"contains strange UC entry under 1M, check "
+				"with your system vendor!\n", i);
+			if (base + size <= (1<<(20-PAGE_SHIFT)))
+				continue;
+			size -= (1<<(20-PAGE_SHIFT)) - base;
+			base = 1<<(20-PAGE_SHIFT);
+		}
+		subtract_range(range, base, base + size - 1);
+	}
+	if (extra_remove_size)
+		subtract_range(range, extra_remove_base,
+				 extra_remove_base + extra_remove_size  - 1);
+
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	if  (debug_print) {
+		printk(KERN_DEBUG "After UC checking\n");
+		for (i = 0; i < nr_range; i++)
+			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+	}
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	if  (debug_print) {
+		printk(KERN_DEBUG "After sorting\n");
+		for (i = 0; i < nr_range; i++)
+			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+	}
+
+	/* clear those is not used */
+	for (i = nr_range; i < RANGE_NUM; i++)
+		memset(&range[i], 0, sizeof(range[i]));
+
+	return nr_range;
+}
+
+static struct res_range __initdata range[RANGE_NUM];
+static int __initdata nr_range;
+
+#ifdef CONFIG_MTRR_SANITIZER
+
+static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+{
+	unsigned long sum;
+	int i;
+
+	sum = 0;
+	for (i = 0; i < nr_range; i++)
+		sum += range[i].end + 1 - range[i].start;
+
+	return sum;
+}
+
+static int enable_mtrr_cleanup __initdata =
+	CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	enable_mtrr_cleanup = 0;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
+static int __init mtrr_cleanup_debug_setup(char *str)
+{
+	debug_print = 1;
+	return 0;
+}
+early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
+
+struct var_mtrr_state {
+	unsigned long	range_startk;
+	unsigned long	range_sizek;
+	unsigned long	chunk_sizek;
+	unsigned long	gran_sizek;
+	unsigned int	reg;
+};
+
+static void __init
+set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+		unsigned char type, unsigned int address_bits)
+{
+	u32 base_lo, base_hi, mask_lo, mask_hi;
+	u64 base, mask;
+
+	if (!sizek) {
+		fill_mtrr_var_range(reg, 0, 0, 0, 0);
+		return;
+	}
+
+	mask = (1ULL << address_bits) - 1;
+	mask &= ~((((u64)sizek) << 10) - 1);
+
+	base  = ((u64)basek) << 10;
+
+	base |= type;
+	mask |= 0x800;
+
+	base_lo = base & ((1ULL<<32) - 1);
+	base_hi = base >> 32;
+
+	mask_lo = mask & ((1ULL<<32) - 1);
+	mask_hi = mask >> 32;
+
+	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static void __init
+save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+		unsigned char type)
+{
+	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
+	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
+	range_state[reg].type = type;
+}
+
+static void __init
+set_var_mtrr_all(unsigned int address_bits)
+{
+	unsigned long basek, sizek;
+	unsigned char type;
+	unsigned int reg;
+
+	for (reg = 0; reg < num_var_ranges; reg++) {
+		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
+		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
+		type = range_state[reg].type;
+
+		set_var_mtrr(reg, basek, sizek, type, address_bits);
+	}
+}
+
+static unsigned long to_size_factor(unsigned long sizek, char *factorp)
+{
+	char factor;
+	unsigned long base = sizek;
+
+	if (base & ((1<<10) - 1)) {
+		/* not MB alignment */
+		factor = 'K';
+	} else if (base & ((1<<20) - 1)) {
+		factor = 'M';
+		base >>= 10;
+	} else {
+		factor = 'G';
+		base >>= 20;
+	}
+
+	*factorp = factor;
+
+	return base;
+}
+
+static unsigned int __init
+range_to_mtrr(unsigned int reg, unsigned long range_startk,
+	      unsigned long range_sizek, unsigned char type)
+{
+	if (!range_sizek || (reg >= num_var_ranges))
+		return reg;
+
+	while (range_sizek) {
+		unsigned long max_align, align;
+		unsigned long sizek;
+
+		/* Compute the maximum size I can make a range */
+		if (range_startk)
+			max_align = ffs(range_startk) - 1;
+		else
+			max_align = 32;
+		align = fls(range_sizek) - 1;
+		if (align > max_align)
+			align = max_align;
+
+		sizek = 1 << align;
+		if (debug_print) {
+			char start_factor = 'K', size_factor = 'K';
+			unsigned long start_base, size_base;
+
+			start_base = to_size_factor(range_startk,
+							 &start_factor),
+			size_base = to_size_factor(sizek, &size_factor),
+
+			printk(KERN_DEBUG "Setting variable MTRR %d, "
+				"base: %ld%cB, range: %ld%cB, type %s\n",
+				reg, start_base, start_factor,
+				size_base, size_factor,
+				(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+				   ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")
+				);
+		}
+		save_var_mtrr(reg++, range_startk, sizek, type);
+		range_startk += sizek;
+		range_sizek -= sizek;
+		if (reg >= num_var_ranges)
+			break;
+	}
+	return reg;
+}
+
+static unsigned __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
+			unsigned long sizek)
+{
+	unsigned long hole_basek, hole_sizek;
+	unsigned long second_basek, second_sizek;
+	unsigned long range0_basek, range0_sizek;
+	unsigned long range_basek, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned long gran_sizek;
+
+	hole_basek = 0;
+	hole_sizek = 0;
+	second_basek = 0;
+	second_sizek = 0;
+	chunk_sizek = state->chunk_sizek;
+	gran_sizek = state->gran_sizek;
+
+	/* align with gran size, prevent small block used up MTRRs */
+	range_basek = ALIGN(state->range_startk, gran_sizek);
+	if ((range_basek > basek) && basek)
+		return second_sizek;
+	state->range_sizek -= (range_basek - state->range_startk);
+	range_sizek = ALIGN(state->range_sizek, gran_sizek);
+
+	while (range_sizek > state->range_sizek) {
+		range_sizek -= gran_sizek;
+		if (!range_sizek)
+			return 0;
+	}
+	state->range_sizek = range_sizek;
+
+	/* try to append some small hole */
+	range0_basek = state->range_startk;
+	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+
+	/* no increase */
+	if (range0_sizek == state->range_sizek) {
+		if (debug_print)
+			printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
+				range0_basek<<10,
+				(range0_basek + state->range_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				state->range_sizek, MTRR_TYPE_WRBACK);
+		return 0;
+	}
+
+	/* only cut back, when it is not the last */
+	if (sizek) {
+		while (range0_basek + range0_sizek > (basek + sizek)) {
+			if (range0_sizek >= chunk_sizek)
+				range0_sizek -= chunk_sizek;
+			else
+				range0_sizek = 0;
+
+			if (!range0_sizek)
+				break;
+		}
+	}
+
+second_try:
+	range_basek = range0_basek + range0_sizek;
+
+	/* one hole in the middle */
+	if (range_basek > basek && range_basek <= (basek + sizek))
+		second_sizek = range_basek - basek;
+
+	if (range0_sizek > state->range_sizek) {
+
+		/* one hole in middle or at end */
+		hole_sizek = range0_sizek - state->range_sizek - second_sizek;
+
+		/* hole size should be less than half of range0 size */
+		if (hole_sizek >= (range0_sizek >> 1) &&
+		    range0_sizek >= chunk_sizek) {
+			range0_sizek -= chunk_sizek;
+			second_sizek = 0;
+			hole_sizek = 0;
+
+			goto second_try;
+		}
+	}
+
+	if (range0_sizek) {
+		if (debug_print)
+			printk(KERN_DEBUG "range0: %016lx - %016lx\n",
+				range0_basek<<10,
+				(range0_basek + range0_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, MTRR_TYPE_WRBACK);
+	}
+
+	if (range0_sizek < state->range_sizek) {
+		/* need to handle left over */
+		range_sizek = state->range_sizek - range0_sizek;
+
+		if (debug_print)
+			printk(KERN_DEBUG "range: %016lx - %016lx\n",
+				 range_basek<<10,
+				 (range_basek + range_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range_basek,
+				 range_sizek, MTRR_TYPE_WRBACK);
+	}
+
+	if (hole_sizek) {
+		hole_basek = range_basek - hole_sizek - second_sizek;
+		if (debug_print)
+			printk(KERN_DEBUG "hole: %016lx - %016lx\n",
+				 hole_basek<<10,
+				 (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+				 hole_sizek, MTRR_TYPE_UNCACHABLE);
+	}
+
+	return second_sizek;
+}
+
+static void __init
+set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
+		   unsigned long size_pfn)
+{
+	unsigned long basek, sizek;
+	unsigned long second_sizek = 0;
+
+	if (state->reg >= num_var_ranges)
+		return;
+
+	basek = base_pfn << (PAGE_SHIFT - 10);
+	sizek = size_pfn << (PAGE_SHIFT - 10);
+
+	/* See if I can merge with the last range */
+	if ((basek <= 1024) ||
+	    (state->range_startk + state->range_sizek == basek)) {
+		unsigned long endk = basek + sizek;
+		state->range_sizek = endk - state->range_startk;
+		return;
+	}
+	/* Write the range mtrrs */
+	if (state->range_sizek != 0)
+		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
+
+	/* Allocate an msr */
+	state->range_startk = basek + second_sizek;
+	state->range_sizek  = sizek - second_sizek;
+}
+
+/* mininum size of mtrr block that can take hole */
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_chunk_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+/* granity of mtrr of block */
+static u64 mtrr_gran_size __initdata;
+
+static int __init parse_mtrr_gran_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_gran_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
+
+static int nr_mtrr_spare_reg __initdata =
+				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
+
+static int __init parse_mtrr_spare_reg(char *arg)
+{
+	if (arg)
+		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
+
+static int __init
+x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+		    u64 chunk_size, u64 gran_size)
+{
+	struct var_mtrr_state var_state;
+	int i;
+	int num_reg;
+
+	var_state.range_startk	= 0;
+	var_state.range_sizek	= 0;
+	var_state.reg		= 0;
+	var_state.chunk_sizek	= chunk_size >> 10;
+	var_state.gran_sizek	= gran_size >> 10;
+
+	memset(range_state, 0, sizeof(range_state));
+
+	/* Write the range etc */
+	for (i = 0; i < nr_range; i++)
+		set_var_mtrr_range(&var_state, range[i].start,
+				   range[i].end - range[i].start + 1);
+
+	/* Write the last range */
+	if (var_state.range_sizek != 0)
+		range_to_mtrr_with_hole(&var_state, 0, 0);
+
+	num_reg = var_state.reg;
+	/* Clear out the extra MTRR's */
+	while (var_state.reg < num_var_ranges) {
+		save_var_mtrr(var_state.reg, 0, 0, 0);
+		var_state.reg++;
+	}
+
+	return num_reg;
+}
+
+struct mtrr_cleanup_result {
+	unsigned long gran_sizek;
+	unsigned long chunk_sizek;
+	unsigned long lose_cover_sizek;
+	unsigned int num_reg;
+	int bad;
+};
+
+/*
+ * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
+ * chunk size: gran_size, ..., 2G
+ * so we need (1+16)*8
+ */
+#define NUM_RESULT	136
+#define PSHIFT		(PAGE_SHIFT - 10)
+
+static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
+static unsigned long __initdata min_loss_pfn[RANGE_NUM];
+
+static void __init print_out_mtrr_range_state(void)
+{
+	int i;
+	char start_factor = 'K', size_factor = 'K';
+	unsigned long start_base, size_base;
+	mtrr_type type;
+
+	for (i = 0; i < num_var_ranges; i++) {
+
+		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+		if (!size_base)
+			continue;
+
+		size_base = to_size_factor(size_base, &size_factor),
+		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+		start_base = to_size_factor(start_base, &start_factor),
+		type = range_state[i].type;
+
+		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+			i, start_base, start_factor,
+			size_base, size_factor,
+			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
+			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+			);
+	}
+}
+
+static int __init mtrr_need_cleanup(void)
+{
+	int i;
+	mtrr_type type;
+	unsigned long size;
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
+
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		size = range_state[i].size_pfn;
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		if (type == MTRR_TYPE_WRPROT)
+			type = MTRR_TYPE_UNCACHABLE;
+		num[type]++;
+	}
+
+	/* check if we got UC entries */
+	if (!num[MTRR_TYPE_UNCACHABLE])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	return 1;
+}
+
+static unsigned long __initdata range_sums;
+static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
+					 unsigned long extra_remove_base,
+					 unsigned long extra_remove_size,
+					 int i)
+{
+	int num_reg;
+	static struct res_range range_new[RANGE_NUM];
+	static int nr_range_new;
+	unsigned long range_sums_new;
+
+	/* convert ranges to var ranges state */
+	num_reg = x86_setup_var_mtrrs(range, nr_range,
+						chunk_size, gran_size);
+
+	/* we got new setting in range_state, check it */
+	memset(range_new, 0, sizeof(range_new));
+	nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+				extra_remove_base, extra_remove_size);
+	range_sums_new = sum_ranges(range_new, nr_range_new);
+
+	result[i].chunk_sizek = chunk_size >> 10;
+	result[i].gran_sizek = gran_size >> 10;
+	result[i].num_reg = num_reg;
+	if (range_sums < range_sums_new) {
+		result[i].lose_cover_sizek =
+			(range_sums_new - range_sums) << PSHIFT;
+		result[i].bad = 1;
+	} else
+		result[i].lose_cover_sizek =
+			(range_sums - range_sums_new) << PSHIFT;
+
+	/* double check it */
+	if (!result[i].bad && !result[i].lose_cover_sizek) {
+		if (nr_range_new != nr_range ||
+			memcmp(range, range_new, sizeof(range)))
+				result[i].bad = 1;
+	}
+
+	if (!result[i].bad && (range_sums - range_sums_new <
+				min_loss_pfn[num_reg])) {
+		min_loss_pfn[num_reg] =
+			range_sums - range_sums_new;
+	}
+}
+
+static void __init mtrr_print_out_one_result(int i)
+{
+	char gran_factor, chunk_factor, lose_factor;
+	unsigned long gran_base, chunk_base, lose_base;
+
+	gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+	chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+	lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+	printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+			result[i].bad ? "*BAD*" : " ",
+			gran_base, gran_factor, chunk_base, chunk_factor);
+	printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
+			result[i].num_reg, result[i].bad ? "-" : "",
+			lose_base, lose_factor);
+}
+
+static int __init mtrr_search_optimal_index(void)
+{
+	int i;
+	int num_reg_good;
+	int index_good;
+
+	if (nr_mtrr_spare_reg >= num_var_ranges)
+		nr_mtrr_spare_reg = num_var_ranges - 1;
+	num_reg_good = -1;
+	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
+		if (!min_loss_pfn[i])
+			num_reg_good = i;
+	}
+
+	index_good = -1;
+	if (num_reg_good != -1) {
+		for (i = 0; i < NUM_RESULT; i++) {
+			if (!result[i].bad &&
+			    result[i].num_reg == num_reg_good &&
+			    !result[i].lose_cover_sizek) {
+				index_good = i;
+				break;
+			}
+		}
+	}
+
+	return index_good;
+}
+
+
+int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long extra_remove_base, extra_remove_size;
+	unsigned long base, size, def, dummy;
+	mtrr_type type;
+	u64 chunk_size, gran_size;
+	int index_good;
+	int i;
+
+	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
+	/* check if we need handle it and can handle it */
+	if (!mtrr_need_cleanup())
+		return 0;
+
+	/* print original var MTRRs at first, for debugging: */
+	printk(KERN_DEBUG "original variable MTRRs\n");
+	print_out_mtrr_range_state();
+
+	memset(range, 0, sizeof(range));
+	extra_remove_size = 0;
+	extra_remove_base = 1 << (32 - PAGE_SHIFT);
+	if (mtrr_tom2)
+		extra_remove_size =
+			(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
+	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
+					  extra_remove_size);
+	/*
+	 * [0, 1M) should always be coverred by var mtrr with WB
+	 * and fixed mtrrs should take effective before var mtrr for it
+	 */
+	nr_range = add_range_with_merge(range, nr_range, 0,
+					(1ULL<<(20 - PAGE_SHIFT)) - 1);
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+
+	range_sums = sum_ranges(range, nr_range);
+	printk(KERN_INFO "total RAM coverred: %ldM\n",
+	       range_sums >> (20 - PAGE_SHIFT));
+
+	if (mtrr_chunk_size && mtrr_gran_size) {
+		i = 0;
+		mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
+				      extra_remove_base, extra_remove_size, i);
+
+		mtrr_print_out_one_result(i);
+
+		if (!result[i].bad) {
+			set_var_mtrr_all(address_bits);
+			printk(KERN_DEBUG "New variable MTRRs\n");
+			print_out_mtrr_range_state();
+			return 1;
+		}
+		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
+		       "will find optimal one\n");
+	}
+
+	i = 0;
+	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
+	memset(result, 0, sizeof(result));
+	for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
+
+		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
+		     chunk_size <<= 1) {
+
+			if (i >= NUM_RESULT)
+				continue;
+
+			mtrr_calc_range_state(chunk_size, gran_size,
+				      extra_remove_base, extra_remove_size, i);
+			if (debug_print) {
+				mtrr_print_out_one_result(i);
+				printk(KERN_INFO "\n");
+			}
+
+			i++;
+		}
+	}
+
+	/* try to find the optimal index */
+	index_good = mtrr_search_optimal_index();
+
+	if (index_good != -1) {
+		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+		i = index_good;
+		mtrr_print_out_one_result(i);
+
+		/* convert ranges to var ranges state */
+		chunk_size = result[i].chunk_sizek;
+		chunk_size <<= 10;
+		gran_size = result[i].gran_sizek;
+		gran_size <<= 10;
+		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+		set_var_mtrr_all(address_bits);
+		printk(KERN_DEBUG "New variable MTRRs\n");
+		print_out_mtrr_range_state();
+		return 1;
+	} else {
+		/* print out all */
+		for (i = 0; i < NUM_RESULT; i++)
+			mtrr_print_out_one_result(i);
+	}
+
+	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
+	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+
+	return 0;
+}
+#else
+int __init mtrr_cleanup(unsigned address_bits)
+{
+	return 0;
+}
+#endif
+
+static int disable_mtrr_trim;
+
+static int __init disable_mtrr_trim_setup(char *str)
+{
+	disable_mtrr_trim = 1;
+	return 0;
+}
+early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
+
+/*
+ * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
+ * for memory >4GB. Check for that here.
+ * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
+ * apply to are wrong, but so far we don't know of any such case in the wild.
+ */
+#define Tom2Enabled (1U << 21)
+#define Tom2ForceMemTypeWB (1U << 22)
+
+int __init amd_special_default_mtrr(void)
+{
+	u32 l, h;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		return 0;
+	if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
+		return 0;
+	/* In case some hypervisor doesn't pass SYSCFG through */
+	if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
+		return 0;
+	/*
+	 * Memory between 4GB and top of mem is forced WB by this magic bit.
+	 * Reserved before K8RevF, but should be zero there.
+	 */
+	if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
+		 (Tom2Enabled | Tom2ForceMemTypeWB))
+		return 1;
+	return 0;
+}
+
+static u64 __init real_trim_memory(unsigned long start_pfn,
+				   unsigned long limit_pfn)
+{
+	u64 trim_start, trim_size;
+	trim_start = start_pfn;
+	trim_start <<= PAGE_SHIFT;
+	trim_size = limit_pfn;
+	trim_size <<= PAGE_SHIFT;
+	trim_size -= trim_start;
+
+	return e820_update_range(trim_start, trim_size, E820_RAM,
+				E820_RESERVED);
+}
+/**
+ * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ * @end_pfn: ending page frame number
+ *
+ * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
+ * memory configurations.  This routine checks that the highest MTRR matches
+ * the end of memory, to make sure the MTRRs having a write back type cover
+ * all of the memory the kernel is intending to use. If not, it'll trim any
+ * memory off the end by adjusting end_pfn, removing it from the kernel's
+ * allocation pools, warning the user with an obnoxious message.
+ */
+int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
+{
+	unsigned long i, base, size, highest_pfn = 0, def, dummy;
+	mtrr_type type;
+	u64 total_trim_size;
+
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
+	/*
+	 * Make sure we only trim uncachable memory on machines that
+	 * support the Intel MTRR architecture:
+	 */
+	if (!is_cpu(INTEL) || disable_mtrr_trim)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
+	/* Find highest cached pfn */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
+		if (highest_pfn < base + size)
+			highest_pfn = base + size;
+	}
+
+	/* kvm/qemu doesn't have mtrr set right, don't trim them all */
+	if (!highest_pfn) {
+		printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
+		return 0;
+	}
+
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		size = range_state[i].size_pfn;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* no entry for WB? */
+	if (!num[MTRR_TYPE_WRBACK])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	memset(range, 0, sizeof(range));
+	nr_range = 0;
+	if (mtrr_tom2) {
+		range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
+		range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
+		if (highest_pfn < range[nr_range].end + 1)
+			highest_pfn = range[nr_range].end + 1;
+		nr_range++;
+	}
+	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
+
+	total_trim_size = 0;
+	/* check the head */
+	if (range[0].start)
+		total_trim_size += real_trim_memory(0, range[0].start);
+	/* check the holes */
+	for (i = 0; i < nr_range - 1; i++) {
+		if (range[i].end + 1 < range[i+1].start)
+			total_trim_size += real_trim_memory(range[i].end + 1,
+							    range[i+1].start);
+	}
+	/* check the top */
+	i = nr_range - 1;
+	if (range[i].end + 1 < end_pfn)
+		total_trim_size += real_trim_memory(range[i].end + 1,
+							 end_pfn);
+
+	if (total_trim_size) {
+		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
+			" all of memory, losing %lluMB of RAM.\n",
+			total_trim_size >> 20);
+
+		if (!changed_by_mtrr_cleanup)
+			WARN_ON(1);
+
+		printk(KERN_INFO "update e820 for mtrr\n");
+		update_e820();
+
+		return 1;
+	}
+
+	return 0;
+}
+
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0c0a455fe95..37f28fc7cf9 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -33,13 +33,31 @@ u64 mtrr_tom2;
 struct mtrr_state_type mtrr_state = {};
 EXPORT_SYMBOL_GPL(mtrr_state);
 
-static int __initdata mtrr_show;
-static int __init mtrr_debug(char *opt)
+/**
+ * BIOS is expected to clear MtrrFixDramModEn bit, see for example
+ * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
+ * Opteron Processors" (26094 Rev. 3.30 February 2006), section
+ * "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set
+ * to 1 during BIOS initalization of the fixed MTRRs, then cleared to
+ * 0 for operation."
+ */
+static inline void k8_check_syscfg_dram_mod_en(void)
 {
-	mtrr_show = 1;
-	return 0;
+	u32 lo, hi;
+
+	if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
+	      (boot_cpu_data.x86 >= 0x0f)))
+		return;
+
+	rdmsr(MSR_K8_SYSCFG, lo, hi);
+	if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
+		printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
+		       " not cleared by BIOS, clearing this bit\n",
+		       smp_processor_id());
+		lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
+		mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi);
+	}
 }
-early_param("mtrr.show", mtrr_debug);
 
 /*
  * Returns the effective MTRR type for the region
@@ -174,6 +192,8 @@ get_fixed_ranges(mtrr_type * frs)
 	unsigned int *p = (unsigned int *) frs;
 	int i;
 
+	k8_check_syscfg_dram_mod_en();
+
 	rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
 
 	for (i = 0; i < 2; i++)
@@ -188,18 +208,94 @@ void mtrr_save_fixed_ranges(void *info)
 		get_fixed_ranges(mtrr_state.fixed_ranges);
 }
 
-static void print_fixed(unsigned base, unsigned step, const mtrr_type*types)
+static unsigned __initdata last_fixed_start;
+static unsigned __initdata last_fixed_end;
+static mtrr_type __initdata last_fixed_type;
+
+static void __init print_fixed_last(void)
+{
+	if (!last_fixed_end)
+		return;
+
+	printk(KERN_DEBUG "  %05X-%05X %s\n", last_fixed_start,
+		last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
+
+	last_fixed_end = 0;
+}
+
+static void __init update_fixed_last(unsigned base, unsigned end,
+				       mtrr_type type)
+{
+	last_fixed_start = base;
+	last_fixed_end = end;
+	last_fixed_type = type;
+}
+
+static void __init print_fixed(unsigned base, unsigned step,
+			       const mtrr_type *types)
 {
 	unsigned i;
 
-	for (i = 0; i < 8; ++i, ++types, base += step)
-		printk(KERN_INFO "MTRR %05X-%05X %s\n",
-			base, base + step - 1, mtrr_attrib_to_str(*types));
+	for (i = 0; i < 8; ++i, ++types, base += step) {
+		if (last_fixed_end == 0) {
+			update_fixed_last(base, base + step, *types);
+			continue;
+		}
+		if (last_fixed_end == base && last_fixed_type == *types) {
+			last_fixed_end = base + step;
+			continue;
+		}
+		/* new segments: gap or different type */
+		print_fixed_last();
+		update_fixed_last(base, base + step, *types);
+	}
 }
 
 static void prepare_set(void);
 static void post_set(void);
 
+static void __init print_mtrr_state(void)
+{
+	unsigned int i;
+	int high_width;
+
+	printk(KERN_DEBUG "MTRR default type: %s\n",
+			 mtrr_attrib_to_str(mtrr_state.def_type));
+	if (mtrr_state.have_fixed) {
+		printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n",
+		       mtrr_state.enabled & 1 ? "en" : "dis");
+		print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
+		for (i = 0; i < 2; ++i)
+			print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
+		for (i = 0; i < 8; ++i)
+			print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
+
+		/* tail */
+		print_fixed_last();
+	}
+	printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
+	       mtrr_state.enabled & 2 ? "en" : "dis");
+	high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
+	for (i = 0; i < num_var_ranges; ++i) {
+		if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
+			printk(KERN_DEBUG "  %u base %0*X%05X000 mask %0*X%05X000 %s\n",
+			       i,
+			       high_width,
+			       mtrr_state.var_ranges[i].base_hi,
+			       mtrr_state.var_ranges[i].base_lo >> 12,
+			       high_width,
+			       mtrr_state.var_ranges[i].mask_hi,
+			       mtrr_state.var_ranges[i].mask_lo >> 12,
+			       mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
+		else
+			printk(KERN_DEBUG "  %u disabled\n", i);
+	}
+	if (mtrr_tom2) {
+		printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n",
+				  mtrr_tom2, mtrr_tom2>>20);
+	}
+}
+
 /*  Grab all of the MTRR state for this CPU into *state  */
 void __init get_mtrr_state(void)
 {
@@ -231,41 +327,9 @@ void __init get_mtrr_state(void)
 		mtrr_tom2 |= low;
 		mtrr_tom2 &= 0xffffff800000ULL;
 	}
-	if (mtrr_show) {
-		int high_width;
-
-		printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type));
-		if (mtrr_state.have_fixed) {
-			printk(KERN_INFO "MTRR fixed ranges %sabled:\n",
-			       mtrr_state.enabled & 1 ? "en" : "dis");
-			print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
-			for (i = 0; i < 2; ++i)
-				print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
-			for (i = 0; i < 8; ++i)
-				print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
-		}
-		printk(KERN_INFO "MTRR variable ranges %sabled:\n",
-		       mtrr_state.enabled & 2 ? "en" : "dis");
-		high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
-		for (i = 0; i < num_var_ranges; ++i) {
-			if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
-				printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n",
-				       i,
-				       high_width,
-				       mtrr_state.var_ranges[i].base_hi,
-				       mtrr_state.var_ranges[i].base_lo >> 12,
-				       high_width,
-				       mtrr_state.var_ranges[i].mask_hi,
-				       mtrr_state.var_ranges[i].mask_lo >> 12,
-				       mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
-			else
-				printk(KERN_INFO "MTRR %u disabled\n", i);
-		}
-		if (mtrr_tom2) {
-			printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
-					  mtrr_tom2, mtrr_tom2>>20);
-		}
-	}
+
+	print_mtrr_state();
+
 	mtrr_state_set = 1;
 
 	/* PAT setup for BP. We need to go through sync steps here */
@@ -308,27 +372,10 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
 }
 
 /**
- * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs
- * see AMD publication no. 24593, chapter 3.2.1 for more information
- */
-static inline void k8_enable_fixed_iorrs(void)
-{
-	unsigned lo, hi;
-
-	rdmsr(MSR_K8_SYSCFG, lo, hi);
-	mtrr_wrmsr(MSR_K8_SYSCFG, lo
-				| K8_MTRRFIXRANGE_DRAM_ENABLE
-				| K8_MTRRFIXRANGE_DRAM_MODIFY, hi);
-}
-
-/**
  * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
  * @msr: MSR address of the MTTR which should be checked and updated
  * @changed: pointer which indicates whether the MTRR needed to be changed
  * @msrwords: pointer to the MSR values which the MSR should have
- *
- * If K8 extentions are wanted, update the K8 SYSCFG MSR also.
- * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information.
  */
 static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
 {
@@ -337,10 +384,6 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
 	rdmsr(msr, lo, hi);
 
 	if (lo != msrwords[0] || hi != msrwords[1]) {
-		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		    (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
-		    ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
-			k8_enable_fixed_iorrs();
 		mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
 		*changed = true;
 	}
@@ -376,22 +419,31 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 {
 	unsigned int mask_lo, mask_hi, base_lo, base_hi;
 	unsigned int tmp, hi;
+	int cpu;
+
+	/*
+	 * get_mtrr doesn't need to update mtrr_state, also it could be called
+	 * from any cpu, so try to print it out directly.
+	 */
+	cpu = get_cpu();
 
 	rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
+
 	if ((mask_lo & 0x800) == 0) {
 		/*  Invalid (i.e. free) range  */
 		*base = 0;
 		*size = 0;
 		*type = 0;
-		return;
+		goto out_put_cpu;
 	}
 
 	rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
 
-	/* Work out the shifted address mask. */
+	/* Work out the shifted address mask: */
 	tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
 	mask_lo = size_or_mask | tmp;
-	/* Expand tmp with high bits to all 1s*/
+
+	/* Expand tmp with high bits to all 1s: */
 	hi = fls(tmp);
 	if (hi > 0) {
 		tmp |= ~((1<<(hi - 1)) - 1);
@@ -402,11 +454,19 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 		}
 	}
 
-	/* This works correctly if size is a power of two, i.e. a
-	   contiguous range. */
+	/*
+	 * This works correctly if size is a power of two, i.e. a
+	 * contiguous range:
+	 */
 	*size = -mask_lo;
 	*base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
 	*type = base_lo & 0xff;
+
+	printk(KERN_DEBUG "  get_mtrr: cpu%d reg%02d base=%010lx size=%010lx %s\n",
+			cpu, reg, *base, *size,
+			mtrr_attrib_to_str(*type & 0xff));
+out_put_cpu:
+	put_cpu();
 }
 
 /**
@@ -419,6 +479,8 @@ static int set_fixed_ranges(mtrr_type * frs)
 	bool changed = false;
 	int block=-1, range;
 
+	k8_check_syscfg_dram_mod_en();
+
 	while (fixed_range_blocks[++block].ranges)
 	    for (range=0; range < fixed_range_blocks[block].ranges; range++)
 		set_fixed_range(fixed_range_blocks[block].base_msr + range,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 236a401b825..03cda01f57c 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -574,7 +574,7 @@ struct mtrr_value {
 	unsigned long	lsize;
 };
 
-static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
+static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
 
 static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 {
@@ -582,9 +582,9 @@ static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i,
-			     &mtrr_state[i].lbase,
-			     &mtrr_state[i].lsize,
-			     &mtrr_state[i].ltype);
+			     &mtrr_value[i].lbase,
+			     &mtrr_value[i].lsize,
+			     &mtrr_value[i].ltype);
 	}
 	return 0;
 }
@@ -594,11 +594,11 @@ static int mtrr_restore(struct sys_device * sysdev)
 	int i;
 
 	for (i = 0; i < num_var_ranges; i++) {
-		if (mtrr_state[i].lsize) 
+		if (mtrr_value[i].lsize)
 			set_mtrr(i,
-				 mtrr_state[i].lbase,
-				 mtrr_state[i].lsize,
-				 mtrr_state[i].ltype);
+				 mtrr_value[i].lbase,
+				 mtrr_value[i].lsize,
+				 mtrr_value[i].ltype);
 	}
 	return 0;
 }
@@ -610,1058 +610,7 @@ static struct sysdev_driver mtrr_sysdev_driver = {
 	.resume		= mtrr_restore,
 };
 
-/* should be related to MTRR_VAR_RANGES nums */
-#define RANGE_NUM 256
-
-struct res_range {
-	unsigned long start;
-	unsigned long end;
-};
-
-static int __init
-add_range(struct res_range *range, int nr_range, unsigned long start,
-			      unsigned long end)
-{
-	/* out of slots */
-	if (nr_range >= RANGE_NUM)
-		return nr_range;
-
-	range[nr_range].start = start;
-	range[nr_range].end = end;
-
-	nr_range++;
-
-	return nr_range;
-}
-
-static int __init
-add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
-			      unsigned long end)
-{
-	int i;
-
-	/* try to merge it with old one */
-	for (i = 0; i < nr_range; i++) {
-		unsigned long final_start, final_end;
-		unsigned long common_start, common_end;
-
-		if (!range[i].end)
-			continue;
-
-		common_start = max(range[i].start, start);
-		common_end = min(range[i].end, end);
-		if (common_start > common_end + 1)
-			continue;
-
-		final_start = min(range[i].start, start);
-		final_end = max(range[i].end, end);
-
-		range[i].start = final_start;
-		range[i].end =  final_end;
-		return nr_range;
-	}
-
-	/* need to add that */
-	return add_range(range, nr_range, start, end);
-}
-
-static void __init
-subtract_range(struct res_range *range, unsigned long start, unsigned long end)
-{
-	int i, j;
-
-	for (j = 0; j < RANGE_NUM; j++) {
-		if (!range[j].end)
-			continue;
-
-		if (start <= range[j].start && end >= range[j].end) {
-			range[j].start = 0;
-			range[j].end = 0;
-			continue;
-		}
-
-		if (start <= range[j].start && end < range[j].end &&
-		    range[j].start < end + 1) {
-			range[j].start = end + 1;
-			continue;
-		}
-
-
-		if (start > range[j].start && end >= range[j].end &&
-		    range[j].end > start - 1) {
-			range[j].end = start - 1;
-			continue;
-		}
-
-		if (start > range[j].start && end < range[j].end) {
-			/* find the new spare */
-			for (i = 0; i < RANGE_NUM; i++) {
-				if (range[i].end == 0)
-					break;
-			}
-			if (i < RANGE_NUM) {
-				range[i].end = range[j].end;
-				range[i].start = end + 1;
-			} else {
-				printk(KERN_ERR "run of slot in ranges\n");
-			}
-			range[j].end = start - 1;
-			continue;
-		}
-	}
-}
-
-static int __init cmp_range(const void *x1, const void *x2)
-{
-	const struct res_range *r1 = x1;
-	const struct res_range *r2 = x2;
-	long start1, start2;
-
-	start1 = r1->start;
-	start2 = r2->start;
-
-	return start1 - start2;
-}
-
-struct var_mtrr_range_state {
-	unsigned long base_pfn;
-	unsigned long size_pfn;
-	mtrr_type type;
-};
-
-static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
-static int __initdata debug_print;
-
-static int __init
-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
-		       unsigned long extra_remove_base,
-		       unsigned long extra_remove_size)
-{
-	unsigned long i, base, size;
-	mtrr_type type;
-
-	for (i = 0; i < num_var_ranges; i++) {
-		type = range_state[i].type;
-		if (type != MTRR_TYPE_WRBACK)
-			continue;
-		base = range_state[i].base_pfn;
-		size = range_state[i].size_pfn;
-		nr_range = add_range_with_merge(range, nr_range, base,
-						base + size - 1);
-	}
-	if (debug_print) {
-		printk(KERN_DEBUG "After WB checking\n");
-		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
-				 range[i].start, range[i].end + 1);
-	}
-
-	/* take out UC ranges */
-	for (i = 0; i < num_var_ranges; i++) {
-		type = range_state[i].type;
-		if (type != MTRR_TYPE_UNCACHABLE &&
-		    type != MTRR_TYPE_WRPROT)
-			continue;
-		size = range_state[i].size_pfn;
-		if (!size)
-			continue;
-		base = range_state[i].base_pfn;
-		subtract_range(range, base, base + size - 1);
-	}
-	if (extra_remove_size)
-		subtract_range(range, extra_remove_base,
-				 extra_remove_base + extra_remove_size  - 1);
-
-	/* get new range num */
-	nr_range = 0;
-	for (i = 0; i < RANGE_NUM; i++) {
-		if (!range[i].end)
-			continue;
-		nr_range++;
-	}
-	if  (debug_print) {
-		printk(KERN_DEBUG "After UC checking\n");
-		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
-				 range[i].start, range[i].end + 1);
-	}
-
-	/* sort the ranges */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-	if  (debug_print) {
-		printk(KERN_DEBUG "After sorting\n");
-		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
-				 range[i].start, range[i].end + 1);
-	}
-
-	/* clear those is not used */
-	for (i = nr_range; i < RANGE_NUM; i++)
-		memset(&range[i], 0, sizeof(range[i]));
-
-	return nr_range;
-}
-
-static struct res_range __initdata range[RANGE_NUM];
-static int __initdata nr_range;
-
-#ifdef CONFIG_MTRR_SANITIZER
-
-static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
-{
-	unsigned long sum;
-	int i;
-
-	sum = 0;
-	for (i = 0; i < nr_range; i++)
-		sum += range[i].end + 1 - range[i].start;
-
-	return sum;
-}
-
-static int enable_mtrr_cleanup __initdata =
-	CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
-	enable_mtrr_cleanup = 0;
-	return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
-	enable_mtrr_cleanup = 1;
-	return 0;
-}
-early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
-static int __init mtrr_cleanup_debug_setup(char *str)
-{
-	debug_print = 1;
-	return 0;
-}
-early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
-
-struct var_mtrr_state {
-	unsigned long	range_startk;
-	unsigned long	range_sizek;
-	unsigned long	chunk_sizek;
-	unsigned long	gran_sizek;
-	unsigned int	reg;
-};
-
-static void __init
-set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
-		unsigned char type, unsigned int address_bits)
-{
-	u32 base_lo, base_hi, mask_lo, mask_hi;
-	u64 base, mask;
-
-	if (!sizek) {
-		fill_mtrr_var_range(reg, 0, 0, 0, 0);
-		return;
-	}
-
-	mask = (1ULL << address_bits) - 1;
-	mask &= ~((((u64)sizek) << 10) - 1);
-
-	base  = ((u64)basek) << 10;
-
-	base |= type;
-	mask |= 0x800;
-
-	base_lo = base & ((1ULL<<32) - 1);
-	base_hi = base >> 32;
-
-	mask_lo = mask & ((1ULL<<32) - 1);
-	mask_hi = mask >> 32;
-
-	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
-}
-
-static void __init
-save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
-		unsigned char type)
-{
-	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
-	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
-	range_state[reg].type = type;
-}
-
-static void __init
-set_var_mtrr_all(unsigned int address_bits)
-{
-	unsigned long basek, sizek;
-	unsigned char type;
-	unsigned int reg;
-
-	for (reg = 0; reg < num_var_ranges; reg++) {
-		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
-		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
-		type = range_state[reg].type;
-
-		set_var_mtrr(reg, basek, sizek, type, address_bits);
-	}
-}
-
-static unsigned long to_size_factor(unsigned long sizek, char *factorp)
-{
-	char factor;
-	unsigned long base = sizek;
-
-	if (base & ((1<<10) - 1)) {
-		/* not MB alignment */
-		factor = 'K';
-	} else if (base & ((1<<20) - 1)){
-		factor = 'M';
-		base >>= 10;
-	} else {
-		factor = 'G';
-		base >>= 20;
-	}
-
-	*factorp = factor;
-
-	return base;
-}
-
-static unsigned int __init
-range_to_mtrr(unsigned int reg, unsigned long range_startk,
-	      unsigned long range_sizek, unsigned char type)
-{
-	if (!range_sizek || (reg >= num_var_ranges))
-		return reg;
-
-	while (range_sizek) {
-		unsigned long max_align, align;
-		unsigned long sizek;
-
-		/* Compute the maximum size I can make a range */
-		if (range_startk)
-			max_align = ffs(range_startk) - 1;
-		else
-			max_align = 32;
-		align = fls(range_sizek) - 1;
-		if (align > max_align)
-			align = max_align;
-
-		sizek = 1 << align;
-		if (debug_print) {
-			char start_factor = 'K', size_factor = 'K';
-			unsigned long start_base, size_base;
-
-			start_base = to_size_factor(range_startk, &start_factor),
-			size_base = to_size_factor(sizek, &size_factor),
-
-			printk(KERN_DEBUG "Setting variable MTRR %d, "
-				"base: %ld%cB, range: %ld%cB, type %s\n",
-				reg, start_base, start_factor,
-				size_base, size_factor,
-				(type == MTRR_TYPE_UNCACHABLE)?"UC":
-				    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
-				);
-		}
-		save_var_mtrr(reg++, range_startk, sizek, type);
-		range_startk += sizek;
-		range_sizek -= sizek;
-		if (reg >= num_var_ranges)
-			break;
-	}
-	return reg;
-}
-
-static unsigned __init
-range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
-			unsigned long sizek)
-{
-	unsigned long hole_basek, hole_sizek;
-	unsigned long second_basek, second_sizek;
-	unsigned long range0_basek, range0_sizek;
-	unsigned long range_basek, range_sizek;
-	unsigned long chunk_sizek;
-	unsigned long gran_sizek;
-
-	hole_basek = 0;
-	hole_sizek = 0;
-	second_basek = 0;
-	second_sizek = 0;
-	chunk_sizek = state->chunk_sizek;
-	gran_sizek = state->gran_sizek;
-
-	/* align with gran size, prevent small block used up MTRRs */
-	range_basek = ALIGN(state->range_startk, gran_sizek);
-	if ((range_basek > basek) && basek)
-		return second_sizek;
-	state->range_sizek -= (range_basek - state->range_startk);
-	range_sizek = ALIGN(state->range_sizek, gran_sizek);
-
-	while (range_sizek > state->range_sizek) {
-		range_sizek -= gran_sizek;
-		if (!range_sizek)
-			return 0;
-	}
-	state->range_sizek = range_sizek;
-
-	/* try to append some small hole */
-	range0_basek = state->range_startk;
-	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
-
-	/* no increase */
-	if (range0_sizek == state->range_sizek) {
-		if (debug_print)
-			printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
-				range0_basek<<10,
-				(range0_basek + state->range_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, range0_basek,
-				state->range_sizek, MTRR_TYPE_WRBACK);
-		return 0;
-	}
-
-	/* only cut back, when it is not the last */
-	if (sizek) {
-		while (range0_basek + range0_sizek > (basek + sizek)) {
-			if (range0_sizek >= chunk_sizek)
-				range0_sizek -= chunk_sizek;
-			else
-				range0_sizek = 0;
-
-			if (!range0_sizek)
-				break;
-		}
-	}
-
-second_try:
-	range_basek = range0_basek + range0_sizek;
-
-	/* one hole in the middle */
-	if (range_basek > basek && range_basek <= (basek + sizek))
-		second_sizek = range_basek - basek;
-
-	if (range0_sizek > state->range_sizek) {
-
-		/* one hole in middle or at end */
-		hole_sizek = range0_sizek - state->range_sizek - second_sizek;
-
-		/* hole size should be less than half of range0 size */
-		if (hole_sizek >= (range0_sizek >> 1) &&
-		    range0_sizek >= chunk_sizek) {
-			range0_sizek -= chunk_sizek;
-			second_sizek = 0;
-			hole_sizek = 0;
-
-			goto second_try;
-		}
-	}
-
-	if (range0_sizek) {
-		if (debug_print)
-			printk(KERN_DEBUG "range0: %016lx - %016lx\n",
-				range0_basek<<10,
-				(range0_basek + range0_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, range0_basek,
-				range0_sizek, MTRR_TYPE_WRBACK);
-	}
-
-	if (range0_sizek < state->range_sizek) {
-		/* need to handle left over */
-		range_sizek = state->range_sizek - range0_sizek;
-
-		if (debug_print)
-			printk(KERN_DEBUG "range: %016lx - %016lx\n",
-				 range_basek<<10,
-				 (range_basek + range_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, range_basek,
-				 range_sizek, MTRR_TYPE_WRBACK);
-	}
-
-	if (hole_sizek) {
-		hole_basek = range_basek - hole_sizek - second_sizek;
-		if (debug_print)
-			printk(KERN_DEBUG "hole: %016lx - %016lx\n",
-				 hole_basek<<10,
-				 (hole_basek + hole_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, hole_basek,
-				 hole_sizek, MTRR_TYPE_UNCACHABLE);
-	}
-
-	return second_sizek;
-}
-
-static void __init
-set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
-		   unsigned long size_pfn)
-{
-	unsigned long basek, sizek;
-	unsigned long second_sizek = 0;
-
-	if (state->reg >= num_var_ranges)
-		return;
-
-	basek = base_pfn << (PAGE_SHIFT - 10);
-	sizek = size_pfn << (PAGE_SHIFT - 10);
-
-	/* See if I can merge with the last range */
-	if ((basek <= 1024) ||
-	    (state->range_startk + state->range_sizek == basek)) {
-		unsigned long endk = basek + sizek;
-		state->range_sizek = endk - state->range_startk;
-		return;
-	}
-	/* Write the range mtrrs */
-	if (state->range_sizek != 0)
-		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
-
-	/* Allocate an msr */
-	state->range_startk = basek + second_sizek;
-	state->range_sizek  = sizek - second_sizek;
-}
-
-/* mininum size of mtrr block that can take hole */
-static u64 mtrr_chunk_size __initdata = (256ULL<<20);
-
-static int __init parse_mtrr_chunk_size_opt(char *p)
-{
-	if (!p)
-		return -EINVAL;
-	mtrr_chunk_size = memparse(p, &p);
-	return 0;
-}
-early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
-
-/* granity of mtrr of block */
-static u64 mtrr_gran_size __initdata;
-
-static int __init parse_mtrr_gran_size_opt(char *p)
-{
-	if (!p)
-		return -EINVAL;
-	mtrr_gran_size = memparse(p, &p);
-	return 0;
-}
-early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
-
-static int nr_mtrr_spare_reg __initdata =
-				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
-
-static int __init parse_mtrr_spare_reg(char *arg)
-{
-	if (arg)
-		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
-	return 0;
-}
-
-early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
-
-static int __init
-x86_setup_var_mtrrs(struct res_range *range, int nr_range,
-		    u64 chunk_size, u64 gran_size)
-{
-	struct var_mtrr_state var_state;
-	int i;
-	int num_reg;
-
-	var_state.range_startk	= 0;
-	var_state.range_sizek	= 0;
-	var_state.reg		= 0;
-	var_state.chunk_sizek	= chunk_size >> 10;
-	var_state.gran_sizek	= gran_size >> 10;
-
-	memset(range_state, 0, sizeof(range_state));
-
-	/* Write the range etc */
-	for (i = 0; i < nr_range; i++)
-		set_var_mtrr_range(&var_state, range[i].start,
-				   range[i].end - range[i].start + 1);
-
-	/* Write the last range */
-	if (var_state.range_sizek != 0)
-		range_to_mtrr_with_hole(&var_state, 0, 0);
-
-	num_reg = var_state.reg;
-	/* Clear out the extra MTRR's */
-	while (var_state.reg < num_var_ranges) {
-		save_var_mtrr(var_state.reg, 0, 0, 0);
-		var_state.reg++;
-	}
-
-	return num_reg;
-}
-
-struct mtrr_cleanup_result {
-	unsigned long gran_sizek;
-	unsigned long chunk_sizek;
-	unsigned long lose_cover_sizek;
-	unsigned int num_reg;
-	int bad;
-};
-
-/*
- * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
- * chunk size: gran_size, ..., 2G
- * so we need (1+16)*8
- */
-#define NUM_RESULT	136
-#define PSHIFT		(PAGE_SHIFT - 10)
-
-static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
-static unsigned long __initdata min_loss_pfn[RANGE_NUM];
-
-static void __init print_out_mtrr_range_state(void)
-{
-	int i;
-	char start_factor = 'K', size_factor = 'K';
-	unsigned long start_base, size_base;
-	mtrr_type type;
-
-	for (i = 0; i < num_var_ranges; i++) {
-
-		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
-		if (!size_base)
-			continue;
-
-		size_base = to_size_factor(size_base, &size_factor),
-		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
-		start_base = to_size_factor(start_base, &start_factor),
-		type = range_state[i].type;
-
-		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
-			i, start_base, start_factor,
-			size_base, size_factor,
-			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
-			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
-			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
-			);
-	}
-}
-
-static int __init mtrr_need_cleanup(void)
-{
-	int i;
-	mtrr_type type;
-	unsigned long size;
-	/* extra one for all 0 */
-	int num[MTRR_NUM_TYPES + 1];
-
-	/* check entries number */
-	memset(num, 0, sizeof(num));
-	for (i = 0; i < num_var_ranges; i++) {
-		type = range_state[i].type;
-		size = range_state[i].size_pfn;
-		if (type >= MTRR_NUM_TYPES)
-			continue;
-		if (!size)
-			type = MTRR_NUM_TYPES;
-		if (type == MTRR_TYPE_WRPROT)
-			type = MTRR_TYPE_UNCACHABLE;
-		num[type]++;
-	}
-
-	/* check if we got UC entries */
-	if (!num[MTRR_TYPE_UNCACHABLE])
-		return 0;
-
-	/* check if we only had WB and UC */
-	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
-		num_var_ranges - num[MTRR_NUM_TYPES])
-		return 0;
-
-	return 1;
-}
-
-static unsigned long __initdata range_sums;
-static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
-					 unsigned long extra_remove_base,
-					 unsigned long extra_remove_size,
-					 int i)
-{
-	int num_reg;
-	static struct res_range range_new[RANGE_NUM];
-	static int nr_range_new;
-	unsigned long range_sums_new;
-
-	/* convert ranges to var ranges state */
-	num_reg = x86_setup_var_mtrrs(range, nr_range,
-						chunk_size, gran_size);
-
-	/* we got new setting in range_state, check it */
-	memset(range_new, 0, sizeof(range_new));
-	nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
-				extra_remove_base, extra_remove_size);
-	range_sums_new = sum_ranges(range_new, nr_range_new);
-
-	result[i].chunk_sizek = chunk_size >> 10;
-	result[i].gran_sizek = gran_size >> 10;
-	result[i].num_reg = num_reg;
-	if (range_sums < range_sums_new) {
-		result[i].lose_cover_sizek =
-			(range_sums_new - range_sums) << PSHIFT;
-		result[i].bad = 1;
-	} else
-		result[i].lose_cover_sizek =
-			(range_sums - range_sums_new) << PSHIFT;
-
-	/* double check it */
-	if (!result[i].bad && !result[i].lose_cover_sizek) {
-		if (nr_range_new != nr_range ||
-			memcmp(range, range_new, sizeof(range)))
-				result[i].bad = 1;
-	}
-
-	if (!result[i].bad && (range_sums - range_sums_new <
-				min_loss_pfn[num_reg])) {
-		min_loss_pfn[num_reg] =
-			range_sums - range_sums_new;
-	}
-}
-
-static void __init mtrr_print_out_one_result(int i)
-{
-	char gran_factor, chunk_factor, lose_factor;
-	unsigned long gran_base, chunk_base, lose_base;
-
-	gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
-	chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
-	lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
-	printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
-			result[i].bad ? "*BAD*" : " ",
-			gran_base, gran_factor, chunk_base, chunk_factor);
-	printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
-			result[i].num_reg, result[i].bad ? "-" : "",
-			lose_base, lose_factor);
-}
-
-static int __init mtrr_search_optimal_index(void)
-{
-	int i;
-	int num_reg_good;
-	int index_good;
-
-	if (nr_mtrr_spare_reg >= num_var_ranges)
-		nr_mtrr_spare_reg = num_var_ranges - 1;
-	num_reg_good = -1;
-	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
-		if (!min_loss_pfn[i])
-			num_reg_good = i;
-	}
-
-	index_good = -1;
-	if (num_reg_good != -1) {
-		for (i = 0; i < NUM_RESULT; i++) {
-			if (!result[i].bad &&
-			    result[i].num_reg == num_reg_good &&
-			    !result[i].lose_cover_sizek) {
-				index_good = i;
-				break;
-			}
-		}
-	}
-
-	return index_good;
-}
-
-
-static int __init mtrr_cleanup(unsigned address_bits)
-{
-	unsigned long extra_remove_base, extra_remove_size;
-	unsigned long base, size, def, dummy;
-	mtrr_type type;
-	u64 chunk_size, gran_size;
-	int index_good;
-	int i;
-
-	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
-		return 0;
-	rdmsr(MTRRdefType_MSR, def, dummy);
-	def &= 0xff;
-	if (def != MTRR_TYPE_UNCACHABLE)
-		return 0;
-
-	/* get it and store it aside */
-	memset(range_state, 0, sizeof(range_state));
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		range_state[i].base_pfn = base;
-		range_state[i].size_pfn = size;
-		range_state[i].type = type;
-	}
-
-	/* check if we need handle it and can handle it */
-	if (!mtrr_need_cleanup())
-		return 0;
-
-	/* print original var MTRRs at first, for debugging: */
-	printk(KERN_DEBUG "original variable MTRRs\n");
-	print_out_mtrr_range_state();
-
-	memset(range, 0, sizeof(range));
-	extra_remove_size = 0;
-	extra_remove_base = 1 << (32 - PAGE_SHIFT);
-	if (mtrr_tom2)
-		extra_remove_size =
-			(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
-	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
-					  extra_remove_size);
-	/*
-	 * [0, 1M) should always be coverred by var mtrr with WB
-	 * and fixed mtrrs should take effective before var mtrr for it
-	 */
-	nr_range = add_range_with_merge(range, nr_range, 0,
-					(1ULL<<(20 - PAGE_SHIFT)) - 1);
-	/* sort the ranges */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-
-	range_sums = sum_ranges(range, nr_range);
-	printk(KERN_INFO "total RAM coverred: %ldM\n",
-	       range_sums >> (20 - PAGE_SHIFT));
-
-	if (mtrr_chunk_size && mtrr_gran_size) {
-		i = 0;
-		mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
-				      extra_remove_base, extra_remove_size, i);
-
-		mtrr_print_out_one_result(i);
-
-		if (!result[i].bad) {
-			set_var_mtrr_all(address_bits);
-			return 1;
-		}
-		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
-		       "will find optimal one\n");
-	}
-
-	i = 0;
-	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
-	memset(result, 0, sizeof(result));
-	for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
-
-		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
-		     chunk_size <<= 1) {
-
-			if (i >= NUM_RESULT)
-				continue;
-
-			mtrr_calc_range_state(chunk_size, gran_size,
-				      extra_remove_base, extra_remove_size, i);
-			if (debug_print) {
-				mtrr_print_out_one_result(i);
-				printk(KERN_INFO "\n");
-			}
-
-			i++;
-		}
-	}
-
-	/* try to find the optimal index */
-	index_good = mtrr_search_optimal_index();
-
-	if (index_good != -1) {
-		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
-		i = index_good;
-		mtrr_print_out_one_result(i);
-
-		/* convert ranges to var ranges state */
-		chunk_size = result[i].chunk_sizek;
-		chunk_size <<= 10;
-		gran_size = result[i].gran_sizek;
-		gran_size <<= 10;
-		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
-		set_var_mtrr_all(address_bits);
-		printk(KERN_DEBUG "New variable MTRRs\n");
-		print_out_mtrr_range_state();
-		return 1;
-	} else {
-		/* print out all */
-		for (i = 0; i < NUM_RESULT; i++)
-			mtrr_print_out_one_result(i);
-	}
-
-	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
-	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
-
-	return 0;
-}
-#else
-static int __init mtrr_cleanup(unsigned address_bits)
-{
-	return 0;
-}
-#endif
-
-static int __initdata changed_by_mtrr_cleanup;
-
-static int disable_mtrr_trim;
-
-static int __init disable_mtrr_trim_setup(char *str)
-{
-	disable_mtrr_trim = 1;
-	return 0;
-}
-early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
-
-/*
- * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
- * for memory >4GB. Check for that here.
- * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
- * apply to are wrong, but so far we don't know of any such case in the wild.
- */
-#define Tom2Enabled (1U << 21)
-#define Tom2ForceMemTypeWB (1U << 22)
-
-int __init amd_special_default_mtrr(void)
-{
-	u32 l, h;
-
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
-		return 0;
-	if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
-		return 0;
-	/* In case some hypervisor doesn't pass SYSCFG through */
-	if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
-		return 0;
-	/*
-	 * Memory between 4GB and top of mem is forced WB by this magic bit.
-	 * Reserved before K8RevF, but should be zero there.
-	 */
-	if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
-		 (Tom2Enabled | Tom2ForceMemTypeWB))
-		return 1;
-	return 0;
-}
-
-static u64 __init real_trim_memory(unsigned long start_pfn,
-				   unsigned long limit_pfn)
-{
-	u64 trim_start, trim_size;
-	trim_start = start_pfn;
-	trim_start <<= PAGE_SHIFT;
-	trim_size = limit_pfn;
-	trim_size <<= PAGE_SHIFT;
-	trim_size -= trim_start;
-
-	return e820_update_range(trim_start, trim_size, E820_RAM,
-				E820_RESERVED);
-}
-/**
- * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
- * @end_pfn: ending page frame number
- *
- * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
- * memory configurations.  This routine checks that the highest MTRR matches
- * the end of memory, to make sure the MTRRs having a write back type cover
- * all of the memory the kernel is intending to use. If not, it'll trim any
- * memory off the end by adjusting end_pfn, removing it from the kernel's
- * allocation pools, warning the user with an obnoxious message.
- */
-int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
-{
-	unsigned long i, base, size, highest_pfn = 0, def, dummy;
-	mtrr_type type;
-	u64 total_trim_size;
-
-	/* extra one for all 0 */
-	int num[MTRR_NUM_TYPES + 1];
-	/*
-	 * Make sure we only trim uncachable memory on machines that
-	 * support the Intel MTRR architecture:
-	 */
-	if (!is_cpu(INTEL) || disable_mtrr_trim)
-		return 0;
-	rdmsr(MTRRdefType_MSR, def, dummy);
-	def &= 0xff;
-	if (def != MTRR_TYPE_UNCACHABLE)
-		return 0;
-
-	/* get it and store it aside */
-	memset(range_state, 0, sizeof(range_state));
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		range_state[i].base_pfn = base;
-		range_state[i].size_pfn = size;
-		range_state[i].type = type;
-	}
-
-	/* Find highest cached pfn */
-	for (i = 0; i < num_var_ranges; i++) {
-		type = range_state[i].type;
-		if (type != MTRR_TYPE_WRBACK)
-			continue;
-		base = range_state[i].base_pfn;
-		size = range_state[i].size_pfn;
-		if (highest_pfn < base + size)
-			highest_pfn = base + size;
-	}
-
-	/* kvm/qemu doesn't have mtrr set right, don't trim them all */
-	if (!highest_pfn) {
-		printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
-		return 0;
-	}
-
-	/* check entries number */
-	memset(num, 0, sizeof(num));
-	for (i = 0; i < num_var_ranges; i++) {
-		type = range_state[i].type;
-		if (type >= MTRR_NUM_TYPES)
-			continue;
-		size = range_state[i].size_pfn;
-		if (!size)
-			type = MTRR_NUM_TYPES;
-		num[type]++;
-	}
-
-	/* no entry for WB? */
-	if (!num[MTRR_TYPE_WRBACK])
-		return 0;
-
-	/* check if we only had WB and UC */
-	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
-		num_var_ranges - num[MTRR_NUM_TYPES])
-		return 0;
-
-	memset(range, 0, sizeof(range));
-	nr_range = 0;
-	if (mtrr_tom2) {
-		range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
-		range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
-		if (highest_pfn < range[nr_range].end + 1)
-			highest_pfn = range[nr_range].end + 1;
-		nr_range++;
-	}
-	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
-
-	total_trim_size = 0;
-	/* check the head */
-	if (range[0].start)
-		total_trim_size += real_trim_memory(0, range[0].start);
-	/* check the holes */
-	for (i = 0; i < nr_range - 1; i++) {
-		if (range[i].end + 1 < range[i+1].start)
-			total_trim_size += real_trim_memory(range[i].end + 1,
-							    range[i+1].start);
-	}
-	/* check the top */
-	i = nr_range - 1;
-	if (range[i].end + 1 < end_pfn)
-		total_trim_size += real_trim_memory(range[i].end + 1,
-							 end_pfn);
-
-	if (total_trim_size) {
-		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
-			" all of memory, losing %lluMB of RAM.\n",
-			total_trim_size >> 20);
-
-		if (!changed_by_mtrr_cleanup)
-			WARN_ON(1);
-
-		printk(KERN_INFO "update e820 for mtrr\n");
-		update_e820();
-
-		return 1;
-	}
-
-	return 0;
-}
+int __initdata changed_by_mtrr_cleanup;
 
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index ffd60409cc6..77f67f7b347 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -79,6 +79,7 @@ extern struct mtrr_ops * mtrr_if;
 
 extern unsigned int num_var_ranges;
 extern u64 mtrr_tom2;
+extern struct mtrr_state_type mtrr_state;
 
 void mtrr_state_warn(void);
 const char *mtrr_attrib_to_str(int x);
@@ -88,3 +89,6 @@ void mtrr_wrmsr(unsigned, unsigned, unsigned);
 int amd_init_mtrr(void);
 int cyrix_init_mtrr(void);
 int centaur_init_mtrr(void);
+
+extern int changed_by_mtrr_cleanup;
+extern int mtrr_cleanup(unsigned address_bits);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 9abd48b2267..f6c70a164e3 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -19,7 +19,7 @@
 #include <linux/nmi.h>
 #include <linux/kprobes.h>
 
-#include <asm/apic.h>
+#include <asm/genapic.h>
 #include <asm/intel_arch_perfmon.h>
 
 struct nmi_watchdog_ctlblk {
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 01b1244ef1c..d67e0e48bc2 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -7,11 +7,10 @@
 /*
  *	Get CPU information for use by the procfs.
  */
-#ifdef CONFIG_X86_32
 static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
 			      unsigned int cpu)
 {
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
 	if (c->x86_max_cores * smp_num_siblings > 1) {
 		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
 		seq_printf(m, "siblings\t: %d\n",
@@ -24,6 +23,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
 #endif
 }
 
+#ifdef CONFIG_X86_32
 static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 {
 	/*
@@ -50,22 +50,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 		   c->wp_works_ok ? "yes" : "no");
 }
 #else
-static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
-			      unsigned int cpu)
-{
-#ifdef CONFIG_SMP
-	if (c->x86_max_cores * smp_num_siblings > 1) {
-		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-		seq_printf(m, "siblings\t: %d\n",
-			   cpus_weight(per_cpu(cpu_core_map, cpu)));
-		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
-		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
-		seq_printf(m, "apicid\t\t: %d\n", c->apicid);
-		seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
-	}
-#endif
-}
-
 static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 {
 	seq_printf(m,
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 52b3fefbd5a..bb62b3e5caa 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -98,7 +98,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
 #endif
 }
 
-static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
+static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = {
 	.c_vendor	= "Transmeta",
 	.c_ident	= { "GenuineTMx86", "TransmetaCPU" },
 	.c_early_init	= early_init_transmeta,
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index e777f79e096..fd2c37bf7ac 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -8,7 +8,7 @@
  * so no special init takes place.
  */
 
-static struct cpu_dev umc_cpu_dev __cpuinitdata = {
+static const struct cpu_dev __cpuinitconst umc_cpu_dev = {
 	.c_vendor	= "UMC",
 	.c_ident	= { "UMC UMC UMC" },
 	.c_models = {