aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c58
-rw-r--r--drivers/edac/Makefile2
-rw-r--r--drivers/edac/edac_mce_amd.c15
4 files changed, 53 insertions, 24 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b608a64c581..f1363b72364 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void enable_p5_mce(void) {}
#endif
+extern void (*x86_mce_decode_callback)(struct mce *m);
+
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 183c3457d2f..b1598a9436d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
+static void default_decode_mce(struct mce *m)
+{
+ pr_emerg("No human readable MCE decoding support on this CPU type.\n");
+ pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
+}
+
+/*
+ * CPU/chipset specific EDAC code can register a callback here to print
+ * MCE errors in a human-readable form:
+ */
+void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
+EXPORT_SYMBOL(x86_mce_decode_callback);
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -165,46 +177,46 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify);
}
-void __weak decode_mce(struct mce *m)
-{
- return;
-}
-
static void print_mce(struct mce *m)
{
- printk(KERN_EMERG
- "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
+ pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
m->extcpu, m->mcgstatus, m->bank, m->status);
+
if (m->ip) {
- printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
- !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
- m->cs, m->ip);
+ pr_emerg("RIP%s %02x:<%016Lx> ",
+ !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
+ m->cs, m->ip);
+
if (m->cs == __KERNEL_CS)
print_symbol("{%s}", m->ip);
- printk(KERN_CONT "\n");
+ pr_cont("\n");
}
- printk(KERN_EMERG "TSC %llx ", m->tsc);
+
+ pr_emerg("TSC %llx ", m->tsc);
if (m->addr)
- printk(KERN_CONT "ADDR %llx ", m->addr);
+ pr_cont("ADDR %llx ", m->addr);
if (m->misc)
- printk(KERN_CONT "MISC %llx ", m->misc);
- printk(KERN_CONT "\n");
- printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
- m->cpuvendor, m->cpuid, m->time, m->socketid,
- m->apicid);
+ pr_cont("MISC %llx ", m->misc);
+
+ pr_cont("\n");
+ pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
+ m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
- decode_mce(m);
+ /*
+ * Print out human-readable details about the MCE error,
+ * (if the CPU has an implementation for that):
+ */
+ x86_mce_decode_callback(m);
}
static void print_mce_head(void)
{
- printk(KERN_EMERG "\nHARDWARE ERROR\n");
+ pr_emerg("\nHARDWARE ERROR\n");
}
static void print_mce_tail(void)
{
- printk(KERN_EMERG "This is not a software problem!\n"
- "Run through mcelog --ascii to decode and contact your hardware vendor\n");
+ pr_emerg("This is not a software problem!\n");
}
#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -218,6 +230,7 @@ static atomic_t mce_fake_paniced;
static void wait_for_panic(void)
{
long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
+
preempt_disable();
local_irq_enable();
while (timeout-- > 0)
@@ -285,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
static int msr_to_offset(u32 msr)
{
unsigned bank = __get_cpu_var(injectm.bank);
+
if (msr == rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 7a473bbe8ab..8701cd7ce4e 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -18,7 +18,7 @@ edac_core-objs += edac_pci.o edac_pci_sysfs.o
endif
ifdef CONFIG_CPU_SUP_AMD
-edac_core-objs += edac_mce_amd.o
+obj-$(CONFIG_X86_MCE) += edac_mce_amd.o
endif
obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
index 0c21c370c9d..83a01a1187d 100644
--- a/drivers/edac/edac_mce_amd.c
+++ b/drivers/edac/edac_mce_amd.c
@@ -362,7 +362,7 @@ static inline void amd_decode_err_code(unsigned int ec)
pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
}
-void decode_mce(struct mce *m)
+static void amd_decode_mce(struct mce *m)
{
struct err_regs regs;
int node, ecc;
@@ -420,3 +420,16 @@ void decode_mce(struct mce *m)
amd_decode_err_code(m->status & 0xffff);
}
+
+static int __init mce_amd_init(void)
+{
+ /*
+ * We can decode MCEs for Opteron and later CPUs:
+ */
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
+ (boot_cpu_data.x86 >= 0xf))
+ x86_mce_decode_callback = amd_decode_mce;
+
+ return 0;
+}
+early_initcall(mce_amd_init);