aboutsummaryrefslogtreecommitdiff
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig8
-rw-r--r--arch/x86_64/Kconfig.debug4
-rw-r--r--arch/x86_64/ia32/ia32entry.S19
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/entry.S188
-rw-r--r--arch/x86_64/kernel/head64.c5
-rw-r--r--arch/x86_64/kernel/irq.c4
-rw-r--r--arch/x86_64/kernel/nmi.c2
-rw-r--r--arch/x86_64/kernel/process.c2
-rw-r--r--arch/x86_64/kernel/smpboot.c2
-rw-r--r--arch/x86_64/kernel/stacktrace.c221
-rw-r--r--arch/x86_64/kernel/traps.c129
-rw-r--r--arch/x86_64/lib/thunk.S5
-rw-r--r--arch/x86_64/mm/fault.c1
14 files changed, 495 insertions, 96 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index e856804c447..28df7d88ce2 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,14 @@ config X86
bool
default y
+config LOCKDEP_SUPPORT
+ bool
+ default y
+
+config STACKTRACE_SUPPORT
+ bool
+ default y
+
config SEMAPHORE_SLEEPERS
bool
default y
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index 1d92ab56c0f..775d211a5cf 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -1,5 +1,9 @@
menu "Kernel hacking"
+config TRACE_IRQFLAGS_SUPPORT
+ bool
+ default y
+
source "lib/Kconfig.debug"
config DEBUG_RODATA
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index c536fa98ea3..9b5bb413a6e 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -13,6 +13,7 @@
#include <asm/thread_info.h>
#include <asm/segment.h>
#include <asm/vsyscall32.h>
+#include <asm/irqflags.h>
#include <linux/linkage.h>
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target)
swapgs
movq %gs:pda_kernelstack, %rsp
addq $(PDA_STACKOFFSET),%rsp
+ /*
+ * No need to follow this irqs on/off section: the syscall
+ * disabled irqs, here we enable it straight after entry:
+ */
sti
movl %ebp,%ebp /* zero extension */
pushq $__USER32_DS
@@ -118,6 +123,7 @@ sysenter_do_call:
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
+ TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
jnz int_ret_from_sys_call
andl $~TS_COMPAT,threadinfo_status(%r10)
@@ -132,6 +138,7 @@ sysenter_do_call:
CFI_REGISTER rsp,rcx
movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
CFI_REGISTER rip,rdx
+ TRACE_IRQS_ON
swapgs
sti /* sti only takes effect after the next instruction */
/* sysexit */
@@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target)
movl %esp,%r8d
CFI_REGISTER rsp,r8
movq %gs:pda_kernelstack,%rsp
+ /*
+ * No need to follow this irqs on/off section: the syscall
+ * disabled irqs and here we enable it straight after entry:
+ */
sti
SAVE_ARGS 8,1,1
movl %eax,%eax /* zero extension */
@@ -220,6 +231,7 @@ cstar_do_call:
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
+ TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
jnz int_ret_from_sys_call
andl $~TS_COMPAT,threadinfo_status(%r10)
@@ -228,6 +240,7 @@ cstar_do_call:
CFI_REGISTER rip,rcx
movl EFLAGS-ARGOFFSET(%rsp),%r11d
/*CFI_REGISTER rflags,r11*/
+ TRACE_IRQS_ON
movl RSP-ARGOFFSET(%rsp),%esp
CFI_RESTORE rsp
swapgs
@@ -286,7 +299,11 @@ ENTRY(ia32_syscall)
/*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
/*CFI_REL_OFFSET cs,CS-RIP*/
CFI_REL_OFFSET rip,RIP-RIP
- swapgs
+ swapgs
+ /*
+ * No need to follow this irqs on/off section: the syscall
+ * disabled irqs and here we enable it straight after entry:
+ */
sti
movl %eax,%eax
pushq %rax
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 819e84ec5b6..b5aaeafc1cd 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
pci-dma.o pci-nommu.o alternative.o
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_X86_MCE) += mce.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed92c298360..d464dded68c 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -41,13 +41,24 @@
#include <asm/thread_info.h>
#include <asm/hw_irq.h>
#include <asm/page.h>
+#include <asm/irqflags.h>
.code64
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
-
+
+
+.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
+ jnc 1f
+ TRACE_IRQS_ON
+1:
+#endif
+.endm
+
/*
* C code is not supposed to know about undefined top of stack. Every time
* a C function with an pt_regs argument is called from the SYSCALL based
@@ -194,6 +205,10 @@ ENTRY(system_call)
swapgs
movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
+ /*
+ * No need to follow this irqs off/on section - it's straight
+ * and short:
+ */
sti
SAVE_ARGS 8,1
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
@@ -219,10 +234,15 @@ ret_from_sys_call:
sysret_check:
GET_THREAD_INFO(%rcx)
cli
+ TRACE_IRQS_OFF
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
CFI_REMEMBER_STATE
jnz sysret_careful
+ /*
+ * sysretq will re-enable interrupts:
+ */
+ TRACE_IRQS_ON
movq RIP-ARGOFFSET(%rsp),%rcx
CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1
@@ -237,6 +257,7 @@ sysret_careful:
CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
+ TRACE_IRQS_ON
sti
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
@@ -247,6 +268,7 @@ sysret_careful:
/* Handle a signal */
sysret_signal:
+ TRACE_IRQS_ON
sti
testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
jz 1f
@@ -261,6 +283,7 @@ sysret_signal:
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
cli
+ TRACE_IRQS_OFF
jmp int_with_check
badsys:
@@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call)
CFI_REL_OFFSET r10,R10-ARGOFFSET
CFI_REL_OFFSET r11,R11-ARGOFFSET
cli
+ TRACE_IRQS_OFF
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
movl $_TIF_ALLWORK_MASK,%edi
@@ -327,6 +351,7 @@ int_with_check:
int_careful:
bt $TIF_NEED_RESCHED,%edx
jnc int_very_careful
+ TRACE_IRQS_ON
sti
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
@@ -334,10 +359,12 @@ int_careful:
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
cli
+ TRACE_IRQS_OFF
jmp int_with_check
/* handle signals and tracing -- both require a full stack frame */
int_very_careful:
+ TRACE_IRQS_ON
sti
SAVE_REST
/* Check for syscall exit trace */
@@ -351,6 +378,7 @@ int_very_careful:
CFI_ADJUST_CFA_OFFSET -8
andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
cli
+ TRACE_IRQS_OFF
jmp int_restore_rest
int_signal:
@@ -363,6 +391,7 @@ int_signal:
int_restore_rest:
RESTORE_REST
cli
+ TRACE_IRQS_OFF
jmp int_with_check
CFI_ENDPROC
END(int_ret_from_sys_call)
@@ -484,6 +513,10 @@ END(stub_rt_sigreturn)
swapgs
1: incl %gs:pda_irqcount # RED-PEN should check preempt count
cmoveq %gs:pda_irqstackptr,%rsp
+ /*
+ * We entered an interrupt context - irqs are off:
+ */
+ TRACE_IRQS_OFF
call \func
.endm
@@ -493,6 +526,7 @@ ENTRY(common_interrupt)
/* 0(%rsp): oldrsp-ARGOFFSET */
ret_from_intr:
cli
+ TRACE_IRQS_OFF
decl %gs:pda_irqcount
leaveq
CFI_DEF_CFA_REGISTER rsp
@@ -515,9 +549,21 @@ retint_check:
CFI_REMEMBER_STATE
jnz retint_careful
retint_swapgs:
+ /*
+ * The iretq could re-enable interrupts:
+ */
+ cli
+ TRACE_IRQS_IRETQ
swapgs
+ jmp restore_args
+
retint_restore_args:
cli
+ /*
+ * The iretq could re-enable interrupts:
+ */
+ TRACE_IRQS_IRETQ
+restore_args:
RESTORE_ARGS 0,8,0
iret_label:
iretq
@@ -530,6 +576,7 @@ iret_label:
/* running with kernel gs */
bad_iret:
movq $11,%rdi /* SIGSEGV */
+ TRACE_IRQS_ON
sti
jmp do_exit
.previous
@@ -539,6 +586,7 @@ retint_careful:
CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx
jnc retint_signal
+ TRACE_IRQS_ON
sti
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
@@ -547,11 +595,13 @@ retint_careful:
CFI_ADJUST_CFA_OFFSET -8
GET_THREAD_INFO(%rcx)
cli
+ TRACE_IRQS_OFF
jmp retint_check
retint_signal:
testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
jz retint_swapgs
+ TRACE_IRQS_ON
sti
SAVE_REST
movq $-1,ORIG_RAX(%rsp)
@@ -560,6 +610,7 @@ retint_signal:
call do_notify_resume
RESTORE_REST
cli
+ TRACE_IRQS_OFF
movl $_TIF_NEED_RESCHED,%edi
GET_THREAD_INFO(%rcx)
jmp retint_check
@@ -666,7 +717,7 @@ END(spurious_interrupt)
/* error code is on the stack already */
/* handle NMI like exceptions that can happen everywhere */
- .macro paranoidentry sym, ist=0
+ .macro paranoidentry sym, ist=0, irqtrace=1
SAVE_ALL
cld
movl $1,%ebx
@@ -691,8 +742,73 @@ END(spurious_interrupt)
addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
.endif
cli
+ .if \irqtrace
+ TRACE_IRQS_OFF
+ .endif
.endm
-
+
+ /*
+ * "Paranoid" exit path from exception stack.
+ * Paranoid because this is used by NMIs and cannot take
+ * any kernel state for granted.
+ * We don't do kernel preemption checks here, because only
+ * NMI should be common and it does not enable IRQs and
+ * cannot get reschedule ticks.
+ *
+ * "trace" is 0 for the NMI handler only, because irq-tracing
+ * is fundamentally NMI-unsafe. (we cannot change the soft and
+ * hard flags at once, atomically)
+ */
+ .macro paranoidexit trace=1
+ /* ebx: no swapgs flag */
+paranoid_exit\trace:
+ testl %ebx,%ebx /* swapgs needed? */
+ jnz paranoid_restore\trace
+ testl $3,CS(%rsp)
+ jnz paranoid_userspace\trace
+paranoid_swapgs\trace:
+ TRACE_IRQS_IRETQ 0
+ swapgs
+paranoid_restore\trace:
+ RESTORE_ALL 8
+ iretq
+paranoid_userspace\trace:
+ GET_THREAD_INFO(%rcx)
+ movl threadinfo_flags(%rcx),%ebx
+ andl $_TIF_WORK_MASK,%ebx
+ jz paranoid_swapgs\trace
+ movq %rsp,%rdi /* &pt_regs */
+ call sync_regs
+ movq %rax,%rsp /* switch stack for scheduling */
+ testl $_TIF_NEED_RESCHED,%ebx
+ jnz paranoid_schedule\trace
+ movl %ebx,%edx /* arg3: thread flags */
+ .if \trace
+ TRACE_IRQS_ON
+ .endif
+ sti
+ xorl %esi,%esi /* arg2: oldset */
+ movq %rsp,%rdi /* arg1: &pt_regs */
+ call do_notify_resume
+ cli
+ .if \trace
+ TRACE_IRQS_OFF
+ .endif
+ jmp paranoid_userspace\trace
+paranoid_schedule\trace:
+ .if \trace
+ TRACE_IRQS_ON
+ .endif
+ sti
+ call schedule
+ cli
+ .if \trace
+ TRACE_IRQS_OFF
+ .endif
+ jmp paranoid_userspace\trace
+ CFI_ENDPROC
+ .endm
+
/*
* Exception entry point. This expects an error code/orig_rax on the stack
* and the exception handler in %rax.
@@ -748,6 +864,7 @@ error_exit:
movl %ebx,%eax
RESTORE_REST
cli
+ TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
jne retint_kernel
@@ -755,6 +872,10 @@ error_exit:
movl $_TIF_WORK_MASK,%edi
andl %edi,%edx
jnz retint_careful
+ /*
+ * The iret might restore flags:
+ */
+ TRACE_IRQS_IRETQ
swapgs
RESTORE_ARGS 0,8,0
jmp iret_label
@@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug)
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug, DEBUG_STACK
- jmp paranoid_exit
- CFI_ENDPROC
+ paranoidexit
END(debug)
.previous .text
@@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi)
INTR_FRAME
pushq $-1
CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_nmi
- /*
- * "Paranoid" exit path from exception stack.
- * Paranoid because this is used by NMIs and cannot take
- * any kernel state for granted.
- * We don't do kernel preemption checks here, because only
- * NMI should be common and it does not enable IRQs and
- * cannot get reschedule ticks.
- */
- /* ebx: no swapgs flag */
-paranoid_exit:
- testl %ebx,%ebx /* swapgs needed? */
- jnz paranoid_restore
- testl $3,CS(%rsp)
- jnz paranoid_userspace
-paranoid_swapgs:
- swapgs
-paranoid_restore:
- RESTORE_ALL 8
- iretq
-paranoid_userspace:
- GET_THREAD_INFO(%rcx)
- movl threadinfo_flags(%rcx),%ebx
- andl $_TIF_WORK_MASK,%ebx
- jz paranoid_swapgs
- movq %rsp,%rdi /* &pt_regs */
- call sync_regs
- movq %rax,%rsp /* switch stack for scheduling */
- testl $_TIF_NEED_RESCHED,%ebx
- jnz paranoid_schedule
- movl %ebx,%edx /* arg3: thread flags */
- sti
- xorl %esi,%esi /* arg2: oldset */
- movq %rsp,%rdi /* arg1: &pt_regs */
- call do_notify_resume
- cli
- jmp paranoid_userspace
-paranoid_schedule:
- sti
- call schedule
- cli
- jmp paranoid_userspace
- CFI_ENDPROC
+ paranoidentry do_nmi, 0, 0
+#ifdef CONFIG_TRACE_IRQFLAGS
+ paranoidexit 0
+#else
+ jmp paranoid_exit1
+ CFI_ENDPROC
+#endif
END(nmi)
.previous .text
@@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3)
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_int3, DEBUG_STACK
- jmp paranoid_exit
+ jmp paranoid_exit1
CFI_ENDPROC
END(int3)
.previous .text
@@ -1006,7 +1090,7 @@ END(reserved)
ENTRY(double_fault)
XCPT_FRAME
paranoidentry do_double_fault
- jmp paranoid_exit
+ jmp paranoid_exit1
CFI_ENDPROC
END(double_fault)
@@ -1022,7 +1106,7 @@ END(segment_not_present)
ENTRY(stack_segment)
XCPT_FRAME
paranoidentry do_stack_segment
- jmp paranoid_exit
+ jmp paranoid_exit1
CFI_ENDPROC
END(stack_segment)
@@ -1050,7 +1134,7 @@ ENTRY(machine_check)
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_machine_check
- jmp paranoid_exit
+ jmp paranoid_exit1
CFI_ENDPROC
END(machine_check)
#endif
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index e6a71c9556d..36647ce6aec 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -85,6 +85,11 @@ void __init x86_64_start_kernel(char * real_mode_data)
clear_bss();
/*
+ * This must be called really, really early:
+ */
+ lockdep_init();
+
+ /*
* switch to init_level4_pgt from boot_level4_pgt
*/
memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index a1f1df5f7bf..5221a53e90c 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -177,8 +177,10 @@ asmlinkage void do_softirq(void)
local_irq_save(flags);
pending = local_softirq_pending();
/* Switch to interrupt stack */
- if (pending)
+ if (pending) {
call_softirq();
+ WARN_ON_ONCE(softirq_count());
+ }
local_irq_restore(flags);
}
EXPORT_SYMBOL(do_softirq);
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 476c1472fc0..5baa0c726e9 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void)
static __init void nmi_cpu_busy(void *data)
{
volatile int *endflag = data;
- local_irq_enable();
+ local_irq_enable_in_hardirq();
/* Intentionally don't use cpu_relax here. This is
to make sure that the performance counter really ticks,
even if there is a simulator or similar that catches the
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index ca56e19b8b6..bb6745d13b8 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -296,7 +296,7 @@ void __show_regs(struct pt_regs * regs)
system_utsname.version);
printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
printk_address(regs->rip);
- printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
+ printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
regs->eflags);
printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->rax, regs->rbx, regs->rcx);
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 9705a6a384f..b7c70596979 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -775,6 +775,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
};
DECLARE_WORK(work, do_fork_idle, &c_idle);
+ lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key);
+
/* allocate memory for gdts of secondary cpus. Hotplug is considered */
if (!cpu_gdt_descr[cpu].address &&
!(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c
new file mode 100644
index 00000000000..32cf55eb9af
--- /dev/null
+++ b/arch/x86_64/kernel/stacktrace.c
@@ -0,0 +1,221 @@
+/*
+ * arch/x86_64/kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+#include <asm/smp.h>
+
+static inline int
+in_range(unsigned long start, unsigned long addr, unsigned long end)
+{
+ return addr >= start && addr <= end;
+}
+
+static unsigned long
+get_stack_end(struct task_struct *task, unsigned long stack)
+{
+ unsigned long stack_start, stack_end, flags;
+ int i, cpu;
+
+ /*
+ * The most common case is that we are in the task stack:
+ */
+ stack_start = (unsigned long)task->thread_info;
+ stack_end = stack_start + THREAD_SIZE;
+
+ if (in_range(stack_start, stack, stack_end))
+ return stack_end;
+
+ /*
+ * We are in an interrupt if irqstackptr is set:
+ */
+ raw_local_irq_save(flags);
+ cpu = safe_smp_processor_id();
+ stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr;
+
+ if (stack_end) {
+ stack_start = stack_end & ~(IRQSTACKSIZE-1);
+ if (in_range(stack_start, stack, stack_end))
+ goto out_restore;
+ /*
+ * We get here if we are in an IRQ context but we
+ * are also in an exception stack.
+ */
+ }
+
+ /*
+ * Iterate over all exception stacks, and figure out whether
+ * 'stack' is in one of them:
+ */
+ for (i = 0; i < N_EXCEPTION_STACKS; i++) {
+ /*
+ * set 'end' to the end of the exception stack.
+ */
+ stack_end = per_cpu(init_tss, cpu).ist[i];
+ stack_start = stack_end - EXCEPTION_STKSZ;
+
+ /*
+ * Is 'stack' above this exception frame's end?
+ * If yes then skip to the next frame.
+ */
+ if (stack >= stack_end)
+ continue;
+ /*
+ * Is 'stack' above this exception frame's start address?
+ * If yes then we found the right frame.
+ */
+ if (stack >= stack_start)
+ goto out_restore;
+
+ /*
+ * If this is a debug stack, and if it has a larger size than
+ * the usual exception stacks, then 'stack' might still
+ * be within the lower portion of the debug stack:
+ */
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+ if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) {
+ /*
+ * Black magic. A large debug stack is composed of
+ * multiple exception stack entries, which we
+ * iterate through now. Dont look:
+ */
+ do {
+ stack_end -= EXCEPTION_STKSZ;
+ stack_start -= EXCEPTION_STKSZ;
+ } while (stack < stack_start);
+
+ goto out_restore;
+ }
+#endif
+ }
+ /*
+ * Ok, 'stack' is not pointing to any of the system stacks.
+ */
+ stack_end = 0;
+
+out_restore:
+ raw_local_irq_restore(flags);
+
+ return stack_end;
+}
+
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer:
+ */
+static inline unsigned long
+save_context_stack(struct stack_trace *trace, unsigned int skip,
+ unsigned long stack, unsigned long stack_end)
+{
+ unsigned long addr;
+
+#ifdef CONFIG_FRAME_POINTER
+ unsigned long prev_stack = 0;
+
+ while (in_range(prev_stack, stack, stack_end)) {
+ pr_debug("stack: %p\n", (void *)stack);
+ addr = (unsigned long)(((unsigned long *)stack)[1]);
+ pr_debug("addr: %p\n", (void *)addr);
+ if (!skip)
+ trace->entries[trace->nr_entries++] = addr-1;
+ else
+ skip--;
+ if (trace->nr_entries >= trace->max_entries)
+ break;
+ if (!addr)
+ return 0;
+ /*
+ * Stack frames must go forwards (otherwise a loop could
+ * happen if the stackframe is corrupted), so we move
+ * prev_stack forwards:
+ */
+ prev_stack = stack;
+ stack = (unsigned long)(((unsigned long *)stack)[0]);
+ }
+ pr_debug("invalid: %p\n", (void *)stack);
+#else
+ while (stack < stack_end) {
+ addr = ((unsigned long *)stack)[0];
+ stack += sizeof(long);
+ if (__kernel_text_address(addr)) {
+ if (!skip)
+ trace->entries[trace->nr_entries++] = addr-1;
+ else
+ skip--;
+ if (trace->nr_entries >= trace->max_entries)
+ break;
+ }
+ }
+#endif
+ return stack;
+}
+
+#define MAX_STACKS 10
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ * If all_contexts is set, all contexts (hardirq, softirq and process)
+ * are saved. If not set then only the current context is saved.
+ */
+void save_stack_trace(struct stack_trace *trace,
+ struct task_struct *task, int all_contexts,
+ unsigned int skip)
+{
+ unsigned long stack = (unsigned long)&stack;
+ int i, nr_stacks = 0, stacks_done[MAX_STACKS];
+
+ WARN_ON(trace->nr_entries || !trace->max_entries);
+
+ if (!task)
+ task = current;
+
+ pr_debug("task: %p, ti: %p\n", task, task->thread_info);
+
+ if (!task || task == current) {
+ /* Grab rbp right from our regs: */
+ asm ("mov %%rbp, %0" : "=r" (stack));
+ pr_debug("rbp: %p\n", (void *)stack);
+ } else {
+ /* rbp is the last reg pushed by switch_to(): */
+ stack = task->thread.rsp;
+ pr_debug("other task rsp: %p\n", (void *)stack);
+ stack = (unsigned long)(((unsigned long *)stack)[0]);
+ pr_debug("other task rbp: %p\n", (void *)stack);
+ }
+
+ while (1) {
+ unsigned long stack_end = get_stack_end(task, stack);
+
+ pr_debug("stack: %p\n", (void *)stack);
+ pr_debug("stack end: %p\n", (void *)stack_end);
+
+ /*
+ * Invalid stack addres?
+ */
+ if (!stack_end)
+ return;
+ /*
+ * Were we in this stack already? (recursion)
+ */
+ for (i = 0; i < nr_stacks; i++)
+ if (stacks_done[i] == stack_end)
+ return;
+ stacks_done[nr_stacks] = stack_end;
+
+ stack = save_context_stack(trace, skip, stack, stack_end);
+ if (!all_contexts || !stack ||
+ trace->nr_entries >= trace->max_entries)
+ return;
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+ if (trace->nr_entries >= trace->max_entries)
+ return;
+ if (++nr_stacks >= MAX_STACKS)
+ return;
+ }
+}
+
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 5a5311d3de0..79d05c48207 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -110,28 +110,31 @@ static int kstack_depth_to_print = 12;
static int call_trace = 1;
#ifdef CONFIG_KALLSYMS
-#include <linux/kallsyms.h>
-int printk_address(unsigned long address)
-{
+# include <linux/kallsyms.h>
+void printk_address(unsigned long address)
+{
unsigned long offset = 0, symsize;
const char *symname;
char *modname;
- char *delim = ":";
+ char *delim = ":";
char namebuf[128];
- symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf);
- if (!symname)
- return printk("[<%016lx>]", address);
- if (!modname)
+ symname = kallsyms_lookup(address, &symsize, &offset,
+ &modname, namebuf);
+ if (!symname) {
+ printk(" [<%016lx>]\n", address);
+ return;
+ }
+ if (!modname)
modname = delim = "";
- return printk("<%016lx>{%s%s%s%s%+ld}",
- address, delim, modname, delim, symname, offset);
-}
+ printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
+ address, delim, modname, delim, symname, offset, symsize);
+}
#else
-int printk_address(unsigned long address)
-{
- return printk("[<%016lx>]", address);
-}
+void printk_address(unsigned long address)
+{
+ printk(" [<%016lx>]\n", address);
+}
#endif
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -149,10 +152,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
};
unsigned k;
+ /*
+ * Iterate over all exception stacks, and figure out whether
+ * 'stack' is in one of them:
+ */
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
unsigned long end;
+ /*
+ * set 'end' to the end of the exception stack.
+ */
switch (k + 1) {
+ /*
+ * TODO: this block is not needed i think, because
+ * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
+ * properly too.
+ */
#if DEBUG_STKSZ > EXCEPTION_STKSZ
case DEBUG_STACK:
end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
@@ -162,19 +177,43 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
end = per_cpu(init_tss, cpu).ist[k];
break;
}
+ /*
+ * Is 'stack' above this exception frame's end?
+ * If yes then skip to the next frame.
+ */
if (stack >= end)
continue;
+ /*
+ * Is 'stack' above this exception frame's start address?
+ * If yes then we found the right frame.
+ */
if (stack >= end - EXCEPTION_STKSZ) {
+ /*
+ * Make sure we only iterate through an exception
+ * stack once. If it comes up for the second time
+ * then there's something wrong going on - just
+ * break out and return NULL:
+ */
if (*usedp & (1U << k))
break;
*usedp |= 1U << k;
*idp = ids[k];
return (unsigned long *)end;
}
+ /*
+ * If this is a debug stack, and if it has a larger size than
+ * the usual exception stacks, then 'stack' might still
+ * be within the lower portion of the debug stack:
+ */
#if DEBUG_STKSZ > EXCEPTION_STKSZ
if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
unsigned j = N_EXCEPTION_STACKS - 1;
+ /*
+ * Black magic. A large debug stack is composed of
+ * multiple exception stack entries, which we
+ * iterate through now. Dont look:
+ */
do {
++j;
end -= EXCEPTION_STKSZ;
@@ -193,20 +232,14 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
static int show_trace_unwind(struct unwind_frame_info *info, void *context)
{
- int i = 11, n = 0;
+ int n = 0;
while (unwind(info) == 0 && UNW_PC(info)) {
- ++n;
- if (i > 50) {
- printk("\n ");
- i = 7;
- } else
- i += printk(" ");
- i += printk_address(UNW_PC(info));
+ n++;
+ printk_address(UNW_PC(info));
if (arch_unw_user_mode(info))
break;
}
- printk("\n");
return n;
}
@@ -224,7 +257,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
int i = 11;
unsigned used = 0;
- printk("\nCall Trace:");
+ printk("\nCall Trace:\n");
if (!tsk)
tsk = current;
@@ -250,16 +283,15 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
}
}
+ /*
+ * Print function call entries within a stack. 'cond' is the
+ * "end of stackframe" condition, that the 'stack++'
+ * iteration will eventually trigger.
+ */
#define HANDLE_STACK(cond) \
do while (cond) { \
unsigned long addr = *stack++; \
if (kernel_text_address(addr)) { \
- if (i > 50) { \
- printk("\n "); \
- i = 0; \
- } \
- else \
- i += printk(" "); \
/* \
* If the address is either in the text segment of the \
* kernel, or in the region which contains vmalloc'ed \
@@ -268,20 +300,30 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
* down the cause of the crash will be able to figure \
* out the call path that was taken. \
*/ \
- i += printk_address(addr); \
+ printk_address(addr); \
} \
} while (0)
- for(; ; ) {
+ /*
+ * Print function call entries in all stacks, starting at the
+ * current stack address. If the stacks consist of nested
+ * exceptions
+ */
+ for ( ; ; ) {
const char *id;
unsigned long *estack_end;
estack_end = in_exception_stack(cpu, (unsigned long)stack,
&used, &id);
if (estack_end) {
- i += printk(" <%s>", id);
+ printk(" <%s>", id);
HANDLE_STACK (stack < estack_end);
- i += printk(" <EOE>");
+ printk(" <EOE>");
+ /*
+ * We link to the next stack via the
+ * second-to-last pointer (index -2 to end) in the
+ * exception stack:
+ */
stack = (unsigned long *) estack_end[-2];
continue;
}
@@ -291,19 +333,28 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
(IRQSTACKSIZE - 64) / sizeof(*irqstack);
if (stack >= irqstack && stack < irqstack_end) {
- i += printk(" <IRQ>");
+ printk(" <IRQ>");
HANDLE_STACK (stack < irqstack_end);
+ /*
+ * We link to the next stack (which would be
+ * the process stack normally) the last
+ * pointer (index -1 to end) in the IRQ stack:
+ */
stack = (unsigned long *) (irqstack_end[-1]);
irqstack_end = NULL;
- i += printk(" <EOI>");
+ printk(" <EOI>");
continue;
}
}
break;
}
+ /*
+ * This prints the process stack:
+ */
HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
#undef HANDLE_STACK
+
printk("\n");
}
@@ -337,8 +388,8 @@ static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned
break;
}
if (i && ((i % 4) == 0))
- printk("\n ");
- printk("%016lx ", *stack++);
+ printk("\n");
+ printk(" %016lx", *stack++);
touch_nmi_watchdog();
}
show_trace(tsk, regs, rsp);
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
index e49af0032e9..332ea5dff91 100644
--- a/arch/x86_64/lib/thunk.S
+++ b/arch/x86_64/lib/thunk.S
@@ -47,6 +47,11 @@
thunk_retrax __down_failed_interruptible,__down_interruptible
thunk_retrax __down_failed_trylock,__down_trylock
thunk __up_wakeup,__up
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ thunk trace_hardirqs_on_thunk,trace_hardirqs_on
+ thunk trace_hardirqs_off_thunk,trace_hardirqs_off
+#endif
/* SAVE_ARGS below is used only for the .cfi directives it contains. */
CFI_STARTPROC
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 5afcf6eb00f..ac8ea66ccb9 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -570,7 +570,6 @@ no_context:
printk(KERN_ALERT "Unable to handle kernel paging request");
printk(" at %016lx RIP: \n" KERN_ALERT,address);
printk_address(regs->rip);
- printk("\n");
dump_pagetable(address);
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;