From b556b35e98ad2b9174a7a623d152cdf412d1a999 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 11 Jan 2006 22:43:00 +0100 Subject: [PATCH] x86_64: Move int 3 handler to debug stack and allow to increase it. This - switches the INT3 handler to run on an IST stack (to cope with breakpoints set by a kernel debugger on places where the kernel's %gs base hasn't been set up, yet); the IST stack used is shared with the INT1 handler's [AK: this also allows setting a kprobe on the interrupt/exception entry points] - allows nesting of INT1/INT3 handlers so that one can, with a kernel debugger, debug (at least) the user-mode portions of the INT1/INT3 handling; the nesting isn't actively enabled here since a kernel- debugger-free kernel doesn't need it Signed-Off-By: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/asm-offsets.c | 4 ++++ arch/x86_64/kernel/entry.S | 26 ++++++++++++++++++++++---- arch/x86_64/kernel/setup64.c | 22 ++++++++++++++++++---- arch/x86_64/kernel/traps.c | 35 ++++++++++++++++++++++++++++++++--- 4 files changed, 76 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c index aaa6d383351..00a08d13230 100644 --- a/arch/x86_64/kernel/asm-offsets.c +++ b/arch/x86_64/kernel/asm-offsets.c @@ -64,5 +64,9 @@ int main(void) DEFINE(pbe_address, offsetof(struct pbe, address)); DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); DEFINE(pbe_next, offsetof(struct pbe, next)); + BLANK(); +#if DEBUG_STKSZ > EXCEPTION_STKSZ + DEFINE(DEBUG_IST, DEBUG_STACK); +#endif return 0; } diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index bd21ebafaf6..28b3c8e6b04 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -673,7 +673,10 @@ ENTRY(spurious_interrupt) /* error code is on the stack already */ /* handle NMI like exceptions that can happen everywhere */ - .macro paranoidentry sym +#ifndef DEBUG_IST +# define DEBUG_IST 0 +#endif + .macro paranoidentry sym, ist=0 SAVE_ALL cld movl $1,%ebx @@ -683,10 +686,20 @@ ENTRY(spurious_interrupt) js 1f swapgs xorl %ebx,%ebx -1: movq %rsp,%rdi +1: + .if \ist + movq %gs:pda_data_offset, %rbp + .endif + movq %rsp,%rdi movq ORIG_RAX(%rsp),%rsi movq $-1,ORIG_RAX(%rsp) + .if \ist + subq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + .endif call \sym + .if \ist + addq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + .endif cli .endm @@ -904,7 +917,7 @@ KPROBE_ENTRY(debug) INTR_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 - paranoidentry do_debug + paranoidentry do_debug, DEBUG_IST jmp paranoid_exit CFI_ENDPROC .previous .text @@ -959,7 +972,12 @@ paranoid_schedule: CFI_ENDPROC KPROBE_ENTRY(int3) - zeroentry do_int3 + INTR_FRAME + pushq $0 + CFI_ADJUST_CFA_OFFSET 8 + paranoidentry do_int3, DEBUG_IST + jmp paranoid_exit + CFI_ENDPROC .previous .text ENTRY(overflow) diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 7b7131d8bc9..39e728cfe6a 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -145,7 +145,7 @@ void pda_init(int cpu) pda->irqstackptr += IRQSTACKSIZE-64; } -char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ] +char boot_exception_stacks[(N_EXCEPTION_STACKS - 2) * EXCEPTION_STKSZ + DEBUG_STKSZ] __attribute__((section(".bss.page_aligned"))); /* May not be marked __init: used by software suspend */ @@ -236,13 +236,27 @@ void __cpuinit cpu_init (void) */ for (v = 0; v < N_EXCEPTION_STACKS; v++) { if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, - EXCEPTION_STACK_ORDER); + static const unsigned int order[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + }; + + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); if (!estacks) panic("Cannot allocate exception stack %ld %d\n", v, cpu); } - estacks += EXCEPTION_STKSZ; + switch (v + 1) { +#if DEBUG_STKSZ > EXCEPTION_STKSZ + case DEBUG_STACK: + cpu_pda[cpu].debugstack = (unsigned long)estacks; + estacks += DEBUG_STKSZ; + break; +#endif + default: + estacks += EXCEPTION_STKSZ; + break; + } t->ist[v] = (unsigned long)estacks; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index bd71ddac0dc..1a9094dab68 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -121,19 +121,31 @@ int printk_address(unsigned long address) static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, unsigned *usedp, const char **idp) { - static const char ids[N_EXCEPTION_STACKS][8] = { + static char ids[][8] = { [DEBUG_STACK - 1] = "#DB", [NMI_STACK - 1] = "NMI", [DOUBLEFAULT_STACK - 1] = "#DF", [STACKFAULT_STACK - 1] = "#SS", [MCE_STACK - 1] = "#MC", +#if DEBUG_STKSZ > EXCEPTION_STKSZ + [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" +#endif }; unsigned k; for (k = 0; k < N_EXCEPTION_STACKS; k++) { unsigned long end; - end = per_cpu(init_tss, cpu).ist[k]; + switch (k + 1) { +#if DEBUG_STKSZ > EXCEPTION_STKSZ + case DEBUG_STACK: + end = cpu_pda[cpu].debugstack + DEBUG_STKSZ; + break; +#endif + default: + end = per_cpu(init_tss, cpu).ist[k]; + break; + } if (stack >= end) continue; if (stack >= end - EXCEPTION_STKSZ) { @@ -143,6 +155,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, *idp = ids[k]; return (unsigned long *)end; } +#if DEBUG_STKSZ > EXCEPTION_STKSZ + if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { + unsigned j = N_EXCEPTION_STACKS - 1; + + do { + ++j; + end -= EXCEPTION_STKSZ; + ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); + } while (stack < end - EXCEPTION_STKSZ); + if (*usedp & (1U << j)) + break; + *usedp |= 1U << j; + *idp = ids[j]; + return (unsigned long *)end; + } +#endif } return NULL; } @@ -613,6 +641,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) io_check_error(reason, regs); } +/* runs on IST stack. */ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { @@ -894,7 +923,7 @@ void __init trap_init(void) set_intr_gate(0,÷_error); set_intr_gate_ist(1,&debug,DEBUG_STACK); set_intr_gate_ist(2,&nmi,NMI_STACK); - set_system_gate(3,&int3); + set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ set_system_gate(4,&overflow); /* int4 can be called from all */ set_intr_gate(5,&bounds); set_intr_gate(6,&invalid_op); -- cgit v1.2.3