diff options
-rw-r--r-- | arch/x86_64/Kconfig.debug | 4 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32entry.S | 19 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 188 | ||||
-rw-r--r-- | arch/x86_64/kernel/irq.c | 4 | ||||
-rw-r--r-- | include/asm-x86_64/irqflags.h | 61 | ||||
-rw-r--r-- | include/asm-x86_64/system.h | 38 |
6 files changed, 223 insertions, 91 deletions
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index 1d92ab56c0f..775d211a5cf 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug @@ -1,5 +1,9 @@ menu "Kernel hacking" +config TRACE_IRQFLAGS_SUPPORT + bool + default y + source "lib/Kconfig.debug" config DEBUG_RODATA diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c536fa98ea3..9b5bb413a6e 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -13,6 +13,7 @@ #include <asm/thread_info.h> #include <asm/segment.h> #include <asm/vsyscall32.h> +#include <asm/irqflags.h> #include <linux/linkage.h> #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) @@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target) swapgs movq %gs:pda_kernelstack, %rsp addq $(PDA_STACKOFFSET),%rsp + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs, here we enable it straight after entry: + */ sti movl %ebp,%ebp /* zero extension */ pushq $__USER32_DS @@ -118,6 +123,7 @@ sysenter_do_call: movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) cli + TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) jnz int_ret_from_sys_call andl $~TS_COMPAT,threadinfo_status(%r10) @@ -132,6 +138,7 @@ sysenter_do_call: CFI_REGISTER rsp,rcx movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ CFI_REGISTER rip,rdx + TRACE_IRQS_ON swapgs sti /* sti only takes effect after the next instruction */ /* sysexit */ @@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target) movl %esp,%r8d CFI_REGISTER rsp,r8 movq %gs:pda_kernelstack,%rsp + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ sti SAVE_ARGS 8,1,1 movl %eax,%eax /* zero extension */ @@ -220,6 +231,7 @@ cstar_do_call: movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) cli + TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) jnz int_ret_from_sys_call andl $~TS_COMPAT,threadinfo_status(%r10) @@ -228,6 +240,7 @@ cstar_do_call: CFI_REGISTER rip,rcx movl EFLAGS-ARGOFFSET(%rsp),%r11d /*CFI_REGISTER rflags,r11*/ + TRACE_IRQS_ON movl RSP-ARGOFFSET(%rsp),%esp CFI_RESTORE rsp swapgs @@ -286,7 +299,11 @@ ENTRY(ia32_syscall) /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ /*CFI_REL_OFFSET cs,CS-RIP*/ CFI_REL_OFFSET rip,RIP-RIP - swapgs + swapgs + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ sti movl %eax,%eax pushq %rax diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index ed92c298360..d464dded68c 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -41,13 +41,24 @@ #include <asm/thread_info.h> #include <asm/hw_irq.h> #include <asm/page.h> +#include <asm/irqflags.h> .code64 #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif - + + +.macro TRACE_IRQS_IRETQ offset=ARGOFFSET +#ifdef CONFIG_TRACE_IRQFLAGS + bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ + jnc 1f + TRACE_IRQS_ON +1: +#endif +.endm + /* * C code is not supposed to know about undefined top of stack. Every time * a C function with an pt_regs argument is called from the SYSCALL based @@ -194,6 +205,10 @@ ENTRY(system_call) swapgs movq %rsp,%gs:pda_oldrsp movq %gs:pda_kernelstack,%rsp + /* + * No need to follow this irqs off/on section - it's straight + * and short: + */ sti SAVE_ARGS 8,1 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) @@ -219,10 +234,15 @@ ret_from_sys_call: sysret_check: GET_THREAD_INFO(%rcx) cli + TRACE_IRQS_OFF movl threadinfo_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz sysret_careful + /* + * sysretq will re-enable interrupts: + */ + TRACE_IRQS_ON movq RIP-ARGOFFSET(%rsp),%rcx CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 @@ -237,6 +257,7 @@ sysret_careful: CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc sysret_signal + TRACE_IRQS_ON sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -247,6 +268,7 @@ sysret_careful: /* Handle a signal */ sysret_signal: + TRACE_IRQS_ON sti testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx jz 1f @@ -261,6 +283,7 @@ sysret_signal: /* Use IRET because user could have changed frame. This works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ cli + TRACE_IRQS_OFF jmp int_with_check badsys: @@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call) CFI_REL_OFFSET r10,R10-ARGOFFSET CFI_REL_OFFSET r11,R11-ARGOFFSET cli + TRACE_IRQS_OFF testl $3,CS-ARGOFFSET(%rsp) je retint_restore_args movl $_TIF_ALLWORK_MASK,%edi @@ -327,6 +351,7 @@ int_with_check: int_careful: bt $TIF_NEED_RESCHED,%edx jnc int_very_careful + TRACE_IRQS_ON sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -334,10 +359,12 @@ int_careful: popq %rdi CFI_ADJUST_CFA_OFFSET -8 cli + TRACE_IRQS_OFF jmp int_with_check /* handle signals and tracing -- both require a full stack frame */ int_very_careful: + TRACE_IRQS_ON sti SAVE_REST /* Check for syscall exit trace */ @@ -351,6 +378,7 @@ int_very_careful: CFI_ADJUST_CFA_OFFSET -8 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi cli + TRACE_IRQS_OFF jmp int_restore_rest int_signal: @@ -363,6 +391,7 @@ int_signal: int_restore_rest: RESTORE_REST cli + TRACE_IRQS_OFF jmp int_with_check CFI_ENDPROC END(int_ret_from_sys_call) @@ -484,6 +513,10 @@ END(stub_rt_sigreturn) swapgs 1: incl %gs:pda_irqcount # RED-PEN should check preempt count cmoveq %gs:pda_irqstackptr,%rsp + /* + * We entered an interrupt context - irqs are off: + */ + TRACE_IRQS_OFF call \func .endm @@ -493,6 +526,7 @@ ENTRY(common_interrupt) /* 0(%rsp): oldrsp-ARGOFFSET */ ret_from_intr: cli + TRACE_IRQS_OFF decl %gs:pda_irqcount leaveq CFI_DEF_CFA_REGISTER rsp @@ -515,9 +549,21 @@ retint_check: CFI_REMEMBER_STATE jnz retint_careful retint_swapgs: + /* + * The iretq could re-enable interrupts: + */ + cli + TRACE_IRQS_IRETQ swapgs + jmp restore_args + retint_restore_args: cli + /* + * The iretq could re-enable interrupts: + */ + TRACE_IRQS_IRETQ +restore_args: RESTORE_ARGS 0,8,0 iret_label: iretq @@ -530,6 +576,7 @@ iret_label: /* running with kernel gs */ bad_iret: movq $11,%rdi /* SIGSEGV */ + TRACE_IRQS_ON sti jmp do_exit .previous @@ -539,6 +586,7 @@ retint_careful: CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc retint_signal + TRACE_IRQS_ON sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -547,11 +595,13 @@ retint_careful: CFI_ADJUST_CFA_OFFSET -8 GET_THREAD_INFO(%rcx) cli + TRACE_IRQS_OFF jmp retint_check retint_signal: testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx jz retint_swapgs + TRACE_IRQS_ON sti SAVE_REST movq $-1,ORIG_RAX(%rsp) @@ -560,6 +610,7 @@ retint_signal: call do_notify_resume RESTORE_REST cli + TRACE_IRQS_OFF movl $_TIF_NEED_RESCHED,%edi GET_THREAD_INFO(%rcx) jmp retint_check @@ -666,7 +717,7 @@ END(spurious_interrupt) /* error code is on the stack already */ /* handle NMI like exceptions that can happen everywhere */ - .macro paranoidentry sym, ist=0 + .macro paranoidentry sym, ist=0, irqtrace=1 SAVE_ALL cld movl $1,%ebx @@ -691,8 +742,73 @@ END(spurious_interrupt) addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) .endif cli + .if \irqtrace + TRACE_IRQS_OFF + .endif .endm - + + /* + * "Paranoid" exit path from exception stack. + * Paranoid because this is used by NMIs and cannot take + * any kernel state for granted. + * We don't do kernel preemption checks here, because only + * NMI should be common and it does not enable IRQs and + * cannot get reschedule ticks. + * + * "trace" is 0 for the NMI handler only, because irq-tracing + * is fundamentally NMI-unsafe. (we cannot change the soft and + * hard flags at once, atomically) + */ + .macro paranoidexit trace=1 + /* ebx: no swapgs flag */ +paranoid_exit\trace: + testl %ebx,%ebx /* swapgs needed? */ + jnz paranoid_restore\trace + testl $3,CS(%rsp) + jnz paranoid_userspace\trace +paranoid_swapgs\trace: + TRACE_IRQS_IRETQ 0 + swapgs +paranoid_restore\trace: + RESTORE_ALL 8 + iretq +paranoid_userspace\trace: + GET_THREAD_INFO(%rcx) + movl threadinfo_flags(%rcx),%ebx + andl $_TIF_WORK_MASK,%ebx + jz paranoid_swapgs\trace + movq %rsp,%rdi /* &pt_regs */ + call sync_regs + movq %rax,%rsp /* switch stack for scheduling */ + testl $_TIF_NEED_RESCHED,%ebx + jnz paranoid_schedule\trace + movl %ebx,%edx /* arg3: thread flags */ + .if \trace + TRACE_IRQS_ON + .endif + sti + xorl %esi,%esi /* arg2: oldset */ + movq %rsp,%rdi /* arg1: &pt_regs */ + call do_notify_resume + cli + .if \trace + TRACE_IRQS_OFF + .endif + jmp paranoid_userspace\trace +paranoid_schedule\trace: + .if \trace + TRACE_IRQS_ON + .endif + sti + call schedule + cli + .if \trace + TRACE_IRQS_OFF + .endif + jmp paranoid_userspace\trace + CFI_ENDPROC + .endm + /* * Exception entry point. This expects an error code/orig_rax on the stack * and the exception handler in %rax. @@ -748,6 +864,7 @@ error_exit: movl %ebx,%eax RESTORE_REST cli + TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax jne retint_kernel @@ -755,6 +872,10 @@ error_exit: movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful + /* + * The iret might restore flags: + */ + TRACE_IRQS_IRETQ swapgs RESTORE_ARGS 0,8,0 jmp iret_label @@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug, DEBUG_STACK - jmp paranoid_exit - CFI_ENDPROC + paranoidexit END(debug) .previous .text @@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi) INTR_FRAME pushq $-1 CFI_ADJUST_CFA_OFFSET 8 - paranoidentry do_nmi - /* - * "Paranoid" exit path from exception stack. - * Paranoid because this is used by NMIs and cannot take - * any kernel state for granted. - * We don't do kernel preemption checks here, because only - * NMI should be common and it does not enable IRQs and - * cannot get reschedule ticks. - */ - /* ebx: no swapgs flag */ -paranoid_exit: - testl %ebx,%ebx /* swapgs needed? */ - jnz paranoid_restore - testl $3,CS(%rsp) - jnz paranoid_userspace -paranoid_swapgs: - swapgs -paranoid_restore: - RESTORE_ALL 8 - iretq -paranoid_userspace: - GET_THREAD_INFO(%rcx) - movl threadinfo_flags(%rcx),%ebx - andl $_TIF_WORK_MASK,%ebx - jz paranoid_swapgs - movq %rsp,%rdi /* &pt_regs */ - call sync_regs - movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx - jnz paranoid_schedule - movl %ebx,%edx /* arg3: thread flags */ - sti - xorl %esi,%esi /* arg2: oldset */ - movq %rsp,%rdi /* arg1: &pt_regs */ - call do_notify_resume - cli - jmp paranoid_userspace -paranoid_schedule: - sti - call schedule - cli - jmp paranoid_userspace - CFI_ENDPROC + paranoidentry do_nmi, 0, 0 +#ifdef CONFIG_TRACE_IRQFLAGS + paranoidexit 0 +#else + jmp paranoid_exit1 + CFI_ENDPROC +#endif END(nmi) .previous .text @@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_int3, DEBUG_STACK - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(int3) .previous .text @@ -1006,7 +1090,7 @@ END(reserved) ENTRY(double_fault) XCPT_FRAME paranoidentry do_double_fault - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(double_fault) @@ -1022,7 +1106,7 @@ END(segment_not_present) ENTRY(stack_segment) XCPT_FRAME paranoidentry do_stack_segment - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(stack_segment) @@ -1050,7 +1134,7 @@ ENTRY(machine_check) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_machine_check - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(machine_check) #endif diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index a1f1df5f7bf..5221a53e90c 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -177,8 +177,10 @@ asmlinkage void do_softirq(void) local_irq_save(flags); pending = local_softirq_pending(); /* Switch to interrupt stack */ - if (pending) + if (pending) { call_softirq(); + WARN_ON_ONCE(softirq_count()); + } local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h new file mode 100644 index 00000000000..22f3c06b247 --- /dev/null +++ b/include/asm-x86_64/irqflags.h @@ -0,0 +1,61 @@ +/* + * include/asm-x86_64/irqflags.h + * + * IRQ flags handling + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() macros from the lowlevel headers. + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +#ifndef __ASSEMBLY__ + +/* interrupt control.. */ +#define raw_local_save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) +#define raw_local_irq_restore(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") + +#ifdef CONFIG_X86_VSMP +/* Interrupt control for VSMP architecture */ +#define raw_local_irq_disable() do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0) +#define raw_local_irq_enable() do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0) + +#define raw_irqs_disabled_flags(flags) \ +({ \ + (flags & (1<<18)) || !(flags & (1<<9)); \ +}) + +/* For spinlocks etc */ +#define raw_local_irq_save(x) do { raw_local_save_flags(x); raw_local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0) +#else /* CONFIG_X86_VSMP */ +#define raw_local_irq_disable() __asm__ __volatile__("cli": : :"memory") +#define raw_local_irq_enable() __asm__ __volatile__("sti": : :"memory") + +#define raw_irqs_disabled_flags(flags) \ +({ \ + !(flags & (1<<9)); \ +}) + +/* For spinlocks etc */ +#define raw_local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# raw_local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) +#endif + +#define raw_irqs_disabled() \ +({ \ + unsigned long flags; \ + raw_local_save_flags(flags); \ + raw_irqs_disabled_flags(flags); \ +}) + +/* used in the idle loop; sti takes one instruction cycle to complete */ +#define raw_safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") +/* used when interrupts are already enabled or to shutdown the processor */ +#define halt() __asm__ __volatile__("hlt": : :"memory") + +#else /* __ASSEMBLY__: */ +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif + +#endif diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index 68e559f3631..f67f2873a92 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -244,43 +244,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) -/* interrupt control.. */ -#define local_save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) -#define local_irq_restore(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") - -#ifdef CONFIG_X86_VSMP -/* Interrupt control for VSMP architecture */ -#define local_irq_disable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0) -#define local_irq_enable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0) - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - (flags & (1<<18)) || !(flags & (1<<9)); \ -}) - -/* For spinlocks etc */ -#define local_irq_save(x) do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0) -#else /* CONFIG_X86_VSMP */ -#define local_irq_disable() __asm__ __volatile__("cli": : :"memory") -#define local_irq_enable() __asm__ __volatile__("sti": : :"memory") - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - !(flags & (1<<9)); \ -}) - -/* For spinlocks etc */ -#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) -#endif - -/* used in the idle loop; sti takes one instruction cycle to complete */ -#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") -/* used when interrupts are already enabled or to shutdown the processor */ -#define halt() __asm__ __volatile__("hlt": : :"memory") +#include <linux/irqflags.h> void cpu_idle_wait(void); |