diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2008-01-30 13:34:11 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-30 13:34:11 +0100 |
commit | 5b727a3b0158a129827c21ce3bfb0ba997e8ddd0 (patch) | |
tree | e0d43e258fa46e22ac6c4daf82c5005dc0e762c6 /arch/x86/mm | |
parent | b406ac61e94875723540bd56e26f634afdeef489 (diff) |
x86: ignore spurious faults
When changing a kernel page from RO->RW, it's OK to leave stale TLB
entries around, since doing a global flush is expensive and they pose
no security problem. They can, however, generate a spurious fault,
which we should catch and simply return from (which will have the
side-effect of reloading the TLB to the current PTE).
This can occur when running under Xen, because it frequently changes
kernel pages from RW->RO->RW to implement Xen's pagetable semantics.
It could also occur when using CONFIG_DEBUG_PAGEALLOC, since it avoids
doing a global TLB flush after changing page permissions.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 99d273dbc75..1c836527dde 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -435,6 +435,51 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, #endif /* + * Handle a spurious fault caused by a stale TLB entry. This allows + * us to lazily refresh the TLB when increasing the permissions of a + * kernel page (RO -> RW or NX -> X). Doing it eagerly is very + * expensive since that implies doing a full cross-processor TLB + * flush, even if no stale TLB entries exist on other processors. + * There are no security implications to leaving a stale TLB when + * increasing the permissions on a page. + */ +static int spurious_fault(unsigned long address, + unsigned long error_code) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + /* Reserved-bit violation or user access to kernel space? */ + if (error_code & (PF_USER | PF_RSVD)) + return 0; + + pgd = init_mm.pgd + pgd_index(address); + if (!pgd_present(*pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return 0; + + pte = pte_offset_kernel(pmd, address); + if (!pte_present(*pte)) + return 0; + + if ((error_code & PF_WRITE) && !pte_write(*pte)) + return 0; + if ((error_code & PF_INSTR) && !pte_exec(*pte)) + return 0; + + return 1; +} + +/* * X86_32 * Handle a fault on the vmalloc or module mapping area * @@ -568,6 +613,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && vmalloc_fault(address) >= 0) return; + + /* Can handle a stale RO->RW TLB */ + if (spurious_fault(address, error_code)) + return; + /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. @@ -598,6 +648,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (vmalloc_fault(address) >= 0) return; } + + /* Can handle a stale RO->RW TLB */ + if (spurious_fault(address, error_code)) + return; + /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. |