diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-06 13:54:09 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-06 13:54:09 -0800 |
commit | 3e6bdf473f489664dac4d7511d26c7ac3dfdc748 (patch) | |
tree | 10cb2e928830b9de8bbc3f6dd47c18c24cd2affa /arch | |
parent | 3d4d4582e5b3f67a68f2cf32fd5b70d8d80f119d (diff) | |
parent | 58d5d0d8dd52cbca988af24b5692a20b00285543 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86
* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86:
x86: fix deadlock, make pgd_lock irq-safe
virtio: fix trivial build bug
x86: fix mttr trimming
x86: delay CPA self-test and repeat it
x86: fix 64-bit sections
generic: add __FINITDATA
x86: remove suprious ifdefs from pageattr.c
x86: mark the .rodata section also NX
x86: fix iret exception recovery on 64-bit
cpuidle: dubious one-bit signed bitfield in cpuidle.h
x86: fix sparse warnings in powernow-k8.c
x86: fix sparse error in traps_32.c
x86: trivial sparse/checkpatch in quirks.c
x86 ptrace: disallow null cs/ss
MAINTAINERS: RDC R-321x SoC maintainer
brk randomization: introduce CONFIG_COMPAT_BRK
brk: check the lower bound properly
x86: remove X2 workaround
x86: make spurious fault handler aware of large mappings
x86: make traps on entry code be debuggable in user space, 64-bit
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig.debug | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 19 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 24 | ||||
-rw-r--r-- | arch/x86/kernel/head_64.S | 15 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/quirks.c | 26 | ||||
-rw-r--r-- | arch/x86/kernel/test_nx.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps_32.c | 15 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 28 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 9 | ||||
-rw-r--r-- | arch/x86/mm/pageattr-test.c | 65 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 14 |
13 files changed, 153 insertions, 94 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2e1e3af28c3..fa555148823 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -220,9 +220,9 @@ config DEBUG_BOOT_PARAMS This option will cause struct boot_params to be exported via debugfs. config CPA_DEBUG - bool "CPA self test code" + bool "CPA self-test code" depends on DEBUG_KERNEL help - Do change_page_attr self tests at boot. + Do change_page_attr() self-tests every 30 seconds. endmenu diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index a0522735dd9..5affe91ca1e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -827,7 +827,6 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf for (i = 0; i < data->acpi_data.state_count; i++) { u32 index; - u32 hi = 0, lo = 0; index = data->acpi_data.states[i].control & HW_PSTATE_MASK; if (index > data->max_hw_pstate) { diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 1e27b69a7a0..b6e136f23d3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -659,7 +659,7 @@ static __init int amd_special_default_mtrr(void) */ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) { - unsigned long i, base, size, highest_addr = 0, def, dummy; + unsigned long i, base, size, highest_pfn = 0, def, dummy; mtrr_type type; u64 trim_start, trim_size; @@ -682,28 +682,27 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) mtrr_if->get(i, &base, &size, &type); if (type != MTRR_TYPE_WRBACK) continue; - base <<= PAGE_SHIFT; - size <<= PAGE_SHIFT; - if (highest_addr < base + size) - highest_addr = base + size; + if (highest_pfn < base + size) + highest_pfn = base + size; } /* kvm/qemu doesn't have mtrr set right, don't trim them all */ - if (!highest_addr) { + if (!highest_pfn) { printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); WARN_ON(1); return 0; } - if ((highest_addr >> PAGE_SHIFT) < end_pfn) { + if (highest_pfn < end_pfn) { printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" - " all of memory, losing %LdMB of RAM.\n", - (((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20); + " all of memory, losing %luMB of RAM.\n", + (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); WARN_ON(1); printk(KERN_INFO "update e820 for mtrr\n"); - trim_start = highest_addr; + trim_start = highest_pfn; + trim_start <<= PAGE_SHIFT; trim_size = end_pfn; trim_size <<= PAGE_SHIFT; trim_size -= trim_start; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bea8474744f..c7341e81941 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -582,7 +582,6 @@ retint_restore_args: /* return to kernel space */ TRACE_IRQS_IRETQ restore_args: RESTORE_ARGS 0,8,0 -iret_label: #ifdef CONFIG_PARAVIRT INTERRUPT_RETURN #endif @@ -593,13 +592,22 @@ ENTRY(native_iret) .quad native_iret, bad_iret .previous .section .fixup,"ax" - /* force a signal here? this matches i386 behaviour */ - /* running with kernel gs */ bad_iret: - movq $11,%rdi /* SIGSEGV */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) - jmp do_exit + /* + * The iret traps when the %cs or %ss being restored is bogus. + * We've lost the original trap vector and error code. + * #GPF is the most likely one to get for an invalid selector. + * So pretend we completed the iret and took the #GPF in user mode. + * + * We are now running with the kernel GS after exception recovery. + * But error_entry expects us to have user GS to match the user %cs, + * so swap back. + */ + pushq $0 + + SWAPGS + jmp general_protection + .previous /* edi: workmask, edx: work */ @@ -911,7 +919,7 @@ error_kernelspace: iret run with kernel gs again, so don't set the user space flag. B stepping K8s sometimes report an truncated RIP for IRET exceptions returning to compat mode. Check for these here too. */ - leaq iret_label(%rip),%rbp + leaq native_iret(%rip),%rbp cmpq %rbp,RIP(%rsp) je error_swapgs movl %ebp,%ebp /* zero extend */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 4f283ad215e..09b38d539b0 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -250,18 +250,13 @@ ENTRY(secondary_startup_64) lretq /* SMP bootup changes these two */ -#ifndef CONFIG_HOTPLUG_CPU - .pushsection .init.data -#endif + __CPUINITDATA .align 8 - .globl initial_code -initial_code: + ENTRY(initial_code) .quad x86_64_start_kernel -#ifndef CONFIG_HOTPLUG_CPU - .popsection -#endif - .globl init_rsp -init_rsp: + __FINITDATA + + ENTRY(init_rsp) .quad init_thread_union+THREAD_SIZE-8 bad_address: diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 96286df1bb8..702c33efea8 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -103,9 +103,26 @@ static int set_segment_reg(struct task_struct *task, if (invalid_selector(value)) return -EIO; - if (offset != offsetof(struct user_regs_struct, gs)) + /* + * For %cs and %ss we cannot permit a null selector. + * We can permit a bogus selector as long as it has USER_RPL. + * Null selectors are fine for other segment registers, but + * we will never get back to user mode with invalid %cs or %ss + * and will take the trap in iret instead. Much code relies + * on user_mode() to distinguish a user trap frame (which can + * safely use invalid selectors) from a kernel trap frame. + */ + switch (offset) { + case offsetof(struct user_regs_struct, cs): + case offsetof(struct user_regs_struct, ss): + if (unlikely(value == 0)) + return -EIO; + + default: *pt_regs_access(task_pt_regs(task), offset) = value; - else { + break; + + case offsetof(struct user_regs_struct, gs): task->thread.gs = value; if (task == current) /* @@ -227,12 +244,16 @@ static int set_segment_reg(struct task_struct *task, * Can't actually change these in 64-bit mode. */ case offsetof(struct user_regs_struct,cs): + if (unlikely(value == 0)) + return -EIO; #ifdef CONFIG_IA32_EMULATION if (test_tsk_thread_flag(task, TIF_IA32)) task_pt_regs(task)->cs = value; #endif break; case offsetof(struct user_regs_struct,ss): + if (unlikely(value == 0)) + return -EIO; #ifdef CONFIG_IA32_EMULATION if (test_tsk_thread_flag(task, TIF_IA32)) task_pt_regs(task)->ss = value; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 3cd7a2dcd4f..6ba33ca8715 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -380,19 +380,19 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367, void force_hpet_resume(void) { switch (force_hpet_resume_type) { - case ICH_FORCE_HPET_RESUME: - return ich_force_hpet_resume(); - - case OLD_ICH_FORCE_HPET_RESUME: - return old_ich_force_hpet_resume(); - - case VT8237_FORCE_HPET_RESUME: - return vt8237_force_hpet_resume(); - - case NVIDIA_FORCE_HPET_RESUME: - return nvidia_force_hpet_resume(); - - default: + case ICH_FORCE_HPET_RESUME: + ich_force_hpet_resume(); + return; + case OLD_ICH_FORCE_HPET_RESUME: + old_ich_force_hpet_resume(); + return; + case VT8237_FORCE_HPET_RESUME: + vt8237_force_hpet_resume(); + return; + case NVIDIA_FORCE_HPET_RESUME: + nvidia_force_hpet_resume(); + return; + default: break; } } diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 36c100c323a..10b8a6f69f8 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -139,7 +139,6 @@ static int test_NX(void) * Until then, don't run them to avoid too many people getting scared * by the error message */ -#if 0 #ifdef CONFIG_DEBUG_RODATA /* Test 3: Check if the .rodata section is executable */ @@ -152,6 +151,7 @@ static int test_NX(void) } #endif +#if 0 /* Test 4: Check if the .data section of a module is executable */ if (test_address(&test_data)) { printk(KERN_ERR "test_nx: .data section is executable\n"); diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 3cf72977d01..b22c01e05a1 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1176,17 +1176,12 @@ void __init trap_init(void) #endif set_trap_gate(19,&simd_coprocessor_error); + /* + * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. + * Generate a build-time error if the alignment is wrong. + */ + BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15); if (cpu_has_fxsr) { - /* - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. - * Generates a compile-time "error: zero width for bit-field" if - * the alignment is wrong. - */ - struct fxsrAlignAssert { - int _:!(offsetof(struct task_struct, - thread.i387.fxsave) & 15); - }; - printk(KERN_INFO "Enabling fast FPU save and restore... "); set_in_cr4(X86_CR4_OSFXSR); printk("done.\n"); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ad8b9733d6b..621afb6343d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -428,6 +428,16 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, } #endif +static int spurious_fault_check(unsigned long error_code, pte_t *pte) +{ + if ((error_code & PF_WRITE) && !pte_write(*pte)) + return 0; + if ((error_code & PF_INSTR) && !pte_exec(*pte)) + return 0; + + return 1; +} + /* * Handle a spurious fault caused by a stale TLB entry. This allows * us to lazily refresh the TLB when increasing the permissions of a @@ -457,20 +467,21 @@ static int spurious_fault(unsigned long address, if (!pud_present(*pud)) return 0; + if (pud_large(*pud)) + return spurious_fault_check(error_code, (pte_t *) pud); + pmd = pmd_offset(pud, address); if (!pmd_present(*pmd)) return 0; + if (pmd_large(*pmd)) + return spurious_fault_check(error_code, (pte_t *) pmd); + pte = pte_offset_kernel(pmd, address); if (!pte_present(*pte)) return 0; - if ((error_code & PF_WRITE) && !pte_write(*pte)) - return 0; - if ((error_code & PF_INSTR) && !pte_exec(*pte)) - return 0; - - return 1; + return spurious_fault_check(error_code, pte); } /* @@ -947,11 +958,12 @@ void vmalloc_sync_all(void) for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { if (!test_bit(pgd_index(address), insync)) { const pgd_t *pgd_ref = pgd_offset_k(address); + unsigned long flags; struct page *page; if (pgd_none(*pgd_ref)) continue; - spin_lock(&pgd_lock); + spin_lock_irqsave(&pgd_lock, flags); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; pgd = (pgd_t *)page_address(page) + pgd_index(address); @@ -960,7 +972,7 @@ void vmalloc_sync_all(void) else BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); } - spin_unlock(&pgd_lock); + spin_unlock_irqrestore(&pgd_lock, flags); set_bit(pgd_index(address), insync); } if (address == start) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3a98d6f724a..9b61c75a235 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -591,10 +591,17 @@ void mark_rodata_ro(void) if (end <= start) return; - set_memory_ro(start, (end - start) >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); + set_memory_ro(start, (end - start) >> PAGE_SHIFT); + + /* + * The rodata section (but not the kernel text!) should also be + * not-executable. + */ + start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; + set_memory_nx(start, (end - start) >> PAGE_SHIFT); rodata_test(); diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 398f3a578dd..ed820160035 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -5,6 +5,7 @@ * and compares page tables forwards and afterwards. */ #include <linux/bootmem.h> +#include <linux/kthread.h> #include <linux/random.h> #include <linux/kernel.h> #include <linux/init.h> @@ -14,8 +15,13 @@ #include <asm/pgtable.h> #include <asm/kdebug.h> +/* + * Only print the results of the first pass: + */ +static __read_mostly int print = 1; + enum { - NTEST = 4000, + NTEST = 400, #ifdef CONFIG_X86_64 LPS = (1 << PMD_SHIFT), #elif defined(CONFIG_X86_PAE) @@ -31,7 +37,7 @@ struct split_state { long min_exec, max_exec; }; -static __init int print_split(struct split_state *s) +static int print_split(struct split_state *s) { long i, expected, missed = 0; int printed = 0; @@ -82,10 +88,13 @@ static __init int print_split(struct split_state *s) s->max_exec = addr; } } - printk(KERN_INFO - "CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n", - s->spg, s->lpg, s->gpg, s->exec, - s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed); + if (print) { + printk(KERN_INFO + " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n", + s->spg, s->lpg, s->gpg, s->exec, + s->min_exec != ~0UL ? s->min_exec : 0, + s->max_exec, missed); + } expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed; if (expected != i) { @@ -96,11 +105,11 @@ static __init int print_split(struct split_state *s) return err; } -static unsigned long __initdata addr[NTEST]; -static unsigned int __initdata len[NTEST]; +static unsigned long addr[NTEST]; +static unsigned int len[NTEST]; /* Change the global bit on random pages in the direct mapping */ -static __init int exercise_pageattr(void) +static int pageattr_test(void) { struct split_state sa, sb, sc; unsigned long *bm; @@ -110,7 +119,8 @@ static __init int exercise_pageattr(void) int i, k; int err; - printk(KERN_INFO "CPA exercising pageattr\n"); + if (print) + printk(KERN_INFO "CPA self-test:\n"); bm = vmalloc((max_pfn_mapped + 7) / 8); if (!bm) { @@ -186,7 +196,6 @@ static __init int exercise_pageattr(void) failed += print_split(&sb); - printk(KERN_INFO "CPA reverting everything\n"); for (i = 0; i < NTEST; i++) { if (!addr[i]) continue; @@ -214,12 +223,40 @@ static __init int exercise_pageattr(void) failed += print_split(&sc); if (failed) { - printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n"); + printk(KERN_ERR "NOT PASSED. Please report.\n"); WARN_ON(1); + return -EINVAL; } else { - printk(KERN_INFO "CPA selftests PASSED\n"); + if (print) + printk(KERN_INFO "ok.\n"); } return 0; } -module_init(exercise_pageattr); + +static int do_pageattr_test(void *__unused) +{ + while (!kthread_should_stop()) { + schedule_timeout_interruptible(HZ*30); + if (pageattr_test() < 0) + break; + if (print) + print--; + } + return 0; +} + +static int start_pageattr_test(void) +{ + struct task_struct *p; + + p = kthread_create(do_pageattr_test, NULL, "pageattr-test"); + if (!IS_ERR(p)) + wake_up_process(p); + else + WARN_ON(1); + + return 0; +} + +module_init(start_pageattr_test); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 16ce841f08d..8493c855582 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -167,8 +167,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext))) pgprot_val(forbidden) |= _PAGE_NX; - -#ifdef CONFIG_DEBUG_RODATA /* The .rodata section needs to be read-only */ if (within(address, (unsigned long)__start_rodata, (unsigned long)__end_rodata)) @@ -179,7 +177,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) if (within(address, virt_to_highmap(__start_rodata), virt_to_highmap(__end_rodata))) pgprot_val(forbidden) |= _PAGE_RW; -#endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); @@ -260,17 +257,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pgprot_t old_prot, new_prot; int level, do_split = 1; - /* - * An Athlon 64 X2 showed hard hangs if we tried to preserve - * largepages and changed the PSE entry from RW to RO. - * - * As AMD CPUs have a long series of erratas in this area, - * (and none of the known ones seem to explain this hang), - * disable this code until the hang can be debugged: - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return 1; - spin_lock_irqsave(&pgd_lock, flags); /* * Check for races, another CPU might have split this page |