aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-06 13:54:09 -0800
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-06 13:54:09 -0800
commit3e6bdf473f489664dac4d7511d26c7ac3dfdc748 (patch)
tree10cb2e928830b9de8bbc3f6dd47c18c24cd2affa /arch/x86
parent3d4d4582e5b3f67a68f2cf32fd5b70d8d80f119d (diff)
parent58d5d0d8dd52cbca988af24b5692a20b00285543 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86
* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: x86: fix deadlock, make pgd_lock irq-safe virtio: fix trivial build bug x86: fix mttr trimming x86: delay CPA self-test and repeat it x86: fix 64-bit sections generic: add __FINITDATA x86: remove suprious ifdefs from pageattr.c x86: mark the .rodata section also NX x86: fix iret exception recovery on 64-bit cpuidle: dubious one-bit signed bitfield in cpuidle.h x86: fix sparse warnings in powernow-k8.c x86: fix sparse error in traps_32.c x86: trivial sparse/checkpatch in quirks.c x86 ptrace: disallow null cs/ss MAINTAINERS: RDC R-321x SoC maintainer brk randomization: introduce CONFIG_COMPAT_BRK brk: check the lower bound properly x86: remove X2 workaround x86: make spurious fault handler aware of large mappings x86: make traps on entry code be debuggable in user space, 64-bit
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c19
-rw-r--r--arch/x86/kernel/entry_64.S24
-rw-r--r--arch/x86/kernel/head_64.S15
-rw-r--r--arch/x86/kernel/ptrace.c25
-rw-r--r--arch/x86/kernel/quirks.c26
-rw-r--r--arch/x86/kernel/test_nx.c2
-rw-r--r--arch/x86/kernel/traps_32.c15
-rw-r--r--arch/x86/mm/fault.c28
-rw-r--r--arch/x86/mm/init_64.c9
-rw-r--r--arch/x86/mm/pageattr-test.c65
-rw-r--r--arch/x86/mm/pageattr.c14
13 files changed, 153 insertions, 94 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2e1e3af28c3..fa555148823 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -220,9 +220,9 @@ config DEBUG_BOOT_PARAMS
This option will cause struct boot_params to be exported via debugfs.
config CPA_DEBUG
- bool "CPA self test code"
+ bool "CPA self-test code"
depends on DEBUG_KERNEL
help
- Do change_page_attr self tests at boot.
+ Do change_page_attr() self-tests every 30 seconds.
endmenu
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index a0522735dd9..5affe91ca1e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -827,7 +827,6 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
for (i = 0; i < data->acpi_data.state_count; i++) {
u32 index;
- u32 hi = 0, lo = 0;
index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
if (index > data->max_hw_pstate) {
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1e27b69a7a0..b6e136f23d3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -659,7 +659,7 @@ static __init int amd_special_default_mtrr(void)
*/
int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
{
- unsigned long i, base, size, highest_addr = 0, def, dummy;
+ unsigned long i, base, size, highest_pfn = 0, def, dummy;
mtrr_type type;
u64 trim_start, trim_size;
@@ -682,28 +682,27 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
mtrr_if->get(i, &base, &size, &type);
if (type != MTRR_TYPE_WRBACK)
continue;
- base <<= PAGE_SHIFT;
- size <<= PAGE_SHIFT;
- if (highest_addr < base + size)
- highest_addr = base + size;
+ if (highest_pfn < base + size)
+ highest_pfn = base + size;
}
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
- if (!highest_addr) {
+ if (!highest_pfn) {
printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
WARN_ON(1);
return 0;
}
- if ((highest_addr >> PAGE_SHIFT) < end_pfn) {
+ if (highest_pfn < end_pfn) {
printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
- " all of memory, losing %LdMB of RAM.\n",
- (((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20);
+ " all of memory, losing %luMB of RAM.\n",
+ (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
WARN_ON(1);
printk(KERN_INFO "update e820 for mtrr\n");
- trim_start = highest_addr;
+ trim_start = highest_pfn;
+ trim_start <<= PAGE_SHIFT;
trim_size = end_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bea8474744f..c7341e81941 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -582,7 +582,6 @@ retint_restore_args: /* return to kernel space */
TRACE_IRQS_IRETQ
restore_args:
RESTORE_ARGS 0,8,0
-iret_label:
#ifdef CONFIG_PARAVIRT
INTERRUPT_RETURN
#endif
@@ -593,13 +592,22 @@ ENTRY(native_iret)
.quad native_iret, bad_iret
.previous
.section .fixup,"ax"
- /* force a signal here? this matches i386 behaviour */
- /* running with kernel gs */
bad_iret:
- movq $11,%rdi /* SIGSEGV */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
- jmp do_exit
+ /*
+ * The iret traps when the %cs or %ss being restored is bogus.
+ * We've lost the original trap vector and error code.
+ * #GPF is the most likely one to get for an invalid selector.
+ * So pretend we completed the iret and took the #GPF in user mode.
+ *
+ * We are now running with the kernel GS after exception recovery.
+ * But error_entry expects us to have user GS to match the user %cs,
+ * so swap back.
+ */
+ pushq $0
+
+ SWAPGS
+ jmp general_protection
+
.previous
/* edi: workmask, edx: work */
@@ -911,7 +919,7 @@ error_kernelspace:
iret run with kernel gs again, so don't set the user space flag.
B stepping K8s sometimes report an truncated RIP for IRET
exceptions returning to compat mode. Check for these here too. */
- leaq iret_label(%rip),%rbp
+ leaq native_iret(%rip),%rbp
cmpq %rbp,RIP(%rsp)
je error_swapgs
movl %ebp,%ebp /* zero extend */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 4f283ad215e..09b38d539b0 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -250,18 +250,13 @@ ENTRY(secondary_startup_64)
lretq
/* SMP bootup changes these two */
-#ifndef CONFIG_HOTPLUG_CPU
- .pushsection .init.data
-#endif
+ __CPUINITDATA
.align 8
- .globl initial_code
-initial_code:
+ ENTRY(initial_code)
.quad x86_64_start_kernel
-#ifndef CONFIG_HOTPLUG_CPU
- .popsection
-#endif
- .globl init_rsp
-init_rsp:
+ __FINITDATA
+
+ ENTRY(init_rsp)
.quad init_thread_union+THREAD_SIZE-8
bad_address:
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 96286df1bb8..702c33efea8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -103,9 +103,26 @@ static int set_segment_reg(struct task_struct *task,
if (invalid_selector(value))
return -EIO;
- if (offset != offsetof(struct user_regs_struct, gs))
+ /*
+ * For %cs and %ss we cannot permit a null selector.
+ * We can permit a bogus selector as long as it has USER_RPL.
+ * Null selectors are fine for other segment registers, but
+ * we will never get back to user mode with invalid %cs or %ss
+ * and will take the trap in iret instead. Much code relies
+ * on user_mode() to distinguish a user trap frame (which can
+ * safely use invalid selectors) from a kernel trap frame.
+ */
+ switch (offset) {
+ case offsetof(struct user_regs_struct, cs):
+ case offsetof(struct user_regs_struct, ss):
+ if (unlikely(value == 0))
+ return -EIO;
+
+ default:
*pt_regs_access(task_pt_regs(task), offset) = value;
- else {
+ break;
+
+ case offsetof(struct user_regs_struct, gs):
task->thread.gs = value;
if (task == current)
/*
@@ -227,12 +244,16 @@ static int set_segment_reg(struct task_struct *task,
* Can't actually change these in 64-bit mode.
*/
case offsetof(struct user_regs_struct,cs):
+ if (unlikely(value == 0))
+ return -EIO;
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->cs = value;
#endif
break;
case offsetof(struct user_regs_struct,ss):
+ if (unlikely(value == 0))
+ return -EIO;
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->ss = value;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 3cd7a2dcd4f..6ba33ca8715 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -380,19 +380,19 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367,
void force_hpet_resume(void)
{
switch (force_hpet_resume_type) {
- case ICH_FORCE_HPET_RESUME:
- return ich_force_hpet_resume();
-
- case OLD_ICH_FORCE_HPET_RESUME:
- return old_ich_force_hpet_resume();
-
- case VT8237_FORCE_HPET_RESUME:
- return vt8237_force_hpet_resume();
-
- case NVIDIA_FORCE_HPET_RESUME:
- return nvidia_force_hpet_resume();
-
- default:
+ case ICH_FORCE_HPET_RESUME:
+ ich_force_hpet_resume();
+ return;
+ case OLD_ICH_FORCE_HPET_RESUME:
+ old_ich_force_hpet_resume();
+ return;
+ case VT8237_FORCE_HPET_RESUME:
+ vt8237_force_hpet_resume();
+ return;
+ case NVIDIA_FORCE_HPET_RESUME:
+ nvidia_force_hpet_resume();
+ return;
+ default:
break;
}
}
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 36c100c323a..10b8a6f69f8 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -139,7 +139,6 @@ static int test_NX(void)
* Until then, don't run them to avoid too many people getting scared
* by the error message
*/
-#if 0
#ifdef CONFIG_DEBUG_RODATA
/* Test 3: Check if the .rodata section is executable */
@@ -152,6 +151,7 @@ static int test_NX(void)
}
#endif
+#if 0
/* Test 4: Check if the .data section of a module is executable */
if (test_address(&test_data)) {
printk(KERN_ERR "test_nx: .data section is executable\n");
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 3cf72977d01..b22c01e05a1 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1176,17 +1176,12 @@ void __init trap_init(void)
#endif
set_trap_gate(19,&simd_coprocessor_error);
+ /*
+ * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
+ * Generate a build-time error if the alignment is wrong.
+ */
+ BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
if (cpu_has_fxsr) {
- /*
- * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
- * Generates a compile-time "error: zero width for bit-field" if
- * the alignment is wrong.
- */
- struct fxsrAlignAssert {
- int _:!(offsetof(struct task_struct,
- thread.i387.fxsave) & 15);
- };
-
printk(KERN_INFO "Enabling fast FPU save and restore... ");
set_in_cr4(X86_CR4_OSFXSR);
printk("done.\n");
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ad8b9733d6b..621afb6343d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -428,6 +428,16 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
}
#endif
+static int spurious_fault_check(unsigned long error_code, pte_t *pte)
+{
+ if ((error_code & PF_WRITE) && !pte_write(*pte))
+ return 0;
+ if ((error_code & PF_INSTR) && !pte_exec(*pte))
+ return 0;
+
+ return 1;
+}
+
/*
* Handle a spurious fault caused by a stale TLB entry. This allows
* us to lazily refresh the TLB when increasing the permissions of a
@@ -457,20 +467,21 @@ static int spurious_fault(unsigned long address,
if (!pud_present(*pud))
return 0;
+ if (pud_large(*pud))
+ return spurious_fault_check(error_code, (pte_t *) pud);
+
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
return 0;
+ if (pmd_large(*pmd))
+ return spurious_fault_check(error_code, (pte_t *) pmd);
+
pte = pte_offset_kernel(pmd, address);
if (!pte_present(*pte))
return 0;
- if ((error_code & PF_WRITE) && !pte_write(*pte))
- return 0;
- if ((error_code & PF_INSTR) && !pte_exec(*pte))
- return 0;
-
- return 1;
+ return spurious_fault_check(error_code, pte);
}
/*
@@ -947,11 +958,12 @@ void vmalloc_sync_all(void)
for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
if (!test_bit(pgd_index(address), insync)) {
const pgd_t *pgd_ref = pgd_offset_k(address);
+ unsigned long flags;
struct page *page;
if (pgd_none(*pgd_ref))
continue;
- spin_lock(&pgd_lock);
+ spin_lock_irqsave(&pgd_lock, flags);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
@@ -960,7 +972,7 @@ void vmalloc_sync_all(void)
else
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
}
- spin_unlock(&pgd_lock);
+ spin_unlock_irqrestore(&pgd_lock, flags);
set_bit(pgd_index(address), insync);
}
if (address == start)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3a98d6f724a..9b61c75a235 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -591,10 +591,17 @@ void mark_rodata_ro(void)
if (end <= start)
return;
- set_memory_ro(start, (end - start) >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
+ set_memory_ro(start, (end - start) >> PAGE_SHIFT);
+
+ /*
+ * The rodata section (but not the kernel text!) should also be
+ * not-executable.
+ */
+ start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+ set_memory_nx(start, (end - start) >> PAGE_SHIFT);
rodata_test();
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 398f3a578dd..ed820160035 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -5,6 +5,7 @@
* and compares page tables forwards and afterwards.
*/
#include <linux/bootmem.h>
+#include <linux/kthread.h>
#include <linux/random.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -14,8 +15,13 @@
#include <asm/pgtable.h>
#include <asm/kdebug.h>
+/*
+ * Only print the results of the first pass:
+ */
+static __read_mostly int print = 1;
+
enum {
- NTEST = 4000,
+ NTEST = 400,
#ifdef CONFIG_X86_64
LPS = (1 << PMD_SHIFT),
#elif defined(CONFIG_X86_PAE)
@@ -31,7 +37,7 @@ struct split_state {
long min_exec, max_exec;
};
-static __init int print_split(struct split_state *s)
+static int print_split(struct split_state *s)
{
long i, expected, missed = 0;
int printed = 0;
@@ -82,10 +88,13 @@ static __init int print_split(struct split_state *s)
s->max_exec = addr;
}
}
- printk(KERN_INFO
- "CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
- s->spg, s->lpg, s->gpg, s->exec,
- s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed);
+ if (print) {
+ printk(KERN_INFO
+ " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
+ s->spg, s->lpg, s->gpg, s->exec,
+ s->min_exec != ~0UL ? s->min_exec : 0,
+ s->max_exec, missed);
+ }
expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed;
if (expected != i) {
@@ -96,11 +105,11 @@ static __init int print_split(struct split_state *s)
return err;
}
-static unsigned long __initdata addr[NTEST];
-static unsigned int __initdata len[NTEST];
+static unsigned long addr[NTEST];
+static unsigned int len[NTEST];
/* Change the global bit on random pages in the direct mapping */
-static __init int exercise_pageattr(void)
+static int pageattr_test(void)
{
struct split_state sa, sb, sc;
unsigned long *bm;
@@ -110,7 +119,8 @@ static __init int exercise_pageattr(void)
int i, k;
int err;
- printk(KERN_INFO "CPA exercising pageattr\n");
+ if (print)
+ printk(KERN_INFO "CPA self-test:\n");
bm = vmalloc((max_pfn_mapped + 7) / 8);
if (!bm) {
@@ -186,7 +196,6 @@ static __init int exercise_pageattr(void)
failed += print_split(&sb);
- printk(KERN_INFO "CPA reverting everything\n");
for (i = 0; i < NTEST; i++) {
if (!addr[i])
continue;
@@ -214,12 +223,40 @@ static __init int exercise_pageattr(void)
failed += print_split(&sc);
if (failed) {
- printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
+ printk(KERN_ERR "NOT PASSED. Please report.\n");
WARN_ON(1);
+ return -EINVAL;
} else {
- printk(KERN_INFO "CPA selftests PASSED\n");
+ if (print)
+ printk(KERN_INFO "ok.\n");
}
return 0;
}
-module_init(exercise_pageattr);
+
+static int do_pageattr_test(void *__unused)
+{
+ while (!kthread_should_stop()) {
+ schedule_timeout_interruptible(HZ*30);
+ if (pageattr_test() < 0)
+ break;
+ if (print)
+ print--;
+ }
+ return 0;
+}
+
+static int start_pageattr_test(void)
+{
+ struct task_struct *p;
+
+ p = kthread_create(do_pageattr_test, NULL, "pageattr-test");
+ if (!IS_ERR(p))
+ wake_up_process(p);
+ else
+ WARN_ON(1);
+
+ return 0;
+}
+
+module_init(start_pageattr_test);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 16ce841f08d..8493c855582 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -167,8 +167,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
pgprot_val(forbidden) |= _PAGE_NX;
-
-#ifdef CONFIG_DEBUG_RODATA
/* The .rodata section needs to be read-only */
if (within(address, (unsigned long)__start_rodata,
(unsigned long)__end_rodata))
@@ -179,7 +177,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
if (within(address, virt_to_highmap(__start_rodata),
virt_to_highmap(__end_rodata)))
pgprot_val(forbidden) |= _PAGE_RW;
-#endif
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
@@ -260,17 +257,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
pgprot_t old_prot, new_prot;
int level, do_split = 1;
- /*
- * An Athlon 64 X2 showed hard hangs if we tried to preserve
- * largepages and changed the PSE entry from RW to RO.
- *
- * As AMD CPUs have a long series of erratas in this area,
- * (and none of the known ones seem to explain this hang),
- * disable this code until the hang can be debugged:
- */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return 1;
-
spin_lock_irqsave(&pgd_lock, flags);
/*
* Check for races, another CPU might have split this page