From c513867561eeb07d24a0bdda1a18a8f91921a301 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Oct 2008 18:08:48 -0400 Subject: ftrace: do not enclose logic in WARN_ON In ftrace, logic is defined in the WARN_ON_ONCE, which can become a nop with some configs. This patch fixes it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d073d981a73..8821ceabf51 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -62,6 +62,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { unsigned char replaced[MCOUNT_INSN_SIZE]; + int ret; /* * Note: Due to modules and __init, code can @@ -77,8 +78,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) return 2; - WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code, - MCOUNT_INSN_SIZE)); + ret = __copy_to_user_inatomic((char __user *)ip, new_code, + MCOUNT_INSN_SIZE); + WARN_ON_ONCE(ret); sync_core(); -- cgit v1.2.3 From 606576ce816603d9fe1fb453a88bc6eea16ca709 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 19:06:12 -0400 Subject: ftrace: rename FTRACE to FUNCTION_TRACER Due to confusion between the ftrace infrastructure and the gcc profiling tracer "ftrace", this patch renames the config options from FTRACE to FUNCTION_TRACER. The other two names that are offspring from FTRACE DYNAMIC_FTRACE and FTRACE_MCOUNT_RECORD will stay the same. This patch was generated mostly by script, and partially by hand. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/entry_32.S | 4 ++-- arch/x86/kernel/entry_64.S | 4 ++-- arch/x86/kernel/i386_ksyms_32.c | 2 +- arch/x86/kernel/x8664_ksyms_64.c | 2 +- arch/x86/xen/Makefile | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 40ee8080956..290e21aa774 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -28,7 +28,7 @@ config X86 select HAVE_KRETPROBES select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE + select HAVE_FUNCTION_TRACER select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0d41f0343dc..ec3d30136bf 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 4e4269c73bb..9d49facc21f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1149,7 +1149,7 @@ ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) @@ -1204,7 +1204,7 @@ trace: jmp ftrace_stub END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 09e7145484c..b86f332c96a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -61,7 +61,7 @@ .code64 -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) retq @@ -138,7 +138,7 @@ trace: jmp ftrace_stub END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index dd7ebee446a..43cec6bdda6 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -5,7 +5,7 @@ #include #include -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER /* mcount is defined in assembly */ EXPORT_SYMBOL(mcount); #endif diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index b545f371b5f..695e426aa35 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -12,7 +12,7 @@ #include #include -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER /* mcount is defined in assembly */ EXPORT_SYMBOL(mcount); #endif diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 313947940a1..6dcefba7836 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_spinlock.o = -pg CFLAGS_REMOVE_time.o = -pg -- cgit v1.2.3 From 593eb8a2d63e95772a5f22d746f18a997c5ee463 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:32:59 -0400 Subject: ftrace: return error on failed modified text. Have the ftrace_modify_code return error values: -EFAULT on error of reading the address -EINVAL if what is read does not match what it expected -EPERM if the write fails to update after a successful match. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8821ceabf51..428291581cb 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -62,7 +62,6 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { unsigned char replaced[MCOUNT_INSN_SIZE]; - int ret; /* * Note: Due to modules and __init, code can @@ -72,15 +71,16 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, * No real locking needed, this code is run through * kstop_machine, or before SMP starts. */ - if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) - return 1; + if (__copy_from_user_inatomic(replaced, (char __user *)ip, + MCOUNT_INSN_SIZE)) + return -EFAULT; if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) - return 2; + return -EINVAL; - ret = __copy_to_user_inatomic((char __user *)ip, new_code, - MCOUNT_INSN_SIZE); - WARN_ON_ONCE(ret); + if (__copy_to_user_inatomic((char __user *)ip, new_code, + MCOUNT_INSN_SIZE)) + return -EPERM; sync_core(); -- cgit v1.2.3 From 76aefee57657428fb77cbd8624119c1a440bee44 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:00 -0400 Subject: ftrace: comment arch ftrace code Add comments to explain what is happening in the x86 arch ftrace code. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 428291581cb..783455454d7 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -66,18 +66,23 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting - * as well as code changing. + * as well as code changing. We do this by using the + * __copy_*_user functions. * * No real locking needed, this code is run through * kstop_machine, or before SMP starts. */ + + /* read the text we want to modify */ if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; + /* Make sure it is what we expect it to be */ if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) return -EINVAL; + /* replace the text with the new text */ if (__copy_to_user_inatomic((char __user *)ip, new_code, MCOUNT_INSN_SIZE)) return -EPERM; -- cgit v1.2.3 From ab9a0918cbf0fa8883301838df8dbc8fc085ff50 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:01 -0400 Subject: ftrace: use probe_kernel Andrew Morton suggested using the proper API for reading and writing kernel areas that might fault. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 783455454d7..da4fb0deecf 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -67,15 +67,14 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting * as well as code changing. We do this by using the - * __copy_*_user functions. + * probe_kernel_* functions. * * No real locking needed, this code is run through * kstop_machine, or before SMP starts. */ /* read the text we want to modify */ - if (__copy_from_user_inatomic(replaced, (char __user *)ip, - MCOUNT_INSN_SIZE)) + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -83,8 +82,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return -EINVAL; /* replace the text with the new text */ - if (__copy_to_user_inatomic((char __user *)ip, new_code, - MCOUNT_INSN_SIZE)) + if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) return -EPERM; sync_core(); -- cgit v1.2.3 From 4d296c24326783bff1282ac72f310d8bac8df413 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:06 -0400 Subject: ftrace: remove mcount set The arch dependent function ftrace_mcount_set was only used by the daemon start up code. This patch removes it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index da4fb0deecf..b399eed2353 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -103,13 +103,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -notrace int ftrace_mcount_set(unsigned long *data) -{ - /* mcount is initialized as a nop */ - *data = 0; - return 0; -} - int __init ftrace_dyn_arch_init(void *data) { extern const unsigned char ftrace_test_p6nop[]; -- cgit v1.2.3 From 15adc048986f6b54b6044f2b6fc4b48f49413e2f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:08 -0400 Subject: ftrace, powerpc, sparc64, x86: remove notrace from arch ftrace file The entire file of ftrace.c in the arch code needs to be marked as notrace. It is much cleaner to do this from the Makefile with CFLAGS_REMOVE_ftrace.o. [ powerpc already had this in its Makefile. ] Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/ftrace.c | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f1283fe6072..e489ff9cb3e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -11,6 +11,7 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg +CFLAGS_REMOVE_ftrace.o = -pg endif # diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b399eed2353..b1e5e2244ec 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -33,17 +33,17 @@ union ftrace_code_union { }; -static int notrace ftrace_calc_offset(long ip, long addr) +static int ftrace_calc_offset(long ip, long addr) { return (int)(addr - ip); } -notrace unsigned char *ftrace_nop_replace(void) +unsigned char *ftrace_nop_replace(void) { return (char *)ftrace_nop; } -notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -57,7 +57,7 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) return calc.code; } -notrace int +int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -90,7 +90,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return 0; } -notrace int ftrace_update_ftrace_func(ftrace_func_t func) +int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); unsigned char old[MCOUNT_INSN_SIZE], *new; -- cgit v1.2.3 From 75bebb7f0c2a709812cccb4d3151a21b012c5cad Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 23 Oct 2008 20:46:55 +0900 Subject: x86: use GFP_DMA for 24bit coherent_dma_mask dma_alloc_coherent (include/asm-x86/dma-mapping.h) avoids GFP_DMA allocation first and if the allocated address is not fit for the device's coherent_dma_mask, then dma_alloc_coherent does GFP_DMA allocation. This is because dma_alloc_coherent avoids precious GFP_DMA zone if possible. This is also how the old dma_alloc_coherent (arch/x86/kernel/pci-dma.c) works. However, if the coherent_dma_mask of a device is 24bit, there is no point to go into the above GFP_DMA retry mechanism. We had better use GFP_DMA in the first place. Signed-off-by: FUJITA Tomonori Tested-by: Takashi Iwai Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dma-mapping.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 4a5397bfce2..7f225a4b2a2 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -255,9 +255,11 @@ static inline unsigned long dma_alloc_coherent_mask(struct device *dev, static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) { -#ifdef CONFIG_X86_64 unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); + if (dma_mask <= DMA_24BIT_MASK) + gfp |= GFP_DMA; +#ifdef CONFIG_X86_64 if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) gfp |= GFP_DMA32; #endif -- cgit v1.2.3 From 03967c5267b0e7312d1d55dc814d94cf190ca573 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 23 Oct 2008 23:14:29 +0900 Subject: x86: restore the old swiotlb alloc_coherent behavior This restores the old swiotlb alloc_coherent behavior (before the alloc_coherent rewrite): http://lkml.org/lkml/2008/8/12/200 The old alloc_coherent avoids GFP_DMA allocation first and if the allocated address is not fit for the device's coherent_dma_mask, then dma_alloc_coherent does GFP_DMA allocation. If it fails, alloc_coherent calls swiotlb_alloc_coherent (in short, we rarely used swiotlb_alloc_coherent). After the alloc_coherent rewrite, dma_alloc_coherent (include/asm-x86/dma-mapping.h) directly calls swiotlb_alloc_coherent. It means that we possibly can't handle a device having dma_masks > 24bit < 32bits since swiotlb_alloc_coherent doesn't have the above GFP_DMA retry mechanism. This patch fixes x86's swiotlb alloc_coherent to use the GFP_DMA retry mechanism, which dma_generic_alloc_coherent() provides now (pci-nommu.c and GART IOMMU driver also use dma_generic_alloc_coherent). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-swiotlb_64.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index c4ce0332759..3c539d111ab 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -18,9 +18,21 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); } +static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + void *vaddr; + + vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags); + if (vaddr) + return vaddr; + + return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); +} + struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, - .alloc_coherent = swiotlb_alloc_coherent, + .alloc_coherent = x86_swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, .map_single = swiotlb_map_single_phys, .unmap_single = swiotlb_unmap_single, -- cgit v1.2.3 From 3b15e581981b3ad35809f56d8131d5c19b6da1bd Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 23 Oct 2008 16:51:00 -0700 Subject: x86/PCI: build failure at x86/kernel/pci-dma.c with !CONFIG_PCI On Thu, Oct 23, 2008 at 04:09:52PM -0700, Alexander Beregalov wrote: > arch/x86/kernel/built-in.o: In function `iommu_setup': > pci-dma.c:(.init.text+0x36ad): undefined reference to `forbid_dac' > pci-dma.c:(.init.text+0x36cc): undefined reference to `forbid_dac' > pci-dma.c:(.init.text+0x3711): undefined reference to `forbid_dac This patch partially reverts a patch to add IOMMU support to ia64. The forbid_dac variable was incorrectly moved to quirks.c, which isn't built when PCI is disabled. Tested-by: "Alexander Beregalov" Acked-by: FUJITA Tomonori Signed-off-by: Fenghua Yu Signed-off-by: Jesse Barnes --- arch/x86/include/asm/iommu.h | 1 - arch/x86/kernel/pci-dma.c | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 98e28ea8cd1..e4a552d4446 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -7,7 +7,6 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int dmar_disabled; -extern int forbid_dac; extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1972266e8ba..19262482021 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -9,6 +9,8 @@ #include #include +static int forbid_dac __read_mostly; + struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); @@ -291,3 +293,17 @@ void pci_iommu_shutdown(void) } /* Must execute after PCI subsystem */ fs_initcall(pci_iommu_init); + +#ifdef CONFIG_PCI +/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ + +static __devinit void via_no_dac(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { + printk(KERN_INFO "PCI: VIA PCI bridge detected." + "Disabling DAC.\n"); + forbid_dac = 1; + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); +#endif -- cgit v1.2.3 From 9f32d21c981bb638d0991ce5675a20337312066b Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Thu, 23 Oct 2008 17:40:25 -0700 Subject: xen: fix Xen domU boot with batched mprotect Impact: fix guest kernel boot crash on certain configs Recent i686 2.6.27 kernels with a certain amount of memory (between 736 and 855MB) have a problem booting under a hypervisor that supports batched mprotect (this includes the RHEL-5 Xen hypervisor as well as any 3.3 or later Xen hypervisor). The problem ends up being that xen_ptep_modify_prot_commit() is using virt_to_machine to calculate which pfn to update. However, this only works for pages that are in the p2m list, and the pages coming from change_pte_range() in mm/mprotect.c are kmap_atomic pages. Because of this, we can run into the situation where the lookup in the p2m table returns an INVALID_MFN, which we then try to pass to the hypervisor, which then (correctly) denies the request to a totally bogus pfn. The right thing to do is to use arbitrary_virt_to_machine, so that we can be sure we are modifying the right pfn. This unfortunately introduces a performance penalty because of a full page-table-walk, but we can avoid that penalty for pages in the p2m list by checking if virt_addr_valid is true, and if so, just doing the lookup in the p2m table. The attached patch implements this, and allows my 2.6.27 i686 based guest with 768MB of memory to boot on a RHEL-5 hypervisor again. Thanks to Jeremy for the suggestions about how to fix this particular issue. Signed-off-by: Chris Lalancette Signed-off-by: Jeremy Fitzhardinge Cc: Chris Lalancette Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d4d52f5a1cf..aba77b2b7d1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -246,11 +246,21 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) { unsigned long address = (unsigned long)vaddr; unsigned int level; - pte_t *pte = lookup_address(address, &level); - unsigned offset = address & ~PAGE_MASK; + pte_t *pte; + unsigned offset; - BUG_ON(pte == NULL); + /* + * if the PFN is in the linear mapped vaddr range, we can just use + * the (quick) virt_to_machine() p2m lookup + */ + if (virt_addr_valid(vaddr)) + return virt_to_machine(vaddr); + /* otherwise we have to do a (slower) full page-table walk */ + + pte = lookup_address(address, &level); + BUG_ON(pte == NULL); + offset = address & ~PAGE_MASK; return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); } @@ -410,7 +420,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, xen_mc_batch(); - u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; + u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; u.val = pte_val_ma(pte); xen_extend_mmu_update(&u); -- cgit v1.2.3 From ef020ab0109aa5cd6eac2e93519b7641c9862828 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Thu, 23 Oct 2008 17:54:05 -0500 Subject: x86/uv: memory allocation at initialization Impact: on SGI UV platforms, fix boot crash UV initialization is currently called too late to call alloc_bootmem_pages(). The current sequence is: start_kernel() mem_init() free_all_bootmem() <--- discard of bootmem rest_init() kernel_init() smp_prepare_cpus() native_smp_prepare_cpus() uv_system_init() <--- uses alloc_bootmem_pages() It should be calling kmalloc(). Signed-off-by: Cliff Wickman Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 680a06557c5..2c7dbdb9827 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -398,16 +397,16 @@ void __init uv_system_init(void) printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); - uv_blade_info = alloc_bootmem_pages(bytes); + uv_blade_info = kmalloc(bytes, GFP_KERNEL); get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); - uv_node_to_blade = alloc_bootmem_pages(bytes); + uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); memset(uv_node_to_blade, 255, bytes); bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); - uv_cpu_to_blade = alloc_bootmem_pages(bytes); + uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); memset(uv_cpu_to_blade, 255, bytes); blade = 0; -- cgit v1.2.3 From 8115f3f0c939c5db0fe3c6c6c58911fd3a205b1e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Oct 2008 09:12:17 -0400 Subject: ftrace: use a real variable for ftrace_nop in x86 Impact: avoid section mismatch warning, clean up The dynamic ftrace determines which nop is safe to use at start up. When it finds a safe nop for patching, it sets a pointer called ftrace_nop to point to the code. All call sites are then patched to this nop. Later, when tracing is turned on, this ftrace_nop variable is again used to compare the location to make sure it is a nop before we update it to an mcount call. If this fails just once, a warning is printed and ftrace is disabled. Rakib Mullick noted that the code that sets up the nop is a .init section where as the nop itself is in the .text section. This is needed because the nop is used later on after boot up. The problem is that the test of the nop jumps back to the setup code and causes a "section mismatch" warning. Rakib first recommended to convert the nop to .init.text, but as stated above, this would fail since that text is used later. The real solution is to extend Rabik's patch, and to make the ftrace_nop into an array, and just save the code from the assembly to this array. Now the section can stay as an init section, and we have a nop to use later on. Reported-by: Rakib Mullick Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b1e5e2244ec..50ea0ac8c9b 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -21,8 +21,7 @@ #include -/* Long is fine, even if it is only 4 bytes ;-) */ -static unsigned long *ftrace_nop; +static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; @@ -40,7 +39,7 @@ static int ftrace_calc_offset(long ip, long addr) unsigned char *ftrace_nop_replace(void) { - return (char *)ftrace_nop; + return ftrace_nop; } unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) @@ -125,9 +124,6 @@ int __init ftrace_dyn_arch_init(void *data) * TODO: check the cpuid to determine the best nop. */ asm volatile ( - "jmp ftrace_test_jmp\n" - /* This code needs to stay around */ - ".section .text, \"ax\"\n" "ftrace_test_jmp:" "jmp ftrace_test_p6nop\n" "nop\n" @@ -138,8 +134,6 @@ int __init ftrace_dyn_arch_init(void *data) "jmp 1f\n" "ftrace_test_nop5:" ".byte 0x66,0x66,0x66,0x66,0x90\n" - "jmp 1f\n" - ".previous\n" "1:" ".section .fixup, \"ax\"\n" "2: movl $1, %0\n" @@ -154,15 +148,15 @@ int __init ftrace_dyn_arch_init(void *data) switch (faulted) { case 0: pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); - ftrace_nop = (unsigned long *)ftrace_test_p6nop; + memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); break; case 1: pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); - ftrace_nop = (unsigned long *)ftrace_test_nop5; + memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); break; case 2: pr_info("ftrace: converting mcount calls to jmp . + 5\n"); - ftrace_nop = (unsigned long *)ftrace_test_jmp; + memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); break; } -- cgit v1.2.3 From 3afa39493de510c33c56ddc76e6e1af7f87c5392 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 25 Oct 2008 22:58:21 -0700 Subject: x86: keep the /proc/meminfo page count correct Impact: get correct page count in /proc/meminfo found page count in /proc/meminfo is nor correct on 1G system in VirtualBox 2.0.4 # cat /proc/meminfo MemTotal: 1017508 kB MemFree: 822700 kB Buffers: 1456 kB Cached: 26632 kB SwapCached: 0 kB ... Hugepagesize: 2048 kB DirectMap4k: 4032 kB DirectMap2M: 18446744073709549568 kB with this patch get: ... DirectMap4k: 4032 kB DirectMap2M: 1044480 kB which is consistent to kernel_page_tables ---[ Low Kernel Mapping ]--- 0xffff880000000000-0xffff880000001000 4K RW PCD GLB x pte 0xffff880000001000-0xffff88000009f000 632K RW GLB x pte 0xffff88000009f000-0xffff8800000a0000 4K RW PCD GLB x pte 0xffff8800000a0000-0xffff880000200000 1408K RW GLB x pte 0xffff880000200000-0xffff88003fe00000 1020M RW PSE GLB x pmd 0xffff88003fe00000-0xffff88003fff0000 1984K RW GLB NX pte 0xffff88003fff0000-0xffff880040000000 64K pte 0xffff880040000000-0xffff888000000000 511G pud 0xffff888000000000-0xffffc20000000000 58880G pgd Signed-off-by: Yinghai Lu Acked-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b8e461d4941..c7a4c5a9a21 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -350,8 +350,10 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, * pagetable pages as RO. So assume someone who pre-setup * these mappings are more intelligent. */ - if (pte_val(*pte)) + if (pte_val(*pte)) { + pages++; continue; + } if (0) printk(" pte=%p addr=%lx pte=%016lx\n", @@ -418,8 +420,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, * not differ with respect to page frame and * attributes. */ - if (page_size_mask & (1 << PG_LEVEL_2M)) + if (page_size_mask & (1 << PG_LEVEL_2M)) { + pages++; continue; + } new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); } @@ -499,8 +503,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, * not differ with respect to page frame and * attributes. */ - if (page_size_mask & (1 << PG_LEVEL_1G)) + if (page_size_mask & (1 << PG_LEVEL_1G)) { + pages++; continue; + } prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); } -- cgit v1.2.3 From 60817c9b31ef7897d60bca2f384cbc316a3fdd8b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 27 Oct 2008 13:03:18 -0700 Subject: x86, memory hotplug: remove wrong -1 in calling init_memory_mapping() Impact: fix crash with memory hotplug Shuahua Li found: | I just did some experiments on a desktop for memory hotplug and this bug | triggered a crash in my test. | | Yinghai's suggestion also fixed the bug. We don't need to round it, just remove that extra -1 Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index c7a4c5a9a21..f79a02f64d1 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -837,7 +837,7 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - last_mapped_pfn = init_memory_mapping(start, start + size-1); + last_mapped_pfn = init_memory_mapping(start, start + size); if (last_mapped_pfn > max_pfn_mapped) max_pfn_mapped = last_mapped_pfn; -- cgit v1.2.3 From 6ad9f15c94822c3f067a7d443f3b414e08b34460 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 15 Oct 2008 07:45:08 -0200 Subject: KVM: MMU: sync root on paravirt TLB flush The pvmmu TLB flush handler should request a root sync, similarly to a native read-write CR3. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 99c239c5c0a..2a5e64881d9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2634,6 +2634,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) { kvm_x86_ops->tlb_flush(vcpu); + set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); return 1; } -- cgit v1.2.3 From 5550af4df179e52753d3a43a788a113ad8cd95cd Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 15 Oct 2008 20:15:06 +0800 Subject: KVM: Fix guest shared interrupt with in-kernel irqchip Every call of kvm_set_irq() should offer an irq_source_id, which is allocated by kvm_request_irq_source_id(). Based on irq_source_id, we identify the irq source and implement logical OR for shared level interrupts. The allocated irq_source_id can be freed by kvm_free_irq_source_id(). Currently, we support at most sizeof(unsigned long) different irq sources. [Amit: - rebase to kvm.git HEAD - move definition of KVM_USERSPACE_IRQ_SOURCE_ID to common file - move kvm_request_irq_source_id to the update_irq ioctl] [Xiantao: - Add kvm/ia64 stuff and make it work for kvm/ia64 guests] Signed-off-by: Sheng Yang Signed-off-by: Amit Shah Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/i8254.c | 11 +++++++++-- arch/x86/kvm/i8254.h | 1 + arch/x86/kvm/x86.c | 6 +++++- 4 files changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 65679d00633..8346be87cfa 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -364,6 +364,9 @@ struct kvm_arch{ struct page *ept_identity_pagetable; bool ept_identity_pagetable_done; + + unsigned long irq_sources_bitmap; + unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 11c6725fb79..8772dc94682 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -545,6 +545,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) if (!pit) return NULL; + mutex_lock(&kvm->lock); + pit->irq_source_id = kvm_request_irq_source_id(kvm); + mutex_unlock(&kvm->lock); + if (pit->irq_source_id < 0) + return NULL; + mutex_init(&pit->pit_state.lock); mutex_lock(&pit->pit_state.lock); spin_lock_init(&pit->pit_state.inject_lock); @@ -587,6 +593,7 @@ void kvm_free_pit(struct kvm *kvm) mutex_lock(&kvm->arch.vpit->pit_state.lock); timer = &kvm->arch.vpit->pit_state.pit_timer.timer; hrtimer_cancel(timer); + kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); mutex_unlock(&kvm->arch.vpit->pit_state.lock); kfree(kvm->arch.vpit); } @@ -595,8 +602,8 @@ void kvm_free_pit(struct kvm *kvm) static void __inject_pit_timer_intr(struct kvm *kvm) { mutex_lock(&kvm->lock); - kvm_set_irq(kvm, 0, 1); - kvm_set_irq(kvm, 0, 0); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); mutex_unlock(&kvm->lock); } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index e436d4983aa..4178022b97a 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -44,6 +44,7 @@ struct kvm_pit { struct kvm_io_device speaker_dev; struct kvm *kvm; struct kvm_kpit_state pit_state; + int irq_source_id; }; #define KVM_PIT_BASE_ADDRESS 0x40 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4f0677d1eae..f1f8ff2f1fa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1742,7 +1742,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { mutex_lock(&kvm->lock); - kvm_set_irq(kvm, irq_event.irq, irq_event.level); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); r = 0; } @@ -4013,6 +4014,9 @@ struct kvm *kvm_arch_create_vm(void) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ + set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); + return kvm; } -- cgit v1.2.3 From 531f6ed7de911e975352fbb2b228367121da630a Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 17 Oct 2008 09:09:27 +0200 Subject: x86, bts: improve help text for BTS config Improve the help text of the X86_PTRACE_BTS config. Make X86_DS invisible and depend on X86_PTRACE_BTS. Reported-by: Roland Dreier Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 0b7c4a3f065..b815664fe37 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -513,19 +513,19 @@ config CPU_SUP_UMC_32 If unsure, say N. config X86_DS - bool "Debug Store support" - default y - help - Add support for Debug Store. - This allows the kernel to provide a memory buffer to the hardware - to store various profiling and tracing events. + def_bool X86_PTRACE_BTS + depends on X86_DEBUGCTLMSR config X86_PTRACE_BTS - bool "ptrace interface to Branch Trace Store" + bool "Branch Trace Store" default y - depends on (X86_DS && X86_DEBUGCTLMSR) + depends on X86_DEBUGCTLMSR help - Add a ptrace interface to allow collecting an execution trace - of the traced task. - This collects control flow changes in a (cyclic) buffer and allows - debuggers to fill in the gaps and show an execution trace of the debuggee. + This adds a ptrace interface to the hardware's branch trace store. + + Debuggers may use it to collect an execution trace of the debugged + application in order to answer the question 'how did I get here?'. + Debuggers may trace user mode as well as kernel mode. + + Say Y unless there is no application development on this machine + and you want to save a small amount of code size. -- cgit v1.2.3 From 36b75da27bb51dc34e358d0b7487406132806c46 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Fri, 17 Oct 2008 15:30:37 +0200 Subject: x86: microcode patch loader author update Removed one author's email address from module init message. Signed-off-by: Peter Oruba Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 936d8d55f23..82fb2809ce3 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -480,8 +480,8 @@ static int __init microcode_init(void) printk(KERN_INFO "Microcode Update Driver: v" MICROCODE_VERSION - " " - " \n"); + " ," + " Peter Oruba\n"); return 0; } -- cgit v1.2.3 From 3c52204bb90834bca8e9e78a3628d886ad6d4db5 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Fri, 17 Oct 2008 15:30:38 +0200 Subject: x86: AMD microcode patch loader author update Removed author's email address from MODULE_AUTHOR. Signed-off-by: Peter Oruba Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 7a1f8eeac2c..5f8e5d75a25 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -39,7 +39,7 @@ #include MODULE_DESCRIPTION("AMD Microcode Update Driver"); -MODULE_AUTHOR("Peter Oruba "); +MODULE_AUTHOR("Peter Oruba"); MODULE_LICENSE("GPL v2"); #define UCODE_MAGIC 0x00414d44 -- cgit v1.2.3 From 1281675e9c0d4d42d993697f4daab45ef22d49da Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 14 Oct 2008 18:59:17 -0700 Subject: x86: fix APIC_DEBUG with inquire_remote_apic APIC_DEBUG is always 2. need to update inquire_remote_apic to check apic_verbosity with it instead. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 9 ++++----- arch/x86/include/asm/mach-default/mach_wakecpu.h | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 3ffc5a7bf66..39849346191 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -50,10 +50,9 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } -#if APIC_DEBUG - #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid) -#else - #define inquire_remote_apic(apicid) {} -#endif +#define inquire_remote_apic(apicid) do { \ + if (apic_verbosity >= APIC_DEBUG) \ + __inquire_remote_apic(apicid); \ + } while (0) #endif /* __ASM_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index d5c0b826a4f..9d80db91e99 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -33,10 +33,9 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } -#if APIC_DEBUG - #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid) -#else - #define inquire_remote_apic(apicid) {} -#endif +#define inquire_remote_apic(apicid) do { \ + if (apic_verbosity >= APIC_DEBUG) \ + __inquire_remote_apic(apicid); \ + } while (0) #endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ -- cgit v1.2.3 From e7706fc691513b0f06adb3de3d6ac04293180146 Mon Sep 17 00:00:00 2001 From: Ken'ichi Ohmichi Date: Mon, 20 Oct 2008 13:51:52 +0900 Subject: x86, kdump: fix invalid access on i386 sparsemem Impact: fix kdump crash on 32-bit sparsemem kernels Since linux-2.6.27, kdump has failed on i386 sparsemem kernel. 1st-kernel gets a panic just before switching to 2nd-kernel. The cause is that a kernel accesses invalid mem_section by page_to_pfn(image->swap_page) at machine_kexec(). image->swap_page is allocated if kexec for hibernation, but it is not allocated if kdump. So if kdump, a kernel should not access the mem_section corresponding to image->swap_page. The attached patch fixes this invalid access. Signed-off-by: Ken'ichi Ohmichi Cc: kexec-ml Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/machine_kexec_32.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 0732adba05c..7a385746509 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -162,7 +162,10 @@ void machine_kexec(struct kimage *image) page_list[VA_PTE_0] = (unsigned long)kexec_pte0; page_list[PA_PTE_1] = __pa(kexec_pte1); page_list[VA_PTE_1] = (unsigned long)kexec_pte1; - page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); + + if (image->type == KEXEC_TYPE_DEFAULT) + page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) + << PAGE_SHIFT); /* The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is -- cgit v1.2.3 From 11a6b0c933b55654a58afd84f63a5dde1607d78f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 14 Oct 2008 18:59:18 -0700 Subject: x86: 64 bit print out absent pages num too so users are not confused with memhole causing big total ram we don't need to worry about 32 bit, because memhole is always above max_low_pfn. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index f79a02f64d1..ad38648bddb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -884,6 +884,7 @@ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, void __init mem_init(void) { long codesize, reservedpages, datasize, initsize; + unsigned long absent_pages; start_periodic_check_for_corruption(); @@ -899,8 +900,9 @@ void __init mem_init(void) #else totalram_pages = free_all_bootmem(); #endif - reservedpages = max_pfn - totalram_pages - - absent_pages_in_range(0, max_pfn); + + absent_pages = absent_pages_in_range(0, max_pfn); + reservedpages = max_pfn - totalram_pages - absent_pages; after_bootmem = 1; codesize = (unsigned long) &_etext - (unsigned long) &_text; @@ -917,10 +919,11 @@ void __init mem_init(void) VSYSCALL_END - VSYSCALL_START); printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk reserved, %ldk data, %ldk init)\n", + "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), max_pfn << (PAGE_SHIFT-10), codesize >> 10, + absent_pages << (PAGE_SHIFT-10), reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10); -- cgit v1.2.3 From 87c6f40128f92621698f97a62d2ead5184d1dd97 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 28 Oct 2008 16:13:54 +0100 Subject: x86, gart: fix gart detection for Fam11h CPUs Impact: fix AMD Family 11h boot hangs / USB device problems The AMD Fam11h CPUs have a K8 northbridge. This northbridge is different from other family's because it lacks GART support (as I just learned). But the kernel implicitly expects a GART if it finds an AMD northbridge. Fix this by removing the Fam11h northbridge id from the scan list of K8 northbridges. This patch also changes the message in the GART driver about missing K8 northbridges to tell that the GART is missing which is the correct information in this case. Reported-by: Jouni Malinen Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/k8.c | 1 - arch/x86/kernel/pci-gart_64.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 304d8bad655..cbc4332a77b 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c @@ -18,7 +18,6 @@ static u32 *flush_words; struct pci_device_id k8_nb_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, {} }; EXPORT_SYMBOL(k8_nb_ids); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index e3f75bbcede..a42b02b4df6 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -744,7 +744,7 @@ void __init gart_iommu_init(void) long i; if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { - printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n"); + printk(KERN_INFO "PCI-GART: No AMD GART found.\n"); return; } -- cgit v1.2.3 From f96f57d91c2df75011d1e260c23edca429f37361 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 28 Oct 2008 12:39:23 -0700 Subject: x86: fix init_memory_mapping for [dc000000 - e0000000) - v2 Impact: change over-mapping to precise mapping, fix /proc/meminfo output v2: fix less than 1G ram system handling when gart aperture is 0xdc000000 - 0xe0000000 it return 0xc0000000 - 0xe0000000 that is not right. this patch fix that will get exact mapping on 256g sytem with that aperture after patch LBSuse:~ # cat /proc/meminfo MemTotal: 264742432 kB MemFree: 263920628 kB Buffers: 1416 kB Cached: 24468 kB ... DirectMap4k: 5760 kB DirectMap2M: 3205120 kB DirectMap1G: 265289728 kB it is consistent to LBSuse:~ # cat /sys/kernel/debug/kernel_page_tables .. ---[ Low Kernel Mapping ]--- 0xffff880000000000-0xffff880000200000 2M RW GLB x pte 0xffff880000200000-0xffff880040000000 1022M RW PSE GLB x pmd 0xffff880040000000-0xffff8800c0000000 2G RW PSE GLB NX pud 0xffff8800c0000000-0xffff8800d7e00000 382M RW PSE GLB NX pmd 0xffff8800d7e00000-0xffff8800d7fa0000 1664K RW GLB NX pte 0xffff8800d7fa0000-0xffff8800d8000000 384K pte 0xffff8800d8000000-0xffff8800dc000000 64M pmd 0xffff8800dc000000-0xffff8800e0000000 64M RW PSE GLB NX pmd 0xffff8800e0000000-0xffff880100000000 512M pmd 0xffff880100000000-0xffff880800000000 28G RW PSE GLB NX pud 0xffff880800000000-0xffff880824600000 582M RW PSE GLB NX pmd 0xffff880824600000-0xffff8808247f0000 1984K RW GLB NX pte 0xffff8808247f0000-0xffff880824800000 64K RW PCD GLB NX pte 0xffff880824800000-0xffff880840000000 440M RW PSE GLB NX pmd 0xffff880840000000-0xffff884000000000 223G RW PSE GLB NX pud 0xffff884000000000-0xffff884028000000 640M RW PSE GLB NX pmd 0xffff884028000000-0xffff884040000000 384M pmd 0xffff884040000000-0xffff888000000000 255G pud 0xffff888000000000-0xffffc20000000000 58880G pgd Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 50 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ad38648bddb..ebe1811e5b1 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -671,12 +671,13 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long last_map_addr = 0; unsigned long page_size_mask = 0; unsigned long start_pfn, end_pfn; + unsigned long pos; struct map_range mr[NR_RANGE_MR]; int nr_range, i; int use_pse, use_gbpages; - printk(KERN_INFO "init_memory_mapping\n"); + printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); /* * Find space for the kernel direct mapping tables. @@ -710,35 +711,50 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, /* head if not big page alignment ?*/ start_pfn = start >> PAGE_SHIFT; - end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT) + pos = start_pfn << PAGE_SHIFT; + end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + pos = end_pfn << PAGE_SHIFT; + } /* big page (2M) range*/ - start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT) + end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); - if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT))) - end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)); - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1< ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) + end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & ((1<>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PAGE_SHIFT; end_pfn = end>>PAGE_SHIFT; nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); -- cgit v1.2.3 From 9352f5698db2c6d7f2789f6cd37e3996d49ac4b5 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 23:05:22 -0700 Subject: x86: two trivial sparse annotations Impact: fewer sparse warnings, no functional changes arch/x86/kernel/vsmp_64.c:87:14: warning: incorrect type in argument 1 (different address spaces) arch/x86/kernel/vsmp_64.c:87:14: expected void const volatile [noderef] *addr arch/x86/kernel/vsmp_64.c:87:14: got void *[assigned] address arch/x86/kernel/vsmp_64.c:88:22: warning: incorrect type in argument 1 (different address spaces) arch/x86/kernel/vsmp_64.c:88:22: expected void const volatile [noderef] *addr arch/x86/kernel/vsmp_64.c:88:22: got void * arch/x86/kernel/vsmp_64.c:100:23: warning: incorrect type in argument 2 (different address spaces) arch/x86/kernel/vsmp_64.c:100:23: expected void volatile [noderef] *addr arch/x86/kernel/vsmp_64.c:100:23: got void * arch/x86/kernel/vsmp_64.c:101:23: warning: incorrect type in argument 1 (different address spaces) arch/x86/kernel/vsmp_64.c:101:23: expected void const volatile [noderef] *addr arch/x86/kernel/vsmp_64.c:101:23: got void * arch/x86/mm/gup.c:235:6: warning: incorrect type in argument 1 (different base types) arch/x86/mm/gup.c:235:6: expected void const volatile [noderef] * arch/x86/mm/gup.c:235:6: got unsigned long [unsigned] [assigned] start Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 2 +- arch/x86/mm/gup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 7766d36983f..a688f3bfaec 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -78,7 +78,7 @@ static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, static void __init set_vsmp_pv_ops(void) { - void *address; + void __iomem *address; unsigned int cap, ctl, cfg; /* set vSMP magic bits to indicate vSMP capable kernel */ diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 4ba373c5b8c..be54176e9eb 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -233,7 +233,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) + (void __user *)start, len))) goto slow_irqon; /* -- cgit v1.2.3 From 1d6cf1feb854c53c6d59e0d879603692b379e208 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 22:46:04 -0700 Subject: x86: start annotating early ioremap pointers with __iomem Impact: some new sparse warnings in e820.c etc, but no functional change. As with regular ioremap, iounmap etc, annotate with __iomem. Fixes the following sparse warnings, will produce some new ones elsewhere in arch/x86 that will get worked out over time. arch/x86/mm/ioremap.c:402:9: warning: cast removes address space of expression arch/x86/mm/ioremap.c:406:10: warning: cast adds address space to expression () arch/x86/mm/ioremap.c:782:19: warning: Using plain integer as NULL pointer Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 6 +++--- arch/x86/mm/ioremap.c | 22 +++++++++++----------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 5618a103f39..ac2abc88cd9 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -82,9 +82,9 @@ extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); extern void early_ioremap_init(void); extern void early_ioremap_clear(void); extern void early_ioremap_reset(void); -extern void *early_ioremap(unsigned long offset, unsigned long size); -extern void *early_memremap(unsigned long offset, unsigned long size); -extern void early_iounmap(void *addr, unsigned long size); +extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); +extern void __iomem *early_memremap(unsigned long offset, unsigned long size); +extern void early_iounmap(void __iomem *addr, unsigned long size); extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index ae71e11eb3e..d4c4307ff3e 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -387,7 +387,7 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, unsigned long size) { unsigned long flags; - void *ret; + void __iomem *ret; int err; /* @@ -399,11 +399,11 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, if (err < 0) return NULL; - ret = (void *) __ioremap_caller(phys_addr, size, flags, - __builtin_return_address(0)); + ret = __ioremap_caller(phys_addr, size, flags, + __builtin_return_address(0)); free_memtype(phys_addr, phys_addr + size); - return (void __iomem *)ret; + return ret; } void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, @@ -622,7 +622,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) __early_set_fixmap(idx, 0, __pgprot(0)); } -static void *prev_map[FIX_BTMAPS_SLOTS] __initdata; +static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; static int __init check_early_ioremap_leak(void) { @@ -645,7 +645,7 @@ static int __init check_early_ioremap_leak(void) } late_initcall(check_early_ioremap_leak); -static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) +static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; unsigned int nrpages; @@ -713,23 +713,23 @@ static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, if (early_ioremap_debug) printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); - prev_map[slot] = (void *) (offset + fix_to_virt(idx0)); + prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); return prev_map[slot]; } /* Remap an IO device */ -void __init *early_ioremap(unsigned long phys_addr, unsigned long size) +void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size) { return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); } /* Remap memory */ -void __init *early_memremap(unsigned long phys_addr, unsigned long size) +void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size) { return __early_ioremap(phys_addr, size, PAGE_KERNEL); } -void __init early_iounmap(void *addr, unsigned long size) +void __init early_iounmap(void __iomem *addr, unsigned long size) { unsigned long virt_addr; unsigned long offset; @@ -779,7 +779,7 @@ void __init early_iounmap(void *addr, unsigned long size) --idx; --nrpages; } - prev_map[slot] = 0; + prev_map[slot] = NULL; } void __this_fixmap_does_not_exist(void) -- cgit v1.2.3 From fe8b868eccb9f85a0e231e35f0abac5b39bac801 Mon Sep 17 00:00:00 2001 From: Gary Hade Date: Tue, 28 Oct 2008 16:43:14 -0700 Subject: x86: remove debug code from arch_add_memory() Impact: remove incorrect WARN_ON(1) Gets rid of dmesg spam created during physical memory hot-add which will very likely confuse users. The change removes what appears to be debugging code which I assume was unintentionally included in: x86: arch/x86/mm/init_64.c printk fixes commit 10f22dde556d1ed41d55355d1fb8ad495f9810c8 Signed-off-by: Gary Hade Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ebe1811e5b1..9db01db6e3c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -858,7 +858,7 @@ int arch_add_memory(int nid, u64 start, u64 size) max_pfn_mapped = last_mapped_pfn; ret = __add_pages(zone, start_pfn, nr_pages); - WARN_ON(1); + WARN_ON_ONCE(ret); return ret; } -- cgit v1.2.3 From ab00fee30cddf975200b3c97aef25bea144a0d89 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Oct 2008 10:37:21 +0000 Subject: i386/PAE: fix pud_page() Impact: cleanup To the unsuspecting user it is quite annoying that this broken and inconsistent with x86-64 definition still exists. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable-3level.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index fb16cec702e..52597aeadff 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -120,13 +120,13 @@ static inline void pud_clear(pud_t *pudp) write_cr3(pgd); } -#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK)) +#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) #define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK)) /* Find an entry in the second-level page table.. */ -#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \ +#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \ pmd_index(address)) #ifdef CONFIG_SMP -- cgit v1.2.3 From ae9b9403644f3ecc76867af042e7e1cfd5c099d0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 30 Oct 2008 17:43:57 +0100 Subject: AMD IOMMU: fix detection of NP capable IOMMUs This patch changes the code to use IOMMU_CAP_NPCACHE as a shift and not as a mask. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3b346c6f551..38e88d40ab1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -50,7 +50,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, /* returns !0 if the IOMMU is caching non-present entries in its TLB */ static int iommu_has_npcache(struct amd_iommu *iommu) { - return iommu->cap & IOMMU_CAP_NPCACHE; + return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); } /**************************************************************************** -- cgit v1.2.3 From c17dad6905fc82d8f523399e5c3f014e81d61df6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 29 Oct 2008 14:00:50 -0700 Subject: .gitignore updates Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/boot/compressed/.gitignore | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore index be0ed065249..63eff3b04d0 100644 --- a/arch/x86/boot/compressed/.gitignore +++ b/arch/x86/boot/compressed/.gitignore @@ -1 +1,3 @@ relocs +vmlinux.bin.all +vmlinux.relocs -- cgit v1.2.3 From c08b6acc9b996ba6231105cb12a4125c957e0c97 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 30 Oct 2008 11:33:19 -0700 Subject: x86, uv: fix compile error in uv_hub.h Impact: include file dependency cleanup Fix compile errors of files that include asm/uv/uv_hub.h but do not include linux/timer.h. [ such files are not mainline right now. ] Signed-of-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index c6ad93e315c..7a5782610b2 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -13,6 +13,7 @@ #include #include +#include #include #include -- cgit v1.2.3 From 08c33308575b370c89b4ed1198ece5f93145a2aa Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 30 Oct 2008 16:08:38 -0500 Subject: x86/voyager: fix boot breakage caused by x86: boot secondary cpus through initial_code Impact: boot up secondary CPUs as well on x86/Voyager systems This commit: | commit 3e9704739daf46a8ba6593d749c67b5f7cd633d2 | Author: Glauber Costa | Date: Wed May 28 13:01:54 2008 -0300 | | x86: boot secondary cpus through initial_code removed the use of initialize_secondary. However, it didn't update voyager, so the secondary cpus no longer boot. Fix this by adding the initial_code switch to voyager as well. Signed-off-by: James Bottomley Cc: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/mach-voyager/voyager_smp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 0f6e8a6523a..9cd327a278a 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -90,6 +90,7 @@ static void ack_vic_irq(unsigned int irq); static void vic_enable_cpi(void); static void do_boot_cpu(__u8 cpuid); static void do_quad_bootstrap(void); +static void initialize_secondary(void); int hard_smp_processor_id(void); int safe_smp_processor_id(void); @@ -650,6 +651,8 @@ void __init smp_boot_cpus(void) smp_tune_scheduling(); */ smp_store_cpu_info(boot_cpu_id); + /* setup the jump vector */ + initial_code = (unsigned long)initialize_secondary; printk("CPU%d: ", boot_cpu_id); print_cpu_info(&cpu_data(boot_cpu_id)); @@ -702,7 +705,7 @@ void __init smp_boot_cpus(void) /* Reload the secondary CPUs task structure (this function does not * return ) */ -void __init initialize_secondary(void) +static void __init initialize_secondary(void) { #if 0 // AC kernels only -- cgit v1.2.3 From 017d9d20d88cacb0a6a29f343b23c95e203f6645 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 30 Oct 2008 16:05:39 -0500 Subject: x86: use CONFIG_X86_SMP instead of CONFIG_SMP Impact: fix x86/Voyager boot CONFIG_SMP is used for features which work on *all* x86 boxes. CONFIG_X86_SMP is used for standard PC like x86 boxes (for things like multi core and apics) Signed-off-by: James Bottomley Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +- arch/x86/kernel/tsc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 0d9c993aa93..ef8f831af82 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) */ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_SMP unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 161bb850fc4..62348e4fd8d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -759,7 +759,7 @@ __cpuinit int unsynchronized_tsc(void) if (!cpu_has_tsc || tsc_unstable) return 1; -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_SMP if (apic_is_clustered_box()) return 1; #endif -- cgit v1.2.3 From ee477524b461324ed8fc950f451c3671dc79f12e Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 30 Oct 2008 16:28:35 -0500 Subject: x86/voyager: fix compile breakage casued by x86: move prefill_possible_map calling early Impact: fix build failure on x86/Voyager Before: | commit 329513a35d1a2b6b28d54f5c2c0dde4face8200b | Author: Yinghai Lu | Date: Wed Jul 2 18:54:40 2008 -0700 | | x86: move prefill_possible_map calling early prefill_possible_mask() was hidden under CONFIG_HOTPLUG_CPU rendering it invisitble to voyager. Since this commit it's exposed, but not provided by the voyager subarch, so add a dummy stub to fix the link breakage. Signed-off-by: James Bottomley Cc: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mach-voyager/voyager_smp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 9cd327a278a..01285af5782 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -345,6 +345,12 @@ static void do_quad_bootstrap(void) } } +void prefill_possible_map(void) +{ + /* This is empty on voyager because we need a much + * earlier detection which is done in find_smp_config */ +} + /* Set up all the basic stuff: read the SMP config and make all the * SMP information reflect only the boot cpu. All others will be * brought on-line later. */ -- cgit v1.2.3 From 9e41bff2708e420e61e6b89a54c15232857069b1 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Thu, 30 Oct 2008 13:59:21 -0700 Subject: x86: fix /dev/mem mmap breakage when PAT is disabled Impact: allow /dev/mem mmaps on non-PAT CPUs/platforms Fix mmap to /dev/mem when CONFIG_X86_PAT is off and CONFIG_STRICT_DEVMEM is off mmap to /dev/mem on kernel memory has been failing since the introduction of PAT (CONFIG_STRICT_DEVMEM=n case). Seems like the check to avoid cache aliasing with PAT is kicking in even when PAT is disabled. The bug seems to have crept in 2.6.26. This patch makes sure that the mmap to regular kernel memory succeeds if CONFIG_STRICT_DEVMEM=n and PAT is disabled, and the checks to avoid cache aliasing still happens if PAT is enabled. Signed-off-by: Ravikiran Thirumalai Tested-by: Tim Sirianni Cc: Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 738fd0f2495..eb1bf000d12 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -481,12 +481,16 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) return 1; } #else +/* This check is needed to avoid cache aliasing when PAT is enabled */ static inline int range_is_allowed(unsigned long pfn, unsigned long size) { u64 from = ((u64)pfn) << PAGE_SHIFT; u64 to = from + size; u64 cursor = from; + if (!pat_enabled) + return 1; + while (cursor < to) { if (!devmem_is_allowed(pfn)) { printk(KERN_INFO -- cgit v1.2.3 From b3572e361b6b2ac5e724bc4bb932b7774b720b95 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 30 Oct 2008 16:00:59 -0500 Subject: x86/voyager: fix compile breakage caused by dc1e35c6e95e8923cf1d3510438b63c600fee1e2 Impact: build fix on x86/Voyager Given commits like this: | Author: Suresh Siddha | Date: Tue Jul 29 10:29:19 2008 -0700 | | x86, xsave: enable xsave/xrstor on cpus with xsave support Which deliberately expose boot cpu dependence to pieces of the system, I think it's time to explicitly have a variable for it to prevent this continual misassumption that the boot CPU is zero. Signed-off-by: James Bottomley Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++++ arch/x86/include/asm/smp.h | 6 ++++++ arch/x86/kernel/cpu/common.c | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 350bee1d54d..2a40c4c6dd7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -231,6 +231,10 @@ config SMP If you don't know what to do here, say N. +config X86_HAS_BOOT_CPU_ID + def_bool y + depends on X86_VOYAGER + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 2766021aef8..d12811ce51d 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -225,5 +225,11 @@ static inline int hard_smp_processor_id(void) #endif /* CONFIG_X86_LOCAL_APIC */ +#ifdef CONFIG_X86_HAS_BOOT_CPU_ID +extern unsigned char boot_cpu_id; +#else +#define boot_cpu_id 0 +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 25581dcb280..93e9393ea64 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1134,7 +1134,7 @@ void __cpuinit cpu_init(void) /* * Boot processor to setup the FP and extended state context info. */ - if (!smp_processor_id()) + if (smp_processor_id() == boot_cpu_id) init_thread_xstate(); xsave_init(); -- cgit v1.2.3 From bfcb4c1becf93b1592f4a03a4d6e00a3ab89d5ec Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 30 Oct 2008 16:13:37 -0500 Subject: x86/voyager: fix missing cpu_index initialisation Impact: fix /proc/cpuinfo output on x86/Voyager Ever since | commit 92cb7612aee39642d109b8d935ad265e602c0563 | Author: Mike Travis | Date: Fri Oct 19 20:35:04 2007 +0200 | | x86: convert cpuinfo_x86 array to a per_cpu array We've had an extra field in cpuinfo_x86 which is cpu_index. Unfortunately, voyager has never initialised this, although the only noticeable impact seems to be that /proc/cpuinfo shows all zeros for the processor ids. Anyway, fix this by initialising the boot CPU properly and setting the index when the secondaries update. Signed-off-by: James Bottomley Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 ++ arch/x86/mach-voyager/voyager_smp.c | 1 + 2 files changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 93e9393ea64..da8f15ac7a6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -549,6 +549,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_early_init(c); validate_pat_support(c); + + c->cpu_index = boot_cpu_id; } void __init early_cpu_init(void) diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 01285af5782..7f4c6af1435 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -420,6 +420,7 @@ void __init smp_store_cpu_info(int id) struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; + c->cpu_index = id; identify_secondary_cpu(c); } -- cgit v1.2.3 From 1c4acdb467f8a6704855a5670ff3d82e3c18eb0b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 31 Oct 2008 00:43:03 +0100 Subject: x86: cpu_index build fix fix: arch/x86/kernel/cpu/common.c: In function 'early_identify_cpu': arch/x86/kernel/cpu/common.c:553: error: 'struct cpuinfo_x86' has no member named 'cpu_index' as cpu_index is only available on SMP. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index da8f15ac7a6..003a65395bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -550,7 +550,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) validate_pat_support(c); +#ifdef CONFIG_SMP c->cpu_index = boot_cpu_id; +#endif } void __init early_cpu_init(void) -- cgit v1.2.3 From ad5173ff8a387191dbacf889becb92c59aba5d59 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 31 Oct 2008 11:24:27 -0500 Subject: lguest: fix early_ioremap. dmi_scan_machine breaks under lguest: lguest: unhandled trap 14 at 0xc04edeae (0xffa00000) This is because we use current_cr3 for the read_cr3() paravirt function, and it isn't set until the first cr3 change. We got away with it until this happened. Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 48ee4f9435f..4e22fa08d62 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -367,10 +367,9 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, * lazily after a task switch, and Linux uses that gratefully, but wouldn't a * name like "FPUTRAP bit" be a little less cryptic? * - * We store cr0 (and cr3) locally, because the Host never changes it. The - * Guest sometimes wants to read it and we'd prefer not to bother the Host - * unnecessarily. */ -static unsigned long current_cr0, current_cr3; + * We store cr0 locally because the Host never changes it. The Guest sometimes + * wants to read it and we'd prefer not to bother the Host unnecessarily. */ +static unsigned long current_cr0; static void lguest_write_cr0(unsigned long val) { lazy_hcall(LHCALL_TS, val & X86_CR0_TS, 0, 0); @@ -399,17 +398,23 @@ static unsigned long lguest_read_cr2(void) return lguest_data.cr2; } +/* See lguest_set_pte() below. */ +static bool cr3_changed = false; + /* cr3 is the current toplevel pagetable page: the principle is the same as - * cr0. Keep a local copy, and tell the Host when it changes. */ + * cr0. Keep a local copy, and tell the Host when it changes. The only + * difference is that our local copy is in lguest_data because the Host needs + * to set it upon our initial hypercall. */ static void lguest_write_cr3(unsigned long cr3) { + lguest_data.pgdir = cr3; lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0); - current_cr3 = cr3; + cr3_changed = true; } static unsigned long lguest_read_cr3(void) { - return current_cr3; + return lguest_data.pgdir; } /* cr4 is used to enable and disable PGE, but we don't care. */ @@ -498,13 +503,13 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) * to forget all of them. Fortunately, this is very rare. * * ... except in early boot when the kernel sets up the initial pagetables, - * which makes booting astonishingly slow. So we don't even tell the Host - * anything changed until we've done the first page table switch. */ + * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell + * the Host anything changed until we've done the first page table switch, + * which brings boot back to 0.25 seconds. */ static void lguest_set_pte(pte_t *ptep, pte_t pteval) { *ptep = pteval; - /* Don't bother with hypercall before initial setup. */ - if (current_cr3) + if (cr3_changed) lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); } @@ -521,7 +526,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval) static void lguest_flush_tlb_single(unsigned long addr) { /* Simply set it to zero: if it was not, it will fault back in. */ - lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0); + lazy_hcall(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0); } /* This is what happens after the Guest has removed a large number of entries. -- cgit v1.2.3 From 526e5ab200ce483dcdf146806f4936bd58daa800 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 31 Oct 2008 11:24:27 -0500 Subject: lguest: fix irq vectors. do_IRQ: cannot handle IRQ -1 vector 0x20 cpu 0 ------------[ cut here ]------------ kernel BUG at arch/x86/kernel/irq_32.c:219! We're not ISA: we have a 1:1 mapping from vectors to irqs. Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 4e22fa08d62..a5d8e1ace1c 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -586,6 +586,9 @@ static void __init lguest_init_IRQ(void) for (i = 0; i < LGUEST_IRQS; i++) { int vector = FIRST_EXTERNAL_VECTOR + i; + /* Some systems map "vectors" to interrupts weirdly. Lguest has + * a straightforward 1 to 1 mapping, so force that here. */ + __get_cpu_var(vector_irq)[vector] = i; if (vector != SYSCALL_VECTOR) { set_intr_gate(vector, interrupt[vector]); set_irq_chip_and_handler_name(i, &lguest_irq_controller, -- cgit v1.2.3 From b342797c1e5116a130841527b47dfaa462ed0968 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 31 Oct 2008 09:31:38 +0100 Subject: x86: build fix Impact: build fix on certain UP configs fix: arch/x86/kernel/cpu/common.c: In function 'cpu_init': arch/x86/kernel/cpu/common.c:1141: error: 'boot_cpu_id' undeclared (first use in this function) arch/x86/kernel/cpu/common.c:1141: error: (Each undeclared identifier is reported only once arch/x86/kernel/cpu/common.c:1141: error: for each function it appears in.) Pull in asm/smp.h on UP, so that we get the definition of boot_cpu_id. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 003a65395bd..b9c9ea0217a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include -- cgit v1.2.3 From fd9409343521eac22b6ed51686128a643c7c976b Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 30 Oct 2008 19:37:09 -0700 Subject: x86: add iomap_atomic*()/iounmap_atomic() on 32-bit using fixmaps Impact: introduce new APIs, separate kmap code from CONFIG_HIGHMEM This takes the code used for CONFIG_HIGHMEM memory mappings except that it's designed for dynamic IO resource mapping. These fixmaps are available even with CONFIG_HIGHMEM turned off. Signed-off-by: Keith Packard Signed-off-by: Eric Anholt Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fixmap.h | 4 +++ arch/x86/include/asm/fixmap_32.h | 4 --- arch/x86/include/asm/highmem.h | 5 +--- arch/x86/mm/Makefile | 2 +- arch/x86/mm/init_32.c | 3 +- arch/x86/mm/iomap_32.c | 59 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 66 insertions(+), 11 deletions(-) create mode 100644 arch/x86/mm/iomap_32.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 8668a94f850..23696d44a0a 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -9,6 +9,10 @@ extern int fixmaps_set; +extern pte_t *kmap_pte; +extern pgprot_t kmap_prot; +extern pte_t *pkmap_page_table; + void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags); diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h index 09f29ab5c13..c7115c1d721 100644 --- a/arch/x86/include/asm/fixmap_32.h +++ b/arch/x86/include/asm/fixmap_32.h @@ -28,10 +28,8 @@ extern unsigned long __FIXADDR_TOP; #include #include #include -#ifdef CONFIG_HIGHMEM #include #include -#endif /* * Here we define all the compile-time 'special' virtual @@ -75,10 +73,8 @@ enum fixed_addresses { #ifdef CONFIG_X86_CYCLONE_TIMER FIX_CYCLONE_TIMER, /*cyclone timer register*/ #endif -#ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -#endif #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index a3b3b7c3027..bf9276bea66 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -25,14 +25,11 @@ #include #include #include +#include /* declarations for highmem.c */ extern unsigned long highstart_pfn, highend_pfn; -extern pte_t *kmap_pte; -extern pgprot_t kmap_prot; -extern pte_t *pkmap_page_table; - /* * Right now we initialize only a single pte table. It can be extended * easily, subsequent pte tables have to be allocated in one physical diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 59f89b434b4..fea4565ff57 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,7 +1,7 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o gup.o -obj-$(CONFIG_X86_32) += pgtable_32.o +obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8396868e82c..c483f424207 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -334,7 +334,6 @@ int devmem_is_allowed(unsigned long pagenr) return 0; } -#ifdef CONFIG_HIGHMEM pte_t *kmap_pte; pgprot_t kmap_prot; @@ -357,6 +356,7 @@ static void __init kmap_init(void) kmap_prot = PAGE_KERNEL; } +#ifdef CONFIG_HIGHMEM static void __init permanent_kmaps_init(pgd_t *pgd_base) { unsigned long vaddr; @@ -436,7 +436,6 @@ static void __init set_highmem_pages_init(void) #endif /* !CONFIG_NUMA */ #else -# define kmap_init() do { } while (0) # define permanent_kmaps_init(pgd_base) do { } while (0) # define set_highmem_pages_init() do { } while (0) #endif /* CONFIG_HIGHMEM */ diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c new file mode 100644 index 00000000000..d0151d8ce45 --- /dev/null +++ b/arch/x86/mm/iomap_32.c @@ -0,0 +1,59 @@ +/* + * Copyright © 2008 Ingo Molnar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include + +/* Map 'pfn' using fixed map 'type' and protections 'prot' + */ +void * +iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + pagefault_disable(); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); + arch_flush_lazy_mmu_mode(); + + return (void*) vaddr; +} +EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); + +void +iounmap_atomic(void *kvaddr, enum km_type type) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + + /* + * Force other mappings to Oops if they'll try to access this pte + * without first remap it. Keeping stale mappings around is a bad idea + * also, in case the page changes cacheability attributes or becomes + * a protected page in a hypervisor. + */ + if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + kpte_clear_flush(kmap_pte-idx, vaddr); + + arch_flush_lazy_mmu_mode(); + pagefault_enable(); +} +EXPORT_SYMBOL_GPL(iounmap_atomic); -- cgit v1.2.3 From 2576c9991758e431b73e374f6019d6e1e12a8d36 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Tue, 7 Oct 2008 13:33:12 -0700 Subject: x86: fix AMDC1E and XTOPOLOGY conflict in cpufeature Impact: fix xsave slowdown regression Fix two features from conflicting in feature bits. Fixes this performance regression: Subject: cpu2000(both float and int) 13% regression with 2.6.28-rc1 http://lkml.org/lkml/2008/10/28/36 Reported-by: "Zhang, Yanmin" Bisected-by: "Zhang, Yanmin" Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f73e95d75b4..cfdf8c2c5c3 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -91,7 +91,7 @@ #define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ -#define X86_FEATURE_XTOPOLOGY (3*32+21) /* cpu topology enum extensions */ +#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ -- cgit v1.2.3 From 1f98757776eafe31065be9118db6051afcf8643c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 1 Nov 2008 10:17:22 -0700 Subject: x86: Clean up late e820 resource allocation This makes the late e820 resources use 'insert_resource_expand_to_fit()' instead of doing a 'reserve_region_with_split()', and also avoids marking them as IORESOURCE_BUSY. This results in us being perfectly happy to use pre-existing PCI resources even if they were marked as being in a reserved region, while still avoiding any _new_ allocations in the reserved regions. It also makes for a simpler and more accurate resource tree. Example resource allocation from Jonathan Corbet, who has firmware that has an e820 reserved entry that covered a big range (e0000000-fed003ff), and that had various PCI resources in it set up by firmware. With old kernels, the reserved range would force us to re-allocate all pre-existing PCI resources, and his reserved range would end up looking like this: e0000000-fed003ff : reserved fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 where only the pre-allocated special regions (IOAPIC and HPET) were kept around. With 2.6.28-rc2, which uses 'reserve_region_with_split()', Jonathan's resource tree looked like this: e0000000-fe7fffff : reserved fe800000-fe8fffff : PCI Bus 0000:01 fe800000-fe8fffff : reserved fe900000-fe9d9aff : reserved fe9d9b00-fe9d9bff : 0000:00:1f.3 fe9d9b00-fe9d9bff : reserved fe9d9c00-fe9d9fff : 0000:00:1a.7 fe9d9c00-fe9d9fff : reserved fe9da000-fe9dafff : 0000:00:03.3 fe9da000-fe9dafff : reserved fe9db000-fe9dbfff : 0000:00:19.0 fe9db000-fe9dbfff : reserved fe9dc000-fe9dffff : 0000:00:1b.0 fe9dc000-fe9dffff : reserved fe9e0000-fe9fffff : 0000:00:19.0 fe9e0000-fe9fffff : reserved fea00000-fea7ffff : 0000:00:02.0 fea00000-fea7ffff : reserved fea80000-feafffff : 0000:00:02.1 fea80000-feafffff : reserved feb00000-febfffff : 0000:00:02.0 feb00000-febfffff : reserved fec00000-fed003ff : reserved fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 and because the reserved entry had been split and moved into the individual resources, and because it used the IORESOURCE_BUSY flag, the drivers that actually wanted to _use_ those resources couldn't actually attach to them: e1000e 0000:00:19.0: BAR 0: can't reserve mem region [0xfe9e0000-0xfe9fffff] HDA Intel 0000:00:1b.0: BAR 0: can't reserve mem region [0xfe9dc000-0xfe9dffff] with this patch, the resource tree instead becomes e0000000-fed003ff : reserved fe800000-fe8fffff : PCI Bus 0000:01 fe9d9b00-fe9d9bff : 0000:00:1f.3 fe9d9c00-fe9d9fff : 0000:00:1a.7 fe9d9c00-fe9d9fff : ehci_hcd fe9da000-fe9dafff : 0000:00:03.3 fe9db000-fe9dbfff : 0000:00:19.0 fe9db000-fe9dbfff : e1000e fe9dc000-fe9dffff : 0000:00:1b.0 fe9dc000-fe9dffff : ICH HD audio fe9e0000-fe9fffff : 0000:00:19.0 fe9e0000-fe9fffff : e1000e fea00000-fea7ffff : 0000:00:02.0 fea80000-feafffff : 0000:00:02.1 feb00000-febfffff : 0000:00:02.0 fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 ie the one reserved region now ends up surrounding all the PCI resources that were allocated inside of it by firmware, and because it is not marked BUSY, drivers have no problem attaching to the pre-allocated resources. Reported-and-tested-by: Jonathan Corbet Cc: Yinghai Lu Cc: Ingo Molnar Cc: Robert Hancock Signed-off-by: Linus Torvalds --- arch/x86/kernel/e820.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index ce97bf3bed1..7aafeb5263e 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1290,15 +1290,17 @@ void __init e820_reserve_resources(void) res->start = e820.map[i].addr; res->end = end; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM; /* * don't register the region that could be conflicted with * pci device BAR resource and insert them later in * pcibios_resource_survey() */ - if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) + if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) { + res->flags |= IORESOURCE_BUSY; insert_resource(&iomem_resource, res); + } res++; } @@ -1318,7 +1320,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { if (!res->parent && res->end) - reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); + insert_resource_expand_to_fit(&iomem_resource, res); res++; } } -- cgit v1.2.3 From 73557af5bf32c3db973050de1fb73423e8fc873e Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 31 Oct 2008 13:59:49 -0400 Subject: x86, voyager: fix smp_intr_init() compile breakage Impact: fix x86/Voyager build Looks like this became static on the rest of x86. Fix it up by adding an external definition to mach-voyager/setup.c Signed-off-by: Ingo Molnar --- arch/x86/include/asm/voyager.h | 1 + arch/x86/mach-voyager/setup.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h index 9c811d2e6f9..b3e64730762 100644 --- a/arch/x86/include/asm/voyager.h +++ b/arch/x86/include/asm/voyager.h @@ -520,6 +520,7 @@ extern void voyager_restart(void); extern void voyager_cat_power_off(void); extern void voyager_cat_do_common_interrupt(void); extern void voyager_handle_nmi(void); +extern void voyager_smp_intr_init(void); /* Commands for the following are */ #define VOYAGER_PSI_READ 0 #define VOYAGER_PSI_WRITE 1 diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 6bbdd633864..a580b9562e7 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -27,7 +27,7 @@ static struct irqaction irq2 = { void __init intr_init_hook(void) { #ifdef CONFIG_SMP - smp_intr_init(); + voyager_smp_intr_init(); #endif setup_irq(2, &irq2); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 7f4c6af1435..0e331652681 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1258,7 +1258,7 @@ static void handle_vic_irq(unsigned int irq, struct irq_desc *desc) #define QIC_SET_GATE(cpi, vector) \ set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector)) -void __init smp_intr_init(void) +void __init voyager_smp_intr_init(void) { int i; -- cgit v1.2.3 From e5beae16901795223d677f15aa2fe192976278ee Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 3 Nov 2008 18:21:45 +0100 Subject: io mapping: clean up #ifdefs Impact: cleanup clean up ifdefs: change #ifdef CONFIG_X86_32/64 to CONFIG_HAVE_ATOMIC_IOMAP. flip around the #ifdef sections to clean up the structure. Signed-off-by: Keith Packard Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6f20718d315..e60c59b81bd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1894,6 +1894,10 @@ config SYSVIPC_COMPAT endmenu +config HAVE_ATOMIC_IOMAP + def_bool y + depends on X86_32 + source "net/Kconfig" source "drivers/Kconfig" -- cgit v1.2.3 From 70de9a97049e0ba79dc040868564408d5ce697f9 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 3 Nov 2008 11:18:47 -0800 Subject: x86: don't use tsc_khz to calculate lpj if notsc is passed Impact: fix udelay when "notsc" boot parameter is passed With notsc passed on commandline, tsc may not be used for udelays, make sure that we do not use tsc_khz to calculate the lpj value in such cases. Reported-by: Bartlomiej Zolnierkiewicz Signed-off-by: Alok N Kataria Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 62348e4fd8d..2ef80e30192 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -813,10 +813,6 @@ void __init tsc_init(void) cpu_khz = calibrate_cpu(); #endif - lpj = ((u64)tsc_khz * 1000); - do_div(lpj, HZ); - lpj_fine = lpj; - printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); @@ -836,6 +832,10 @@ void __init tsc_init(void) /* now allow native_sched_clock() to use rdtsc */ tsc_disabled = 0; + lpj = ((u64)tsc_khz * 1000); + do_div(lpj, HZ); + lpj_fine = lpj; + use_tsc_delay(); /* Check and install the TSC clocksource */ dmi_check_system(bad_tsc_dmi_table); -- cgit v1.2.3 From 9fcd18c9e63e325dbd2b4c726623f760788d5aa8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 5 Nov 2008 16:52:08 +0100 Subject: sched: re-tune balancing Impact: improve wakeup affinity on NUMA systems, tweak SMP systems Given the fixes+tweaks to the wakeup-buddy code, re-tweak the domain balancing defaults on NUMA and SMP systems. Turn on SD_WAKE_AFFINE which was off on x86 NUMA - there's no reason why we would not want to have wakeup affinity across nodes as well. (we already do this in the standard NUMA template.) lat_ctx on a NUMA box is particularly happy about this change: before: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.60 | 2 5.70 after: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.65 | 2 2.07 a 2.75x speedup. pipe-test is similarly happy about it too: | phoenix:~/sched-tests> ./pipe-test | 18.26 usecs/loop. | 14.70 usecs/loop. | 14.38 usecs/loop. | 10.55 usecs/loop. # +WAKE_AFFINE on domain0+domain1 | 8.63 usecs/loop. | 8.59 usecs/loop. | 9.03 usecs/loop. | 8.94 usecs/loop. | 8.96 usecs/loop. | 8.63 usecs/loop. Also: - disable SD_BALANCE_NEWIDLE on NUMA and SMP domains (keep it for siblings) - enable SD_WAKE_BALANCE on SMP domains Sysbench+postgresql improves all around the board, quite significantly: .28-rc3-11474e2c .28-rc3-11474e2c-tune ------------------------------------------------- 1: 571 688 +17.08% 2: 1236 1206 -2.55% 4: 2381 2642 +9.89% 8: 4958 5164 +3.99% 16: 9580 9574 -0.07% 32: 7128 8118 +12.20% 64: 7342 8266 +11.18% 128: 7342 8064 +8.95% 256: 7519 7884 +4.62% 512: 7350 7731 +4.93% ------------------------------------------------- SUM: 55412 59341 +6.62% So it's a win both for the runup portion, the peak area and the tail. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 90ac7718469..4850e4b02b6 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -154,7 +154,7 @@ extern unsigned long node_remap_size[]; #endif -/* sched_domains SD_NODE_INIT for NUMAQ machines */ +/* sched_domains SD_NODE_INIT for NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ .min_interval = 8, \ .max_interval = 32, \ @@ -169,8 +169,9 @@ extern unsigned long node_remap_size[]; .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ } -- cgit v1.2.3 From c78d0cf2925bffae8a6f00e7d9b8e971b0392edd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 5 Nov 2008 12:04:46 +0000 Subject: x86: don't allow nr_irqs > NR_IRQS Impact: fix boot hang on 32-bit systems with more than 224 IO-APIC pins On some 32-bit systems with a lot of IO-APICs probe_nr_irqs() can return a value larger than NR_IRQS. This will lead to probe_irq_on() overrunning the irq_desc array. I hit this when running net-next-2.6 (close to 2.6.28-rc3) on a Supermicro dual Xeon system. NR_IRQS is 224 but probe_nr_irqs() detects 5 IOAPICs and returns 240. Here are the log messages: Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0]) Tue Nov 4 16:53:47 2008 IOAPIC[0]: apic_id 1, version 32, address 0xfec00000, GSI 0-23 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x02] address[0xfec81000] gsi_base[24]) Tue Nov 4 16:53:47 2008 IOAPIC[1]: apic_id 2, version 32, address 0xfec81000, GSI 24-47 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x03] address[0xfec81400] gsi_base[48]) Tue Nov 4 16:53:47 2008 IOAPIC[2]: apic_id 3, version 32, address 0xfec81400, GSI 48-71 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x04] address[0xfec82000] gsi_base[72]) Tue Nov 4 16:53:47 2008 IOAPIC[3]: apic_id 4, version 32, address 0xfec82000, GSI 72-95 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x05] address[0xfec82400] gsi_base[96]) Tue Nov 4 16:53:47 2008 IOAPIC[4]: apic_id 5, version 32, address 0xfec82400, GSI 96-119 Tue Nov 4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 high edge) Tue Nov 4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level) Tue Nov 4 16:53:47 2008 Enabling APIC mode: Flat. Using 5 I/O APICs Signed-off-by: Ben Hutchings Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index b764d7429c6..7a3f2028e2e 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3611,6 +3611,8 @@ int __init probe_nr_irqs(void) /* something wrong ? */ if (nr < nr_min) nr = nr_min; + if (WARN_ON(nr > NR_IRQS)) + nr = NR_IRQS; return nr; } -- cgit v1.2.3 From 1b4897688011cd05e07f00dcfe6af3331eb36a3c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 4 Nov 2008 14:10:13 -0800 Subject: x86: size NR_IRQS on 32-bit systems the same way as 64-bit Impact: make NR_IRQS big enough for system with lots of apic/pins If lots of IO_APIC's are there (or can be there), size the same way as 64-bit, depending on MAX_IO_APICS and NR_CPUS. This fixes the boot problem reported by Ben Hutchings on a 32-bit server with 5 IO-APICs and 240 IO-APIC pins. Signed-off-by: Yinghai Tested-by: Ben Hutchings Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index d843ed0e9b2..503aadc4ad3 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,30 +101,22 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_PARAVIRT) && !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif -#elif !defined(CONFIG_X86_VOYAGER) +#elif defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) || defined(CONFIG_X86_VOYAGER) -# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) - -# define NR_IRQS 224 - -# else /* IO_APIC || PARAVIRT */ - -# define NR_IRQS 16 - -# endif +# define NR_IRQS 224 -#else /* !VISWS && !VOYAGER */ +#else /* IO_APIC || PARAVIRT */ -# define NR_IRQS 224 +# define NR_IRQS 16 -#endif /* VISWS */ +#endif /* Voyager specific defines */ /* These define the CPIs we use in linux */ -- cgit v1.2.3 From da85f865b1dcec0853c48b763ed312441ce0c7df Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Nov 2008 13:37:27 -0600 Subject: x86: mention ACPI in top-level Kconfig menu Impact: clarify menuconfig text Mention ACPI in the top-level menu to give a clue as to where it lives. This matches what ia64 does. Signed-off-by: Bjorn Helgaas Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6f20718d315..5d6aa4013dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1494,7 +1494,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID def_bool X86_64 depends on NUMA -menu "Power management options" +menu "Power management and ACPI options" depends on !X86_VOYAGER config ARCH_HIBERNATION_HEADER -- cgit v1.2.3 From 7db282fa67b58daff8a57f9e1c93d4474b5908ff Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 5 Nov 2008 23:36:48 -0800 Subject: x86: remove VISWS and PARAVIRT around NR_IRQS puzzle Impact: fix warning message when PARAVIRT is set in config Remove stale #ifdef components from our IRQ sizing logic. x86/Voyager is the only holdout. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 503aadc4ad3..0005adb0f94 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,18 +101,18 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_PARAVIRT) && !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif -#elif defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) || defined(CONFIG_X86_VOYAGER) +#elif defined(CONFIG_X86_VOYAGER) # define NR_IRQS 224 -#else /* IO_APIC || PARAVIRT */ +#else /* IO_APIC || VOYAGER */ # define NR_IRQS 16 -- cgit v1.2.3 From d6f0f39b7d05e62b347c4352d070e4afb3ade4b5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 4 Nov 2008 13:53:04 -0800 Subject: x86: add smp_mb() before sending INVALIDATE_TLB_VECTOR Impact: fix rare x2apic hang On x86, x2apic mode accesses for sending IPI's don't have serializing semantics. If the IPI receivner refers(in lock-free fashion) to some memory setup by the sender, the need for smp_mb() before sending the IPI becomes critical in x2apic mode. Add the smp_mb() in native_flush_tlb_others() before sending the IPI. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_32.c | 6 ++++++ arch/x86/kernel/tlb_64.c | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index e00534b3353..f4049f3513b 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -154,6 +154,12 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, flush_mm = mm; flush_va = va; cpus_or(flush_cpumask, cpumask, flush_cpumask); + + /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); /* * We have to send the IPI only to * CPUs affected. diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index dcbf7a1159e..8f919ca6949 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -182,6 +182,11 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, f->flush_va = va; cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); + /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); /* * We have to send the IPI only to * CPUs affected. -- cgit v1.2.3 From 80be308dfa3798c7bad0fc81760b2faf83870e91 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 6 Nov 2008 14:59:05 +0100 Subject: AMD IOMMU: fix lazy IO/TLB flushing in unmap path Lazy flushing needs to take care of the unmap path too which is not yet implemented and leads to stale IO/TLB entries. This is fixed by this patch. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 38e88d40ab1..4755bbc7ae5 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -526,6 +526,9 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, { address >>= PAGE_SHIFT; iommu_area_free(dom->bitmap, address, pages); + + if (address + pages >= dom->next_bit) + dom->need_flush = true; } /**************************************************************************** @@ -981,8 +984,10 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - if (amd_iommu_unmap_flush) + if (amd_iommu_unmap_flush || dma_dom->need_flush) { iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + dma_dom->need_flush = false; + } } /* -- cgit v1.2.3 From b9c3bfc24e1088d260de4091b2b41808c7398355 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 6 Nov 2008 12:05:40 +0000 Subject: x86: align DirectMap in /proc/meminfo Impact: right-align /proc/meminfo consistent with other fields When the split-LRU patches added Inactive(anon) and Inactive(file) lines to /proc/meminfo, all counts were moved two columns rightwards to fit in. Now move x86's DirectMap lines two columns rightwards to line up. Signed-off-by: Hugh Dickins Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f1dc1b75d16..e89d24815f2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -67,18 +67,18 @@ static void split_page_count(int level) void arch_report_meminfo(struct seq_file *m) { - seq_printf(m, "DirectMap4k: %8lu kB\n", + seq_printf(m, "DirectMap4k: %8lu kB\n", direct_pages_count[PG_LEVEL_4K] << 2); #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) - seq_printf(m, "DirectMap2M: %8lu kB\n", + seq_printf(m, "DirectMap2M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 11); #else - seq_printf(m, "DirectMap4M: %8lu kB\n", + seq_printf(m, "DirectMap4M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 12); #endif #ifdef CONFIG_X86_64 if (direct_gbpages) - seq_printf(m, "DirectMap1G: %8lu kB\n", + seq_printf(m, "DirectMap1G: %8lu kB\n", direct_pages_count[PG_LEVEL_1G] << 20); #endif } -- cgit v1.2.3 From 8d00450d296dedec9ada38d43b83e79cca6fd5a3 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 4 Nov 2008 12:52:44 -0200 Subject: Revert "x86: default to reboot via ACPI" This reverts commit c7ffa6c26277b403920e2255d10df849bd613380. the assumptio of this change was that this would not break any existing machine. Andrey Borzenkov reported troubles with the ACPI reboot method: the system would hang on reboot, necessiating a power cycle. Probably more systems are affected as well. Also, there are patches queued up for v2.6.29 to disable virtualization on emergency_restart() - which was the original motivation of this change. Reported-by: Andrey Borzenkov Bisected-by: Andrey Borzenkov Signed-off-by: Eduardo Habkost Acked-by: Avi Kivity Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f4c93f1cfc1..724adfc63cb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -29,11 +29,7 @@ EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; static int reboot_mode; -/* - * Keyboard reset and triple fault may result in INIT, not RESET, which - * doesn't work when we're in vmx root mode. Try ACPI first. - */ -enum reboot_type reboot_type = BOOT_ACPI; +enum reboot_type reboot_type = BOOT_KBD; int reboot_force; #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) -- cgit v1.2.3 From 47cb2ed9df2789fc4a3fe1201e475078f93c4839 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 6 Nov 2008 13:48:24 -0800 Subject: x86, xen: fix use of pgd_page now that it really does return a page Impact: fix 32-bit Xen guest boot crash On 32-bit PAE, pud_page, for no good reason, didn't really return a struct page *. Since Jan Beulich's fix "i386/PAE: fix pud_page()", pud_page does return a struct page *. Because PAE has 3 pagetable levels, the pud level is folded into the pgd level, so pgd_page() is the same as pud_page(), and now returns a struct page *. Update the xen/mmu.c code which uses pgd_page() accordingly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index aba77b2b7d1..49697d86c6a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -874,7 +874,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) #else /* CONFIG_X86_32 */ #ifdef CONFIG_X86_PAE /* Need to make sure unshared kernel PMD is pinnable */ - xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), + xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), PT_PMD); #endif xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); @@ -991,7 +991,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) #ifdef CONFIG_X86_PAE /* Need to make sure unshared kernel PMD is unpinned */ - xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), + xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), PT_PMD); #endif -- cgit v1.2.3 From d05fdf316067cd311d5e7add08da26ded8a58080 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 28 Oct 2008 19:23:06 +1100 Subject: xen: make sure stray alias mappings are gone before pinning Xen requires that all mappings of pagetable pages are read-only, so that they can't be updated illegally. As a result, if a page is being turned into a pagetable page, we need to make sure all its mappings are RO. If the page had been used for ioremap or vmalloc, it may still have left over mappings as a result of not having been lazily unmapped. This change makes sure we explicitly mop them all up before pinning the page. Unlike aliases created by kmap, the there can be vmalloc aliases even for non-high pages, so we must do the flush unconditionally. Signed-off-by: Jeremy Fitzhardinge Cc: Linux Memory Management List Cc: Nick Piggin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 5 +++-- arch/x86/xen/mmu.c | 9 ++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b61534c7a4c..5e4686d70f6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -863,15 +863,16 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l if (PagePinned(virt_to_page(mm->pgd))) { SetPagePinned(page); + vm_unmap_aliases(); if (!PageHighMem(page)) { make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); if (level == PT_PTE && USE_SPLIT_PTLOCKS) pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - } else + } else { /* make sure there are no stray mappings of this page */ kmap_flush_unused(); - vm_unmap_aliases(); + } } } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index aba77b2b7d1..89f3b6edc65 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -850,13 +850,16 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, read-only, and can be pinned. */ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) { + vm_unmap_aliases(); + xen_mc_batch(); - if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { - /* re-enable interrupts for kmap_flush_unused */ + if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { + /* re-enable interrupts for flushing */ xen_mc_issue(0); + kmap_flush_unused(); - vm_unmap_aliases(); + xen_mc_batch(); } -- cgit v1.2.3 From 7c64ade53a6f977d73f16243865c42ceae999aea Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 7 Nov 2008 14:02:49 +0100 Subject: oprofile: Fix p6 counter overflow check Fix the counter overflow check for CPUs with counter width > 32 I had a similar change in a different patch that I didn't submit and I didn't notice the problem earlier because it was always tested together. Signed-off-by: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 0620d6d45f7..3f1b81a83e2 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -27,8 +27,7 @@ static int num_counters = 2; static int counter_width = 32; #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) +#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) @@ -124,14 +123,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) static int ppro_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; for (i = 0 ; i < num_counters; ++i) { if (!reset_value[i]) continue; - CTR_READ(low, high, msrs, i); - if (CTR_OVERFLOWED(low)) { + rdmsrl(msrs->counters[i].addr, val); + if (CTR_OVERFLOWED(val)) { oprofile_add_sample(regs, i); wrmsrl(msrs->counters[i].addr, -reset_value[i]); } -- cgit v1.2.3 From 0d12cdd5f883f508d33b85c1bae98fa28987c8c7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 8 Nov 2008 16:19:55 +0100 Subject: sched: improve sched_clock() performance in scheduler-intense workloads native_read_tsc() overhead accounts for 20% of the system overhead: 659567 system_call 41222.9375 686796 schedule 435.7843 718382 __switch_to 665.1685 823875 switch_mm 4526.7857 1883122 native_read_tsc 55385.9412 9761990 total 2.8468 this is large part due to the rdtsc_barrier() that is done before and after reading the TSC. But sched_clock() is not a precise clock in the GTOD sense, using such barriers is completely pointless. So remove the barriers and only use them in vget_cycles(). This improves lat_ctx performance by about 5%. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 2 -- arch/x86/include/asm/tsc.h | 8 +++++++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 46be2fa7ac2..c2a812ebde8 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -108,9 +108,7 @@ static __always_inline unsigned long long __native_read_tsc(void) { DECLARE_ARGS(val, low, high); - rdtsc_barrier(); asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); - rdtsc_barrier(); return EAX_EDX_VAL(val, low, high); } diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 38ae163cc91..9cd83a8e40d 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -34,6 +34,8 @@ static inline cycles_t get_cycles(void) static __always_inline cycles_t vget_cycles(void) { + cycles_t cycles; + /* * We only do VDSOs on TSC capable CPUs, so this shouldnt * access boot_cpu_data (which is not VDSO-safe): @@ -42,7 +44,11 @@ static __always_inline cycles_t vget_cycles(void) if (!cpu_has_tsc) return 0; #endif - return (cycles_t)__native_read_tsc(); + rdtsc_barrier(); + cycles = (cycles_t)__native_read_tsc(); + rdtsc_barrier(); + + return cycles; } extern void tsc_init(void); -- cgit v1.2.3 From 7cbaef9c83e58bbd4bdd534b09052b6c5ec457d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 8 Nov 2008 17:05:38 +0100 Subject: sched: optimize sched_clock() a bit sched_clock() uses cycles_2_ns() needlessly - which is an irq-disabling variant of __cycles_2_ns(). Most of the time sched_clock() is called with irqs disabled already. The few places that call it with irqs enabled need to be updated. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2ef80e30192..424093b157d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -55,7 +55,7 @@ u64 native_sched_clock(void) rdtscll(this_offset); /* return the value in ns */ - return cycles_2_ns(this_offset); + return __cycles_2_ns(this_offset); } /* We need to define a real function for sched_clock, to override the -- cgit v1.2.3 From 3044646148cdfa83a311bf1c146a70e550280159 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 Nov 2008 10:07:58 -0800 Subject: x86: move iomap.h to the new include location a new file was accidentally added to include/asm-x86; move it to the new arch/x86/include/asm location Signed-off-by: Arjan van de Ven --- arch/x86/include/asm/iomap.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 arch/x86/include/asm/iomap.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h new file mode 100644 index 00000000000..c1f06289b14 --- /dev/null +++ b/arch/x86/include/asm/iomap.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2008 Ingo Molnar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include +#include +#include +#include +#include + +void * +iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); + +void +iounmap_atomic(void *kvaddr, enum km_type type); -- cgit v1.2.3 From 6c2e94033df5ca11149e52dd179b8dde3172e9bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Nov 2008 12:33:49 +0100 Subject: x86: apic honour irq affinity which was set in early boot setup_ioapic_dest() is called after the non boot cpus have been brought up. It sets the irq affinity of all already configured interrupts to all cpus and ignores affinity settings which were done by the early bootup code. If the IRQ_NO_BALANCING or IRQ_AFFINITY_SET flags are set then use the affinity mask from the irq descriptor and not TARGET_CPUS. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7a3f2028e2e..988ee89467d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3761,7 +3761,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; + struct irq_desc *desc; struct irq_cfg *cfg; + cpumask_t mask; if (skip_ioapic_setup == 1) return; @@ -3778,16 +3780,30 @@ void __init setup_ioapic_dest(void) * cpu is online. */ cfg = irq_cfg(irq); - if (!cfg->vector) + if (!cfg->vector) { setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); + continue; + + } + + /* + * Honour affinities which have been set in early boot + */ + desc = irq_to_desc(irq); + if (desc->status & + (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) + mask = desc->affinity; + else + mask = TARGET_CPUS; + #ifdef CONFIG_INTR_REMAP - else if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); -#endif + if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, mask); else - set_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif + set_ioapic_affinity_irq(irq, mask); } } -- cgit v1.2.3 From 1de5b0854623d30d01d72cd4ea323eb5f39d1f16 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 2 Nov 2008 16:04:18 +0000 Subject: x86: HPET: convert WARN_ON to WARN_ON_ONCE It is possible to flood the console with call traces if the WARN_ON condition is true because of the frequency with which this function is called. Signed-off-by: Matt Fleming Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 77017e834cf..f10f9461a43 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta, * what we wrote hit the chip before we compare it to the * counter. */ - WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); + WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != cnt); return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } -- cgit v1.2.3 From 89d77a1eb60be916d85d9394bedbfa2037af89c5 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 2 Nov 2008 16:04:20 +0000 Subject: x86: HPET: read from HPET_Tn_CMP() not HPET_T0_CMP In hpet_next_event() we check that the value we just wrote to HPET_Tn_CMP(timer) has reached the chip. Currently, we're checking that the value we wrote to HPET_Tn_CMP(timer) is in HPET_T0_CMP, which, if timer is anything other than timer 0, is likely to fail. Signed-off-by: Matt Fleming Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index f10f9461a43..cfe6aa56f71 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta, * what we wrote hit the chip before we compare it to the * counter. */ - WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != cnt); + WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt); return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } -- cgit v1.2.3 From 5ceb1a04187553e08c6ab60d30cee7c454ee139a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 2 Nov 2008 22:23:13 +0000 Subject: x86: HPET: enter hpet_interrupt_handler with interrupts disabled Some functions that may be called from this handler require that interrupts are disabled. Also, combining IRQF_DISABLED and IRQF_SHARED does not reliably disable interrupts in a handler, so remove IRQF_SHARED from the irq flags (this irq is not shared anyway). Signed-off-by: Matt Fleming Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Cc: "Will Newton" Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index cfe6aa56f71..067d8de913f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -445,7 +445,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) { if (request_irq(dev->irq, hpet_interrupt_handler, - IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev)) + IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev)) return -1; disable_irq(dev->irq); -- cgit v1.2.3 From 4694516d1987303dd83bfd0efdd36fa5b65d701b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 10 Nov 2008 21:52:47 +0100 Subject: x86: Make NUMA on 32-bit depend on BROKEN While investigating the failure of hibernation on 32-bit x86 with CONFIG_NUMA set, as described in this message http://marc.info/?l=linux-kernel&m=122634118116226&w=4 I asked some people for help and I was told that it wasn't really worth the effort, because CONFIG_NUMA was generally broken on 32-bit x86 systems and it shouldn't be used in such configs. For this reason, make CONFIG_NUMA depend on BROKEN instead of EXPERIMENTAL on x86-32. Signed-off-by: Rafael J. Wysocki Cc: Andi Kleen Cc: Pavel Machek Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4cf0ab13d18..93224b56918 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -957,7 +957,7 @@ config ARCH_PHYS_ADDR_T_64BIT config NUMA bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) help -- cgit v1.2.3 From 6cd10f8db385ba547811baa5b26f672fdff232e6 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 9 Nov 2008 11:53:14 -0600 Subject: x86, voyager: fix smp generic helper voyager breakage Impact: build/boot fix for x86/Voyager This change: | commit 3d4422332711ef48ef0f132f1fcbfcbd56c7f3d1 | Author: Jens Axboe | Date: Thu Jun 26 11:21:34 2008 +0200 | | Add generic helpers for arch IPI function calls didn't wire up the voyager smp call function correctly, so do that here. Also make CONFIG_USE_GENERIC_SMP_HELPERS a def_bool y again, since we now use the generic helpers for every x86 architecture. Signed-off-by: James Bottomley Cc: Jens Axboe Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 5 ++++- arch/x86/mach-voyager/voyager_smp.c | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4cf0ab13d18..ac22bb7719f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -167,9 +167,12 @@ config GENERIC_PENDING_IRQ config X86_SMP bool depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) - select USE_GENERIC_SMP_HELPERS default y +config USE_GENERIC_SMP_HELPERS + def_bool y + depends on SMP + config X86_32_SMP def_bool y depends on X86_32 && SMP diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 0e331652681..52145007bd7 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -7,6 +7,7 @@ * This file provides all the same external entries as smp.c but uses * the voyager hal to provide the functionality */ +#include #include #include #include @@ -1790,6 +1791,17 @@ void __init smp_setup_processor_id(void) x86_write_percpu(cpu_number, hard_smp_processor_id()); } +static void voyager_send_call_func(cpumask_t callmask) +{ + __u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id()); + send_CPI(mask, VIC_CALL_FUNCTION_CPI); +} + +static void voyager_send_call_func_single(int cpu) +{ + send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI); +} + struct smp_ops smp_ops = { .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, .smp_prepare_cpus = voyager_smp_prepare_cpus, @@ -1799,6 +1811,6 @@ struct smp_ops smp_ops = { .smp_send_stop = voyager_smp_send_stop, .smp_send_reschedule = voyager_smp_send_reschedule, - .send_call_func_ipi = native_send_call_func_ipi, - .send_call_func_single_ipi = native_send_call_func_single_ipi, + .send_call_func_ipi = voyager_send_call_func, + .send_call_func_single_ipi = voyager_send_call_func_single, }; -- cgit v1.2.3 From c41ef344de212bd918f7765af21b5008628c03e0 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 28 Oct 2008 18:16:58 -0200 Subject: KVM: MMU: increase per-vcpu rmap cache alloc size The page fault path can use two rmap_desc structures, if: - walk_addr's dirty pte update allocates one rmap_desc. - mmu_lock is dropped, sptes are zapped resulting in rmap_desc being freed. - fetch->mmu_set_spte allocates another rmap_desc. Increase to 4 for safety. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2a5e64881d9..f1983d9477c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -314,7 +314,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) if (r) goto out; r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, - rmap_desc_cache, 1); + rmap_desc_cache, 4); if (r) goto out; r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); -- cgit v1.2.3 From a29a2af378f3f6362b68e126e2541c8bde885ead Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Wed, 29 Oct 2008 14:13:39 -0700 Subject: x86: KVM guest: fix section mismatch warning in kvmclock.c WARNING: arch/x86/kernel/built-in.o(.text+0x1722c): Section mismatch in reference from the function kvm_setup_secondary_clock() to the function .devinit.text:setup_secondary_APIC_clock() The function kvm_setup_secondary_clock() references the function __devinit setup_secondary_APIC_clock(). This is often because kvm_setup_secondary_clock lacks a __devinit annotation or the annotation of setup_secondary_APIC_clock is wrong. Signed-off-by: Md.Rakib H. Mullick Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Avi Kivity --- arch/x86/kernel/kvmclock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 774ac499156..1c9cc431ea4 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt) } #ifdef CONFIG_X86_LOCAL_APIC -static void kvm_setup_secondary_clock(void) +static void __devinit kvm_setup_secondary_clock(void) { /* * Now that the first cpu already had this clocksource initialized, -- cgit v1.2.3 From ca93e992fdfdc6569ac2845d7560eeb5de4a4e0b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 4 Nov 2008 11:25:17 +0200 Subject: KVM: Require the PCI subsystem PCI device assignment makes calls to pci code, so require it to be built into the kernel. Signed-off-by: Avi Kivity --- arch/x86/kvm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ce3251ce550..b81125f0bde 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -20,6 +20,8 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM + # for device assignment: + depends on PCI select PREEMPT_NOTIFIERS select MMU_NOTIFIER select ANON_INODES -- cgit v1.2.3 From 928d4bf747e9c290b690ff515d8f81e8ee226d97 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 6 Nov 2008 14:55:45 +0800 Subject: KVM: VMX: Set IGMT bit in EPT entry There is a potential issue that, when guest using pagetable without vmexit when EPT enabled, guest would use PAT/PCD/PWT bits to index PAT msr for it's memory, which would be inconsistent with host side and would cause host MCE due to inconsistent cache attribute. The patch set IGMT bit in EPT entry to ignore guest PAT and use WB as default memory type to protect host (notice that all memory mapped by KVM should be WB). Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 ++- arch/x86/kvm/vmx.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2643b430d83..d06b4dc0e2e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3564,7 +3564,8 @@ static int __init vmx_init(void) bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK | - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | + VMX_EPT_IGMT_BIT); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 3e010d21fdd..ec5edc339da 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -352,6 +352,7 @@ enum vmcs_field { #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_IGMT_BIT (1ull << 6) #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul -- cgit v1.2.3 From e17d1dc0863767bab8fde4ba9be92c7f79e7fe50 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 11 Nov 2008 13:09:36 +0200 Subject: KVM: Fix pit memory leak if unable to allocate irq source id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-By: Daniel Marjamäki Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 8772dc94682..59ebd37ad79 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -548,8 +548,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) mutex_lock(&kvm->lock); pit->irq_source_id = kvm_request_irq_source_id(kvm); mutex_unlock(&kvm->lock); - if (pit->irq_source_id < 0) + if (pit->irq_source_id < 0) { + kfree(pit); return NULL; + } mutex_init(&pit->pit_state.lock); mutex_lock(&pit->pit_state.lock); -- cgit v1.2.3 From 32836259ff25ce97010569706cd33ba94de81d62 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Nov 2008 16:17:52 -0700 Subject: ACPI: pci_link: remove acpi_irq_balance_set() interface This removes the acpi_irq_balance_set() interface from the PCI interrupt link driver. x86 used acpi_irq_balance_set() to tell the PCI interrupt link driver to configure links to minimize IRQ sharing. But the link driver can easily figure out whether to turn on IRQ balancing based on the IRQ model (PIC/IOAPIC/etc), so we can get rid of that external interface. It's better for the driver to figure this out at init-time. If we set it externally via the x86 code, the interface reduces modularity, and we depend on the fact that acpi_process_madt() happens before we process the kernel command line. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- arch/x86/include/asm/acpi.h | 1 - arch/x86/kernel/acpi/boot.c | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8d676d8ecde..9830681446a 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -113,7 +113,6 @@ static inline void acpi_disable_pci(void) acpi_pci_disabled = 1; acpi_noirq_set(); } -extern int acpi_irq_balance_set(char *str); /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8c1f76abae9..4c51a2f8fd3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1343,7 +1343,6 @@ static void __init acpi_process_madt(void) error = acpi_parse_madt_ioapic_entries(); if (!error) { acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - acpi_irq_balance_set(NULL); acpi_ioapic = 1; smp_found_config = 1; -- cgit v1.2.3 From 97a70e548bd97d5a46ae9d44f24aafcc013fd701 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Nov 2008 23:22:35 +0100 Subject: x86, hibernate: fix breakage on x86_32 with CONFIG_NUMA set Impact: fix crash during hibernation on 32-bit NUMA The NUMA code on x86_32 creates special memory mapping that allows each node's pgdat to be located in this node's memory. For this purpose it allocates a memory area at the end of each node's memory and maps this area so that it is accessible with virtual addresses belonging to low memory. As a result, if there is high memory, these NUMA-allocated areas are physically located in high memory, although they are mapped to low memory addresses. Our hibernation code does not take that into account and for this reason hibernation fails on all x86_32 systems with CONFIG_NUMA=y and with high memory present. Fix this by adding a special mapping for the NUMA-allocated memory areas to the temporary page tables created during the last phase of resume. Signed-off-by: Rafael J. Wysocki Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmzone_32.h | 4 ++++ arch/x86/mm/numa_32.c | 35 +++++++++++++++++++++++++++++++++++ arch/x86/power/hibernate_32.c | 4 ++++ 3 files changed, 43 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 485bdf059ff..07f1af494ca 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -34,10 +34,14 @@ static inline void get_memcfg_numa(void) extern int early_pfn_to_nid(unsigned long pfn); +extern void resume_map_numa_kva(pgd_t *pgd); + #else /* !CONFIG_NUMA */ #define get_memcfg_numa get_memcfg_numa_flat +static inline void resume_map_numa_kva(pgd_t *pgd) {} + #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 847c164725f..8518c678d83 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -222,6 +222,41 @@ static void __init remap_numa_kva(void) } } +#ifdef CONFIG_HIBERNATION +/** + * resume_map_numa_kva - add KVA mapping to the temporary page tables created + * during resume from hibernation + * @pgd_base - temporary resume page directory + */ +void resume_map_numa_kva(pgd_t *pgd_base) +{ + int node; + + for_each_online_node(node) { + unsigned long start_va, start_pfn, size, pfn; + + start_va = (unsigned long)node_remap_start_vaddr[node]; + start_pfn = node_remap_start_pfn[node]; + size = node_remap_size[node]; + + printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node); + + for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { + unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); + pgd_t *pgd = pgd_base + pgd_index(vaddr); + pud_t *pud = pud_offset(pgd, vaddr); + pmd_t *pmd = pmd_offset(pud, vaddr); + + set_pmd(pmd, pfn_pmd(start_pfn + pfn, + PAGE_KERNEL_LARGE_EXEC)); + + printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", + __FUNCTION__, vaddr, start_pfn + pfn); + } + } +} +#endif + static unsigned long calculate_numa_remap_pages(void) { int nid; diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index f2b6e3f11bf..81197c62d5b 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -12,6 +12,7 @@ #include #include #include +#include /* Defined in hibernate_asm_32.S */ extern int restore_image(void); @@ -127,6 +128,9 @@ static int resume_physical_mapping_init(pgd_t *pgd_base) } } } + + resume_map_numa_kva(pgd_base); + return 0; } -- cgit v1.2.3 From 604d20554883cf03f888440d58ea7c6d36899839 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Nov 2008 23:26:14 +0100 Subject: x86: make NUMA on 32-bit depend on EXPERIMENTAL again My previous patch to make CONFIG_NUMA on x86_32 depend on BROKEN turned out to be unnecessary, after all, since the source of the hibernation vs CONFIG_NUMA problem turned out to be the fact that we didn't take the NUMA KVA remapping into account in the hibernation code. Signed-off-by: Rafael J. Wysocki Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 93224b56918..4cf0ab13d18 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -957,7 +957,7 @@ config ARCH_PHYS_ADDR_T_64BIT config NUMA bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN) + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) help -- cgit v1.2.3 From 52168e60f7d86d83124903098ac8c2dba93cd1c4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 14 Nov 2008 13:47:31 +0000 Subject: Revert "x86: blacklist DMAR on Intel G31/G33 chipsets" This reverts commit e51af6630848406fc97adbd71443818cdcda297b, which was wrongly hoovered up and submitted about a month after a better fix had already been merged. The better fix is commit cbda1ba898647aeb4ee770b803c922f595e97731 ("PCI/iommu: blacklist DMAR on Intel G31/G33 chipsets"), where we do this blacklisting based on the DMI identification for the offending motherboard, since sometimes this chipset (or at least a chipset with the same PCI ID) apparently _does_ actually have an IOMMU. Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- arch/x86/include/asm/iommu.h | 1 - arch/x86/kernel/early-quirks.c | 18 ------------------ 2 files changed, 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index e4a552d4446..0b500c5b644 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -6,7 +6,6 @@ extern void no_iommu_init(void); extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; -extern int dmar_disabled; extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 3ce029ffaa5..1b894b72c0f 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -188,20 +188,6 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif -#ifdef CONFIG_DMAR -static void __init intel_g33_dmar(int num, int slot, int func) -{ - struct acpi_table_header *dmar_tbl; - acpi_status status; - - status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); - if (ACPI_SUCCESS(status)) { - printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); - dmar_disabled = 1; - } -} -#endif - #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -225,10 +211,6 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, -#ifdef CONFIG_DMAR - { PCI_VENDOR_ID_INTEL, 0x29c0, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, -#endif {} }; -- cgit v1.2.3 From d1f1e9c01006b4b050e090055c75278f80c2a5c5 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Sat, 15 Nov 2008 11:00:17 +0100 Subject: x86, bts: fix unlock problem in ds.c Fix a problem where ds_request() returned an error without releasing the ds lock. Reported-by: Stephane Eranian Signed-off-by: Markus Metzger Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 2b69994fd3a..ac1d5b0586b 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -384,8 +384,9 @@ static int ds_request(struct task_struct *task, void *base, size_t size, spin_lock(&ds_lock); + error = -EPERM; if (!check_tracer(task)) - return -EPERM; + goto out_unlock; error = -ENOMEM; context = ds_alloc_context(task); -- cgit v1.2.3 From d3c6aa1e69f705ac3ab64584101b1d38435b1353 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 16 Nov 2008 00:49:31 -0800 Subject: x86: fix es7000 compiling Impact: fix es7000 build CC arch/x86/kernel/es7000_32.o arch/x86/kernel/es7000_32.c: In function find_unisys_acpi_oem_table: arch/x86/kernel/es7000_32.c:255: error: implicit declaration of function acpi_get_table_with_size arch/x86/kernel/es7000_32.c:261: error: implicit declaration of function early_acpi_os_unmap_memory arch/x86/kernel/es7000_32.c: In function unmap_unisys_acpi_oem_table: arch/x86/kernel/es7000_32.c:277: error: implicit declaration of function __acpi_unmap_table make[1]: *** [arch/x86/kernel/es7000_32.o] Error 1 we applied one patch out of order... | commit a73aaedd95703bd49f4c3f9df06fb7b7373ba905 | Author: Yinghai Lu | Date: Sun Sep 14 02:33:14 2008 -0700 | | x86: check dsdt before find oem table for es7000, v2 | | v2: use __acpi_unmap_table() that patch need: x86: use early_ioremap in __acpi_map_table x86: always explicitly map acpi memory acpi: remove final __acpi_map_table mapping before setting acpi_gbl_permanent_mmap acpi/x86: introduce __apci_map_table, v4 submitted to the ACPI tree but not upstream yet. fix it until those patches applied, need to revert this one Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index f454c78fcef..0aa2c443d60 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -250,31 +250,24 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; int i = 0; - acpi_size tbl_size; - while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { + while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { struct oem_table *t = (struct oem_table *)header; oem_addrX = t->OEMTableAddr; oem_size = t->OEMTableSize; - early_acpi_os_unmap_memory(header, tbl_size); *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size); return 0; } - early_acpi_os_unmap_memory(header, tbl_size); } return -1; } void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) { - if (!oem_addr) - return; - - __acpi_unmap_table((char *)oem_addr, oem_size); } #endif -- cgit v1.2.3 From a4a16beadea041ab601e65b264b568e8b6b4f68d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Nov 2008 09:05:37 +0100 Subject: oprofile: fix an overflow in ppro code reset_value was changed from long to u64 in commit b99170288421c79f0c2efa8b33e26e65f4bb7fb8 (oprofile: Implement Intel architectural perfmon support) But dynamic allocation of this array use a wrong type (long instead of u64) Cc: Andi Kleen Signed-off-by: Eric Dumazet Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 3f1b81a83e2..716d26f0e5d 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) int i; if (!reset_value) { - reset_value = kmalloc(sizeof(unsigned) * num_counters, + reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, GFP_ATOMIC); if (!reset_value) return; -- cgit v1.2.3 From 93ce99e849433ede4ce8b410b749dc0cad1100b2 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Mon, 17 Nov 2008 14:43:58 -0800 Subject: x86: add rdtsc barrier to TSC sync check Impact: fix incorrectly marked unstable TSC clock Patch (commit 0d12cdd "sched: improve sched_clock() performance") has a regression on one of the test systems here. With the patch, I see: checking TSC synchronization [CPU#0 -> CPU#1]: Measured 28 cycles TSC warp between CPUs, turning off TSC clock. Marking TSC unstable due to check_tsc_sync_source failed Whereas, without the patch syncs pass fine on all CPUs: checking TSC synchronization [CPU#0 -> CPU#1]: passed. Due to this, TSC is marked unstable, when it is not actually unstable. This is because syncs in check_tsc_wrap() goes away due to this commit. As per the discussion on this thread, correct way to fix this is to add explicit syncs as below? Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_sync.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9ffb01c31c4..1c0dfbca87c 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -46,7 +46,9 @@ static __cpuinit void check_tsc_warp(void) cycles_t start, now, prev, end; int i; + rdtsc_barrier(); start = get_cycles(); + rdtsc_barrier(); /* * The measurement runs for 20 msecs: */ @@ -61,7 +63,9 @@ static __cpuinit void check_tsc_warp(void) */ __raw_spin_lock(&sync_lock); prev = last_tsc; + rdtsc_barrier(); now = get_cycles(); + rdtsc_barrier(); last_tsc = now; __raw_spin_unlock(&sync_lock); -- cgit v1.2.3 From 10db4ef7b9a65b86e4d047671a1886f4c101a859 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 18 Nov 2008 15:23:08 +0100 Subject: x86, PEBS/DS: fix code flow in ds_request() this compiler warning: arch/x86/kernel/ds.c: In function 'ds_request': arch/x86/kernel/ds.c:368: warning: 'context' may be used uninitialized in this function Shows that the code flow in ds_request() is buggy - it goes into the unlock+release-context path even when the context is not allocated yet. First allocate the context, then do the other checks. Also, take care with GFP allocations under the ds_lock spinlock. Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index ac1d5b0586b..d1a121443bd 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -236,17 +236,33 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) struct ds_context *context = *p_context; if (!context) { + spin_unlock(&ds_lock); + context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) + if (!context) { + spin_lock(&ds_lock); return NULL; + } context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { kfree(context); + spin_lock(&ds_lock); return NULL; } + spin_lock(&ds_lock); + /* + * Check for race - another CPU could have allocated + * it meanwhile: + */ + if (*p_context) { + kfree(context->ds); + kfree(context); + return *p_context; + } + *p_context = context; context->this = p_context; @@ -384,15 +400,15 @@ static int ds_request(struct task_struct *task, void *base, size_t size, spin_lock(&ds_lock); - error = -EPERM; - if (!check_tracer(task)) - goto out_unlock; - error = -ENOMEM; context = ds_alloc_context(task); if (!context) goto out_unlock; + error = -EPERM; + if (!check_tracer(task)) + goto out_unlock; + error = -EALREADY; if (context->owner[qual] == current) goto out_unlock; -- cgit v1.2.3 From e5e1f606ecbf67e52ebe2df5d14f8b94ec6544d0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 15:07:17 +0100 Subject: AMD IOMMU: add parameter to disable device isolation Impact: add a new AMD IOMMU kernel command line parameter Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0cdcda35a05..838a2e1d5bb 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1213,6 +1213,8 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; + if (strncmp(str, "share", 5) == 0) + amd_iommu_isolate = 0; if (strncmp(str, "fullflush", 11) == 0) amd_iommu_unmap_flush = true; } -- cgit v1.2.3 From 3ce1f93c6d53c3f91c3846cf66b018276c8ac2e7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 15:09:20 +0100 Subject: AMD IOMMU: enable device isolation per default Impact: makes device isolation the default for AMD IOMMU Some device drivers showed double-free bugs of DMA memory while testing them with AMD IOMMU. If all devices share the same protection domain this can lead to data corruption and data loss. Prevent this by putting each device into its own protection domain per default. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 838a2e1d5bb..595edd2befc 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -121,7 +121,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ -int amd_iommu_isolate; /* if 1, device isolation is enabled */ +int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the -- cgit v1.2.3 From 695b5676c727d80921a2dc8737d5b3322222db85 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 15:16:43 +0100 Subject: AMD IOMMU: fix fullflush comparison length Impact: fix comparison length for 'fullflush' Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 595edd2befc..30ae2701b3d 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1215,7 +1215,7 @@ static int __init parse_amd_iommu_options(char *str) amd_iommu_isolate = 1; if (strncmp(str, "share", 5) == 0) amd_iommu_isolate = 0; - if (strncmp(str, "fullflush", 11) == 0) + if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; } -- cgit v1.2.3 From 8501c45cc32c311ae755a2d5ac8c4a5f04908d42 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 19:11:46 +0100 Subject: AMD IOMMU: check for next_bit also in unmapped area Impact: fix possible use of stale IO/TLB entries Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 331b318304e..e4899e0e878 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -537,7 +537,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, address >>= PAGE_SHIFT; iommu_area_free(dom->bitmap, address, pages); - if (address + pages >= dom->next_bit) + if (address >= dom->next_bit) dom->need_flush = true; } -- cgit v1.2.3 From 0af40a4b1050c050e62eb1dc30b82d5ab22bf221 Mon Sep 17 00:00:00 2001 From: Philipp Kohlbecher Date: Sun, 16 Nov 2008 12:11:01 +0100 Subject: x86: more general identifier for Phoenix BIOS Impact: widen the reach of the low-memory-protect DMI quirk Phoenix BIOSes variously identify their vendor as "Phoenix Technologies, LTD" or "Phoenix Technologies LTD" (without the comma.) This patch makes the identification string in the bad_bios_dmi_table more general (following a suggestion by Ingo Molnar), so that both versions are handled. Again, the patched file compiles cleanly and the patch has been tested successfully on my machine. Signed-off-by: Philipp Kohlbecher Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0fa6790c1dd..9d5674f7b6c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -764,7 +764,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { .callback = dmi_low_memory_corruption, .ident = "Phoenix BIOS", .matches = { - DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), }, }, #endif -- cgit v1.2.3 From 093bac154c142fa1fb31a3ac69ae1bc08930231b Mon Sep 17 00:00:00 2001 From: Steve Conklin Date: Fri, 14 Nov 2008 00:55:51 -0600 Subject: x86: quirk for reboot stalls on a Dell Optiplex 330 Dell Optiplex 330 appears to hang on reboot. This is resolved by adding a quirk to set bios reboot. Signed-off-by: Leann Ogasawara Signed-off-by: Steve Conklin Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 724adfc63cb..cc5a2545dd4 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -169,6 +169,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0KW626"), }, }, + { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ + .callback = set_bios_reboot, + .ident = "Dell OptiPlex 330", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"), + DMI_MATCH(DMI_BOARD_NAME, "0KP561"), + }, + }, { /* Handle problems with rebooting on Dell 2400's */ .callback = set_bios_reboot, .ident = "Dell PowerEdge 2400", -- cgit v1.2.3 From 20a4a236c7de5c915551cdc562482aa53eaff40e Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 13 Nov 2008 18:06:04 -0800 Subject: x86: uaccess_64: fix return value in __copy_from_user() __copy_from_user() will return invalid value 16 when it fails to access user space and the size is 10. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 664f15280f1..f8cfd00db45 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -46,7 +46,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size) return ret; case 10: __get_user_asm(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 16); + ret, "q", "", "=r", 10); if (unlikely(ret)) return ret; __get_user_asm(*(u16 *)(8 + (char *)dst), -- cgit v1.2.3 From de11defebf00007677fb7ee91d9b089b78786fbb Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 19 Nov 2008 15:36:14 -0800 Subject: reintroduce accept4 Introduce a new accept4() system call. The addition of this system call matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added new system calls that differed from analogous traditional system calls in adding a flags argument that can be used to access additional functionality. The accept4() system call is exactly the same as accept(), except that it adds a flags bit-mask argument. Two flags are initially implemented. (Most of the new system calls in 2.6.27 also had both of these flags.) SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled for the new file descriptor returned by accept4(). This is a useful security feature to avoid leaking information in a multithreaded program where one thread is doing an accept() at the same time as another thread is doing a fork() plus exec(). More details here: http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling", Ulrich Drepper). The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag to be enabled on the new open file description created by accept4(). (This flag is merely a convenience, saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result. Here's a test program. Works on x86-32. Should work on x86-64, but I (mtk) don't have a system to hand to test with. It tests accept4() with each of the four possible combinations of SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies that the appropriate flags are set on the file descriptor/open file description returned by accept4(). I tested Ulrich's patch in this thread by applying against 2.6.28-rc2, and it passes according to my test program. /* test_accept4.c Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk Licensed under the GNU GPLv2 or later. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #define PORT_NUM 33333 #define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0) /**********************************************************************/ /* The following is what we need until glibc gets a wrapper for accept4() */ /* Flags for socket(), socketpair(), accept4() */ #ifndef SOCK_CLOEXEC #define SOCK_CLOEXEC O_CLOEXEC #endif #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif #ifdef __x86_64__ #define SYS_accept4 288 #elif __i386__ #define USE_SOCKETCALL 1 #define SYS_ACCEPT4 18 #else #error "Sorry -- don't know the syscall # on this architecture" #endif static int accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags) { printf("Calling accept4(): flags = %x", flags); if (flags != 0) { printf(" ("); if (flags & SOCK_CLOEXEC) printf("SOCK_CLOEXEC"); if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK)) printf(" "); if (flags & SOCK_NONBLOCK) printf("SOCK_NONBLOCK"); printf(")"); } printf("\n"); #if USE_SOCKETCALL long args[6]; args[0] = fd; args[1] = (long) sockaddr; args[2] = (long) addrlen; args[3] = flags; return syscall(SYS_socketcall, SYS_ACCEPT4, args); #else return syscall(SYS_accept4, fd, sockaddr, addrlen, flags); #endif } /**********************************************************************/ static int do_test(int lfd, struct sockaddr_in *conn_addr, int closeonexec_flag, int nonblock_flag) { int connfd, acceptfd; int fdf, flf, fdf_pass, flf_pass; struct sockaddr_in claddr; socklen_t addrlen; printf("=======================================\n"); connfd = socket(AF_INET, SOCK_STREAM, 0); if (connfd == -1) die("socket"); if (connect(connfd, (struct sockaddr *) conn_addr, sizeof(struct sockaddr_in)) == -1) die("connect"); addrlen = sizeof(struct sockaddr_in); acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen, closeonexec_flag | nonblock_flag); if (acceptfd == -1) { perror("accept4()"); close(connfd); return 0; } fdf = fcntl(acceptfd, F_GETFD); if (fdf == -1) die("fcntl:F_GETFD"); fdf_pass = ((fdf & FD_CLOEXEC) != 0) == ((closeonexec_flag & SOCK_CLOEXEC) != 0); printf("Close-on-exec flag is %sset (%s); ", (fdf & FD_CLOEXEC) ? "" : "not ", fdf_pass ? "OK" : "failed"); flf = fcntl(acceptfd, F_GETFL); if (flf == -1) die("fcntl:F_GETFD"); flf_pass = ((flf & O_NONBLOCK) != 0) == ((nonblock_flag & SOCK_NONBLOCK) !=0); printf("nonblock flag is %sset (%s)\n", (flf & O_NONBLOCK) ? "" : "not ", flf_pass ? "OK" : "failed"); close(acceptfd); close(connfd); printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL"); return fdf_pass && flf_pass; } static int create_listening_socket(int port_num) { struct sockaddr_in svaddr; int lfd; int optval; memset(&svaddr, 0, sizeof(struct sockaddr_in)); svaddr.sin_family = AF_INET; svaddr.sin_addr.s_addr = htonl(INADDR_ANY); svaddr.sin_port = htons(port_num); lfd = socket(AF_INET, SOCK_STREAM, 0); if (lfd == -1) die("socket"); optval = 1; if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) == -1) die("setsockopt"); if (bind(lfd, (struct sockaddr *) &svaddr, sizeof(struct sockaddr_in)) == -1) die("bind"); if (listen(lfd, 5) == -1) die("listen"); return lfd; } int main(int argc, char *argv[]) { struct sockaddr_in conn_addr; int lfd; int port_num; int passed; passed = 1; port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM; memset(&conn_addr, 0, sizeof(struct sockaddr_in)); conn_addr.sin_family = AF_INET; conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); conn_addr.sin_port = htons(port_num); lfd = create_listening_socket(port_num); if (!do_test(lfd, &conn_addr, 0, 0)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0)) passed = 0; if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK)) passed = 0; close(lfd); exit(passed ? EXIT_SUCCESS : EXIT_FAILURE); } [mtk.manpages@gmail.com: rewrote changelog, updated test program] Signed-off-by: Ulrich Drepper Tested-by: Michael Kerrisk Acked-by: Michael Kerrisk Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/unistd_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 834b2c1d89f..d2e415e6666 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate) __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) #define __NR_timerfd_gettime 287 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) -#define __NR_paccept 288 -__SYSCALL(__NR_paccept, sys_paccept) +#define __NR_accept4 288 +__SYSCALL(__NR_accept4, sys_accept4) #define __NR_signalfd4 289 __SYSCALL(__NR_signalfd4, sys_signalfd4) #define __NR_eventfd2 290 -- cgit v1.2.3 From 9bc646f163b136684390081262fab0fd8f5343ca Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Thu, 20 Nov 2008 19:08:45 +0600 Subject: x86: fix __cpuinit/__init tangle in init_thread_xstate() Impact: fix incorrect __init annotation This patch removes the following section mismatch warning. A patch set was send previously (http://lkml.org/lkml/2008/11/10/407). But introduce some other problem, reported by Rufus (http://lkml.org/lkml/2008/11/11/46). Then Ingo Molnar suggest that, it's best to remove __init from xsave_cntxt_init(void). Which is the second patch in this series. Now, this one removes the following warning. WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x2237): Section mismatch in reference from the function cpu_init() to the function .init.text:init_thread_xstate() The function __cpuinit cpu_init() references a function __init init_thread_xstate(). If init_thread_xstate is only used by cpu_init then annotate init_thread_xstate with a matching annotation. Signed-off-by: Rakib Mullick Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 1f20608d4ca..b0f61f0dcd0 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void) stts(); } -void __init init_thread_xstate(void) +void __cpuinit init_thread_xstate(void) { if (!HAVE_HWFP) { xstate_size = sizeof(struct i387_soft_struct); -- cgit v1.2.3 From bfe085f62f98a49e1b864e4950389c7205174e4f Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Thu, 20 Nov 2008 19:12:50 +0600 Subject: x86: fixing __cpuinit/__init tangle, xsave_cntxt_init() Annotate xsave_cntxt_init() as "can be called outside of __init". Signed-off-by: Rakib Mullick Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index b13acb75e82..15c3e699918 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -310,7 +310,7 @@ static void __init setup_xstate_init(void) /* * Enable and initialize the xsave feature. */ -void __init xsave_cntxt_init(void) +void __ref xsave_cntxt_init(void) { unsigned int eax, ebx, ecx, edx; -- cgit v1.2.3 From 0ca4b6b00113b064c080d26d803d0d7c80fb5dc8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 20 Nov 2008 14:09:33 -0700 Subject: x86: Fix interrupt leak due to migration When we migrate an interrupt from one CPU to another, we set the move_in_progress flag and clean up the vectors later once they're not being used. If you're unlucky and call destroy_irq() before the vectors become un-used, the move_in_progress flag is never cleared, which causes the interrupt to become unusable. This was discovered by Jesse Brandeburg for whom it manifested as an MSI-X device refusing to use MSI-X mode when the driver was unloaded and reloaded repeatedly. Signed-off-by: Matthew Wilcox Signed-off-by: Linus Torvalds --- arch/x86/kernel/io_apic.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7a3f2028e2e..c9513e1ff28 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1140,6 +1140,20 @@ static void __clear_irq_vector(int irq) cfg->vector = 0; cpus_clear(cfg->domain); + + if (likely(!cfg->move_in_progress)) + return; + cpus_and(mask, cfg->old_domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) { + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; + vector++) { + if (per_cpu(vector_irq, cpu)[vector] != irq) + continue; + per_cpu(vector_irq, cpu)[vector] = -1; + break; + } + } + cfg->move_in_progress = 0; } void __setup_vector_irq(int cpu) -- cgit v1.2.3 From a1967d64414dab500e86cbbddf8eae6ad2047903 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 21 Nov 2008 11:16:48 -0800 Subject: x86: revert irq number limitation Impact: fix MSIx not enough irq numbers available regression The manual revert of the sparse_irq patches missed to bring the number of possible irqs back to the .27 status. This resulted in a regression when two multichannel network cards were placed in a system with only one IO_APIC - causing the networking driver to not have the right IRQ and the device not coming up. Remove the dynamic allocation logic leftovers and simply return NR_IRQS in probe_nr_irqs() for now. Fixes: http://lkml.org/lkml/2008/11/19/354 Reported-by: Jesper Dangaard Brouer Signed-off-by: Thomas Gleixner Tested-by: Jesper Dangaard Brouer Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index c9513e1ff28..1fec0f9b150 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3608,27 +3608,7 @@ int __init io_apic_get_redir_entries (int ioapic) int __init probe_nr_irqs(void) { - int idx; - int nr = 0; -#ifndef CONFIG_XEN - int nr_min = 32; -#else - int nr_min = NR_IRQS; -#endif - - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - /* double it for hotplug and msi and nmi */ - nr <<= 1; - - /* something wrong ? */ - if (nr < nr_min) - nr = nr_min; - if (WARN_ON(nr > NR_IRQS)) - nr = NR_IRQS; - - return nr; + return NR_IRQS; } /* -------------------------------------------------------------------------- -- cgit v1.2.3 From 86bbc2c235e500957b213e7e64ce2e0ccb8bc131 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 21 Nov 2008 10:21:33 +0000 Subject: xen: pin correct PGD on suspend Impact: fix Xen guest boot failure commit eefb47f6a1e855653d275cb90592a3587ea93a09 ("xen: use spin_lock_nest_lock when pinning a pagetable") changed xen_pgd_walk to walk over mm->pgd rather than taking pgd as an argument. This breaks xen_mm_(un)pin_all() because it makes init_mm.pgd readonly instead of the pgd we are interested in and therefore the pin subsequently fails. (XEN) mm.c:2280:d15 Bad type (saw 00000000e8000001 != exp 0000000060000000) for mfn bc464 (pfn 21ca7) (XEN) mm.c:2665:d15 Error while pinning mfn bc464 [ 14.586913] 1 multicall(s) failed: cpu 0 [ 14.586926] Pid: 14, comm: kstop/0 Not tainted 2.6.28-rc5-x86_32p-xenU-00172-gee2f6cc #200 [ 14.586940] Call Trace: [ 14.586955] [] ? printk+0x18/0x1e [ 14.586972] [] xen_mc_flush+0x163/0x1d0 [ 14.586986] [] __xen_pgd_pin+0xa1/0x110 [ 14.587000] [] ? stop_cpu+0x0/0xf0 [ 14.587015] [] xen_mm_pin_all+0x4b/0x70 [ 14.587029] [] xen_suspend+0x39/0xe0 [ 14.587042] [] ? stop_cpu+0x0/0xf0 [ 14.587054] [] stop_cpu+0x9d/0xf0 [ 14.587067] [] run_workqueue+0x8d/0x150 [ 14.587080] [] ? _spin_unlock_irqrestore+0x23/0x40 [ 14.587094] [] ? prepare_to_wait+0x3a/0x70 [ 14.587107] [] worker_thread+0x88/0xf0 [ 14.587120] [] ? autoremove_wake_function+0x0/0x50 [ 14.587133] [] ? worker_thread+0x0/0xf0 [ 14.587146] [] kthread+0x3c/0x70 [ 14.587157] [] ? kthread+0x0/0x70 [ 14.587170] [] kernel_thread_helper+0x7/0x10 [ 14.587181] call 1/3: op=14 arg=[c0415000] result=0 [ 14.587192] call 2/3: op=14 arg=[e1ca2000] result=0 [ 14.587204] call 3/3: op=26 arg=[c1808860] result=-22 Signed-off-by: Ian Campbell Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 688936044dc..636ef4caa52 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) * For 64-bit, we must skip the Xen hole in the middle of the address * space, just after the big x86-64 virtual hole. */ -static int xen_pgd_walk(struct mm_struct *mm, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) +static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, + int (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) { - pgd_t *pgd = mm->pgd; int flush = 0; unsigned hole_low, hole_high; unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; @@ -753,6 +752,14 @@ out: return flush; } +static int xen_pgd_walk(struct mm_struct *mm, + int (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) +{ + return __xen_pgd_walk(mm, mm->pgd, func, limit); +} + /* If we're using split pte locks, then take the page's lock and return a pointer to it. Otherwise return NULL. */ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) @@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) xen_mc_batch(); - if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { + if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { /* re-enable interrupts for flushing */ xen_mc_issue(0); @@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) PT_PMD); #endif - xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT); + __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT); xen_mc_issue(0); } -- cgit v1.2.3 From bd2b3ca7686d9470b1b58df631daa03179486182 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 20 Nov 2008 11:47:18 +0200 Subject: KVM: VMX: Fix interrupt loss during race with NMI If an interrupt cannot be injected for some reason (say, page fault when fetching the IDT descriptor), the interrupt is marked for reinjection. However, if an NMI is queued at this time, the NMI will be injected instead and the NMI will be lost. Fix by deferring the NMI injection until the interrupt has been injected successfully. Analyzed by Jan Kiszka. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d06b4dc0e2e..a4018b01e1f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (cpu_has_virtual_nmis()) { if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vmx_nmi_enabled(vcpu)) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vmx_nmi_enabled(vcpu)) { vcpu->arch.nmi_pending = false; vcpu->arch.nmi_injected = true; } else { -- cgit v1.2.3 From 0c0f40bdbe4ddb48ebecfb5c2b56eeb175a57c45 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 21 Nov 2008 19:13:58 +0100 Subject: KVM: MMU: fix sync of ptes addressed at owner pagetable During page sync, if a pagetable contains a self referencing pte (that points to the pagetable), the corresponding spte may be marked as writable even though all mappings are supposed to be write protected. Fix by clearing page unsync before syncing individual sptes. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f1983d9477c..410ddbc1aa2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) } rmap_write_protect(vcpu->kvm, sp->gfn); + kvm_unlink_unsync_page(vcpu->kvm, sp); if (vcpu->arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_zap_page(vcpu->kvm, sp); return 1; } kvm_mmu_flush_tlb(vcpu); - kvm_unlink_unsync_page(vcpu->kvm, sp); return 0; } -- cgit v1.2.3 From 5cf02b7bafddb6c3c16ddfb23d3ce187f70528ba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 25 Nov 2008 00:42:37 -0500 Subject: x86: use limited register constraint for setnz Impact: build fix with certain compilers GCC can decide to use %dil when "r" is used, which is not valid for setnz. This bug was brought out by Stephen Rothwell's merging of the branch tracer into linux-next. [ Thanks to Uros Bizjak for recommending 'q' over 'Q' ] Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/boot/tty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index 0be77b39328..7e8e8b25f5f 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -74,7 +74,7 @@ static int kbd_pending(void) { u8 pending; asm volatile("int $0x16; setnz %0" - : "=rm" (pending) + : "=qm" (pending) : "a" (0x0100)); return pending; } -- cgit v1.2.3 From eff79aee91dd07e944df65fa448c8baeee7709d8 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 25 Nov 2008 14:13:03 +0100 Subject: arch/x86/kernel/pci-calgary_64.c: change simple_strtol to simple_strtoul Impact: fix theoretical option string parsing overflow Since bridge is unsigned, it would seem better to use simple_strtoul that simple_strtol. A simplified version of the semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r2@ long e; position p; @@ e = simple_strtol@p(...) @@ position p != r2.p; type T; T e; @@ e = - simple_strtol@p + simple_strtoul (...) // Signed-off-by: Julia Lawall Cc: muli@il.ibm.com Cc: jdmason@kudzu.us Cc: discuss@x86-64.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index e1e731d78f3..d28bbdc35e4 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p) ++p; if (*p == '\0') break; - bridge = simple_strtol(p, &endp, 0); + bridge = simple_strtoul(p, &endp, 0); if (p == endp) break; -- cgit v1.2.3 From 292c669cd7087a090d6420e223eb1072f3e3c50b Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:45:13 +0100 Subject: x86, bts: exclude ds.c from build when disabled Impact: cleanup Move the CONFIG guard from the .c file into the makefile. Reported-by: Andi Kleen Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/ds.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e489ff9cb3e..b62a7667828 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o -obj-y += ds.o +obj-$(CONFIG_X86_DS) += ds.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o obj-y += step.o diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index d1a121443bd..4c8d57ec966 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -21,8 +21,6 @@ */ -#ifdef CONFIG_X86_DS - #include #include @@ -878,4 +876,3 @@ void ds_free(struct ds_context *context) while (leftovers--) ds_put_context(context); } -#endif /* CONFIG_X86_DS */ -- cgit v1.2.3 From e5e8ca633bbe972eff6f84e064a63c0c08ed6c3d Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:47:19 +0100 Subject: x86, bts: turn macro into static inline function Impact: cleanup Replace a macro with a static inline function. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 72c5a190bf4..a95008457ea 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -23,12 +23,13 @@ #ifndef _ASM_X86_DS_H #define _ASM_X86_DS_H -#ifdef CONFIG_X86_DS #include #include +#ifdef CONFIG_X86_DS + struct task_struct; /* @@ -232,7 +233,8 @@ extern void ds_free(struct ds_context *context); #else /* CONFIG_X86_DS */ -#define ds_init_intel(config) do {} while (0) +struct cpuinfo_x86; +static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ -- cgit v1.2.3 From c4858ffc8f2dc850cb1f609c679b1ac1ad36ef0c Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:49:06 +0100 Subject: x86, pebs: fix PEBS record size configuration Impact: fix DS hw enablement on 64-bit x86 Fix the PEBS record size in the DS configuration. Reported-by: Stephane Eranian Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 4c8d57ec966..04e38ef646a 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -816,13 +816,21 @@ static const struct ds_configuration ds_cfg_var = { .sizeof_ds = sizeof(long) * 12, .sizeof_field = sizeof(long), .sizeof_rec[ds_bts] = sizeof(long) * 3, +#ifdef __i386__ .sizeof_rec[ds_pebs] = sizeof(long) * 10 +#else + .sizeof_rec[ds_pebs] = sizeof(long) * 18 +#endif }; static const struct ds_configuration ds_cfg_64 = { .sizeof_ds = 8 * 12, .sizeof_field = 8, .sizeof_rec[ds_bts] = 8 * 3, +#ifdef __i386__ .sizeof_rec[ds_pebs] = 8 * 10 +#else + .sizeof_rec[ds_pebs] = 8 * 18 +#endif }; static inline void -- cgit v1.2.3 From de90add30e79261c3b5be68bb0f22d2ef98e8113 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:52:56 +0100 Subject: x86, bts: fix wrmsr and spinlock over kmalloc Impact: fix sleeping-with-spinlock-held bugs/crashes - Turn a wrmsr to write the DS_AREA MSR into a wrmsrl. - Use irqsave variants of spinlocks. - Do not allocate memory while holding spinlocks. Reported-by: Stephane Eranian Reported-by: Ingo Molnar Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 77 +++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 37 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 04e38ef646a..a2d1176c38e 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -209,14 +209,15 @@ static DEFINE_PER_CPU(struct ds_context *, system_context); static inline struct ds_context *ds_get_context(struct task_struct *task) { struct ds_context *context; + unsigned long irq; - spin_lock(&ds_lock); + spin_lock_irqsave(&ds_lock, irq); context = (task ? task->thread.ds_ctx : this_system_context); if (context) context->count++; - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); return context; } @@ -224,55 +225,46 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) /* * Same as ds_get_context, but allocates the context and it's DS * structure, if necessary; returns NULL; if out of memory. - * - * pre: requires ds_lock to be held */ static inline struct ds_context *ds_alloc_context(struct task_struct *task) { struct ds_context **p_context = (task ? &task->thread.ds_ctx : &this_system_context); struct ds_context *context = *p_context; + unsigned long irq; if (!context) { - spin_unlock(&ds_lock); - context = kzalloc(sizeof(*context), GFP_KERNEL); - - if (!context) { - spin_lock(&ds_lock); + if (!context) return NULL; - } context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { kfree(context); - spin_lock(&ds_lock); return NULL; } - spin_lock(&ds_lock); - /* - * Check for race - another CPU could have allocated - * it meanwhile: - */ + spin_lock_irqsave(&ds_lock, irq); + if (*p_context) { kfree(context->ds); kfree(context); - return *p_context; - } - - *p_context = context; - context->this = p_context; - context->task = task; + context = *p_context; + } else { + *p_context = context; - if (task) - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + context->this = p_context; + context->task = task; - if (!task || (task == current)) - wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); - get_tracer(task); + if (!task || (task == current)) + wrmsrl(MSR_IA32_DS_AREA, + (unsigned long)context->ds); + } + spin_unlock_irqrestore(&ds_lock, irq); } context->count++; @@ -286,10 +278,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) */ static inline void ds_put_context(struct ds_context *context) { + unsigned long irq; + if (!context) return; - spin_lock(&ds_lock); + spin_lock_irqsave(&ds_lock, irq); if (--context->count) goto out; @@ -311,7 +305,7 @@ static inline void ds_put_context(struct ds_context *context) kfree(context->ds); kfree(context); out: - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); } @@ -382,6 +376,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, struct ds_context *context; unsigned long buffer, adj; const unsigned long alignment = (1 << 3); + unsigned long irq; int error = 0; if (!ds_cfg.sizeof_ds) @@ -396,26 +391,27 @@ static int ds_request(struct task_struct *task, void *base, size_t size, return -EOPNOTSUPP; - spin_lock(&ds_lock); - - error = -ENOMEM; context = ds_alloc_context(task); if (!context) - goto out_unlock; + return -ENOMEM; + + spin_lock_irqsave(&ds_lock, irq); error = -EPERM; if (!check_tracer(task)) goto out_unlock; + get_tracer(task); + error = -EALREADY; if (context->owner[qual] == current) - goto out_unlock; + goto out_put_tracer; error = -EPERM; if (context->owner[qual] != NULL) - goto out_unlock; + goto out_put_tracer; context->owner[qual] = current; - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); error = -ENOMEM; @@ -463,10 +459,17 @@ static int ds_request(struct task_struct *task, void *base, size_t size, out_release: context->owner[qual] = NULL; ds_put_context(context); + put_tracer(task); + return error; + + out_put_tracer: + spin_unlock_irqrestore(&ds_lock, irq); + ds_put_context(context); + put_tracer(task); return error; out_unlock: - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); ds_put_context(context); return error; } -- cgit v1.2.3 From a266d9f1253a38ec2d5655ebcd6846298b0554f4 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 21 Nov 2008 14:49:25 +0100 Subject: [CPUFREQ] powernow-k8: ignore out-of-range PstateStatus value A workaround for AMD CPU family 11h erratum 311 might cause that the P-state Status Register shows a "current P-state" which is larger than the "current P-state limit" in P-state Current Limit Register. For the wrong P-state value there is no ACPI _PSS object defined and powernow-k8/cpufreq can't determine the proper CPU frequency for that state. As a consequence this can cause a panic during boot (potentially with all recent kernel versions -- at least I have reproduced it with various 2.6.27 kernels and with the current .28 series), as an example: powernow-k8: Found 1 AMD Turion(tm)X2 Ultra DualCore Mobile ZM-82 processors (2 \ ) powernow-k8: 0 : pstate 0 (2200 MHz) powernow-k8: 1 : pstate 1 (1100 MHz) powernow-k8: 2 : pstate 2 (600 MHz) BUG: unable to handle kernel paging request at ffff88086e7528b8 IP: [] cpufreq_stats_update+0x4a/0x5f PGD 202063 PUD 0 Oops: 0002 [#1] SMP last sysfs file: CPU 1 Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.28-rc3-dirty #16 RIP: 0010:[] [] cpufreq_stats_update+0x4a/0\ f Synaptics claims to have extended capabilities, but I'm not able to read them.<6\ 6 RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88006e7528c0 RDX: 00000000ffffffff RSI: ffff88006e54af00 RDI: ffffffff808f056c RBP: 00000000fffee697 R08: 0000000000000003 R09: ffff88006e73f080 R10: 0000000000000001 R11: 00000000002191c0 R12: ffff88006fb83c10 R13: 00000000ffffffff R14: 0000000000000001 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88006fb50740(0000) knlGS:0000000000000000 Unable to initialize Synaptics hardware. CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: ffff88086e7528b8 CR3: 0000000000201000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 1, threadinfo ffff88006fb82000, task ffff88006fb816d0) Stack: ffff88006e74da50 0000000000000000 ffff88006e54af00 ffffffff804863c7 ffff88006e74da50 0000000000000000 00000000ffffffff 0000000000000000 ffff88006fb83c10 ffffffff8024b46c ffffffff808f0560 ffff88006fb83c10 Call Trace: [] ? cpufreq_stat_notifier_trans+0x51/0x83 [] ? notifier_call_chain+0x29/0x4c [] ? __srcu_notifier_call_chain+0x46/0x61 [] ? cpufreq_notify_transition+0x93/0xa9 [] ? powernowk8_target+0x1e8/0x5f3 [] ? cpufreq_governor_performance+0x1b/0x20 [] ? __cpufreq_governor+0x71/0xa8 [] ? __cpufreq_set_policy+0x101/0x13e [] ? cpufreq_add_dev+0x3f0/0x4cd [] ? handle_update+0x0/0x8 [] ? sysdev_driver_register+0xb6/0x10d [] ? powernowk8_init+0x0/0x7e [] ? cpufreq_register_driver+0x8f/0x140 [] ? _stext+0x56/0x14f [] ? proc_register+0x122/0x17d [] ? create_proc_entry+0x73/0x8a [] ? register_irq_proc+0x92/0xaa [] ? init_irq_proc+0x57/0x69 [] ? kernel_init+0x116/0x169 [] ? child_rip+0xa/0x11 [] ? kernel_init+0x0/0x169 [] ? child_rip+0x0/0x11 Code: 05 c5 83 36 00 48 c7 c2 48 5d 86 80 48 8b 04 d8 48 8b 40 08 48 8b 34 02 48\ RIP [] cpufreq_stats_update+0x4a/0x5f RSP CR2: ffff88086e7528b8 ---[ end trace 0678bac75e67a2f7 ]--- Kernel panic - not syncing: Attempted to kill init! In short, aftereffect of the wrong P-state is that cpufreq_stats_update() uses "-1" as index for some array in cpufreq_stats_update (unsigned int cpu) { ... if (stat->time_in_state) stat->time_in_state[stat->last_index] = cputime64_add(stat->time_in_state[stat->last_index], cputime_sub(cur_time, stat->last_time)); ... } Fortunately, the wrong P-state value is returned only if the core is in P-state 0. This fix solves the problem by detecting the out-of-range P-state, ignoring it, and using "0" instead. Cc: Mark Langsdorf Signed-off-by: Andreas Herrmann Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 18 +++++++++++++++--- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 17 ++++++++++++++++- 2 files changed, 31 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index d3dcd58b87c..7f05f44b97e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 i = 0; if (cpu_family == CPU_HW_PSTATE) { - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; - data->currpstate = i; + if (data->currpstate == HW_PSTATE_INVALID) { + /* read (initial) hw pstate if not yet set */ + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + + /* + * a workaround for family 11h erratum 311 might cause + * an "out-of-range Pstate if the core is in Pstate-0 + */ + if (i >= data->numps) + data->currpstate = HW_PSTATE_0; + else + data->currpstate = i; + } return 0; } do { @@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) } data->cpu = pol->cpu; + data->currpstate = HW_PSTATE_INVALID; if (powernow_k8_cpu_init_acpi(data)) { /* diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index ab48cfed4d9..65cfb5d7f77 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -5,6 +5,19 @@ * http://www.gnu.org/licenses/gpl.html */ + +enum pstate { + HW_PSTATE_INVALID = 0xff, + HW_PSTATE_0 = 0, + HW_PSTATE_1 = 1, + HW_PSTATE_2 = 2, + HW_PSTATE_3 = 3, + HW_PSTATE_4 = 4, + HW_PSTATE_5 = 5, + HW_PSTATE_6 = 6, + HW_PSTATE_7 = 7, +}; + struct powernow_k8_data { unsigned int cpu; @@ -23,7 +36,9 @@ struct powernow_k8_data { u32 exttype; /* extended interface = 1 */ /* keep track of the current fid / vid or pstate */ - u32 currvid, currfid, currpstate; + u32 currvid; + u32 currfid; + enum pstate currpstate; /* the powernow_table includes all frequency and vid/fid pairings: * fid are the lower 8 bits of the index, vid are the upper 8 bits. -- cgit v1.2.3 From ffd565a8b817d1eb4b25184e8418e8d96c3f56f6 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 25 Nov 2008 17:18:03 +0100 Subject: x86: fixup config space size of CPU functions for AMD family 11h Impact: extend allowed configuration space access on 11h CPUs from 256 to 4K Signed-off-by: Andreas Herrmann Acked-by: Jesse Barnes Signed-off-by: Ingo Molnar --- arch/x86/pci/fixup.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 3c27a809393..2051dc96b8e 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); /* - * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config - * have 4096 bytes. Even if the device is capable, that doesn't mean we can - * access it. Maybe we don't have a way to generate extended config space - * accesses. So check it + * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have + * 4096 bytes configuration space for each function of their processor + * configuration space. */ -static void fam10h_pci_cfg_space_size(struct pci_dev *dev) +static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev) { dev->cfg_size = pci_cfg_space_size_ext(dev); } - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size); /* * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from -- cgit v1.2.3 From 6c475352e87224a8f0b8cc6f6cc96b30563dc5b4 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 25 Nov 2008 15:33:10 +0100 Subject: KVM: MMU: avoid creation of unreachable pages in the shadow It is possible for a shadow page to have a parent link pointing to a freed page. When zapping a high level table, kvm_mmu_page_unlink_children fails to remove the parent_pte link. For that to happen, the child must be unreachable via the shadow tree, which can happen in shadow_walk_entry if the guest pte was modified in between walk() and fetch(). Remove the parent pte reference in such case. Possible cause for oops in bug #2217430. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 613ec9aa674..84eee43bbe7 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], &curr_pte, sizeof(curr_pte)); if (r || curr_pte != gw->ptes[level - 2]) { + kvm_mmu_put_page(shadow_page, sptep); kvm_release_pfn_clean(sw->pfn); sw->sptep = NULL; return 1; -- cgit v1.2.3 From b627c8b17ccacba38c975bc0f69a49fc4e5261c9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 20 Nov 2008 20:49:56 +0100 Subject: x86: always define DECLARE_PCI_UNMAP* macros Impact: fix boot crash on AMD IOMMU if CONFIG_GART_IOMMU is off Currently these macros evaluate to a no-op except the kernel is compiled with GART or Calgary support. But we also need these macros when we have SWIOTLB, VT-d or AMD IOMMU in the kernel. Since we always compile at least with SWIOTLB we can define these macros always. This patch is also for stable backport for the same reason the SWIOTLB default selection patch is. Signed-off-by: Joerg Roedel Cc: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pci_64.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index 5b28995d664..d02d936840a 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h @@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void); */ #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) -#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU) - #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ dma_addr_t ADDR_NAME; #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ @@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void); #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ (((PTR)->LEN_NAME) = (VAL)) -#else -/* No IOMMU */ - -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) (0) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define pci_unmap_len(PTR, LEN_NAME) (0) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) - -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_X86_PCI_64_H */ -- cgit v1.2.3 From 7b1dedca42ac0d0d0be01e39d8461bb53a2389b3 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Sat, 29 Nov 2008 13:46:27 +0100 Subject: x86: fix dma_mapping_error for 32bit x86 Devices like b44 ethernet can't dma from addresses above 1GB. The driver handles this cases by falling back to GFP_DMA allocation. But for detecting the problem it needs to get an indication from dma_mapping_error. The bug is triggered by using a VMSPLIT option of 2G/2G. Signed-off-by: Thomas Bogendoerfer Acked-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dma-mapping.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 7f225a4b2a2..097794ff6b7 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -71,15 +71,13 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) /* Make sure we keep the same behaviour */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { -#ifdef CONFIG_X86_32 - return 0; -#else +#ifdef CONFIG_X86_64 struct dma_mapping_ops *ops = get_dma_ops(dev); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); - return (dma_addr == bad_dma_address); #endif + return (dma_addr == bad_dma_address); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -- cgit v1.2.3 From 2236d252e001ea57d53cec1954f680e503f3b8bc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:37:34 +0000 Subject: enable_IR_x2apic() needs to be __init calls __init, called only from __init Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 04a7f960bbc..16f94879b52 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1315,7 +1315,7 @@ void enable_x2apic(void) } } -void enable_IR_x2apic(void) +void __init enable_IR_x2apic(void) { #ifdef CONFIG_INTR_REMAP int ret; -- cgit v1.2.3 From 23a14b9e9db49ed5f7683857557c26c874d4abb6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:37:44 +0000 Subject: kvm_setup_secondary_clock() is cpuinit Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/kernel/kvmclock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1c9cc431ea4..e169ae9b6a6 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt) } #ifdef CONFIG_X86_LOCAL_APIC -static void __devinit kvm_setup_secondary_clock(void) +static void __cpuinit kvm_setup_secondary_clock(void) { /* * Now that the first cpu already had this clocksource initialized, -- cgit v1.2.3 From 37af46efa5413c6f4c25d9a24b4c43f2cc718eed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:37:54 +0000 Subject: xen_setup_vcpu_info_placement() is not init on x86 ... so get xen-ops.h in agreement with xen/smp.c Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/xen/xen-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index d7422dc2a55..9e1afae8461 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu); void xen_mark_init_mm_pinned(void); -void __init xen_setup_vcpu_info_placement(void); +void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); -- cgit v1.2.3 From df6b07949b6cab9d119363d02ef63379160f6c82 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:38:04 +0000 Subject: xen_play_dead() is __cpuinit Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/xen/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d77da613b1d..acd9b6705e0 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -362,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu) alternatives_smp_switch(0); } -static void xen_play_dead(void) +static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */ { play_dead_common(); HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); -- cgit v1.2.3 From 96b8936a9ed08746e47081458a5eb9e43a751e24 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Nov 2008 08:10:03 +0100 Subject: remove __ARCH_WANT_COMPAT_SYS_PTRACE All architectures now use the generic compat_sys_ptrace, as should every new architecture that needs 32bit compat (if we'll ever get another). Remove the now superflous __ARCH_WANT_COMPAT_SYS_PTRACE define, and also kill a comment about __ARCH_SYS_PTRACE that was added after __ARCH_SYS_PTRACE was already gone. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- arch/x86/include/asm/ptrace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index d1531c8480b..eefb0594b05 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -#define __ARCH_WANT_COMPAT_SYS_PTRACE - #endif /* __KERNEL__ */ #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 43714539eab42b2fa3653ea7bd667b36c2291b11 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Sat, 29 Nov 2008 16:50:12 +0530 Subject: sched: don't export sched_mc_power_savings in laptops Impact: do not expose a control that has no effect Fix to prevent sched_mc_power_saving from being exported through sysfs on single-socket systems. (Say multicore single socket (Laptop)) CPU core map of the boot cpu should be equal to possible number of cpus for single socket system. This fix has been developed at FOSS.in kernel workout. Signed-off-by: Mahesh Salgaonkar Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4850e4b02b6..ff386ff50ed 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -239,7 +239,7 @@ struct pci_bus; void set_pci_bus_resources_arch_default(struct pci_bus *b); #ifdef CONFIG_SMP -#define mc_capable() (boot_cpu_data.x86_max_cores > 1) +#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids) #define smt_capable() (smp_num_siblings > 1) #endif -- cgit v1.2.3 From 70d7d357578245f1993fd2d3ccd26088bcd38941 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 20:16:03 +0100 Subject: x86: fix broken flushing in GART nofullflush path Impact: remove stale IOTLB entries In the non-default nofullflush case the GART is only flushed when next_bit wraps around. But it can happen that an unmap operation unmaps memory which is behind the current next_bit location. If these addresses are reused it may result in stale GART IO/TLB entries. Fix this by setting the GART next_bit always behind an unmapped location. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a42b02b4df6..ba7ad83e20a 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size) spin_lock_irqsave(&iommu_bitmap_lock, flags); iommu_area_free(iommu_gart_bitmap, offset, size); + if (offset >= next_bit) + next_bit = offset + size; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } -- cgit v1.2.3 From eac9fbc6a90ab3440f4d98a8c52b15a724fc6f4f Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 24 Nov 2008 13:53:24 +0000 Subject: AMD IOMMU: struct amd_iommu remove padding on 64 bit Remove 16 bytes of padding from struct amd_iommu on 64bit builds reducing its size to 120 bytes, allowing it to span one fewer cachelines. Signed-off-by: Richard Kennedy Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 1a30c0440c6..ac302a2fa33 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -251,13 +251,6 @@ struct amd_iommu { /* Pointer to PCI device of this IOMMU */ struct pci_dev *dev; - /* - * Capability pointer. There could be more than one IOMMU per PCI - * device function if there are more than one AMD IOMMU capability - * pointers. - */ - u16 cap_ptr; - /* physical address of MMIO space */ u64 mmio_phys; /* virtual address of MMIO space */ @@ -266,6 +259,13 @@ struct amd_iommu { /* capabilities of that IOMMU read from ACPI */ u32 cap; + /* + * Capability pointer. There could be more than one IOMMU per PCI + * device function if there are more than one AMD IOMMU capability + * pointers. + */ + u16 cap_ptr; + /* pci domain of this IOMMU */ u16 pci_seg; @@ -284,19 +284,19 @@ struct amd_iommu { /* size of command buffer */ u32 cmd_buf_size; - /* event buffer virtual address */ - u8 *evt_buf; /* size of event buffer */ u32 evt_buf_size; + /* event buffer virtual address */ + u8 *evt_buf; /* MSI number for event interrupt */ u16 evt_msi_num; - /* if one, we need to send a completion wait command */ - int need_sync; - /* true if interrupts for this IOMMU are already enabled */ bool int_enabled; + /* if one, we need to send a completion wait command */ + int need_sync; + /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; }; -- cgit v1.2.3 From f91ba190648be4ff127d6aaf3993ac19d66dc2c2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 25 Nov 2008 12:56:12 +0100 Subject: AMD IOMMU: set device table entry for aliased devices In some rare cases a request can arrive an IOMMU with its originial requestor id even it is aliased. Handle this by setting the device table entry to the same protection domain for the original and the aliased requestor id. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e4899e0e878..a232e5a85d4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -858,6 +858,9 @@ static int get_device_resources(struct device *dev, print_devid(_bdf, 1); } + if (domain_for_device(_bdf) == NULL) + set_device_domain(*iommu, *domain, _bdf); + return 1; } -- cgit v1.2.3 From 09ee17eb8ea89514c13980c4010bdbbaea8630c2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 12:19:27 +0100 Subject: AMD IOMMU: fix possible race while accessing iommu->need_sync The access to the iommu->need_sync member needs to be protected by the iommu->lock. Otherwise this is a possible race condition. Fix it with this patch. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a232e5a85d4..5662e226b0c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) spin_lock_irqsave(&iommu->lock, flags); ret = __iommu_queue_command(iommu, cmd); + if (!ret) + iommu->need_sync = 1; spin_unlock_irqrestore(&iommu->lock, flags); return ret; @@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu) cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); - iommu->need_sync = 0; - spin_lock_irqsave(&iommu->lock, flags); + if (!iommu->need_sync) + goto out; + + iommu->need_sync = 0; + ret = __iommu_queue_command(iommu, &cmd); if (ret) @@ -254,8 +259,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) ret = iommu_queue_command(iommu, &cmd); - iommu->need_sync = 1; - return ret; } @@ -281,8 +284,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, ret = iommu_queue_command(iommu, &cmd); - iommu->need_sync = 1; - return ret; } @@ -762,8 +763,6 @@ static void set_device_domain(struct amd_iommu *iommu, write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); iommu_queue_inv_dev_entry(iommu, devid); - - iommu->need_sync = 1; } /***************************************************************************** @@ -1034,8 +1033,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1063,8 +1061,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1130,8 +1127,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1176,8 +1172,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1228,8 +1223,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1260,8 +1254,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); -- cgit v1.2.3 From 9ea84ad77d635bdb76c9a08f44f21a9af98359ee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Dec 2008 07:21:21 +0100 Subject: oprofile: fix CPU unplug panic in ppro_stop() If oprofile statically compiled in kernel, a cpu unplug triggers a panic in ppro_stop(), because a NULL pointer is dereferenced. Signed-off-by: Eric Dumazet Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 716d26f0e5d..e9f80c744cf 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -156,6 +156,8 @@ static void ppro_start(struct op_msrs const * const msrs) unsigned int low, high; int i; + if (!reset_value) + return; for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); @@ -171,6 +173,8 @@ static void ppro_stop(struct op_msrs const * const msrs) unsigned int low, high; int i; + if (!reset_value) + return; for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; -- cgit v1.2.3 From 3d337c653c94be50f11a45fb14a2afa8a8a1a618 Mon Sep 17 00:00:00 2001 From: William Cohen Date: Sun, 30 Nov 2008 15:39:10 -0500 Subject: x86/oprofile: fix Intel cpu family 6 detection Alan Jenkins wrote: > This is on an EeePC 701, /proc/cpuinfo as attached. > > Is this expected? Will the next release work? > > Thanks, Alan > > # opcontrol --setup --no-vmlinux > cpu_type 'unset' is not valid > you should upgrade oprofile or force the use of timer mode > > # opcontrol -v > opcontrol: oprofile 0.9.4 compiled on Nov 29 2008 22:44:10 > > # cat /dev/oprofile/cpu_type > i386/p6 > # uname -r > 2.6.28-rc6eeepc Hi Alan, Looking at the kernel driver code for oprofile it can return the "i386/p6" for the cpu_type. However, looking at the user-space oprofile code there isn't the matching entry in libop/op_cpu_type.c or the events/unit_mask files in events/i386 directory. The Intel AP-485 says this is a "Intel Pentium M processor model D". Seems like the oprofile kernel driver should be identifying the processor as "i386/p6_mobile" The driver identification code doesn't look quite right in nmi_init.c http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git;a=blob;f=arch/x86/oprofile/nmi_int.c;h=022cd41ea9b4106e5884277096e80e9088a7c7a9;hb=HEAD has: 409 case 10 ... 13: 410 *cpu_type = "i386/p6"; 411 break; Referring to the Intel AP-485: case 10 and 11 should produce "i386/piii" case 13 should produce "i386/p6_mobile" I didn't see anything for case 12. Something like the attached patch. I don't have a celeron machine to verify that changes in this area of the kernel fix thing. -Will Signed-off-by: William Cohen Tested-by: Alan Jenkins Acked-by: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 022cd41ea9b..202864ad49a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -401,14 +401,13 @@ static int __init ppro_init(char **cpu_type) *cpu_type = "i386/pii"; break; case 6 ... 8: + case 10 ... 11: *cpu_type = "i386/piii"; break; case 9: + case 13: *cpu_type = "i386/p6_mobile"; break; - case 10 ... 13: - *cpu_type = "i386/p6"; - break; case 14: *cpu_type = "i386/core"; break; -- cgit v1.2.3 From 9adc13867ec5fe0cd35434f92954d90e42381f0b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 4 Dec 2008 13:33:35 +0100 Subject: x86: fix early panic with boot option "nosmp" Impact: fix boot crash with numcpus=0 on certain systems Fix early exception in __get_smp_config with nosmp. Bail out early when there is no MP table. Reported-by: Wu Fengguang Signed-off-by: Andi Kleen Tested-by: Wu Fengguang Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f98f4e1dba0..0f4c1fd5a1f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -604,6 +604,9 @@ static void __init __get_smp_config(unsigned int early) printk(KERN_INFO "Using ACPI for processor (LAPIC) " "configuration information\n"); + if (!mpf) + return; + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) -- cgit v1.2.3 From bb9d4ff80bc032d7961815c2ff5eaf458ae3adff Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Dec 2008 15:59:48 +0100 Subject: AMD IOMMU: fix iommu_map_page function Impact: bugfix in iommu_map_page function Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5662e226b0c..67970a8c2ee 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -344,7 +344,7 @@ static int iommu_map(struct protection_domain *dom, u64 __pte, *pte, *page; bus_addr = PAGE_ALIGN(bus_addr); - phys_addr = PAGE_ALIGN(bus_addr); + phys_addr = PAGE_ALIGN(phys_addr); /* only support 512GB address spaces for now */ if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) -- cgit v1.2.3 From 3cc3d84bffbd93bdb671ac7961b12cd98fbb9266 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Dec 2008 16:44:31 +0100 Subject: AMD IOMMU: fix loop counter in free_pagetable function Impact: bugfix Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 67970a8c2ee..0637e65ca0c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -600,7 +600,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) continue; p2 = IOMMU_PTE_PAGE(p1[i]); - for (j = 0; j < 512; ++i) { + for (j = 0; j < 512; ++j) { if (!IOMMU_PTE_PRESENT(p2[j])) continue; p3 = IOMMU_PTE_PAGE(p2[j]); -- cgit v1.2.3 From 24f811603e8ef4b9173f47fa263230168e237128 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 14:25:39 +0100 Subject: AMD IOMMU: fix typo in comment Impact: cleanup Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0637e65ca0c..2fde073b6be 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -922,8 +922,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, /* * This function contains common code for mapping of a physically - * contiguous memory region into DMA address space. It is uses by all - * mapping functions provided by this IOMMU driver. + * contiguous memory region into DMA address space. It is used by all + * mapping functions provided with this IOMMU driver. * Must be called with the domain lock held. */ static dma_addr_t __map_single(struct device *dev, -- cgit v1.2.3 From 8ad909c4c1b91bd35ba6a2af5e7ab3fc8d9fe283 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 14:37:20 +0100 Subject: AMD IOMMU: fix WARN_ON in dma_ops unmap path Impact: minor fix Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2fde073b6be..3133a0ea09f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -910,7 +910,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, if (address >= dom->aperture_size) return; - WARN_ON(address & 0xfffULL || address > dom->aperture_size); + WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; pte += IOMMU_PTE_L0_INDEX(address); -- cgit v1.2.3 From b8d9905d025d80a2357e8ce4704fde2923f6a1bd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 14:40:26 +0100 Subject: AMD IOMMU: __unmap_single: check for bad_dma_address instead of 0 Impact: minor fix Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3133a0ea09f..a7b6dec6fc3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -983,7 +983,8 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) + if ((dma_addr == bad_dma_address) || + (dma_addr + size > dma_dom->aperture_size)) return; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); -- cgit v1.2.3 From 087052b02f42b50316c6e4d7f2d8dfba3de6fc2e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 17 Oct 2008 16:09:57 +0200 Subject: x86: fix default_spin_lock_flags() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit these warnings: arch/x86/kernel/paravirt-spinlocks.c: In function ‘default_spin_lock_flags’: arch/x86/kernel/paravirt-spinlocks.c:12: warning: passing argument 1 of ‘__raw_spin_lock’ from incompatible pointer type arch/x86/kernel/paravirt-spinlocks.c: At top level: arch/x86/kernel/paravirt-spinlocks.c:11: warning: ‘default_spin_lock_flags’ defined but not used showed that the prototype of default_spin_lock_flags() was confused about what type spinlocks have. the proper type on UP is raw_spinlock_t. Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt-spinlocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 0e9f1982b1d..95777b0faa7 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -7,7 +7,8 @@ #include -static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) +static inline void +default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) { __raw_spin_lock(lock); } -- cgit v1.2.3 From ae8d04e2ecbb233926860e9ce145eac19c7835dc Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 13 Dec 2008 12:36:58 -0800 Subject: x86 Fix VMI crash on boot in 2.6.28-rc8 VMI initialiation can relocate the fixmap, causing early_ioremap to malfunction if it is initialized before the relocation. To fix this, VMI activation is split into two phases; the detection, which must happen before setting up ioremap, and the activation, which must happen after parsing early boot parameters. This fixes a crash on boot when VMI is enabled under VMware. Signed-off-by: Zachary Amsden Signed-off-by: Linus Torvalds --- arch/x86/include/asm/vmi.h | 8 +++++++- arch/x86/kernel/setup.c | 12 +++++------- arch/x86/kernel/smpboot.c | 2 -- arch/x86/kernel/vmi_32.c | 16 +++++++++++----- 4 files changed, 23 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h index b7c0dea119f..61e08c0a290 100644 --- a/arch/x86/include/asm/vmi.h +++ b/arch/x86/include/asm/vmi.h @@ -223,9 +223,15 @@ struct pci_header { } __attribute__((packed)); /* Function prototypes for bootstrapping */ +#ifdef CONFIG_VMI extern void vmi_init(void); +extern void vmi_activate(void); extern void vmi_bringup(void); -extern void vmi_apply_boot_page_allocations(void); +#else +static inline void vmi_init(void) {} +static inline void vmi_activate(void) {} +static inline void vmi_bringup(void) {} +#endif /* State needed to start an application processor in an SMP system. */ struct vmi_ap_state { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9d5674f7b6c..bdec76e5559 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -794,6 +794,9 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif + /* VMI may relocate the fixmap; do this before touching ioremap area */ + vmi_init(); + early_cpu_init(); early_ioremap_init(); @@ -880,13 +883,8 @@ void __init setup_arch(char **cmdline_p) check_efer(); #endif -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) - /* - * Must be before kernel pagetables are setup - * or fixmap area is touched. - */ - vmi_init(); -#endif + /* Must be before kernel pagetables are setup */ + vmi_activate(); /* after early param, so could get panic from serial */ reserve_early_setup_data(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7b109339731..f71f96fc9e6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -294,9 +294,7 @@ static void __cpuinit start_secondary(void *unused) * fragile that we want to limit the things done here to the * most necessary things. */ -#ifdef CONFIG_VMI vmi_bringup(); -#endif cpu_init(); preempt_disable(); smp_callin(); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 8b6c393ab9f..22fd6577156 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -960,8 +960,6 @@ static inline int __init activate_vmi(void) void __init vmi_init(void) { - unsigned long flags; - if (!vmi_rom) probe_vmi_rom(); else @@ -973,13 +971,21 @@ void __init vmi_init(void) reserve_top_address(-vmi_rom->virtual_top); - local_irq_save(flags); - activate_vmi(); - #ifdef CONFIG_X86_IO_APIC /* This is virtual hardware; timer routing is wired correctly */ no_timer_check = 1; #endif +} + +void vmi_activate(void) +{ + unsigned long flags; + + if (!vmi_rom) + return; + + local_irq_save(flags); + activate_vmi(); local_irq_restore(flags & X86_EFLAGS_IF); } -- cgit v1.2.3