From 6c2e94033df5ca11149e52dd179b8dde3172e9bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Nov 2008 12:33:49 +0100 Subject: x86: apic honour irq affinity which was set in early boot setup_ioapic_dest() is called after the non boot cpus have been brought up. It sets the irq affinity of all already configured interrupts to all cpus and ignores affinity settings which were done by the early bootup code. If the IRQ_NO_BALANCING or IRQ_AFFINITY_SET flags are set then use the affinity mask from the irq descriptor and not TARGET_CPUS. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7a3f2028e2e..988ee89467d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3761,7 +3761,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; + struct irq_desc *desc; struct irq_cfg *cfg; + cpumask_t mask; if (skip_ioapic_setup == 1) return; @@ -3778,16 +3780,30 @@ void __init setup_ioapic_dest(void) * cpu is online. */ cfg = irq_cfg(irq); - if (!cfg->vector) + if (!cfg->vector) { setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); + continue; + + } + + /* + * Honour affinities which have been set in early boot + */ + desc = irq_to_desc(irq); + if (desc->status & + (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) + mask = desc->affinity; + else + mask = TARGET_CPUS; + #ifdef CONFIG_INTR_REMAP - else if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); -#endif + if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, mask); else - set_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif + set_ioapic_affinity_irq(irq, mask); } } -- cgit v1.2.3 From a4a16beadea041ab601e65b264b568e8b6b4f68d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Nov 2008 09:05:37 +0100 Subject: oprofile: fix an overflow in ppro code reset_value was changed from long to u64 in commit b99170288421c79f0c2efa8b33e26e65f4bb7fb8 (oprofile: Implement Intel architectural perfmon support) But dynamic allocation of this array use a wrong type (long instead of u64) Cc: Andi Kleen Signed-off-by: Eric Dumazet Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 3f1b81a83e2..716d26f0e5d 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) int i; if (!reset_value) { - reset_value = kmalloc(sizeof(unsigned) * num_counters, + reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, GFP_ATOMIC); if (!reset_value) return; -- cgit v1.2.3 From 9bc646f163b136684390081262fab0fd8f5343ca Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Thu, 20 Nov 2008 19:08:45 +0600 Subject: x86: fix __cpuinit/__init tangle in init_thread_xstate() Impact: fix incorrect __init annotation This patch removes the following section mismatch warning. A patch set was send previously (http://lkml.org/lkml/2008/11/10/407). But introduce some other problem, reported by Rufus (http://lkml.org/lkml/2008/11/11/46). Then Ingo Molnar suggest that, it's best to remove __init from xsave_cntxt_init(void). Which is the second patch in this series. Now, this one removes the following warning. WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x2237): Section mismatch in reference from the function cpu_init() to the function .init.text:init_thread_xstate() The function __cpuinit cpu_init() references a function __init init_thread_xstate(). If init_thread_xstate is only used by cpu_init then annotate init_thread_xstate with a matching annotation. Signed-off-by: Rakib Mullick Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 1f20608d4ca..b0f61f0dcd0 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void) stts(); } -void __init init_thread_xstate(void) +void __cpuinit init_thread_xstate(void) { if (!HAVE_HWFP) { xstate_size = sizeof(struct i387_soft_struct); -- cgit v1.2.3 From bfe085f62f98a49e1b864e4950389c7205174e4f Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Thu, 20 Nov 2008 19:12:50 +0600 Subject: x86: fixing __cpuinit/__init tangle, xsave_cntxt_init() Annotate xsave_cntxt_init() as "can be called outside of __init". Signed-off-by: Rakib Mullick Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index b13acb75e82..15c3e699918 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -310,7 +310,7 @@ static void __init setup_xstate_init(void) /* * Enable and initialize the xsave feature. */ -void __init xsave_cntxt_init(void) +void __ref xsave_cntxt_init(void) { unsigned int eax, ebx, ecx, edx; -- cgit v1.2.3 From a1967d64414dab500e86cbbddf8eae6ad2047903 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 21 Nov 2008 11:16:48 -0800 Subject: x86: revert irq number limitation Impact: fix MSIx not enough irq numbers available regression The manual revert of the sparse_irq patches missed to bring the number of possible irqs back to the .27 status. This resulted in a regression when two multichannel network cards were placed in a system with only one IO_APIC - causing the networking driver to not have the right IRQ and the device not coming up. Remove the dynamic allocation logic leftovers and simply return NR_IRQS in probe_nr_irqs() for now. Fixes: http://lkml.org/lkml/2008/11/19/354 Reported-by: Jesper Dangaard Brouer Signed-off-by: Thomas Gleixner Tested-by: Jesper Dangaard Brouer Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index c9513e1ff28..1fec0f9b150 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3608,27 +3608,7 @@ int __init io_apic_get_redir_entries (int ioapic) int __init probe_nr_irqs(void) { - int idx; - int nr = 0; -#ifndef CONFIG_XEN - int nr_min = 32; -#else - int nr_min = NR_IRQS; -#endif - - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - /* double it for hotplug and msi and nmi */ - nr <<= 1; - - /* something wrong ? */ - if (nr < nr_min) - nr = nr_min; - if (WARN_ON(nr > NR_IRQS)) - nr = NR_IRQS; - - return nr; + return NR_IRQS; } /* -------------------------------------------------------------------------- -- cgit v1.2.3 From 86bbc2c235e500957b213e7e64ce2e0ccb8bc131 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 21 Nov 2008 10:21:33 +0000 Subject: xen: pin correct PGD on suspend Impact: fix Xen guest boot failure commit eefb47f6a1e855653d275cb90592a3587ea93a09 ("xen: use spin_lock_nest_lock when pinning a pagetable") changed xen_pgd_walk to walk over mm->pgd rather than taking pgd as an argument. This breaks xen_mm_(un)pin_all() because it makes init_mm.pgd readonly instead of the pgd we are interested in and therefore the pin subsequently fails. (XEN) mm.c:2280:d15 Bad type (saw 00000000e8000001 != exp 0000000060000000) for mfn bc464 (pfn 21ca7) (XEN) mm.c:2665:d15 Error while pinning mfn bc464 [ 14.586913] 1 multicall(s) failed: cpu 0 [ 14.586926] Pid: 14, comm: kstop/0 Not tainted 2.6.28-rc5-x86_32p-xenU-00172-gee2f6cc #200 [ 14.586940] Call Trace: [ 14.586955] [] ? printk+0x18/0x1e [ 14.586972] [] xen_mc_flush+0x163/0x1d0 [ 14.586986] [] __xen_pgd_pin+0xa1/0x110 [ 14.587000] [] ? stop_cpu+0x0/0xf0 [ 14.587015] [] xen_mm_pin_all+0x4b/0x70 [ 14.587029] [] xen_suspend+0x39/0xe0 [ 14.587042] [] ? stop_cpu+0x0/0xf0 [ 14.587054] [] stop_cpu+0x9d/0xf0 [ 14.587067] [] run_workqueue+0x8d/0x150 [ 14.587080] [] ? _spin_unlock_irqrestore+0x23/0x40 [ 14.587094] [] ? prepare_to_wait+0x3a/0x70 [ 14.587107] [] worker_thread+0x88/0xf0 [ 14.587120] [] ? autoremove_wake_function+0x0/0x50 [ 14.587133] [] ? worker_thread+0x0/0xf0 [ 14.587146] [] kthread+0x3c/0x70 [ 14.587157] [] ? kthread+0x0/0x70 [ 14.587170] [] kernel_thread_helper+0x7/0x10 [ 14.587181] call 1/3: op=14 arg=[c0415000] result=0 [ 14.587192] call 2/3: op=14 arg=[e1ca2000] result=0 [ 14.587204] call 3/3: op=26 arg=[c1808860] result=-22 Signed-off-by: Ian Campbell Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 688936044dc..636ef4caa52 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) * For 64-bit, we must skip the Xen hole in the middle of the address * space, just after the big x86-64 virtual hole. */ -static int xen_pgd_walk(struct mm_struct *mm, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) +static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, + int (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) { - pgd_t *pgd = mm->pgd; int flush = 0; unsigned hole_low, hole_high; unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; @@ -753,6 +752,14 @@ out: return flush; } +static int xen_pgd_walk(struct mm_struct *mm, + int (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) +{ + return __xen_pgd_walk(mm, mm->pgd, func, limit); +} + /* If we're using split pte locks, then take the page's lock and return a pointer to it. Otherwise return NULL. */ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) @@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) xen_mc_batch(); - if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { + if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { /* re-enable interrupts for flushing */ xen_mc_issue(0); @@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) PT_PMD); #endif - xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT); + __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT); xen_mc_issue(0); } -- cgit v1.2.3 From bd2b3ca7686d9470b1b58df631daa03179486182 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 20 Nov 2008 11:47:18 +0200 Subject: KVM: VMX: Fix interrupt loss during race with NMI If an interrupt cannot be injected for some reason (say, page fault when fetching the IDT descriptor), the interrupt is marked for reinjection. However, if an NMI is queued at this time, the NMI will be injected instead and the NMI will be lost. Fix by deferring the NMI injection until the interrupt has been injected successfully. Analyzed by Jan Kiszka. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d06b4dc0e2e..a4018b01e1f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (cpu_has_virtual_nmis()) { if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vmx_nmi_enabled(vcpu)) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vmx_nmi_enabled(vcpu)) { vcpu->arch.nmi_pending = false; vcpu->arch.nmi_injected = true; } else { -- cgit v1.2.3 From 0c0f40bdbe4ddb48ebecfb5c2b56eeb175a57c45 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 21 Nov 2008 19:13:58 +0100 Subject: KVM: MMU: fix sync of ptes addressed at owner pagetable During page sync, if a pagetable contains a self referencing pte (that points to the pagetable), the corresponding spte may be marked as writable even though all mappings are supposed to be write protected. Fix by clearing page unsync before syncing individual sptes. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f1983d9477c..410ddbc1aa2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) } rmap_write_protect(vcpu->kvm, sp->gfn); + kvm_unlink_unsync_page(vcpu->kvm, sp); if (vcpu->arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_zap_page(vcpu->kvm, sp); return 1; } kvm_mmu_flush_tlb(vcpu); - kvm_unlink_unsync_page(vcpu->kvm, sp); return 0; } -- cgit v1.2.3 From 5cf02b7bafddb6c3c16ddfb23d3ce187f70528ba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 25 Nov 2008 00:42:37 -0500 Subject: x86: use limited register constraint for setnz Impact: build fix with certain compilers GCC can decide to use %dil when "r" is used, which is not valid for setnz. This bug was brought out by Stephen Rothwell's merging of the branch tracer into linux-next. [ Thanks to Uros Bizjak for recommending 'q' over 'Q' ] Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/boot/tty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index 0be77b39328..7e8e8b25f5f 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -74,7 +74,7 @@ static int kbd_pending(void) { u8 pending; asm volatile("int $0x16; setnz %0" - : "=rm" (pending) + : "=qm" (pending) : "a" (0x0100)); return pending; } -- cgit v1.2.3 From eff79aee91dd07e944df65fa448c8baeee7709d8 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 25 Nov 2008 14:13:03 +0100 Subject: arch/x86/kernel/pci-calgary_64.c: change simple_strtol to simple_strtoul Impact: fix theoretical option string parsing overflow Since bridge is unsigned, it would seem better to use simple_strtoul that simple_strtol. A simplified version of the semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r2@ long e; position p; @@ e = simple_strtol@p(...) @@ position p != r2.p; type T; T e; @@ e = - simple_strtol@p + simple_strtoul (...) // Signed-off-by: Julia Lawall Cc: muli@il.ibm.com Cc: jdmason@kudzu.us Cc: discuss@x86-64.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index e1e731d78f3..d28bbdc35e4 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p) ++p; if (*p == '\0') break; - bridge = simple_strtol(p, &endp, 0); + bridge = simple_strtoul(p, &endp, 0); if (p == endp) break; -- cgit v1.2.3 From 292c669cd7087a090d6420e223eb1072f3e3c50b Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:45:13 +0100 Subject: x86, bts: exclude ds.c from build when disabled Impact: cleanup Move the CONFIG guard from the .c file into the makefile. Reported-by: Andi Kleen Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/ds.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e489ff9cb3e..b62a7667828 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o -obj-y += ds.o +obj-$(CONFIG_X86_DS) += ds.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o obj-y += step.o diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index d1a121443bd..4c8d57ec966 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -21,8 +21,6 @@ */ -#ifdef CONFIG_X86_DS - #include #include @@ -878,4 +876,3 @@ void ds_free(struct ds_context *context) while (leftovers--) ds_put_context(context); } -#endif /* CONFIG_X86_DS */ -- cgit v1.2.3 From e5e8ca633bbe972eff6f84e064a63c0c08ed6c3d Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:47:19 +0100 Subject: x86, bts: turn macro into static inline function Impact: cleanup Replace a macro with a static inline function. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 72c5a190bf4..a95008457ea 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -23,12 +23,13 @@ #ifndef _ASM_X86_DS_H #define _ASM_X86_DS_H -#ifdef CONFIG_X86_DS #include #include +#ifdef CONFIG_X86_DS + struct task_struct; /* @@ -232,7 +233,8 @@ extern void ds_free(struct ds_context *context); #else /* CONFIG_X86_DS */ -#define ds_init_intel(config) do {} while (0) +struct cpuinfo_x86; +static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ -- cgit v1.2.3 From c4858ffc8f2dc850cb1f609c679b1ac1ad36ef0c Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:49:06 +0100 Subject: x86, pebs: fix PEBS record size configuration Impact: fix DS hw enablement on 64-bit x86 Fix the PEBS record size in the DS configuration. Reported-by: Stephane Eranian Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 4c8d57ec966..04e38ef646a 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -816,13 +816,21 @@ static const struct ds_configuration ds_cfg_var = { .sizeof_ds = sizeof(long) * 12, .sizeof_field = sizeof(long), .sizeof_rec[ds_bts] = sizeof(long) * 3, +#ifdef __i386__ .sizeof_rec[ds_pebs] = sizeof(long) * 10 +#else + .sizeof_rec[ds_pebs] = sizeof(long) * 18 +#endif }; static const struct ds_configuration ds_cfg_64 = { .sizeof_ds = 8 * 12, .sizeof_field = 8, .sizeof_rec[ds_bts] = 8 * 3, +#ifdef __i386__ .sizeof_rec[ds_pebs] = 8 * 10 +#else + .sizeof_rec[ds_pebs] = 8 * 18 +#endif }; static inline void -- cgit v1.2.3 From de90add30e79261c3b5be68bb0f22d2ef98e8113 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:52:56 +0100 Subject: x86, bts: fix wrmsr and spinlock over kmalloc Impact: fix sleeping-with-spinlock-held bugs/crashes - Turn a wrmsr to write the DS_AREA MSR into a wrmsrl. - Use irqsave variants of spinlocks. - Do not allocate memory while holding spinlocks. Reported-by: Stephane Eranian Reported-by: Ingo Molnar Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 77 +++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 37 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 04e38ef646a..a2d1176c38e 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -209,14 +209,15 @@ static DEFINE_PER_CPU(struct ds_context *, system_context); static inline struct ds_context *ds_get_context(struct task_struct *task) { struct ds_context *context; + unsigned long irq; - spin_lock(&ds_lock); + spin_lock_irqsave(&ds_lock, irq); context = (task ? task->thread.ds_ctx : this_system_context); if (context) context->count++; - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); return context; } @@ -224,55 +225,46 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) /* * Same as ds_get_context, but allocates the context and it's DS * structure, if necessary; returns NULL; if out of memory. - * - * pre: requires ds_lock to be held */ static inline struct ds_context *ds_alloc_context(struct task_struct *task) { struct ds_context **p_context = (task ? &task->thread.ds_ctx : &this_system_context); struct ds_context *context = *p_context; + unsigned long irq; if (!context) { - spin_unlock(&ds_lock); - context = kzalloc(sizeof(*context), GFP_KERNEL); - - if (!context) { - spin_lock(&ds_lock); + if (!context) return NULL; - } context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { kfree(context); - spin_lock(&ds_lock); return NULL; } - spin_lock(&ds_lock); - /* - * Check for race - another CPU could have allocated - * it meanwhile: - */ + spin_lock_irqsave(&ds_lock, irq); + if (*p_context) { kfree(context->ds); kfree(context); - return *p_context; - } - - *p_context = context; - context->this = p_context; - context->task = task; + context = *p_context; + } else { + *p_context = context; - if (task) - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + context->this = p_context; + context->task = task; - if (!task || (task == current)) - wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); - get_tracer(task); + if (!task || (task == current)) + wrmsrl(MSR_IA32_DS_AREA, + (unsigned long)context->ds); + } + spin_unlock_irqrestore(&ds_lock, irq); } context->count++; @@ -286,10 +278,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) */ static inline void ds_put_context(struct ds_context *context) { + unsigned long irq; + if (!context) return; - spin_lock(&ds_lock); + spin_lock_irqsave(&ds_lock, irq); if (--context->count) goto out; @@ -311,7 +305,7 @@ static inline void ds_put_context(struct ds_context *context) kfree(context->ds); kfree(context); out: - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); } @@ -382,6 +376,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, struct ds_context *context; unsigned long buffer, adj; const unsigned long alignment = (1 << 3); + unsigned long irq; int error = 0; if (!ds_cfg.sizeof_ds) @@ -396,26 +391,27 @@ static int ds_request(struct task_struct *task, void *base, size_t size, return -EOPNOTSUPP; - spin_lock(&ds_lock); - - error = -ENOMEM; context = ds_alloc_context(task); if (!context) - goto out_unlock; + return -ENOMEM; + + spin_lock_irqsave(&ds_lock, irq); error = -EPERM; if (!check_tracer(task)) goto out_unlock; + get_tracer(task); + error = -EALREADY; if (context->owner[qual] == current) - goto out_unlock; + goto out_put_tracer; error = -EPERM; if (context->owner[qual] != NULL) - goto out_unlock; + goto out_put_tracer; context->owner[qual] = current; - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); error = -ENOMEM; @@ -463,10 +459,17 @@ static int ds_request(struct task_struct *task, void *base, size_t size, out_release: context->owner[qual] = NULL; ds_put_context(context); + put_tracer(task); + return error; + + out_put_tracer: + spin_unlock_irqrestore(&ds_lock, irq); + ds_put_context(context); + put_tracer(task); return error; out_unlock: - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); ds_put_context(context); return error; } -- cgit v1.2.3 From a266d9f1253a38ec2d5655ebcd6846298b0554f4 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 21 Nov 2008 14:49:25 +0100 Subject: [CPUFREQ] powernow-k8: ignore out-of-range PstateStatus value A workaround for AMD CPU family 11h erratum 311 might cause that the P-state Status Register shows a "current P-state" which is larger than the "current P-state limit" in P-state Current Limit Register. For the wrong P-state value there is no ACPI _PSS object defined and powernow-k8/cpufreq can't determine the proper CPU frequency for that state. As a consequence this can cause a panic during boot (potentially with all recent kernel versions -- at least I have reproduced it with various 2.6.27 kernels and with the current .28 series), as an example: powernow-k8: Found 1 AMD Turion(tm)X2 Ultra DualCore Mobile ZM-82 processors (2 \ ) powernow-k8: 0 : pstate 0 (2200 MHz) powernow-k8: 1 : pstate 1 (1100 MHz) powernow-k8: 2 : pstate 2 (600 MHz) BUG: unable to handle kernel paging request at ffff88086e7528b8 IP: [] cpufreq_stats_update+0x4a/0x5f PGD 202063 PUD 0 Oops: 0002 [#1] SMP last sysfs file: CPU 1 Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.28-rc3-dirty #16 RIP: 0010:[] [] cpufreq_stats_update+0x4a/0\ f Synaptics claims to have extended capabilities, but I'm not able to read them.<6\ 6 RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88006e7528c0 RDX: 00000000ffffffff RSI: ffff88006e54af00 RDI: ffffffff808f056c RBP: 00000000fffee697 R08: 0000000000000003 R09: ffff88006e73f080 R10: 0000000000000001 R11: 00000000002191c0 R12: ffff88006fb83c10 R13: 00000000ffffffff R14: 0000000000000001 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88006fb50740(0000) knlGS:0000000000000000 Unable to initialize Synaptics hardware. CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: ffff88086e7528b8 CR3: 0000000000201000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 1, threadinfo ffff88006fb82000, task ffff88006fb816d0) Stack: ffff88006e74da50 0000000000000000 ffff88006e54af00 ffffffff804863c7 ffff88006e74da50 0000000000000000 00000000ffffffff 0000000000000000 ffff88006fb83c10 ffffffff8024b46c ffffffff808f0560 ffff88006fb83c10 Call Trace: [] ? cpufreq_stat_notifier_trans+0x51/0x83 [] ? notifier_call_chain+0x29/0x4c [] ? __srcu_notifier_call_chain+0x46/0x61 [] ? cpufreq_notify_transition+0x93/0xa9 [] ? powernowk8_target+0x1e8/0x5f3 [] ? cpufreq_governor_performance+0x1b/0x20 [] ? __cpufreq_governor+0x71/0xa8 [] ? __cpufreq_set_policy+0x101/0x13e [] ? cpufreq_add_dev+0x3f0/0x4cd [] ? handle_update+0x0/0x8 [] ? sysdev_driver_register+0xb6/0x10d [] ? powernowk8_init+0x0/0x7e [] ? cpufreq_register_driver+0x8f/0x140 [] ? _stext+0x56/0x14f [] ? proc_register+0x122/0x17d [] ? create_proc_entry+0x73/0x8a [] ? register_irq_proc+0x92/0xaa [] ? init_irq_proc+0x57/0x69 [] ? kernel_init+0x116/0x169 [] ? child_rip+0xa/0x11 [] ? kernel_init+0x0/0x169 [] ? child_rip+0x0/0x11 Code: 05 c5 83 36 00 48 c7 c2 48 5d 86 80 48 8b 04 d8 48 8b 40 08 48 8b 34 02 48\ RIP [] cpufreq_stats_update+0x4a/0x5f RSP CR2: ffff88086e7528b8 ---[ end trace 0678bac75e67a2f7 ]--- Kernel panic - not syncing: Attempted to kill init! In short, aftereffect of the wrong P-state is that cpufreq_stats_update() uses "-1" as index for some array in cpufreq_stats_update (unsigned int cpu) { ... if (stat->time_in_state) stat->time_in_state[stat->last_index] = cputime64_add(stat->time_in_state[stat->last_index], cputime_sub(cur_time, stat->last_time)); ... } Fortunately, the wrong P-state value is returned only if the core is in P-state 0. This fix solves the problem by detecting the out-of-range P-state, ignoring it, and using "0" instead. Cc: Mark Langsdorf Signed-off-by: Andreas Herrmann Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 18 +++++++++++++++--- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 17 ++++++++++++++++- 2 files changed, 31 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index d3dcd58b87c..7f05f44b97e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 i = 0; if (cpu_family == CPU_HW_PSTATE) { - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; - data->currpstate = i; + if (data->currpstate == HW_PSTATE_INVALID) { + /* read (initial) hw pstate if not yet set */ + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + + /* + * a workaround for family 11h erratum 311 might cause + * an "out-of-range Pstate if the core is in Pstate-0 + */ + if (i >= data->numps) + data->currpstate = HW_PSTATE_0; + else + data->currpstate = i; + } return 0; } do { @@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) } data->cpu = pol->cpu; + data->currpstate = HW_PSTATE_INVALID; if (powernow_k8_cpu_init_acpi(data)) { /* diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index ab48cfed4d9..65cfb5d7f77 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -5,6 +5,19 @@ * http://www.gnu.org/licenses/gpl.html */ + +enum pstate { + HW_PSTATE_INVALID = 0xff, + HW_PSTATE_0 = 0, + HW_PSTATE_1 = 1, + HW_PSTATE_2 = 2, + HW_PSTATE_3 = 3, + HW_PSTATE_4 = 4, + HW_PSTATE_5 = 5, + HW_PSTATE_6 = 6, + HW_PSTATE_7 = 7, +}; + struct powernow_k8_data { unsigned int cpu; @@ -23,7 +36,9 @@ struct powernow_k8_data { u32 exttype; /* extended interface = 1 */ /* keep track of the current fid / vid or pstate */ - u32 currvid, currfid, currpstate; + u32 currvid; + u32 currfid; + enum pstate currpstate; /* the powernow_table includes all frequency and vid/fid pairings: * fid are the lower 8 bits of the index, vid are the upper 8 bits. -- cgit v1.2.3 From ffd565a8b817d1eb4b25184e8418e8d96c3f56f6 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 25 Nov 2008 17:18:03 +0100 Subject: x86: fixup config space size of CPU functions for AMD family 11h Impact: extend allowed configuration space access on 11h CPUs from 256 to 4K Signed-off-by: Andreas Herrmann Acked-by: Jesse Barnes Signed-off-by: Ingo Molnar --- arch/x86/pci/fixup.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 3c27a809393..2051dc96b8e 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); /* - * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config - * have 4096 bytes. Even if the device is capable, that doesn't mean we can - * access it. Maybe we don't have a way to generate extended config space - * accesses. So check it + * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have + * 4096 bytes configuration space for each function of their processor + * configuration space. */ -static void fam10h_pci_cfg_space_size(struct pci_dev *dev) +static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev) { dev->cfg_size = pci_cfg_space_size_ext(dev); } - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size); /* * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from -- cgit v1.2.3 From 6c475352e87224a8f0b8cc6f6cc96b30563dc5b4 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 25 Nov 2008 15:33:10 +0100 Subject: KVM: MMU: avoid creation of unreachable pages in the shadow It is possible for a shadow page to have a parent link pointing to a freed page. When zapping a high level table, kvm_mmu_page_unlink_children fails to remove the parent_pte link. For that to happen, the child must be unreachable via the shadow tree, which can happen in shadow_walk_entry if the guest pte was modified in between walk() and fetch(). Remove the parent pte reference in such case. Possible cause for oops in bug #2217430. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 613ec9aa674..84eee43bbe7 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], &curr_pte, sizeof(curr_pte)); if (r || curr_pte != gw->ptes[level - 2]) { + kvm_mmu_put_page(shadow_page, sptep); kvm_release_pfn_clean(sw->pfn); sw->sptep = NULL; return 1; -- cgit v1.2.3 From b627c8b17ccacba38c975bc0f69a49fc4e5261c9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 20 Nov 2008 20:49:56 +0100 Subject: x86: always define DECLARE_PCI_UNMAP* macros Impact: fix boot crash on AMD IOMMU if CONFIG_GART_IOMMU is off Currently these macros evaluate to a no-op except the kernel is compiled with GART or Calgary support. But we also need these macros when we have SWIOTLB, VT-d or AMD IOMMU in the kernel. Since we always compile at least with SWIOTLB we can define these macros always. This patch is also for stable backport for the same reason the SWIOTLB default selection patch is. Signed-off-by: Joerg Roedel Cc: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pci_64.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index 5b28995d664..d02d936840a 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h @@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void); */ #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) -#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU) - #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ dma_addr_t ADDR_NAME; #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ @@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void); #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ (((PTR)->LEN_NAME) = (VAL)) -#else -/* No IOMMU */ - -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) (0) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define pci_unmap_len(PTR, LEN_NAME) (0) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) - -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_X86_PCI_64_H */ -- cgit v1.2.3 From 7b1dedca42ac0d0d0be01e39d8461bb53a2389b3 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Sat, 29 Nov 2008 13:46:27 +0100 Subject: x86: fix dma_mapping_error for 32bit x86 Devices like b44 ethernet can't dma from addresses above 1GB. The driver handles this cases by falling back to GFP_DMA allocation. But for detecting the problem it needs to get an indication from dma_mapping_error. The bug is triggered by using a VMSPLIT option of 2G/2G. Signed-off-by: Thomas Bogendoerfer Acked-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dma-mapping.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 7f225a4b2a2..097794ff6b7 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -71,15 +71,13 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) /* Make sure we keep the same behaviour */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { -#ifdef CONFIG_X86_32 - return 0; -#else +#ifdef CONFIG_X86_64 struct dma_mapping_ops *ops = get_dma_ops(dev); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); - return (dma_addr == bad_dma_address); #endif + return (dma_addr == bad_dma_address); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -- cgit v1.2.3 From 2236d252e001ea57d53cec1954f680e503f3b8bc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:37:34 +0000 Subject: enable_IR_x2apic() needs to be __init calls __init, called only from __init Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 04a7f960bbc..16f94879b52 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1315,7 +1315,7 @@ void enable_x2apic(void) } } -void enable_IR_x2apic(void) +void __init enable_IR_x2apic(void) { #ifdef CONFIG_INTR_REMAP int ret; -- cgit v1.2.3 From 23a14b9e9db49ed5f7683857557c26c874d4abb6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:37:44 +0000 Subject: kvm_setup_secondary_clock() is cpuinit Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/kernel/kvmclock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1c9cc431ea4..e169ae9b6a6 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt) } #ifdef CONFIG_X86_LOCAL_APIC -static void __devinit kvm_setup_secondary_clock(void) +static void __cpuinit kvm_setup_secondary_clock(void) { /* * Now that the first cpu already had this clocksource initialized, -- cgit v1.2.3 From 37af46efa5413c6f4c25d9a24b4c43f2cc718eed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:37:54 +0000 Subject: xen_setup_vcpu_info_placement() is not init on x86 ... so get xen-ops.h in agreement with xen/smp.c Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/xen/xen-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index d7422dc2a55..9e1afae8461 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu); void xen_mark_init_mm_pinned(void); -void __init xen_setup_vcpu_info_placement(void); +void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); -- cgit v1.2.3 From df6b07949b6cab9d119363d02ef63379160f6c82 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:38:04 +0000 Subject: xen_play_dead() is __cpuinit Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/xen/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d77da613b1d..acd9b6705e0 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -362,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu) alternatives_smp_switch(0); } -static void xen_play_dead(void) +static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */ { play_dead_common(); HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); -- cgit v1.2.3 From 96b8936a9ed08746e47081458a5eb9e43a751e24 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Nov 2008 08:10:03 +0100 Subject: remove __ARCH_WANT_COMPAT_SYS_PTRACE All architectures now use the generic compat_sys_ptrace, as should every new architecture that needs 32bit compat (if we'll ever get another). Remove the now superflous __ARCH_WANT_COMPAT_SYS_PTRACE define, and also kill a comment about __ARCH_SYS_PTRACE that was added after __ARCH_SYS_PTRACE was already gone. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- arch/x86/include/asm/ptrace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index d1531c8480b..eefb0594b05 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -#define __ARCH_WANT_COMPAT_SYS_PTRACE - #endif /* __KERNEL__ */ #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 43714539eab42b2fa3653ea7bd667b36c2291b11 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Sat, 29 Nov 2008 16:50:12 +0530 Subject: sched: don't export sched_mc_power_savings in laptops Impact: do not expose a control that has no effect Fix to prevent sched_mc_power_saving from being exported through sysfs on single-socket systems. (Say multicore single socket (Laptop)) CPU core map of the boot cpu should be equal to possible number of cpus for single socket system. This fix has been developed at FOSS.in kernel workout. Signed-off-by: Mahesh Salgaonkar Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4850e4b02b6..ff386ff50ed 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -239,7 +239,7 @@ struct pci_bus; void set_pci_bus_resources_arch_default(struct pci_bus *b); #ifdef CONFIG_SMP -#define mc_capable() (boot_cpu_data.x86_max_cores > 1) +#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids) #define smt_capable() (smp_num_siblings > 1) #endif -- cgit v1.2.3 From 70d7d357578245f1993fd2d3ccd26088bcd38941 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 20:16:03 +0100 Subject: x86: fix broken flushing in GART nofullflush path Impact: remove stale IOTLB entries In the non-default nofullflush case the GART is only flushed when next_bit wraps around. But it can happen that an unmap operation unmaps memory which is behind the current next_bit location. If these addresses are reused it may result in stale GART IO/TLB entries. Fix this by setting the GART next_bit always behind an unmapped location. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a42b02b4df6..ba7ad83e20a 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size) spin_lock_irqsave(&iommu_bitmap_lock, flags); iommu_area_free(iommu_gart_bitmap, offset, size); + if (offset >= next_bit) + next_bit = offset + size; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } -- cgit v1.2.3 From eac9fbc6a90ab3440f4d98a8c52b15a724fc6f4f Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 24 Nov 2008 13:53:24 +0000 Subject: AMD IOMMU: struct amd_iommu remove padding on 64 bit Remove 16 bytes of padding from struct amd_iommu on 64bit builds reducing its size to 120 bytes, allowing it to span one fewer cachelines. Signed-off-by: Richard Kennedy Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 1a30c0440c6..ac302a2fa33 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -251,13 +251,6 @@ struct amd_iommu { /* Pointer to PCI device of this IOMMU */ struct pci_dev *dev; - /* - * Capability pointer. There could be more than one IOMMU per PCI - * device function if there are more than one AMD IOMMU capability - * pointers. - */ - u16 cap_ptr; - /* physical address of MMIO space */ u64 mmio_phys; /* virtual address of MMIO space */ @@ -266,6 +259,13 @@ struct amd_iommu { /* capabilities of that IOMMU read from ACPI */ u32 cap; + /* + * Capability pointer. There could be more than one IOMMU per PCI + * device function if there are more than one AMD IOMMU capability + * pointers. + */ + u16 cap_ptr; + /* pci domain of this IOMMU */ u16 pci_seg; @@ -284,19 +284,19 @@ struct amd_iommu { /* size of command buffer */ u32 cmd_buf_size; - /* event buffer virtual address */ - u8 *evt_buf; /* size of event buffer */ u32 evt_buf_size; + /* event buffer virtual address */ + u8 *evt_buf; /* MSI number for event interrupt */ u16 evt_msi_num; - /* if one, we need to send a completion wait command */ - int need_sync; - /* true if interrupts for this IOMMU are already enabled */ bool int_enabled; + /* if one, we need to send a completion wait command */ + int need_sync; + /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; }; -- cgit v1.2.3 From f91ba190648be4ff127d6aaf3993ac19d66dc2c2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 25 Nov 2008 12:56:12 +0100 Subject: AMD IOMMU: set device table entry for aliased devices In some rare cases a request can arrive an IOMMU with its originial requestor id even it is aliased. Handle this by setting the device table entry to the same protection domain for the original and the aliased requestor id. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e4899e0e878..a232e5a85d4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -858,6 +858,9 @@ static int get_device_resources(struct device *dev, print_devid(_bdf, 1); } + if (domain_for_device(_bdf) == NULL) + set_device_domain(*iommu, *domain, _bdf); + return 1; } -- cgit v1.2.3 From 09ee17eb8ea89514c13980c4010bdbbaea8630c2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 12:19:27 +0100 Subject: AMD IOMMU: fix possible race while accessing iommu->need_sync The access to the iommu->need_sync member needs to be protected by the iommu->lock. Otherwise this is a possible race condition. Fix it with this patch. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a232e5a85d4..5662e226b0c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) spin_lock_irqsave(&iommu->lock, flags); ret = __iommu_queue_command(iommu, cmd); + if (!ret) + iommu->need_sync = 1; spin_unlock_irqrestore(&iommu->lock, flags); return ret; @@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu) cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); - iommu->need_sync = 0; - spin_lock_irqsave(&iommu->lock, flags); + if (!iommu->need_sync) + goto out; + + iommu->need_sync = 0; + ret = __iommu_queue_command(iommu, &cmd); if (ret) @@ -254,8 +259,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) ret = iommu_queue_command(iommu, &cmd); - iommu->need_sync = 1; - return ret; } @@ -281,8 +284,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, ret = iommu_queue_command(iommu, &cmd); - iommu->need_sync = 1; - return ret; } @@ -762,8 +763,6 @@ static void set_device_domain(struct amd_iommu *iommu, write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); iommu_queue_inv_dev_entry(iommu, devid); - - iommu->need_sync = 1; } /***************************************************************************** @@ -1034,8 +1033,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1063,8 +1061,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1130,8 +1127,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1176,8 +1172,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1228,8 +1223,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1260,8 +1254,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); -- cgit v1.2.3 From 9ea84ad77d635bdb76c9a08f44f21a9af98359ee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Dec 2008 07:21:21 +0100 Subject: oprofile: fix CPU unplug panic in ppro_stop() If oprofile statically compiled in kernel, a cpu unplug triggers a panic in ppro_stop(), because a NULL pointer is dereferenced. Signed-off-by: Eric Dumazet Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 716d26f0e5d..e9f80c744cf 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -156,6 +156,8 @@ static void ppro_start(struct op_msrs const * const msrs) unsigned int low, high; int i; + if (!reset_value) + return; for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); @@ -171,6 +173,8 @@ static void ppro_stop(struct op_msrs const * const msrs) unsigned int low, high; int i; + if (!reset_value) + return; for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; -- cgit v1.2.3 From 3d337c653c94be50f11a45fb14a2afa8a8a1a618 Mon Sep 17 00:00:00 2001 From: William Cohen Date: Sun, 30 Nov 2008 15:39:10 -0500 Subject: x86/oprofile: fix Intel cpu family 6 detection Alan Jenkins wrote: > This is on an EeePC 701, /proc/cpuinfo as attached. > > Is this expected? Will the next release work? > > Thanks, Alan > > # opcontrol --setup --no-vmlinux > cpu_type 'unset' is not valid > you should upgrade oprofile or force the use of timer mode > > # opcontrol -v > opcontrol: oprofile 0.9.4 compiled on Nov 29 2008 22:44:10 > > # cat /dev/oprofile/cpu_type > i386/p6 > # uname -r > 2.6.28-rc6eeepc Hi Alan, Looking at the kernel driver code for oprofile it can return the "i386/p6" for the cpu_type. However, looking at the user-space oprofile code there isn't the matching entry in libop/op_cpu_type.c or the events/unit_mask files in events/i386 directory. The Intel AP-485 says this is a "Intel Pentium M processor model D". Seems like the oprofile kernel driver should be identifying the processor as "i386/p6_mobile" The driver identification code doesn't look quite right in nmi_init.c http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git;a=blob;f=arch/x86/oprofile/nmi_int.c;h=022cd41ea9b4106e5884277096e80e9088a7c7a9;hb=HEAD has: 409 case 10 ... 13: 410 *cpu_type = "i386/p6"; 411 break; Referring to the Intel AP-485: case 10 and 11 should produce "i386/piii" case 13 should produce "i386/p6_mobile" I didn't see anything for case 12. Something like the attached patch. I don't have a celeron machine to verify that changes in this area of the kernel fix thing. -Will Signed-off-by: William Cohen Tested-by: Alan Jenkins Acked-by: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 022cd41ea9b..202864ad49a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -401,14 +401,13 @@ static int __init ppro_init(char **cpu_type) *cpu_type = "i386/pii"; break; case 6 ... 8: + case 10 ... 11: *cpu_type = "i386/piii"; break; case 9: + case 13: *cpu_type = "i386/p6_mobile"; break; - case 10 ... 13: - *cpu_type = "i386/p6"; - break; case 14: *cpu_type = "i386/core"; break; -- cgit v1.2.3 From 9adc13867ec5fe0cd35434f92954d90e42381f0b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 4 Dec 2008 13:33:35 +0100 Subject: x86: fix early panic with boot option "nosmp" Impact: fix boot crash with numcpus=0 on certain systems Fix early exception in __get_smp_config with nosmp. Bail out early when there is no MP table. Reported-by: Wu Fengguang Signed-off-by: Andi Kleen Tested-by: Wu Fengguang Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f98f4e1dba0..0f4c1fd5a1f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -604,6 +604,9 @@ static void __init __get_smp_config(unsigned int early) printk(KERN_INFO "Using ACPI for processor (LAPIC) " "configuration information\n"); + if (!mpf) + return; + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) -- cgit v1.2.3 From bb9d4ff80bc032d7961815c2ff5eaf458ae3adff Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Dec 2008 15:59:48 +0100 Subject: AMD IOMMU: fix iommu_map_page function Impact: bugfix in iommu_map_page function Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5662e226b0c..67970a8c2ee 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -344,7 +344,7 @@ static int iommu_map(struct protection_domain *dom, u64 __pte, *pte, *page; bus_addr = PAGE_ALIGN(bus_addr); - phys_addr = PAGE_ALIGN(bus_addr); + phys_addr = PAGE_ALIGN(phys_addr); /* only support 512GB address spaces for now */ if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) -- cgit v1.2.3 From 3cc3d84bffbd93bdb671ac7961b12cd98fbb9266 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Dec 2008 16:44:31 +0100 Subject: AMD IOMMU: fix loop counter in free_pagetable function Impact: bugfix Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 67970a8c2ee..0637e65ca0c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -600,7 +600,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) continue; p2 = IOMMU_PTE_PAGE(p1[i]); - for (j = 0; j < 512; ++i) { + for (j = 0; j < 512; ++j) { if (!IOMMU_PTE_PRESENT(p2[j])) continue; p3 = IOMMU_PTE_PAGE(p2[j]); -- cgit v1.2.3 From 24f811603e8ef4b9173f47fa263230168e237128 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 14:25:39 +0100 Subject: AMD IOMMU: fix typo in comment Impact: cleanup Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0637e65ca0c..2fde073b6be 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -922,8 +922,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, /* * This function contains common code for mapping of a physically - * contiguous memory region into DMA address space. It is uses by all - * mapping functions provided by this IOMMU driver. + * contiguous memory region into DMA address space. It is used by all + * mapping functions provided with this IOMMU driver. * Must be called with the domain lock held. */ static dma_addr_t __map_single(struct device *dev, -- cgit v1.2.3 From 8ad909c4c1b91bd35ba6a2af5e7ab3fc8d9fe283 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 14:37:20 +0100 Subject: AMD IOMMU: fix WARN_ON in dma_ops unmap path Impact: minor fix Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2fde073b6be..3133a0ea09f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -910,7 +910,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, if (address >= dom->aperture_size) return; - WARN_ON(address & 0xfffULL || address > dom->aperture_size); + WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; pte += IOMMU_PTE_L0_INDEX(address); -- cgit v1.2.3 From b8d9905d025d80a2357e8ce4704fde2923f6a1bd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 14:40:26 +0100 Subject: AMD IOMMU: __unmap_single: check for bad_dma_address instead of 0 Impact: minor fix Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3133a0ea09f..a7b6dec6fc3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -983,7 +983,8 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) + if ((dma_addr == bad_dma_address) || + (dma_addr + size > dma_dom->aperture_size)) return; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); -- cgit v1.2.3 From 087052b02f42b50316c6e4d7f2d8dfba3de6fc2e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 17 Oct 2008 16:09:57 +0200 Subject: x86: fix default_spin_lock_flags() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit these warnings: arch/x86/kernel/paravirt-spinlocks.c: In function ‘default_spin_lock_flags’: arch/x86/kernel/paravirt-spinlocks.c:12: warning: passing argument 1 of ‘__raw_spin_lock’ from incompatible pointer type arch/x86/kernel/paravirt-spinlocks.c: At top level: arch/x86/kernel/paravirt-spinlocks.c:11: warning: ‘default_spin_lock_flags’ defined but not used showed that the prototype of default_spin_lock_flags() was confused about what type spinlocks have. the proper type on UP is raw_spinlock_t. Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt-spinlocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 0e9f1982b1d..95777b0faa7 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -7,7 +7,8 @@ #include -static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) +static inline void +default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) { __raw_spin_lock(lock); } -- cgit v1.2.3 From ae8d04e2ecbb233926860e9ce145eac19c7835dc Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 13 Dec 2008 12:36:58 -0800 Subject: x86 Fix VMI crash on boot in 2.6.28-rc8 VMI initialiation can relocate the fixmap, causing early_ioremap to malfunction if it is initialized before the relocation. To fix this, VMI activation is split into two phases; the detection, which must happen before setting up ioremap, and the activation, which must happen after parsing early boot parameters. This fixes a crash on boot when VMI is enabled under VMware. Signed-off-by: Zachary Amsden Signed-off-by: Linus Torvalds --- arch/x86/include/asm/vmi.h | 8 +++++++- arch/x86/kernel/setup.c | 12 +++++------- arch/x86/kernel/smpboot.c | 2 -- arch/x86/kernel/vmi_32.c | 16 +++++++++++----- 4 files changed, 23 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h index b7c0dea119f..61e08c0a290 100644 --- a/arch/x86/include/asm/vmi.h +++ b/arch/x86/include/asm/vmi.h @@ -223,9 +223,15 @@ struct pci_header { } __attribute__((packed)); /* Function prototypes for bootstrapping */ +#ifdef CONFIG_VMI extern void vmi_init(void); +extern void vmi_activate(void); extern void vmi_bringup(void); -extern void vmi_apply_boot_page_allocations(void); +#else +static inline void vmi_init(void) {} +static inline void vmi_activate(void) {} +static inline void vmi_bringup(void) {} +#endif /* State needed to start an application processor in an SMP system. */ struct vmi_ap_state { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9d5674f7b6c..bdec76e5559 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -794,6 +794,9 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif + /* VMI may relocate the fixmap; do this before touching ioremap area */ + vmi_init(); + early_cpu_init(); early_ioremap_init(); @@ -880,13 +883,8 @@ void __init setup_arch(char **cmdline_p) check_efer(); #endif -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) - /* - * Must be before kernel pagetables are setup - * or fixmap area is touched. - */ - vmi_init(); -#endif + /* Must be before kernel pagetables are setup */ + vmi_activate(); /* after early param, so could get panic from serial */ reserve_early_setup_data(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7b109339731..f71f96fc9e6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -294,9 +294,7 @@ static void __cpuinit start_secondary(void *unused) * fragile that we want to limit the things done here to the * most necessary things. */ -#ifdef CONFIG_VMI vmi_bringup(); -#endif cpu_init(); preempt_disable(); smp_callin(); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 8b6c393ab9f..22fd6577156 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -960,8 +960,6 @@ static inline int __init activate_vmi(void) void __init vmi_init(void) { - unsigned long flags; - if (!vmi_rom) probe_vmi_rom(); else @@ -973,13 +971,21 @@ void __init vmi_init(void) reserve_top_address(-vmi_rom->virtual_top); - local_irq_save(flags); - activate_vmi(); - #ifdef CONFIG_X86_IO_APIC /* This is virtual hardware; timer routing is wired correctly */ no_timer_check = 1; #endif +} + +void vmi_activate(void) +{ + unsigned long flags; + + if (!vmi_rom) + return; + + local_irq_save(flags); + activate_vmi(); local_irq_restore(flags & X86_EFLAGS_IF); } -- cgit v1.2.3 From 83fd5cc6481c6b7fa8b45f8a7e0aa7120213430b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 16 Dec 2008 19:17:11 +0100 Subject: AMD IOMMU: allocate rlookup_table with __GFP_ZERO Impact: fix bug which can lead to panic in prealloc_protection_domains() Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 30ae2701b3d..c90a15eba5c 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1074,7 +1074,8 @@ int __init amd_iommu_init(void) goto free; /* IOMMU rlookup table - find the IOMMU for a specific device */ - amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL, + amd_iommu_rlookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, get_order(rlookup_table_size)); if (amd_iommu_rlookup_table == NULL) goto free; -- cgit v1.2.3 From cf9b303e55da810255638c0b616b1a3f7eda9320 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 15 Dec 2008 23:33:10 +0100 Subject: x86: re-enable MCE on secondary CPUS after suspend/resume Impact: fix disabled MCE after resume Don't prevent multiple initialization of MCEs. Back from early prehistory mcheck_init() has a reentry check. Presumably that was needed in very old kernels to prevent it entering twice. But as Andreas points out this prevents CPU hotplug (and therefore resume) to correctly reinitialize MCEs when a AP boots again after being offlined. Just drop the check. Reported-by: Andreas Herrmann Signed-off-by: Andi Kleen Tested-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 4b031a4ac85..1c838032fd3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -510,12 +510,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) */ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) { - static cpumask_t mce_cpus = CPU_MASK_NONE; - mce_cpu_quirks(c); if (mce_dont_init || - cpu_test_and_set(smp_processor_id(), mce_cpus) || !mce_available(c)) return; -- cgit v1.2.3 From cf558d25e5c9f70fa0279c9b7b8b4aed7cae9bd4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Dec 2008 15:06:01 +0100 Subject: AMD IOMMU: set cmd buffer pointers to zero manually Impact: set cmd buffer head and tail pointers to zero in case nobody else did Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c90a15eba5c..c6cc22815d3 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -427,6 +427,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, &entry, sizeof(entry)); + /* set head and tail to zero manually */ + writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); return cmd_buf; -- cgit v1.2.3 From 84df81759590ad16b0024cf46b3423cca76b2e07 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Dec 2008 16:36:44 +0100 Subject: AMD IOMMU: panic if completion wait loop fails Impact: prevents data corruption after a failed completion wait loop Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a7b6dec6fc3..0a60d60ed03 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -235,8 +235,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu) status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); - if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) - printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); + if (unlikely(i == EXIT_LOOP_COUNT)) + panic("AMD IOMMU: Completion wait loop failed\n"); + out: spin_unlock_irqrestore(&iommu->lock, flags); -- cgit v1.2.3 From 55aab5f49e384a361668d112eefdb33e90779af9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 17 Dec 2008 12:52:34 -0700 Subject: x86 gart: don't complain if no AMD GART found Impact: remove annoying bootup printk It's perfectly normal for no AMD GART to be present, e.g., if you have Intel CPUs. None of the other iommu_init() functions makes noise when it finds nothing. Signed-off-by: Bjorn Helgaas Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index ba7ad83e20a..a35eaa379ff 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -745,10 +745,8 @@ void __init gart_iommu_init(void) unsigned long scratch; long i; - if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { - printk(KERN_INFO "PCI-GART: No AMD GART found.\n"); + if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) return; - } #ifndef CONFIG_AGP_AMD64 no_agp = 1; -- cgit v1.2.3 From 280a9ca5d0663b185ddc4443052076c29652a328 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Sat, 20 Dec 2008 00:15:24 +0100 Subject: x86: fix resume (S2R) broken by Intel microcode module, on A110L Impact: fix deadlock This is in response to the following bug report: Bug-Entry : http://bugzilla.kernel.org/show_bug.cgi?id=12100 Subject : resume (S2R) broken by Intel microcode module, on A110L Submitter : Andreas Mohr Date : 2008-11-25 08:48 (19 days old) Handled-By : Dmitry Adamushko [ The deadlock scenario has been discovered by Andreas Mohr ] I think I might have a logical explanation why the system: (http://bugzilla.kernel.org/show_bug.cgi?id=12100) might hang upon resuming, OTOH it should have likely hanged each and every time. (1) possible deadlock in microcode_resume_cpu() if either 'if' section is taken; (2) now, I don't see it in spec. and can't experimentally verify it (newer ucodes don't seem to be available for my Core2duo)... but logically-wise, I'd think that when read upon resuming, the 'microcode revision' (MSR 0x8B) should be back to its original one (we need to reload ucode anyway so it doesn't seem logical if a cpu doesn't drop the version)... if so, the comparison with memcmp() for the full 'struct cpu_signature' is wrong... and that's how one of the aforementioned 'if' sections might have been triggered - leading to a deadlock. Obviously, in my tests I simulated loading/resuming with the ucode of the same version (just to see that the file is loaded/re-loaded upon resuming) so this issue has never popped up. I'd appreciate if someone with an appropriate system might give a try to the 2nd patch (titled "fix a comparison && deadlock..."). In any case, the deadlock situation is a must-have fix. Reported-by: Andreas Mohr Signed-off-by: Dmitry Adamushko Tested-by: Andreas Mohr Signed-off-by: Ingo Molnar Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_core.c | 19 ++++++++++++++----- arch/x86/kernel/microcode_intel.c | 6 ++++++ 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 82fb2809ce3..c4b5b24e021 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -272,13 +272,18 @@ static struct attribute_group mc_attr_group = { .name = "microcode", }; -static void microcode_fini_cpu(int cpu) +static void __microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - mutex_lock(µcode_mutex); microcode_ops->microcode_fini_cpu(cpu); uci->valid = 0; +} + +static void microcode_fini_cpu(int cpu) +{ + mutex_lock(µcode_mutex); + __microcode_fini_cpu(cpu); mutex_unlock(µcode_mutex); } @@ -306,12 +311,16 @@ static int microcode_resume_cpu(int cpu) * to this cpu (a bit of paranoia): */ if (microcode_ops->collect_cpu_info(cpu, &nsig)) { - microcode_fini_cpu(cpu); + __microcode_fini_cpu(cpu); + printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n", + cpu); return -1; } - if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { - microcode_fini_cpu(cpu); + if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) { + __microcode_fini_cpu(cpu); + printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n", + cpu); /* Should we look for a new ucode here? */ return 1; } diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 622dc4a2178..a8e62792d17 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -155,6 +155,7 @@ static DEFINE_SPINLOCK(microcode_update_lock); static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); + unsigned long flags; unsigned int val[2]; memset(csig, 0, sizeof(*csig)); @@ -174,11 +175,16 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) csig->pf = 1 << ((val[1] >> 18) & 7); } + /* serialize access to the physical write to MSR 0x79 */ + spin_lock_irqsave(µcode_update_lock, flags); + wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ sync_core(); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); + spin_unlock_irqrestore(µcode_update_lock, flags); + pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", csig->sig, csig->pf, csig->rev); -- cgit v1.2.3 From 40f15ad8aadff5ebb621b17a6f303ad2cd3f847d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 24 Dec 2008 10:49:51 +0100 Subject: x86: disable X86_PTRACE_BTS there's a new ptrace arch level feature in .28: config X86_PTRACE_BTS bool "Branch Trace Store" it has broken fork() handling: the old DS area gets copied over into a new task without clearing it. Fixes exist but they came too late: c5dee61: x86, bts: memory accounting bf53de9: x86, bts: add fork and exit handling and are queued up for v2.6.29. This shows that the facility is still not tested well enough to release into a stable kernel - disable it for now and reactivate in .29. In .29 the hardware-branch-tracer will use the DS/BTS facilities too - hopefully resulting in better code. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index b815664fe37..8e99073b9e0 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -520,6 +520,7 @@ config X86_PTRACE_BTS bool "Branch Trace Store" default y depends on X86_DEBUGCTLMSR + depends on BROKEN help This adds a ptrace interface to the hardware's branch trace store. -- cgit v1.2.3