From 54c28d294c658abb6d6430a49fda230fdfd601c8 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 3 Apr 2009 15:39:42 -0500 Subject: x86, uv: add Kconfig dependency on NUMA for UV systems Impact: build fix Add Kconfig dependency on NUMA for enabling UV. Although it might be possible to configure non-NUMA UV systems, they are unsupported and not interesting. Much of the infrastructure for UV requires NUMA support. Signed-off-by: Jack Steiner LKML-Reference: <20090403203942.GA20137@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 748e50a1a15..2817ab5a120 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -354,6 +354,7 @@ config X86_UV bool "SGI Ultraviolet" depends on X86_64 depends on X86_EXTENDED_PLATFORM + depends on NUMA select X86_X2APIC ---help--- This option is needed in order to support SGI Ultraviolet systems. -- cgit v1.2.3 From ca713c2ab0eea3458962983e4a7e13430ea479b8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 15 Apr 2009 18:39:13 -0700 Subject: x86/irq: mark NUMA_MIGRATE_IRQ_DESC broken It causes crash on system with lots of cards with MSI-X when irq_balancer enabled... The patches fixing it were both complex and fragile, according to Eric they were also doing quite dangerous things to the hardware. Instead we now have patches that solve this problem via static NUMA node mappings - not dynamic allocation and balancing. The patches are much simpler than this method but are still too large outside of the merge window, so we mark the dynamic balancer as broken for now, and queue up the new approach for v2.6.31. [ Impact: deactivate broken kernel feature ] Reported-by: Suresh Siddha Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49E68C41.4020801@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bc25b9f5e4c..b1d2be7d91a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -277,6 +277,7 @@ config SPARSE_IRQ config NUMA_MIGRATE_IRQ_DESC bool "Move irq desc when changing irq smp_affinity" depends on SPARSE_IRQ && NUMA + depends on BROKEN default n ---help--- This enables moving irq_desc to cpu/node that irq will use handled. -- cgit v1.2.3 From 2a3313f494c2f3f74a27d66f0f14b38558b7dba2 Mon Sep 17 00:00:00 2001 From: "Michael K. Johnson" Date: Tue, 21 Apr 2009 21:44:48 -0400 Subject: x86: more than 8 32-bit CPUs requires X86_BIGSMP $ cat x86-more-than-8-cpus-requires-bigsmp.patch Enforce NR_CPUS <= 8 limitation if X86_BIGSMP not set Configuring more than 8 logical CPUs on 32-bit x86 requires X86_BIGSMP to be set in order to boot successfully, if more than 8 logical CPUs are actually found at boot time. The X86_BIGSMP help text describes that it is required to be set if more than 8 CPUs are configured, but this was previously not enforced. This configuration error has affected multiple distributions: https://bugzilla.redhat.com/show_bug.cgi?id=480844 https://issues.rpath.com/browse/RPL-3022 Signed-off-by: Michael K Johnson LKML-Reference: <20090422014448.GB32541@logo.rdu.rpath.com> Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c9086e6307a..b5cda6c03d1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -664,6 +664,7 @@ config MAXSMP config NR_CPUS int "Maximum number of CPUs" if SMP && !MAXSMP + range 2 8 if SMP && X86_32 && !X86_BIGSMP range 2 512 if SMP && !MAXSMP default "1" if !SMP default "4096" if MAXSMP -- cgit v1.2.3 From b4ecc126991b30fe5f9a59dfacda046aeac124b2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 13 May 2009 17:16:55 -0700 Subject: x86: Fix performance regression caused by paravirt_ops on native kernels Xiaohui Xin and some other folks at Intel have been looking into what's behind the performance hit of paravirt_ops when running native. It appears that the hit is entirely due to the paravirtualized spinlocks introduced by: | commit 8efcbab674de2bee45a2e4cdf97de16b8e609ac8 | Date: Mon Jul 7 12:07:51 2008 -0700 | | paravirt: introduce a "lock-byte" spinlock implementation The extra call/return in the spinlock path is somehow causing an increase in the cycles/instruction of somewhere around 2-7% (seems to vary quite a lot from test to test). The working theory is that the CPU's pipeline is getting upset about the call->call->locked-op->return->return, and seems to be failing to speculate (though I haven't seen anything definitive about the precise reasons). This doesn't entirely make sense, because the performance hit is also visible on unlock and other operations which don't involve locked instructions. But spinlock operations clearly swamp all the other pvops operations, even though I can't imagine that they're nearly as common (there's only a .05% increase in instructions executed). If I disable just the pv-spinlock calls, my tests show that pvops is identical to non-pvops performance on native (my measurements show that it is actually about .1% faster, but Xiaohui shows a .05% slowdown). Summary of results, averaging 10 runs of the "mmperf" test, using a no-pvops build as baseline: nopv Pv-nospin Pv-spin CPU cycles 100.00% 99.89% 102.18% instructions 100.00% 100.10% 100.15% CPI 100.00% 99.79% 102.03% cache ref 100.00% 100.84% 100.28% cache miss 100.00% 90.47% 88.56% cache miss rate 100.00% 89.72% 88.31% branches 100.00% 99.93% 100.04% branch miss 100.00% 103.66% 107.72% branch miss rt 100.00% 103.73% 107.67% wallclock 100.00% 99.90% 102.20% The clear effect here is that the 2% increase in CPI is directly reflected in the final wallclock time. (The other interesting effect is that the more ops are out of line calls via pvops, the lower the cache access and miss rates. Not too surprising, but it suggests that the non-pvops kernel is over-inlined. On the flipside, the branch misses go up correspondingly...) So, what's the fix? Paravirt patching turns all the pvops calls into direct calls, so _spin_lock etc do end up having direct calls. For example, the compiler generated code for paravirtualized _spin_lock is: <_spin_lock+0>: mov %gs:0xb4c8,%rax <_spin_lock+9>: incl 0xffffffffffffe044(%rax) <_spin_lock+15>: callq *0xffffffff805a5b30 <_spin_lock+22>: retq The indirect call will get patched to: <_spin_lock+0>: mov %gs:0xb4c8,%rax <_spin_lock+9>: incl 0xffffffffffffe044(%rax) <_spin_lock+15>: callq <__ticket_spin_lock> <_spin_lock+20>: nop; nop /* or whatever 2-byte nop */ <_spin_lock+22>: retq One possibility is to inline _spin_lock, etc, when building an optimised kernel (ie, when there's no spinlock/preempt instrumentation/debugging enabled). That will remove the outer call/return pair, returning the instruction stream to a single call/return, which will presumably execute the same as the non-pvops case. The downsides arel 1) it will replicate the preempt_disable/enable code at eack lock/unlock callsite; this code is fairly small, but not nothing; and 2) the spinlock definitions are already a very heavily tangled mass of #ifdefs and other preprocessor magic, and making any changes will be non-trivial. The other obvious answer is to disable pv-spinlocks. Making them a separate config option is fairly easy, and it would be trivial to enable them only when Xen is enabled (as the only non-default user). But it doesn't really address the common case of a distro build which is going to have Xen support enabled, and leaves the open question of whether the native performance cost of pv-spinlocks is worth the performance improvement on a loaded Xen system (10% saving of overall system CPU when guests block rather than spin). Still it is a reasonable short-term workaround. [ Impact: fix pvops performance regression when running native ] Analysed-by: "Xin Xiaohui" Analysed-by: "Li Xin" Analysed-by: "Nakajima Jun" Signed-off-by: Jeremy Fitzhardinge Acked-by: H. Peter Anvin Cc: Nick Piggin Cc: Xen-devel LKML-Reference: <4A0B62F7.5030802@goop.org> [ fixed the help text ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df9e885eee1..a6efe0a2e9a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -498,6 +498,19 @@ config PARAVIRT over full virtualization. However, when run without a hypervisor the kernel is theoretically slower and slightly larger. +config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP && EXPERIMENTAL + ---help--- + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + Unfortunately the downside is an up to 5% performance hit on + native kernels, with various workloads. + + If you are unsure how to answer this question, answer N. + config PARAVIRT_CLOCK bool default n -- cgit v1.2.3