From 73557af5bf32c3db973050de1fb73423e8fc873e Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 31 Oct 2008 13:59:49 -0400 Subject: x86, voyager: fix smp_intr_init() compile breakage Impact: fix x86/Voyager build Looks like this became static on the rest of x86. Fix it up by adding an external definition to mach-voyager/setup.c Signed-off-by: Ingo Molnar --- arch/x86/include/asm/voyager.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h index 9c811d2e6f9..b3e64730762 100644 --- a/arch/x86/include/asm/voyager.h +++ b/arch/x86/include/asm/voyager.h @@ -520,6 +520,7 @@ extern void voyager_restart(void); extern void voyager_cat_power_off(void); extern void voyager_cat_do_common_interrupt(void); extern void voyager_handle_nmi(void); +extern void voyager_smp_intr_init(void); /* Commands for the following are */ #define VOYAGER_PSI_READ 0 #define VOYAGER_PSI_WRITE 1 -- cgit v1.2.3 From 9fcd18c9e63e325dbd2b4c726623f760788d5aa8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 5 Nov 2008 16:52:08 +0100 Subject: sched: re-tune balancing Impact: improve wakeup affinity on NUMA systems, tweak SMP systems Given the fixes+tweaks to the wakeup-buddy code, re-tweak the domain balancing defaults on NUMA and SMP systems. Turn on SD_WAKE_AFFINE which was off on x86 NUMA - there's no reason why we would not want to have wakeup affinity across nodes as well. (we already do this in the standard NUMA template.) lat_ctx on a NUMA box is particularly happy about this change: before: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.60 | 2 5.70 after: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.65 | 2 2.07 a 2.75x speedup. pipe-test is similarly happy about it too: | phoenix:~/sched-tests> ./pipe-test | 18.26 usecs/loop. | 14.70 usecs/loop. | 14.38 usecs/loop. | 10.55 usecs/loop. # +WAKE_AFFINE on domain0+domain1 | 8.63 usecs/loop. | 8.59 usecs/loop. | 9.03 usecs/loop. | 8.94 usecs/loop. | 8.96 usecs/loop. | 8.63 usecs/loop. Also: - disable SD_BALANCE_NEWIDLE on NUMA and SMP domains (keep it for siblings) - enable SD_WAKE_BALANCE on SMP domains Sysbench+postgresql improves all around the board, quite significantly: .28-rc3-11474e2c .28-rc3-11474e2c-tune ------------------------------------------------- 1: 571 688 +17.08% 2: 1236 1206 -2.55% 4: 2381 2642 +9.89% 8: 4958 5164 +3.99% 16: 9580 9574 -0.07% 32: 7128 8118 +12.20% 64: 7342 8266 +11.18% 128: 7342 8064 +8.95% 256: 7519 7884 +4.62% 512: 7350 7731 +4.93% ------------------------------------------------- SUM: 55412 59341 +6.62% So it's a win both for the runup portion, the peak area and the tail. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 90ac7718469..4850e4b02b6 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -154,7 +154,7 @@ extern unsigned long node_remap_size[]; #endif -/* sched_domains SD_NODE_INIT for NUMAQ machines */ +/* sched_domains SD_NODE_INIT for NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ .min_interval = 8, \ .max_interval = 32, \ @@ -169,8 +169,9 @@ extern unsigned long node_remap_size[]; .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ } -- cgit v1.2.3 From 1b4897688011cd05e07f00dcfe6af3331eb36a3c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 4 Nov 2008 14:10:13 -0800 Subject: x86: size NR_IRQS on 32-bit systems the same way as 64-bit Impact: make NR_IRQS big enough for system with lots of apic/pins If lots of IO_APIC's are there (or can be there), size the same way as 64-bit, depending on MAX_IO_APICS and NR_CPUS. This fixes the boot problem reported by Ben Hutchings on a 32-bit server with 5 IO-APICs and 240 IO-APIC pins. Signed-off-by: Yinghai Tested-by: Ben Hutchings Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index d843ed0e9b2..503aadc4ad3 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,30 +101,22 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_PARAVIRT) && !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif -#elif !defined(CONFIG_X86_VOYAGER) +#elif defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) || defined(CONFIG_X86_VOYAGER) -# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) - -# define NR_IRQS 224 - -# else /* IO_APIC || PARAVIRT */ - -# define NR_IRQS 16 - -# endif +# define NR_IRQS 224 -#else /* !VISWS && !VOYAGER */ +#else /* IO_APIC || PARAVIRT */ -# define NR_IRQS 224 +# define NR_IRQS 16 -#endif /* VISWS */ +#endif /* Voyager specific defines */ /* These define the CPIs we use in linux */ -- cgit v1.2.3 From 7db282fa67b58daff8a57f9e1c93d4474b5908ff Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 5 Nov 2008 23:36:48 -0800 Subject: x86: remove VISWS and PARAVIRT around NR_IRQS puzzle Impact: fix warning message when PARAVIRT is set in config Remove stale #ifdef components from our IRQ sizing logic. x86/Voyager is the only holdout. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 503aadc4ad3..0005adb0f94 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,18 +101,18 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_PARAVIRT) && !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif -#elif defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) || defined(CONFIG_X86_VOYAGER) +#elif defined(CONFIG_X86_VOYAGER) # define NR_IRQS 224 -#else /* IO_APIC || PARAVIRT */ +#else /* IO_APIC || VOYAGER */ # define NR_IRQS 16 -- cgit v1.2.3 From 0d12cdd5f883f508d33b85c1bae98fa28987c8c7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 8 Nov 2008 16:19:55 +0100 Subject: sched: improve sched_clock() performance in scheduler-intense workloads native_read_tsc() overhead accounts for 20% of the system overhead: 659567 system_call 41222.9375 686796 schedule 435.7843 718382 __switch_to 665.1685 823875 switch_mm 4526.7857 1883122 native_read_tsc 55385.9412 9761990 total 2.8468 this is large part due to the rdtsc_barrier() that is done before and after reading the TSC. But sched_clock() is not a precise clock in the GTOD sense, using such barriers is completely pointless. So remove the barriers and only use them in vget_cycles(). This improves lat_ctx performance by about 5%. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 2 -- arch/x86/include/asm/tsc.h | 8 +++++++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 46be2fa7ac2..c2a812ebde8 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -108,9 +108,7 @@ static __always_inline unsigned long long __native_read_tsc(void) { DECLARE_ARGS(val, low, high); - rdtsc_barrier(); asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); - rdtsc_barrier(); return EAX_EDX_VAL(val, low, high); } diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 38ae163cc91..9cd83a8e40d 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -34,6 +34,8 @@ static inline cycles_t get_cycles(void) static __always_inline cycles_t vget_cycles(void) { + cycles_t cycles; + /* * We only do VDSOs on TSC capable CPUs, so this shouldnt * access boot_cpu_data (which is not VDSO-safe): @@ -42,7 +44,11 @@ static __always_inline cycles_t vget_cycles(void) if (!cpu_has_tsc) return 0; #endif - return (cycles_t)__native_read_tsc(); + rdtsc_barrier(); + cycles = (cycles_t)__native_read_tsc(); + rdtsc_barrier(); + + return cycles; } extern void tsc_init(void); -- cgit v1.2.3