From 4c7f8900f1d8a0e464e7092f132a7e93f7c20f2f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Feb 2008 07:06:55 +0100 Subject: x86: stackprotector & PARAVIRT fix on paravirt enabled 64-bit kernels the paravirt ops do function calls themselves - which is bad with the stackprotector - for example pda_init() loads 0 into %gs and then does MSR_GS_BASE write (which modifies gs.base) - but that MSR write is a function call on paravirt, which with stackprotector tries to read the stack canary from the PDA ... crashing the bootup. the solution was suggested by Arjan van de Ven: to exclude paravirt.c from stackprotector, too many lowlevel functionality is in it. It's not like we'll have paravirt functions with character arrays on their stack anyway... Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b472..8161e5a91ca 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -14,6 +14,7 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc_64.o := $(nostackp) +CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o -- cgit v1.2.3 From e00320875d0cc5f8099a7227b2f25fbb3231268d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 08:48:23 +0100 Subject: x86: fix stackprotector canary updates during context switches fix a bug noticed and fixed by pageexec@freemail.hu. if built with -fstack-protector-all then we'll have canary checks built into the __switch_to() function. That does not work well with the canary-switching code there: while we already use the %rsp of the new task, we still call __switch_to() whith the previous task's canary value in the PDA, hence the __switch_to() ssp prologue instructions will store the previous canary. Then we update the PDA and upon return from __switch_to() the canary check triggers and we panic. so update the canary after we have called __switch_to(), where we are at the same stackframe level as the last stackframe of the next (and now freshly current) task. Note: this means that we call __switch_to() [and its sub-functions] still with the old canary, but that is not a problem, both the previous and the next task has a high-quality canary. The only (mostly academic) disadvantage is that the canary of one task may leak onto the stack of another task, increasing the risk of information leaks, were an attacker able to read the stack of specific tasks (but not that of others). To solve this we'll have to reorganize the way we switch tasks, and move the PDA setting into the switch_to() assembly code. That will happen in another patch. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988..d8640388039 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -640,7 +640,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); #ifdef CONFIG_CC_STACKPROTECTOR - write_pda(stack_canary, next_p->stack_canary); /* * Build time only check to make sure the stack_canary is at * offset 40 in the pda; this is a gcc ABI requirement -- cgit v1.2.3 From ce22bd92cba0958e052cb1ce0f89f1d3a02b60a7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 12 May 2008 15:44:31 +0200 Subject: x86: setup stack canary for the idle threads The idle threads for non-boot CPUs are a bit special in how they are created; the result is that these don't have the stack canary set up properly in their PDA. Easiest fix is to just always set the PDA up correctly when entering the idle thread; this is a NOP for the boot cpu. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d8640388039..9e69e223023 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -146,6 +146,15 @@ static inline void play_dead(void) void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; + +#ifdef CONFIG_CC_STACKPROTECTOR + /* + * If we're the non-boot CPU, nothing set the PDA stack + * canary up for us. This is as good a place as any for + * doing that. + */ + write_pda(stack_canary, current->stack_canary); +#endif /* endless idle loop with no priority at all */ while (1) { tick_nohz_stop_sched_tick(); -- cgit v1.2.3 From 7e09b2a02dae4616a5a26000169964b32f86cd35 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:22:34 +0100 Subject: x86: fix canary of the boot CPU's idle task the boot CPU's idle task has a zero stackprotector canary value. this is a special task that is never forked, so the fork code does not randomize its canary. Do it when we hit cpu_idle(). Academic sidenote: this means that the early init code runs with a zero canary and hence the canary becomes predictable for this short, boot-only amount of time. Although attack vectors against early init code are very rare, it might make sense to move this initialization to an earlier point. (to one of the early init functions that never return - such as start_kernel()) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9e69e223023..d4c7ac7aa43 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -150,9 +150,13 @@ void cpu_idle(void) #ifdef CONFIG_CC_STACKPROTECTOR /* * If we're the non-boot CPU, nothing set the PDA stack - * canary up for us. This is as good a place as any for - * doing that. + * canary up for us - and if we are the boot CPU we have + * a 0 stack canary. This is a good place for updating + * it, as we wont ever return from this function (so the + * invalid canaries already on the stack wont ever + * trigger): */ + current->stack_canary = get_random_int(); write_pda(stack_canary, current->stack_canary); #endif /* endless idle loop with no priority at all */ -- cgit v1.2.3 From 18aa8bb12dcb10adc3d7c9d69714d53667c0ab7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:42:02 +0100 Subject: stackprotector: add boot_init_stack_canary() add the boot_init_stack_canary() and make the secondary idle threads use it. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d4c7ac7aa43..5107cb214c7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -147,7 +147,6 @@ void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; -#ifdef CONFIG_CC_STACKPROTECTOR /* * If we're the non-boot CPU, nothing set the PDA stack * canary up for us - and if we are the boot CPU we have @@ -156,9 +155,8 @@ void cpu_idle(void) * invalid canaries already on the stack wont ever * trigger): */ - current->stack_canary = get_random_int(); - write_pda(stack_canary, current->stack_canary); -#endif + boot_init_stack_canary(); + /* endless idle loop with no priority at all */ while (1) { tick_nohz_stop_sched_tick(); -- cgit v1.2.3 From 420594296838fdc9a674470d710cda7d1487f9f4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:44:08 +0100 Subject: x86: fix the stackprotector canary of the boot CPU Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5107cb214c7..cce47f7fbf2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -16,6 +16,7 @@ #include +#include #include #include #include -- cgit v1.2.3 From 6e5385d44b2df05e50a8d07ba0e14d3e32685237 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 18:11:35 +0530 Subject: x86: smp.h move prefill_possible_map declartion to cpu.h Impact: cleanup, moving NON-SMP stuff from smp.h Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ae0d8042cf6..f41c4486c27 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -89,7 +89,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 07576bee03e..f8c885bed18 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From f472cdba849cc3d838f3788469316e8572463a8c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:34:25 +0530 Subject: x86: smp.h move stack_processor_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3f95a40f718..f7619a2eaff 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include -- cgit v1.2.3 From 96b89dc6598a50e3aac8e2c6d826ae3795b7d030 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:35:48 +0530 Subject: x86: smp.h move safe_smp_processor_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash.c | 2 +- arch/x86/kernel/reboot.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c689d19e35a..11b93cabdf7 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2b46eb41643..f8536fee5c1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 # include -- cgit v1.2.3 From 6d652ea1d056390a0c33db92b44ed219284b71af Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:38:59 +0530 Subject: x86: smp.h move boot_cpu_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536..109c91db202 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 41401db698cbb5d1869776bf336881db267e7d19 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 8 Jan 2009 15:42:46 +0530 Subject: x86: rename intel_mp_floating to mpf_intel Impact: cleanup, solve 80 columns wrap problems intel_mp_floating should be renamed to mpf_intel. The reason: the 'f' in MPF already means 'floating' which means MP Floating pointer structure - no need to repeat that in the type name. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c0601c2848a..6cea941c4db 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -569,14 +569,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) } } -static struct intel_mp_floating *mpf_found; +static struct mpf_intel *mpf_found; /* * Scan the memory blocks for an SMP configuration block. */ static void __init __get_smp_config(unsigned int early) { - struct intel_mp_floating *mpf = mpf_found; + struct mpf_intel *mpf = mpf_found; if (!mpf) return; @@ -687,14 +687,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { unsigned int *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; + struct mpf_intel *mpf; apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { - mpf = (struct intel_mp_floating *)bp; + mpf = (struct mpf_intel *)bp; if ((*bp == SMP_MAGIC_IDENT) && (mpf->mpf_length == 1) && !mpf_checksum((unsigned char *)bp, 16) && @@ -1000,7 +1000,7 @@ static int __init update_mp_table(void) { char str[16]; char oem[10]; - struct intel_mp_floating *mpf; + struct mpf_intel *mpf; struct mpc_table *mpc, *mpc_new; if (!enable_update_mptable) @@ -1052,7 +1052,7 @@ static int __init update_mp_table(void) mpc = mpc_new; /* check if we can modify that */ if (mpc_new_phys - mpf->mpf_physptr) { - struct intel_mp_floating *mpf_new; + struct mpf_intel *mpf_new; /* steal 16 bytes from [0, 1k) */ printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); mpf_new = phys_to_virt(0x400 - 16); -- cgit v1.2.3 From 1eb1b3b65dc3e3ffcc6a60e115c085c0c11c1077 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 8 Jan 2009 15:43:26 +0530 Subject: x86: rename all fields of mpf_intel mpf_X to X Impact: cleanup, solve 80 columns wrap problems It would be cleaner to rename all the mpf->mpf_X fields to mpf->X - that alone would give 4 characters per usage site. (we already know that it's an 'mpf' entity - no need to duplicate that in the field too) Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 50 +++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6cea941c4db..8385d4e7e15 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -597,9 +597,9 @@ static void __init __get_smp_config(unsigned int early) } printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->mpf_specification); + mpf->specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) - if (mpf->mpf_feature2 & (1 << 7)) { + if (mpf->feature2 & (1 << 7)) { printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); pic_mode = 1; } else { @@ -610,7 +610,7 @@ static void __init __get_smp_config(unsigned int early) /* * Now see if we need to read further. */ - if (mpf->mpf_feature1 != 0) { + if (mpf->feature1 != 0) { if (early) { /* * local APIC has default address @@ -620,16 +620,16 @@ static void __init __get_smp_config(unsigned int early) } printk(KERN_INFO "Default MP configuration #%d\n", - mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); + mpf->feature1); + construct_default_ISA_mptable(mpf->feature1); - } else if (mpf->mpf_physptr) { + } else if (mpf->physptr) { /* * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { + if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; #endif @@ -696,10 +696,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, while (length > 0) { mpf = (struct mpf_intel *)bp; if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && + (mpf->length == 1) && !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4))) { + ((mpf->specification == 1) + || (mpf->specification == 4))) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; #endif @@ -712,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, return 1; reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, BOOTMEM_DEFAULT); - if (mpf->mpf_physptr) { + if (mpf->physptr) { unsigned long size = PAGE_SIZE; #ifdef CONFIG_X86_32 /* @@ -721,14 +721,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, * the bottom is mapped now. * PC-9800's MPC table places on the very last * of physical memory; so that simply reserving - * PAGE_SIZE from mpg->mpf_physptr yields BUG() + * PAGE_SIZE from mpf->physptr yields BUG() * in reserve_bootmem. */ unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->mpf_physptr + size > end) - size = end - mpf->mpf_physptr; + if (mpf->physptr + size > end) + size = end - mpf->physptr; #endif - reserve_bootmem_generic(mpf->mpf_physptr, size, + reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); } @@ -1013,19 +1013,19 @@ static int __init update_mp_table(void) /* * Now see if we need to go further. */ - if (mpf->mpf_feature1 != 0) + if (mpf->feature1 != 0) return 0; - if (!mpf->mpf_physptr) + if (!mpf->physptr) return 0; - mpc = phys_to_virt(mpf->mpf_physptr); + mpc = phys_to_virt(mpf->physptr); if (!smp_check_mpc(mpc, oem, str)) return 0; printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); - printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); + printk(KERN_INFO "physptr: %x\n", mpf->physptr); if (mpc_new_phys && mpc->length > mpc_new_length) { mpc_new_phys = 0; @@ -1046,23 +1046,23 @@ static int __init update_mp_table(void) } printk(KERN_INFO "use in-positon replacing\n"); } else { - mpf->mpf_physptr = mpc_new_phys; + mpf->physptr = mpc_new_phys; mpc_new = phys_to_virt(mpc_new_phys); memcpy(mpc_new, mpc, mpc->length); mpc = mpc_new; /* check if we can modify that */ - if (mpc_new_phys - mpf->mpf_physptr) { + if (mpc_new_phys - mpf->physptr) { struct mpf_intel *mpf_new; /* steal 16 bytes from [0, 1k) */ printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); mpf_new = phys_to_virt(0x400 - 16); memcpy(mpf_new, mpf, 16); mpf = mpf_new; - mpf->mpf_physptr = mpc_new_phys; + mpf->physptr = mpc_new_phys; } - mpf->mpf_checksum = 0; - mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); - printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); + mpf->checksum = 0; + mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); + printk(KERN_INFO "physptr new: %x\n", mpf->physptr); } /* -- cgit v1.2.3 From 068790334cececc3d2d945617ccc585477da2e38 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:17:37 +0530 Subject: x86: smp.h move cpu_callin_mask and cpu_callin_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/setup_percpu.c | 1 + arch/x86/kernel/smpboot.c | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 14e543b6fd4..f0025846244 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 55c46074eba..bf63de72b64 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6c2b8444b83..84ac1cf46d8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From fb8fd077fbf0de6662acfd240e8e6b25cf3202ca Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:20:24 +0530 Subject: x86: smp.h move cpu_callout_mask and cpu_callout_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 84ac1cf46d8..6c2b8444b83 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 7106a5ab89c50c6b5aadea0850b40323804a922d Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Sat, 10 Jan 2009 23:00:22 -0500 Subject: x86-64: remove locked instruction from switch_to() Impact: micro-optimization The patch below removes an unnecessary locked instruction from switch_to(). TIF_FORK is only ever set in copy_thread() on initial process creation, and gets cleared during the first scheduling of the process. As such, it is safe to use an unlocked test for the flag within switch_to(). Signed-off-by: Benjamin LaHaise Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e28c7a98779..38dd37458e4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -408,6 +408,8 @@ END(save_paranoid) ENTRY(ret_from_fork) DEFAULT_FRAME + LOCK ; btr $TIF_FORK,TI_flags(%r8) + push kernel_eflags(%rip) CFI_ADJUST_CFA_OFFSET 8 popf # reset kernel eflags -- cgit v1.2.3 From 7f7ace0cda64c99599c23785f8979a072e118058 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:08 -0800 Subject: cpumask: update irq_desc to use cpumask_var_t Impact: reduce memory usage, use new cpumask API. Replace the affinity and pending_masks with cpumask_var_t's. This adds to the significant size reduction done with the SPARSE_IRQS changes. The added functions (init_alloc_desc_masks & init_copy_desc_masks) are in the include file so they can be inlined (and optimized out for the !CONFIG_CPUMASKS_OFFSTACK case.) [Naming chosen to be consistent with the other init*irq functions, as well as the backwards arg declaration of "from, to" instead of the more common "to, from" standard.] Includes a slight change to the declaration of struct irq_desc to embed the pending_mask within ifdef(CONFIG_SMP) to be consistent with other references, and some small changes to Xen. Tested: sparse/non-sparse/cpumask_offstack/non-cpumask_offstack/nonuma/nosmp on x86_64 Signed-off-by: Mike Travis Cc: Chris Wright Cc: Jeremy Fitzhardinge Cc: KOSAKI Motohiro Cc: Venkatesh Pallipadi Cc: virtualization@lists.osdl.org Cc: xen-devel@lists.xensource.com Cc: Yinghai Lu --- arch/x86/kernel/io_apic.c | 20 ++++++++++---------- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536..1337eab60ec 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -356,7 +356,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) if (!cfg->move_in_progress) { /* it means that domain is not changed */ - if (!cpumask_intersects(&desc->affinity, mask)) + if (!cpumask_intersects(desc->affinity, mask)) cfg->move_desc_pending = 1; } } @@ -579,9 +579,9 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return BAD_APICID; - cpumask_and(&desc->affinity, cfg->domain, mask); + cpumask_and(desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); + return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); } static void @@ -2383,7 +2383,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) if (cfg->move_in_progress) send_cleanup_vector(cfg); - cpumask_copy(&desc->affinity, mask); + cpumask_copy(desc->affinity, mask); } static int migrate_irq_remapped_level_desc(struct irq_desc *desc) @@ -2405,11 +2405,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, &desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; - cpumask_clear(&desc->pending_mask); + cpumask_clear(desc->pending_mask); unmask: unmask_IO_APIC_irq_desc(desc); @@ -2434,7 +2434,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, &desc->pending_mask); + desc->chip->set_affinity(irq, desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -2448,7 +2448,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, { if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, mask); + cpumask_copy(desc->pending_mask, mask); migrate_irq_remapped_level_desc(desc); return; } @@ -2516,7 +2516,7 @@ static void irq_complete_move(struct irq_desc **descp) /* domain has not changed, but affinity did */ me = smp_processor_id(); - if (cpu_isset(me, desc->affinity)) { + if (cpumask_test_cpu(me, desc->affinity)) { *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; @@ -4039,7 +4039,7 @@ void __init setup_ioapic_dest(void) */ if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = &desc->affinity; + mask = desc->affinity; else mask = TARGET_CPUS; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 74b9ff7341e..e0f29be8ab0 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -248,7 +248,7 @@ void fixup_irqs(void) if (irq == 2) continue; - affinity = &desc->affinity; + affinity = desc->affinity; if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); affinity = cpu_all_mask; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 63c88e6ec02..0b21cb1ea11 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -100,7 +100,7 @@ void fixup_irqs(void) /* interrupt's are disabled at this point */ spin_lock(&desc->lock); - affinity = &desc->affinity; + affinity = desc->affinity; if (!irq_has_action(irq) || cpumask_equal(affinity, cpu_online_mask)) { spin_unlock(&desc->lock); -- cgit v1.2.3 From 4595f9620cda8a1e973588e743cf5f8436dd20c6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: x86: change flush_tlb_others to take a const struct cpumask Impact: reduce stack usage, use new cpumask API. This is made a little more tricky by uv_flush_tlb_others which actually alters its argument, for an IPI to be sent to the remaining cpus in the mask. I solve this by allocating a cpumask_var_t for this case and falling back to IPI should this fail. To eliminate temporaries in the caller, all flush_tlb_others implementations now do the this-cpu-elimination step themselves. Note also the curious "cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask)" which has been there since pre-git and yet f->flush_cpumask is always zero at this point. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/x86/kernel/tlb_32.c | 67 +++++++++++++++++++++--------------------------- arch/x86/kernel/tlb_64.c | 61 +++++++++++++++++++++++-------------------- arch/x86/kernel/tlb_uv.c | 16 ++++++------ 3 files changed, 70 insertions(+), 74 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ce505464224..ec53818f4e3 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -20,7 +20,7 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) * Optimizations Manfred Spraul */ -static cpumask_t flush_cpumask; +static cpumask_var_t flush_cpumask; static struct mm_struct *flush_mm; static unsigned long flush_va; static DEFINE_SPINLOCK(tlbstate_lock); @@ -92,7 +92,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs) cpu = get_cpu(); - if (!cpu_isset(cpu, flush_cpumask)) + if (!cpumask_test_cpu(cpu, flush_cpumask)) goto out; /* * This was a BUG() but until someone can quote me the @@ -114,35 +114,22 @@ void smp_invalidate_interrupt(struct pt_regs *regs) } ack_APIC_irq(); smp_mb__before_clear_bit(); - cpu_clear(cpu, flush_cpumask); + cpumask_clear_cpu(cpu, flush_cpumask); smp_mb__after_clear_bit(); out: put_cpu_no_resched(); inc_irq_stat(irq_tlb_count); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - cpumask_t cpumask = *cpumaskp; - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask * - mask must exist :) */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(cpumask_empty(cpumask)); BUG_ON(!mm); -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (unlikely(cpus_empty(cpumask))) - return; -#endif - /* * i'm not happy about this global shared spinlock in the * MM hot path, but we'll see how contended it is. @@ -150,9 +137,17 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, */ spin_lock(&tlbstate_lock); + cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id())); +#ifdef CONFIG_HOTPLUG_CPU + /* If a CPU which we ran on has gone down, OK. */ + cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask); + if (unlikely(cpumask_empty(flush_cpumask))) { + spin_unlock(&tlbstate_lock); + return; + } +#endif flush_mm = mm; flush_va = va; - cpus_or(flush_cpumask, cpumask, flush_cpumask); /* * Make the above memory operations globally visible before @@ -163,9 +158,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR); - while (!cpus_empty(flush_cpumask)) + while (!cpumask_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ cpu_relax(); @@ -177,25 +172,19 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -203,8 +192,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -212,11 +201,8 @@ void flush_tlb_mm(struct mm_struct *mm) void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -225,9 +211,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); - + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } EXPORT_SYMBOL(flush_tlb_page); @@ -254,3 +239,9 @@ void reset_lazy_tlbstate(void) per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } +static int init_flush_cpumask(void) +{ + alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); + return 0; +} +early_initcall(init_flush_cpumask); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e4..38836aef51b 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -43,10 +43,10 @@ union smp_flush_state { struct { - cpumask_t flush_cpumask; struct mm_struct *flush_mm; unsigned long flush_va; spinlock_t tlbstate_lock; + DECLARE_BITMAP(flush_cpumask, NR_CPUS); }; char pad[SMP_CACHE_BYTES]; } ____cacheline_aligned; @@ -131,7 +131,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; f = &per_cpu(flush_state, sender); - if (!cpu_isset(cpu, f->flush_cpumask)) + if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) goto out; /* * This was a BUG() but until someone can quote me the @@ -153,19 +153,15 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) } out: ack_APIC_irq(); - cpu_clear(cpu, f->flush_cpumask); + cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); inc_irq_stat(irq_tlb_count); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +static void flush_tlb_others_ipi(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { int sender; union smp_flush_state *f; - cpumask_t cpumask = *cpumaskp; - - if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va)) - return; /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; @@ -180,7 +176,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, f->flush_mm = mm; f->flush_va = va; - cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); + cpumask_andnot(to_cpumask(f->flush_cpumask), + cpumask, cpumask_of(smp_processor_id())); /* * Make the above memory operations globally visible before @@ -191,9 +188,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); - while (!cpus_empty(f->flush_cpumask)) + while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); f->flush_mm = NULL; @@ -201,6 +198,24 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, spin_unlock(&f->tlbstate_lock); } +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) +{ + if (is_uv_system()) { + cpumask_var_t after_uv_flush; + + if (alloc_cpumask_var(&after_uv_flush, GFP_ATOMIC)) { + cpumask_andnot(after_uv_flush, + cpumask, cpumask_of(smp_processor_id())); + if (!uv_flush_tlb_others(after_uv_flush, mm, va)) + flush_tlb_others_ipi(after_uv_flush, mm, va); + free_cpumask_var(after_uv_flush); + return; + } + } + flush_tlb_others_ipi(cpumask, mm, va); +} + static int __cpuinit init_smp_flush(void) { int i; @@ -215,25 +230,18 @@ core_initcall(init_smp_flush); void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t cpu_mask; - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -241,8 +249,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -250,11 +258,8 @@ void flush_tlb_mm(struct mm_struct *mm) void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -263,8 +268,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f885023167e..690dcf1a27d 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -212,11 +212,11 @@ static int uv_wait_completion(struct bau_desc *bau_desc, * The cpumaskp mask contains the cpus the broadcast was sent to. * * Returns 1 if all remote flushing was done. The mask is zeroed. - * Returns 0 if some remote flushing remains to be done. The mask is left - * unchanged. + * Returns 0 if some remote flushing remains to be done. The mask will have + * some bits still set. */ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, - cpumask_t *cpumaskp) + struct cpumask *cpumaskp) { int completion_status = 0; int right_shift; @@ -263,13 +263,13 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Success, so clear the remote cpu's from the mask so we don't * use the IPI method of shootdown on them. */ - for_each_cpu_mask(bit, *cpumaskp) { + for_each_cpu(bit, cpumaskp) { blade = uv_cpu_to_blade_id(bit); if (blade == this_blade) continue; - cpu_clear(bit, *cpumaskp); + cpumask_clear_cpu(bit, cpumaskp); } - if (!cpus_empty(*cpumaskp)) + if (!cpumask_empty(cpumaskp)) return 0; return 1; } @@ -296,7 +296,7 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Returns 1 if all remote flushing was done. * Returns 0 if some remote flushing remains to be done. */ -int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, +int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, unsigned long va) { int i; @@ -315,7 +315,7 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); i = 0; - for_each_cpu_mask(bit, *cpumaskp) { + for_each_cpu(bit, cpumaskp) { blade = uv_cpu_to_blade_id(bit); BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); if (blade == this_blade) { -- cgit v1.2.3 From 0e21990ae7ee11af94f44f240b06e520cf1505d4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: SGI UV cpumask: use static temp cpumask in flush_tlb Impact: Improve tlb flush performance for UV Calling alloc_cpumask_var a zillion times a second does affect performance. Replace with static cpumask. Note: when CONFIG_X86_UV is defined, this extra PER_CPU memory will be optimized out for non-UV configs as is_uv_system() will then return a constant 0. Signed-off-by: Mike Travis --- arch/x86/kernel/tlb_64.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 38836aef51b..7a3f9891302 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -202,16 +202,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { if (is_uv_system()) { - cpumask_var_t after_uv_flush; - - if (alloc_cpumask_var(&after_uv_flush, GFP_ATOMIC)) { - cpumask_andnot(after_uv_flush, - cpumask, cpumask_of(smp_processor_id())); - if (!uv_flush_tlb_others(after_uv_flush, mm, va)) - flush_tlb_others_ipi(after_uv_flush, mm, va); - free_cpumask_var(after_uv_flush); - return; - } + /* FIXME: could be an percpu_alloc'd thing */ + static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); + struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask); + + cpumask_andnot(after_uv_flush, cpumask, + cpumask_of(smp_processor_id())); + if (!uv_flush_tlb_others(after_uv_flush, mm, va)) + flush_tlb_others_ipi(after_uv_flush, mm, va); + + put_cpu_var(flush_tlb_uv_cpumask); + return; } flush_tlb_others_ipi(cpumask, mm, va); } -- cgit v1.2.3 From a1c33bbeb7061f3ed39103c385844474eaa8f921 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: x86: cleanup remaining cpumask_t code in mce_amd_64.c Impact: Reduce memory usage, use new cpumask API. Use cpumask_var_t for 'cpus' cpumask in struct threshold_bank and update remaining old cpumask_t functions to new cpumask API. Signed-off-by: Mike Travis --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 8ae8c4ff094..4772e91e824 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = { struct threshold_bank { struct kobject *kobj; struct threshold_block *blocks; - cpumask_t cpus; + cpumask_var_t cpus; }; static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); @@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifdef CONFIG_SMP if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ - i = first_cpu(per_cpu(cpu_core_map, cpu)); + i = cpumask_first(&per_cpu(cpu_core_map, cpu)); /* first core not up yet */ if (cpu_data(i).cpu_core_id) @@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - b->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); per_cpu(threshold_banks, cpu)[bank] = b; goto out; } @@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) err = -ENOMEM; goto out; } + if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { + kfree(b); + err = -ENOMEM; + goto out; + } b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); if (!b->kobj) goto out_free; #ifndef CONFIG_SMP - b->cpus = CPU_MASK_ALL; + cpumask_setall(b->cpus); #else - b->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); #endif per_cpu(threshold_banks, cpu)[bank] = b; @@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out_free; - for_each_cpu_mask_nr(i, b->cpus) { + for_each_cpu(i, b->cpus) { if (i == cpu) continue; @@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) out_free: per_cpu(threshold_banks, cpu)[bank] = NULL; + free_cpumask_var(b->cpus); kfree(b); out: return err; @@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #endif /* remove all sibling symlinks before unregistering */ - for_each_cpu_mask_nr(i, b->cpus) { + for_each_cpu(i, b->cpus) { if (i == cpu) continue; @@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) free_out: kobject_del(b->kobj); kobject_put(b->kobj); + free_cpumask_var(b->cpus); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; } -- cgit v1.2.3 From f9b90566cd46e19f670a1e60a717ff243f060a8a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: x86: reduce stack usage in init_intel_cacheinfo Impact: reduce stack usage. init_intel_cacheinfo() does not use the cpumask so define a subset of struct _cpuid4_info (_cpuid4_info_regs) that can be used instead. Signed-off-by: Mike Travis --- arch/x86/kernel/cpu/intel_cacheinfo.c | 63 ++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 48533d77be7..58527a9fc40 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -132,7 +132,16 @@ struct _cpuid4_info { union _cpuid4_leaf_ecx ecx; unsigned long size; unsigned long can_disable; - cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ + DECLARE_BITMAP(shared_cpu_map, NR_CPUS); +}; + +/* subset of above _cpuid4_info w/o shared_cpu_map */ +struct _cpuid4_info_regs { + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned long size; + unsigned long can_disable; }; #ifdef CONFIG_PCI @@ -263,7 +272,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, } static void __cpuinit -amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) { if (index < 3) return; @@ -271,7 +280,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) } static int -__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +__cpuinit cpuid4_cache_lookup_regs(int index, + struct _cpuid4_info_regs *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -299,6 +309,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) return 0; } +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +{ + struct _cpuid4_info_regs *leaf_regs = + (struct _cpuid4_info_regs *)this_leaf; + + return cpuid4_cache_lookup_regs(index, leaf_regs); +} + static int __cpuinit find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; @@ -338,11 +357,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) * parameters cpuid leaf to find the cache details */ for (i = 0; i < num_cache_leaves; i++) { - struct _cpuid4_info this_leaf; - + struct _cpuid4_info_regs this_leaf; int retval; - retval = cpuid4_cache_lookup(i, &this_leaf); + retval = cpuid4_cache_lookup_regs(i, &this_leaf); if (retval >= 0) { switch(this_leaf.eax.split.level) { case 1: @@ -491,17 +509,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; if (num_threads_sharing == 1) - cpu_set(cpu, this_leaf->shared_cpu_map); + cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); else { index_msb = get_count_order(num_threads_sharing); for_each_online_cpu(i) { if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) { - cpu_set(i, this_leaf->shared_cpu_map); + cpumask_set_cpu(i, + to_cpumask(this_leaf->shared_cpu_map)); if (i != cpu && per_cpu(cpuid4_info, i)) { - sibling_leaf = CPUID4_INFO_IDX(i, index); - cpu_set(cpu, sibling_leaf->shared_cpu_map); + sibling_leaf = + CPUID4_INFO_IDX(i, index); + cpumask_set_cpu(cpu, to_cpumask( + sibling_leaf->shared_cpu_map)); } } } @@ -513,9 +534,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) int sibling; this_leaf = CPUID4_INFO_IDX(cpu, index); - for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { + for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { sibling_leaf = CPUID4_INFO_IDX(sibling, index); - cpu_clear(cpu, sibling_leaf->shared_cpu_map); + cpumask_clear_cpu(cpu, + to_cpumask(sibling_leaf->shared_cpu_map)); } } #else @@ -620,8 +642,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, int n = 0; if (len > 1) { - cpumask_t *mask = &this_leaf->shared_cpu_map; + const struct cpumask *mask; + mask = to_cpumask(this_leaf->shared_cpu_map); n = type? cpulist_scnprintf(buf, len-2, mask) : cpumask_scnprintf(buf, len-2, mask); @@ -684,7 +707,8 @@ static struct pci_dev *get_k8_northbridge(int node) static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int node = cpu_to_node(cpumask_first(mask)); struct pci_dev *dev = NULL; ssize_t ret = 0; int i; @@ -718,7 +742,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) { - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int node = cpu_to_node(cpumask_first(mask)); struct pci_dev *dev = NULL; unsigned int ret, index, val; @@ -863,7 +888,7 @@ err_out: return -ENOMEM; } -static cpumask_t cache_dev_map = CPU_MASK_NONE; +static DECLARE_BITMAP(cache_dev_map, NR_CPUS); /* Add/Remove cache interface for CPU device */ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) @@ -903,7 +928,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } kobject_uevent(&(this_object->kobj), KOBJ_ADD); } - cpu_set(cpu, cache_dev_map); + cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); return 0; @@ -916,9 +941,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) if (per_cpu(cpuid4_info, cpu) == NULL) return; - if (!cpu_isset(cpu, cache_dev_map)) + if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) return; - cpu_clear(cpu, cache_dev_map); + cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); for (i = 0; i < num_cache_leaves; i++) kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); -- cgit v1.2.3 From 9594949b060efe86ecaa1a66839232a3b9800bc9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:06 -0800 Subject: irq: change references from NR_IRQS to nr_irqs Impact: preparation, cleanup, add KERN_INFO printk Modify references from NR_IRQS to nr_irqs as the later will become variable-sized based on nr_cpu_ids when CONFIG_SPARSE_IRQS=y. Signed-off-by: Mike Travis --- arch/x86/kernel/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1337eab60ec..ae80638012d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3183,7 +3183,7 @@ unsigned int create_irq_nr(unsigned int irq_want) irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new < NR_IRQS; new++) { + for (new = irq_want; new < nr_irqs; new++) { if (platform_legacy_irq(new)) continue; -- cgit v1.2.3 From dd3feda7748b4c2739de47daaaa387fb01926c15 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:44:29 +0530 Subject: x86: microcode_intel.c fix style problems Impact: cleanup Fix: WARNING: Use #include instead of ERROR: trailing whitespace ERROR: "(foo*)" should be "(foo *)" total: 3 errors, 1 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_intel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index b7f4c929e61..5e9f4fc5138 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -87,9 +87,9 @@ #include #include #include +#include #include -#include #include #include @@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; } -static inline int +static inline int update_match_revision(struct microcode_header_intel *mc_header, int rev) { return (mc_header->rev <= rev) ? 0 : 1; @@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device) return ret; } - ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, - &get_ucode_fw); + ret = generic_load_microcode(cpu, (void *)firmware->data, + firmware->size, &get_ucode_fw); release_firmware(firmware); @@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size) /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); - return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); + return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); } static void microcode_fini_cpu(int cpu) -- cgit v1.2.3 From 448dd2fa3ec915ad4325868ef8bb9b9490d9f6a9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:45:14 +0530 Subject: x86: msr.c fix style problems Impact: cleanup Fix: WARNING: Use #include instead of total: 0 errors, 1 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/msr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 726266695b2..3cf3413ec62 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -35,10 +35,10 @@ #include #include #include +#include #include #include -#include #include static struct class *msr_class; -- cgit v1.2.3 From e17029ad69c7b1518413c00f793d89bb77b8527b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:46:03 +0530 Subject: x86: module_32.c fix style problems Impact: cleanup Fix: ERROR: code indent should use tabs where possible ERROR: trailing whitespace ERROR: spaces required around that '=' (ctx:VxW) total: 3 errors, 0 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/module_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c index 3db0a5442eb..0edd819050e 100644 --- a/arch/x86/kernel/module_32.c +++ b/arch/x86/kernel/module_32.c @@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region) { vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + table entries. */ } /* We don't need anything special. */ @@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr, *para = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".text", secstrings + s->sh_name)) text = s; if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; + locks = s; if (!strcmp(".parainstructions", secstrings + s->sh_name)) para = s; } -- cgit v1.2.3 From 3b9dc9f2f123286aaade54c45fef6723d587c664 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:46:48 +0530 Subject: x86: module_64.c fix style problems Impact: cleanup Fix: ERROR: trailing whitespace ERROR: code indent should use tabs where possible WARNING: %Ld/%Lu are not-standard C, use %lld/%llu WARNING: printk() should include KERN_ facility level ERROR: spaces required around that '=' (ctx:VxW) total: 13 errors, 2 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/module_64.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index 6ba87830d4b..c23880b90b5 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c @@ -30,14 +30,14 @@ #include #include -#define DEBUGP(fmt...) +#define DEBUGP(fmt...) #ifndef CONFIG_UML void module_free(struct module *mod, void *module_region) { vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + table entries. */ } void *module_alloc(unsigned long size) @@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; Elf64_Sym *sym; void *loc; - u64 val; + u64 val; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + ELF64_R_SYM(rel[i].r_info); - DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), - sym->st_value, rel[i].r_addend, (u64)loc); + DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), + sym->st_value, rel[i].r_addend, (u64)loc); - val = sym->st_value + rel[i].r_addend; + val = sym->st_value + rel[i].r_addend; switch (ELF64_R_TYPE(rel[i].r_info)) { case R_X86_64_NONE: @@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if ((s64)val != *(s32 *)loc) goto overflow; break; - case R_X86_64_PC32: + case R_X86_64_PC32: val -= (u64)loc; *(u32 *)loc = val; #if 0 if ((s64)val != *(s32 *)loc) - goto overflow; + goto overflow; #endif break; default: - printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", + printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", me->name, ELF64_R_TYPE(rel[i].r_info)); return -ENOEXEC; } @@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return 0; overflow: - printk(KERN_ERR "overflow in relocation type %d val %Lx\n", + printk(KERN_ERR "overflow in relocation type %d val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), val); printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", me->name); @@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs, unsigned int relsec, struct module *me) { - printk("non add relocation not supported\n"); + printk(KERN_ERR "non add relocation not supported\n"); return -ENOSYS; -} +} int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) + const Elf_Shdr *sechdrs, + struct module *me) { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL; @@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr, if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; + locks = s; if (!strcmp(".parainstructions", secstrings + s->sh_name)) para = s; } -- cgit v1.2.3 From 4a046d1754ee6ebb6f399696805ed61ea0444d4c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jan 2009 17:39:24 -0800 Subject: x86: arch_probe_nr_irqs Impact: save RAM with large NR_CPUS, get smaller nr_irqs Signed-off-by: Yinghai Lu Signed-off-by: Mike Travis --- arch/x86/kernel/io_apic.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index ae80638012d..157986916cd 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3850,6 +3850,22 @@ void __init probe_nr_irqs_gsi(void) nr_irqs_gsi = nr; } +#ifdef CONFIG_SPARSE_IRQ +int __init arch_probe_nr_irqs(void) +{ + int nr; + + nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ? + (NR_VECTORS + (8 * nr_cpu_ids)) : + (NR_VECTORS + (32 * nr_ioapics))); + + if (nr < nr_irqs && nr > nr_irqs_gsi) + nr_irqs = nr; + + return 0; +} +#endif + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ -- cgit v1.2.3 From a4a0acf8e17e3d08e28b721ceceb898fbc959ceb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 13 Jan 2009 18:43:03 -0800 Subject: x86: fix broken flush_tlb_others_ipi() This commit broke flush_tlb_others_ipi() causing boot hangs on a 16 logical cpu system: > commit 4595f9620cda8a1e973588e743cf5f8436dd20c6 > Author: Rusty Russell > Date: Sat Jan 10 21:58:09 2009 -0800 > > x86: change flush_tlb_others to take a const struct cpumask This change resulted in sending the invalidate tlb vector to the sender itself causing the hang. flush_tlb_others_ipi() should exclude the sender itself from the destination list. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 7a3f9891302..54ee2ecb5e2 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -188,7 +188,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(f->flush_cpumask, INVALIDATE_TLB_VECTOR_START + sender); while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); -- cgit v1.2.3 From b5ba7e6d1e7e2ac808afd21be1e56dc34caf20e6 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 17:46:17 +0530 Subject: x86: replacing mp_config_ioapic with mpc_ioapic Impact: cleanup, solve 80 columns wrap problems Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 28 ++++++++++----------- arch/x86/kernel/io_apic.c | 60 ++++++++++++++++++++++----------------------- arch/x86/kernel/mpparse.c | 12 ++++----- 3 files changed, 49 insertions(+), 51 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c2f43..2b27019e64f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -912,8 +912,8 @@ static u8 __init uniq_ioapic_id(u8 id) DECLARE_BITMAP(used, 256); bitmap_zero(used, 256); for (i = 0; i < nr_ioapics; i++) { - struct mp_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mp_apicid, used); + struct mpc_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->apicid, used); } if (!test_bit(id, used)) return id; @@ -945,29 +945,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) idx = nr_ioapics; - mp_ioapics[idx].mp_type = MP_IOAPIC; - mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mp_apicaddr = address; + mp_ioapics[idx].type = MP_IOAPIC; + mp_ioapics[idx].flags = MPC_APIC_USABLE; + mp_ioapics[idx].apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); + mp_ioapics[idx].apicid = uniq_ioapic_id(id); #ifdef CONFIG_X86_32 - mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); + mp_ioapics[idx].apicver = io_apic_get_version(idx); #else - mp_ioapics[idx].mp_apicver = 0; + mp_ioapics[idx].apicver = 0; #endif /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, - mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, + mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; @@ -1026,7 +1026,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) mp_irq.mp_irqflag = (trigger << 2) | polarity; mp_irq.mp_srcbus = MP_ISA_BUS; mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ - mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ + mp_irq.mp_dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ mp_irq.mp_dstirq = pin; /* INTIN# */ save_mp_irq(&mp_irq); @@ -1062,7 +1062,7 @@ void __init mp_config_acpi_legacy_irqs(void) ioapic = mp_find_ioapic(0); if (ioapic < 0) return; - dstapic = mp_ioapics[ioapic].mp_apicid; + dstapic = mp_ioapics[ioapic].apicid; /* * Use the default configuration for the IRQs 0-15. Unless diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 109c91db202..6c51ecdfbf4 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -83,7 +83,7 @@ static DEFINE_SPINLOCK(vector_lock); int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ @@ -387,7 +387,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -946,7 +946,7 @@ static int find_irq_entry(int apic, int pin, int type) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].apicid || mp_irqs[i].mp_dstapic == MP_APIC_ALL) && mp_irqs[i].mp_dstirq == pin) return i; @@ -988,7 +988,7 @@ static int __init find_isa_irq_apic(int irq, int type) if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) + if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -1016,7 +1016,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic || mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; @@ -1567,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, + apic, mp_ioapics[apic].apicid, pin, cfg->vector, irq, trigger, polarity); - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry, dest, trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].mp_apicid, pin); + mp_ioapics[apic].apicid, pin); __clear_irq_vector(irq, cfg); return; } @@ -1605,12 +1605,10 @@ static void __init setup_IO_APIC_irqs(void) notcon = 1; apic_printk(APIC_VERBOSE, KERN_DEBUG " %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + mp_ioapics[apic].apicid, pin); } else apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + mp_ioapics[apic].apicid, pin); continue; } if (notcon) { @@ -1700,7 +1698,7 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -1720,7 +1718,7 @@ __apicdebuginit(void) print_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); @@ -2122,14 +2120,14 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic].mp_apicid; + old_id = mp_ioapics[apic].apicid; - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic].apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mp_apicid); + apic, mp_ioapics[apic].apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].mp_apicid = reg_00.bits.ID; + mp_ioapics[apic].apicid = reg_00.bits.ID; } /* @@ -2138,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mp_apicid)) { + mp_ioapics[apic].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mp_apicid); + apic, mp_ioapics[apic].apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -2149,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].mp_apicid = i; + mp_ioapics[apic].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].mp_apicid); + mp_ioapics[apic].apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -2164,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].mp_apicid) + if (old_id != mp_ioapics[apic].apicid) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mp_dstapic == old_id) mp_irqs[i].mp_dstapic - = mp_ioapics[apic].mp_apicid; + = mp_ioapics[apic].apicid; /* * Read the right value from the MPC table and @@ -2176,9 +2174,9 @@ static void __init setup_ioapic_ids_from_mpc(void) */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mp_apicid); + mp_ioapics[apic].apicid); - reg_00.bits.ID = mp_ioapics[apic].mp_apicid; + reg_00.bits.ID = mp_ioapics[apic].apicid; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2189,7 +2187,7 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) + if (reg_00.bits.ID != mp_ioapics[apic].apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); @@ -3118,8 +3116,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -4101,7 +4099,7 @@ void __init ioapic_init_mappings(void) ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mp_apicaddr; + ioapic_phys = mp_ioapics[i].apicaddr; #ifdef CONFIG_X86_32 if (!ioapic_phys) { printk(KERN_ERR diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 8385d4e7e15..a86a6553743 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -143,11 +143,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) if (bad_ioapic(m->apicaddr)) return; - mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; - mp_ioapics[nr_ioapics].mp_apicid = m->apicid; - mp_ioapics[nr_ioapics].mp_type = m->type; - mp_ioapics[nr_ioapics].mp_apicver = m->apicver; - mp_ioapics[nr_ioapics].mp_flags = m->flags; + mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; + mp_ioapics[nr_ioapics].apicid = m->apicid; + mp_ioapics[nr_ioapics].type = m->type; + mp_ioapics[nr_ioapics].apicver = m->apicver; + mp_ioapics[nr_ioapics].flags = m->flags; nr_ioapics++; } @@ -416,7 +416,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) intsrc.type = MP_INTSRC; intsrc.irqflag = 0; /* conforming */ intsrc.srcbus = 0; - intsrc.dstapic = mp_ioapics[0].mp_apicid; + intsrc.dstapic = mp_ioapics[0].apicid; intsrc.irqtype = mp_INT; -- cgit v1.2.3 From c2c21745ecba23c74690a124bcd371f83bd71e45 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 17:47:22 +0530 Subject: x86: replacing mp_config_intsrc with mpc_intsrc Impact: cleanup, solve 80 columns wrap problems Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 70 ++++++++++++++++++++++----------------------- arch/x86/kernel/io_apic.c | 64 ++++++++++++++++++++--------------------- arch/x86/kernel/mpparse.c | 68 +++++++++++++++++++++---------------------- 3 files changed, 100 insertions(+), 102 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2b27019e64f..4cb5964f149 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -973,19 +973,19 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } -static void assign_to_mp_irq(struct mp_config_intsrc *m, - struct mp_config_intsrc *mp_irq) +static void assign_to_mp_irq(struct mpc_intsrc *m, + struct mpc_intsrc *mp_irq) { - memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); + memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); } -static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, - struct mp_config_intsrc *m) +static int mp_irq_cmp(struct mpc_intsrc *mp_irq, + struct mpc_intsrc *m) { - return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); + return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); } -static void save_mp_irq(struct mp_config_intsrc *m) +static void save_mp_irq(struct mpc_intsrc *m) { int i; @@ -1003,7 +1003,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { int ioapic; int pin; - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; /* * Convert 'gsi' to 'ioapic.pin'. @@ -1021,13 +1021,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_irqflag = (trigger << 2) | polarity; - mp_irq.mp_srcbus = MP_ISA_BUS; - mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ - mp_irq.mp_dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ - mp_irq.mp_dstirq = pin; /* INTIN# */ + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; /* IRQ */ + mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ + mp_irq.dstirq = pin; /* INTIN# */ save_mp_irq(&mp_irq); } @@ -1037,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void) int i; int ioapic; unsigned int dstapic; - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; #if defined (CONFIG_MCA) || defined (CONFIG_EISA) /* @@ -1072,16 +1072,14 @@ void __init mp_config_acpi_legacy_irqs(void) int idx; for (idx = 0; idx < mp_irq_entries; idx++) { - struct mp_config_intsrc *irq = mp_irqs + idx; + struct mpc_intsrc *irq = mp_irqs + idx; /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mp_srcbus == MP_ISA_BUS - && irq->mp_srcbusirq == i) + if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i) break; /* Do we already have a mapping for this IOAPIC pin */ - if (irq->mp_dstapic == dstapic && - irq->mp_dstirq == i) + if (irq->dstapic == dstapic && irq->dstirq == i) break; } @@ -1090,13 +1088,13 @@ void __init mp_config_acpi_legacy_irqs(void) continue; /* IRQ already used */ } - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqflag = 0; /* Conforming */ - mp_irq.mp_srcbus = MP_ISA_BUS; - mp_irq.mp_dstapic = dstapic; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_srcbusirq = i; /* Identity mapped */ - mp_irq.mp_dstirq = i; + mp_irq.type = MP_INTSRC; + mp_irq.irqflag = 0; /* Conforming */ + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.dstapic = dstapic; + mp_irq.irqtype = mp_INT; + mp_irq.srcbusirq = i; /* Identity mapped */ + mp_irq.dstirq = i; save_mp_irq(&mp_irq); } @@ -1207,22 +1205,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity) { #ifdef CONFIG_X86_MPPARSE - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; int ioapic; if (!acpi_ioapic) return 0; /* print the entry should happen on mptable identically */ - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); - mp_irq.mp_srcbus = number; - mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + mp_irq.srcbus = number; + mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); ioapic = mp_find_ioapic(gsi); - mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; - mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; + mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; save_mp_irq(&mp_irq); #endif diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 6c51ecdfbf4..79b8c0c72d3 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -87,7 +87,7 @@ struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; @@ -945,10 +945,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].apicid || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) && - mp_irqs[i].mp_dstirq == pin) + if (mp_irqs[i].irqtype == type && + (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || + mp_irqs[i].dstapic == MP_APIC_ALL) && + mp_irqs[i].dstirq == pin) return i; return -1; @@ -962,13 +962,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) + (mp_irqs[i].irqtype == type) && + (mp_irqs[i].srcbusirq == irq)) - return mp_irqs[i].mp_dstirq; + return mp_irqs[i].dstirq; } return -1; } @@ -978,17 +978,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) + (mp_irqs[i].irqtype == type) && + (mp_irqs[i].srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) return apic; } } @@ -1013,23 +1013,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || + mp_irqs[i].dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mp_irqtype && + !mp_irqs[i].irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); + (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mp_srcbusirq & 3)) + if (pin == (mp_irqs[i].srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -1072,7 +1072,7 @@ static int EISA_ELCR(unsigned int irq) * EISA conforming in the MP table, that means its trigger type must * be read in from the ELCR */ -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) /* PCI interrupts are always polarity one level triggered, @@ -1089,13 +1089,13 @@ static int EISA_ELCR(unsigned int irq) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mp_irqflag & 3) + switch (mp_irqs[idx].irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) @@ -1131,13 +1131,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) + switch ((mp_irqs[idx].irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ if (test_bit(bus, mp_bus_not_pci)) @@ -1215,16 +1215,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mp_dstirq != pin) + if (mp_irqs[idx].dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mp_srcbusirq; + irq = mp_irqs[idx].srcbusirq; } else { /* * PCI IRQs are mapped in order @@ -2164,8 +2164,8 @@ static void __init setup_ioapic_ids_from_mpc(void) */ if (old_id != mp_ioapics[apic].apicid) for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_dstapic == old_id) - mp_irqs[i].mp_dstapic + if (mp_irqs[i].dstapic == old_id) + mp_irqs[i].dstapic = mp_ioapics[apic].apicid; /* @@ -3983,8 +3983,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == mp_INT && - mp_irqs[i].mp_srcbusirq == bus_irq) + if (mp_irqs[i].irqtype == mp_INT && + mp_irqs[i].srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a86a6553743..ad36377dc93 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -159,55 +159,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m) m->srcbusirq, m->dstapic, m->dstirq); } -static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) +static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) { apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", - mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, - (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, - mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); + mp_irq->irqtype, mp_irq->irqflag & 3, + (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, + mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); } static void __init assign_to_mp_irq(struct mpc_intsrc *m, - struct mp_config_intsrc *mp_irq) + struct mpc_intsrc *mp_irq) { - mp_irq->mp_dstapic = m->dstapic; - mp_irq->mp_type = m->type; - mp_irq->mp_irqtype = m->irqtype; - mp_irq->mp_irqflag = m->irqflag; - mp_irq->mp_srcbus = m->srcbus; - mp_irq->mp_srcbusirq = m->srcbusirq; - mp_irq->mp_dstirq = m->dstirq; + mp_irq->dstapic = m->dstapic; + mp_irq->type = m->type; + mp_irq->irqtype = m->irqtype; + mp_irq->irqflag = m->irqflag; + mp_irq->srcbus = m->srcbus; + mp_irq->srcbusirq = m->srcbusirq; + mp_irq->dstirq = m->dstirq; } -static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, +static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq, struct mpc_intsrc *m) { - m->dstapic = mp_irq->mp_dstapic; - m->type = mp_irq->mp_type; - m->irqtype = mp_irq->mp_irqtype; - m->irqflag = mp_irq->mp_irqflag; - m->srcbus = mp_irq->mp_srcbus; - m->srcbusirq = mp_irq->mp_srcbusirq; - m->dstirq = mp_irq->mp_dstirq; + m->dstapic = mp_irq->dstapic; + m->type = mp_irq->type; + m->irqtype = mp_irq->irqtype; + m->irqflag = mp_irq->irqflag; + m->srcbus = mp_irq->srcbus; + m->srcbusirq = mp_irq->srcbusirq; + m->dstirq = mp_irq->dstirq; } -static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, +static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq, struct mpc_intsrc *m) { - if (mp_irq->mp_dstapic != m->dstapic) + if (mp_irq->dstapic != m->dstapic) return 1; - if (mp_irq->mp_type != m->type) + if (mp_irq->type != m->type) return 2; - if (mp_irq->mp_irqtype != m->irqtype) + if (mp_irq->irqtype != m->irqtype) return 3; - if (mp_irq->mp_irqflag != m->irqflag) + if (mp_irq->irqflag != m->irqflag) return 4; - if (mp_irq->mp_srcbus != m->srcbus) + if (mp_irq->srcbus != m->srcbus) return 5; - if (mp_irq->mp_srcbusirq != m->srcbusirq) + if (mp_irq->srcbusirq != m->srcbusirq) return 6; - if (mp_irq->mp_dstirq != m->dstirq) + if (mp_irq->dstirq != m->dstirq) return 7; return 0; @@ -808,15 +808,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) /* not legacy */ for (i = 0; i < mp_irq_entries; i++) { - if (mp_irqs[i].mp_irqtype != mp_INT) + if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].mp_irqflag != 0x0f) + if (mp_irqs[i].irqflag != 0x0f) continue; - if (mp_irqs[i].mp_srcbus != m->srcbus) + if (mp_irqs[i].srcbus != m->srcbus) continue; - if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) + if (mp_irqs[i].srcbusirq != m->srcbusirq) continue; if (irq_used[i]) { /* already claimed */ @@ -921,10 +921,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, if (irq_used[i]) continue; - if (mp_irqs[i].mp_irqtype != mp_INT) + if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].mp_irqflag != 0x0f) + if (mp_irqs[i].irqflag != 0x0f) continue; if (nr_m_spare > 0) { -- cgit v1.2.3 From 09b3ec7315a18d885127544204f1e389d41058d0 Mon Sep 17 00:00:00 2001 From: Frederik Deweerdt Date: Mon, 12 Jan 2009 22:35:42 +0100 Subject: x86, tlb flush_data: replace per_cpu with an array Impact: micro-optimization, memory reduction On x86_64 flush tlb data is stored in per_cpu variables. This is unnecessary because only the first NUM_INVALIDATE_TLB_VECTORS entries are accessed. This patch aims at making the code less confusing (there's nothing really "per_cpu") by using a plain array. It also would save some memory on most distros out there (Ubuntu x86_64 has NR_CPUS=64 by default). [ Ravikiran G Thirumalai also pointed out that the correct alignment is ____cacheline_internodealigned_in_smp, so that there's no bouncing on vsmp. ] Signed-off-by: Frederik Deweerdt Acked-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e4..8cfea5d1451 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -33,7 +33,7 @@ * To avoid global state use 8 different call vectors. * Each CPU uses a specific vector to trigger flushes on other * CPUs. Depending on the received vector the target CPUs look into - * the right per cpu variable for the flush data. + * the right array slot for the flush data. * * With more than 8 CPUs they are hashed to the 8 available * vectors. The limited global vector space forces us to this right now. @@ -48,13 +48,13 @@ union smp_flush_state { unsigned long flush_va; spinlock_t tlbstate_lock; }; - char pad[SMP_CACHE_BYTES]; -} ____cacheline_aligned; + char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; +} ____cacheline_internodealigned_in_smp; /* State is put into the per CPU data section, but padded to a full cache line because other CPUs can access it and we don't want false sharing in the per cpu data segment. */ -static DEFINE_PER_CPU(union smp_flush_state, flush_state); +static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; /* * We cannot call mmdrop() because we are in interrupt context, @@ -129,7 +129,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * Use that to determine where the sender put the data. */ sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; if (!cpu_isset(cpu, f->flush_cpumask)) goto out; @@ -169,7 +169,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; /* * Could avoid this lock when @@ -205,8 +205,8 @@ static int __cpuinit init_smp_flush(void) { int i; - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); + for (i = 0; i < ARRAY_SIZE(flush_state); i++) + spin_lock_init(&flush_state[i].tlbstate_lock); return 0; } -- cgit v1.2.3 From f11826385b63566d98c02d35f592232ee77cd791 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 14 Jan 2009 12:27:35 +0000 Subject: x86: fully honor "nolapic" Impact: widen the effect of the 'nolapic' boot parameter "nolapic" should not only suppress SMP and use of the LAPIC, but it also ought to have the effect of disabling all IO-APIC related activity as well as PCI MSI and HT-IRQs. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 7 ++++++- arch/x86/kernel/io_apic.c | 6 ++++++ arch/x86/kernel/smpboot.c | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0f830e4f567..c3dd64fabcf 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1126,6 +1126,11 @@ void __cpuinit setup_local_APIC(void) unsigned int value; int i, j; + if (disable_apic) { + disable_ioapic_setup(); + return; + } + #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (lapic_is_integrated() && esr_disable) { @@ -1566,11 +1571,11 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { -#ifdef CONFIG_X86_64 if (disable_apic) { pr_info("Apic disabled\n"); return -1; } +#ifdef CONFIG_X86_64 if (!cpu_has_apic) { disable_apic = 1; pr_info("Apic disabled by BIOS\n"); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536..40747e58f30 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3258,6 +3258,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms int err; unsigned dest; + if (disable_apic) + return -ENXIO; + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (err) @@ -3726,6 +3729,9 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct irq_cfg *cfg; int err; + if (disable_apic) + return -ENXIO; + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (!err) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bb1a3b1fc87..31f99ec2e0f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1125,6 +1125,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation." "(tell your hw vendor)\n"); smpboot_clear_io_apic(); + disable_ioapic_setup(); return -1; } -- cgit v1.2.3 From a08c4743ed5b861c4fa3d75be00da7106c926296 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 14 Jan 2009 12:28:51 +0000 Subject: x86: avoid early crash in disable_local_APIC() E.g. when called due to an early panic. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index c3dd64fabcf..38d6aab2358 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -895,6 +895,10 @@ void disable_local_APIC(void) { unsigned int value; + /* APIC hasn't been mapped yet */ + if (!apic_phys) + return; + clear_local_APIC(); /* -- cgit v1.2.3 From 54da5b3d44238eeb7417bacf792fb416d473bf4d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 13:04:58 +0100 Subject: x86: fix broken flush_tlb_others_ipi(), fix Impact: cleanup Use the proper type. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 54ee2ecb5e2..7f4141d3b66 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -188,7 +188,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(f->flush_cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(to_cpumask(f->flush_cpumask), + INVALIDATE_TLB_VECTOR_START + sender); while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); -- cgit v1.2.3 From 5cd7376200be7b8bab085557ff5876b04bd84191 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 15:46:08 +0100 Subject: fix: crash: IP: __bitmap_intersects+0x48/0x73 -tip testing found this crash: > [ 35.258515] calling acpi_cpufreq_init+0x0/0x127 @ 1 > [ 35.264127] BUG: unable to handle kernel NULL pointer dereference at (null) > [ 35.267554] IP: [] __bitmap_intersects+0x48/0x73 > [ 35.267554] PGD 0 > [ 35.267554] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c is still broken: there's no allocation of the variable mask, so we pass in an uninitialized cmd.mask field to drv_read(), which then passes it to the scheduler which then crashes ... Switch it over to the much simpler constant-cpumask-pointers approach. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 6f11e029e8c..019276717a7 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -145,7 +145,7 @@ typedef union { struct drv_cmd { unsigned int type; - cpumask_var_t mask; + const struct cpumask *mask; drv_addr_union addr; u32 val; }; @@ -235,8 +235,7 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } - cpumask_copy(cmd.mask, mask); - + cmd.mask = mask; drv_read(&cmd); dprintk("get_cur_val = %u\n", cmd.val); @@ -403,9 +402,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return -ENODEV; } - if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) - return -ENOMEM; - perf = data->acpi_data; result = cpufreq_frequency_table_target(policy, data->freq_table, @@ -450,9 +446,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, /* cpufreq holds the hotplug lock, so we are safe from here on */ if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); + cmd.mask = policy->cpus; else - cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); + cmd.mask = cpumask_of(policy->cpu); freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; @@ -479,7 +475,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, perf->state = next_perf_state; out: - free_cpumask_var(cmd.mask); return result; } -- cgit v1.2.3 From f10fcd47120e80f66665567dbe17f5071c7aef52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:34 +0900 Subject: x86: make early_per_cpu() a lvalue and use it Make early_per_cpu() a lvalue as per_cpu() is and use it where applicable. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 38d6aab2358..48578795583 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1877,17 +1877,8 @@ void __cpuinit generic_processor_info(int apicid, int version) #endif #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) - /* are we being called early in kernel startup? */ - if (early_per_cpu_ptr(x86_cpu_to_apicid)) { - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } + early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; + early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; #endif set_cpu_possible(cpu, true); -- cgit v1.2.3 From c90aa894f0240084f2c6e42e2333b211d6cfe2b2 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 13 Jan 2009 20:41:34 +0900 Subject: x86: cleanup early setup_percpu references [ Based on original patch from Christoph Lameter and Mike Travis. ] * Ruggedize some calls in setup_percpu.c to prevent mishaps in early calls, particularly for non-critical functions. * Cleanup DEBUG_PER_CPU_MAPS usages and some comments. Signed-off-by: Mike Travis Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 56 ++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index bf63de72b64..56c63ac62b1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -15,6 +15,12 @@ #include #include +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +# define DBG(x...) printk(KERN_DEBUG x) +#else +# define DBG(x...) +#endif + #ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; @@ -27,31 +33,39 @@ unsigned int max_physical_apicid; physid_mask_t phys_cpu_present_map; #endif -/* map cpu index to physical APIC ID */ +/* + * Map cpu index to physical APIC ID + */ DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -#define X86_64_NUMA 1 +#define X86_64_NUMA 1 /* (used later) */ -/* map cpu index to node index */ +/* + * Map cpu index to node index + */ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); -/* which logical CPUs are on which nodes */ +/* + * Which logical CPUs are on which nodes + */ cpumask_t *node_to_cpumask_map; EXPORT_SYMBOL(node_to_cpumask_map); -/* setup node_to_cpumask_map */ +/* + * Setup node_to_cpumask_map + */ static void __init setup_node_to_cpumask_map(void); #else static inline void setup_node_to_cpumask_map(void) { } #endif -#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the @@ -200,6 +214,8 @@ void __init setup_per_cpu_areas(void) #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + + DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } /* Setup percpu data maps */ @@ -221,6 +237,7 @@ void __init setup_per_cpu_areas(void) * Requires node_possible_map to be valid. * * Note: node_to_cpumask() is not valid until after this is done. + * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) */ static void __init setup_node_to_cpumask_map(void) { @@ -236,6 +253,7 @@ static void __init setup_node_to_cpumask_map(void) /* allocate the map */ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); + DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); pr_debug("Node to cpumask map at %p for %d nodes\n", map, nr_node_ids); @@ -248,17 +266,23 @@ void __cpuinit numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - if (cpu_pda(cpu) && node != NUMA_NO_NODE) - cpu_pda(cpu)->nodenumber = node; - - if (cpu_to_node_map) + /* early setting, no percpu area yet */ + if (cpu_to_node_map) { cpu_to_node_map[cpu] = node; + return; + } - else if (per_cpu_offset(cpu)) - per_cpu(x86_cpu_to_node_map, cpu) = node; +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { + printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); + dump_stack(); + return; + } +#endif + per_cpu(x86_cpu_to_node_map, cpu) = node; - else - pr_debug("Setting node for non-present cpu %d\n", cpu); + if (node != NUMA_NO_NODE) + cpu_pda(cpu)->nodenumber = node; } void __cpuinit numa_clear_node(int cpu) @@ -275,7 +299,7 @@ void __cpuinit numa_add_cpu(int cpu) void __cpuinit numa_remove_cpu(int cpu) { - cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); + cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } #else /* CONFIG_DEBUG_PER_CPU_MAPS */ @@ -285,7 +309,7 @@ void __cpuinit numa_remove_cpu(int cpu) */ static void __cpuinit numa_set_cpumask(int cpu, int enable) { - int node = cpu_to_node(cpu); + int node = early_cpu_to_node(cpu); cpumask_t *mask; char buf[64]; -- cgit v1.2.3 From a698c823e15149941b0f0281527d0c0d1daf2639 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: make vmlinux_32.lds.S use PERCPU() macro Make vmlinux_32.lds.S use the generic PERCPU() macro instead of open coding it. This will ease future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux_32.lds.S | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 82c67559dde..3eba7f7bac0 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -178,14 +178,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(PAGE_SIZE); - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { - __per_cpu_start = .; - *(.data.percpu.page_aligned) - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU(PAGE_SIZE) . = ALIGN(PAGE_SIZE); /* freed after init ends here */ -- cgit v1.2.3 From 3e5d8f978435bb9ba4dfe3f4514e65e7885db1a9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: make percpu symbols zerobased on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] This patch makes percpu symbols zerobased on x86_64 SMP by adding PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on the percpu output section and using it in vmlinux_64.lds.S. A new PHDR is added as existing ones cannot contain sections near address zero. PERCPU_VADDR() also adds a new symbol __per_cpu_load which always points to the vaddr of the loaded percpu data.init region. The following adjustments have been made to accomodate the address change. * code to locate percpu gdt_page in head_64.S is updated to add the load address to the gdt_page offset. * __per_cpu_load is used in places where access to the init data area is necessary. * pda->data_offset is initialized soon after C code is entered as zero value doesn't work anymore. This patch is mostly taken from Mike Travis' "x86_64: Base percpu variables at zero" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 ++ arch/x86/kernel/head_64.S | 24 +++++++++++++++++++++++- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/vmlinux_64.lds.S | 17 ++++++++++++++++- 4 files changed, 42 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b9a4d8c4b93..bc2900ca82c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -44,6 +44,8 @@ void __init x86_64_init_pda(void) { _cpu_pda = __cpu_pda; cpu_pda(0) = &_boot_cpu_pda; + cpu_pda(0)->data_offset = + (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0e275d49556..7ee0363871e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -204,6 +204,23 @@ ENTRY(secondary_startup_64) pushq $0 popfq +#ifdef CONFIG_SMP + /* + * early_gdt_base should point to the gdt_page in static percpu init + * data area. Computing this requires two symbols - __per_cpu_load + * and per_cpu__gdt_page. As linker can't do no such relocation, do + * it by hand. As early_gdt_descr is manipulated by C code for + * secondary CPUs, this should be done only once for the boot CPU + * when early_gdt_descr_base contains zero. + */ + movq early_gdt_descr_base(%rip), %rax + testq %rax, %rax + jnz 1f + movq $__per_cpu_load, %rax + addq $per_cpu__gdt_page, %rax + movq %rax, early_gdt_descr_base(%rip) +1: +#endif /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt) .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 - .quad per_cpu__gdt_page +#ifdef CONFIG_SMP +early_gdt_descr_base: + .quad 0x0000000000000000 +#else + .quad per_cpu__gdt_page +#endif ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 56c63ac62b1..44845842e72 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -213,7 +213,7 @@ void __init setup_per_cpu_areas(void) } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 1a614c0e6be..f50280db0df 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -19,6 +19,9 @@ PHDRS { data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_SMP + percpu PT_LOAD FLAGS(7); /* RWE */ +#endif note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS @@ -208,14 +211,26 @@ SECTIONS __initramfs_end = .; #endif +#ifdef CONFIG_SMP + /* + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - __data_nosave - should + * switch it back to data.init. + */ + . = ALIGN(PAGE_SIZE); + PERCPU_VADDR(0, :percpu) +#else PERCPU(PAGE_SIZE) +#endif . = ALIGN(PAGE_SIZE); __init_end = .; . = ALIGN(PAGE_SIZE); __nosave_begin = .; - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + *(.data.nosave) + } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ . = ALIGN(PAGE_SIZE); __nosave_end = .; -- cgit v1.2.3 From f32ff5388d86518c0375ccdb330d3b459b9c405e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: load pointer to pda into %gs while brining up a CPU [ Based on original patch from Christoph Lameter and Mike Travis. ] CPU startup code in head_64.S loaded address of a zero page into %gs for temporary use till pda is loaded but address to the actual pda is available at the point. Load the real address directly instead. This will help unifying percpu and pda handling later on. This patch is mostly taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo --- arch/x86/kernel/acpi/sleep.c | 1 + arch/x86/kernel/head64.c | 4 ++-- arch/x86/kernel/head_64.S | 15 ++++++++++----- arch/x86/kernel/smpboot.c | 1 + 4 files changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 707c1f6f95f..9ff67f8dc2c 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -101,6 +101,7 @@ int acpi_save_state_mem(void) stack_start.sp = temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); + initial_gs = (unsigned long)cpu_pda(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index bc2900ca82c..76ffba2aa66 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,8 +26,8 @@ #include #include -/* boot cpu pda */ -static struct x8664_pda _boot_cpu_pda; +/* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ +struct x8664_pda _boot_cpu_pda; #ifdef CONFIG_SMP /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7ee0363871e..2f0ab008988 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -243,12 +243,15 @@ ENTRY(secondary_startup_64) movl %eax,%fs movl %eax,%gs - /* - * Setup up a dummy PDA. this is just for some early bootup code - * that does in_interrupt() - */ + /* Set up %gs. + * + * %gs should point to the pda. For initial boot, make %gs point + * to the _boot_cpu_pda in data section. For a secondary CPU, + * initial_gs should be set to its pda address before the CPU runs + * this code. + */ movl $MSR_GS_BASE,%ecx - movq $empty_zero_page,%rax + movq initial_gs(%rip),%rax movq %rax,%rdx shrq $32,%rdx wrmsr @@ -274,6 +277,8 @@ ENTRY(secondary_startup_64) .align 8 ENTRY(initial_code) .quad x86_64_start_kernel + ENTRY(initial_gs) + .quad _boot_cpu_pda __FINITDATA ENTRY(stack_start) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1a712da1dfa..70d846628bb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -854,6 +854,7 @@ do_rest: #else cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); + initial_gs = (unsigned long)cpu_pda(cpu); #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; -- cgit v1.2.3 From c8f3329a0ddd751241e96b4100df7eda14b2cbc6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: use static _cpu_pda array _cpu_pda array first uses statically allocated storage in data.init and then switches to allocated bootmem to conserve space. However, after folding pda area into percpu area, _cpu_pda array will be removed completely. Drop the reallocation part to simplify the code for soon-to-follow changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/head64.c | 12 ------------ arch/x86/kernel/setup_percpu.c | 14 +++----------- 3 files changed, 4 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f0025846244..c116c599326 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -879,7 +879,7 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; +struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; EXPORT_SYMBOL(_cpu_pda); struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 76ffba2aa66..462d0beccb6 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -29,20 +29,8 @@ /* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ struct x8664_pda _boot_cpu_pda; -#ifdef CONFIG_SMP -/* - * We install an empty cpu_pda pointer table to indicate to early users - * (numa_set_node) that the cpu_pda pointer table for cpus other than - * the boot cpu is not yet setup. - */ -static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; -#else -static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; -#endif - void __init x86_64_init_pda(void) { - _cpu_pda = __cpu_pda; cpu_pda(0) = &_boot_cpu_pda; cpu_pda(0)->data_offset = (unsigned long)(__per_cpu_load - __per_cpu_start); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 44845842e72..73ab01b297c 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -114,7 +114,6 @@ static inline void setup_cpu_pda_map(void) { } static void __init setup_cpu_pda_map(void) { char *pda; - struct x8664_pda **new_cpu_pda; unsigned long size; int cpu; @@ -122,28 +121,21 @@ static void __init setup_cpu_pda_map(void) /* allocate cpu_pda array and pointer table */ { - unsigned long tsize = nr_cpu_ids * sizeof(void *); unsigned long asize = size * (nr_cpu_ids - 1); - tsize = roundup(tsize, cache_line_size()); - new_cpu_pda = alloc_bootmem(tsize + asize); - pda = (char *)new_cpu_pda + tsize; + pda = alloc_bootmem(asize); } /* initialize pointer table to static pda's */ for_each_possible_cpu(cpu) { if (cpu == 0) { /* leave boot cpu pda in place */ - new_cpu_pda[0] = cpu_pda(0); continue; } - new_cpu_pda[cpu] = (struct x8664_pda *)pda; - new_cpu_pda[cpu]->in_bootmem = 1; + cpu_pda(cpu) = (struct x8664_pda *)pda; + cpu_pda(cpu)->in_bootmem = 1; pda += size; } - - /* point to new pointer table */ - _cpu_pda = new_cpu_pda; } #endif /* CONFIG_SMP && CONFIG_X86_64 */ -- cgit v1.2.3 From 1a51e3a0aed18767cf2762e95456ecfeb0bca5e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: fold pda into percpu area on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] Currently pdas and percpu areas are allocated separately. %gs points to local pda and percpu area can be reached using pda->data_offset. This patch folds pda into percpu area. Due to strange gcc requirement, pda needs to be at the beginning of the percpu area so that pda->stack_canary is at %gs:40. To achieve this, a new percpu output section macro - PERCPU_VADDR_PREALLOC() - is added and used to reserve pda sized chunk at the start of the percpu area. After this change, for boot cpu, %gs first points to pda in the data.init area and later during setup_per_cpu_areas() gets updated to point to the actual pda. This means that setup_per_cpu_areas() need to reload %gs for CPU0 while clearing pda area for other cpus as cpu0 already has modified it when control reaches setup_per_cpu_areas(). This patch also removes now unnecessary get_local_pda() and its call sites. A lot of this patch is taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_64.c | 1 + arch/x86/kernel/cpu/common.c | 6 +-- arch/x86/kernel/head64.c | 8 ++- arch/x86/kernel/head_64.S | 15 ++++-- arch/x86/kernel/setup_percpu.c | 107 +++++++++++++++++---------------------- arch/x86/kernel/smpboot.c | 60 +--------------------- arch/x86/kernel/vmlinux_64.lds.S | 6 ++- 7 files changed, 72 insertions(+), 131 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 1d41d3f1edb..f8d1b047ef4 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -56,6 +56,7 @@ int main(void) ENTRY(cpunumber); ENTRY(irqstackptr); ENTRY(data_offset); + DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c116c599326..7041acdf557 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -893,10 +893,8 @@ void __cpuinit pda_init(int cpu) /* Setup up data that may be needed in __get_free_pages early */ loadsegment(fs, 0); loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); + + load_pda_offset(cpu); pda->cpunumber = cpu; pda->irqcount = -1; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 462d0beccb6..1a311293f73 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,12 +26,18 @@ #include #include -/* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ +#ifndef CONFIG_SMP +/* boot cpu pda, referenced by head_64.S to initialize %gs on UP */ struct x8664_pda _boot_cpu_pda; +#endif void __init x86_64_init_pda(void) { +#ifdef CONFIG_SMP + cpu_pda(0) = (void *)__per_cpu_load; +#else cpu_pda(0) = &_boot_cpu_pda; +#endif cpu_pda(0)->data_offset = (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2f0ab008988..7a995d0e9f7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -245,10 +245,13 @@ ENTRY(secondary_startup_64) /* Set up %gs. * - * %gs should point to the pda. For initial boot, make %gs point - * to the _boot_cpu_pda in data section. For a secondary CPU, - * initial_gs should be set to its pda address before the CPU runs - * this code. + * On SMP, %gs should point to the per-cpu area. For initial + * boot, make %gs point to the init data section. For a + * secondary CPU,initial_gs should be set to its pda address + * before the CPU runs this code. + * + * On UP, initial_gs points to _boot_cpu_pda and doesn't + * change. */ movl $MSR_GS_BASE,%ecx movq initial_gs(%rip),%rax @@ -278,7 +281,11 @@ ENTRY(secondary_startup_64) ENTRY(initial_code) .quad x86_64_start_kernel ENTRY(initial_gs) +#ifdef CONFIG_SMP + .quad __per_cpu_load +#else .quad _boot_cpu_pda +#endif __FINITDATA ENTRY(stack_start) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 73ab01b297c..63d46280227 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #ifdef CONFIG_DEBUG_PER_CPU_MAPS @@ -65,6 +66,36 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif +#ifdef CONFIG_X86_64 +void __cpuinit load_pda_offset(int cpu) +{ + /* Memory clobbers used to order pda/percpu accesses */ + mb(); + wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); + mb(); +} + +#endif /* CONFIG_SMP && CONFIG_X86_64 */ + +#ifdef CONFIG_X86_64 + +/* correctly size the local cpu masks */ +static void setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + +#else /* CONFIG_X86_32 */ + +static inline void setup_cpu_local_masks(void) +{ +} + +#endif /* CONFIG_X86_32 */ + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA /* * Copy data used in early init routines from the initial arrays to the @@ -101,63 +132,7 @@ static void __init setup_per_cpu_maps(void) */ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); -static inline void setup_cpu_pda_map(void) { } - -#elif !defined(CONFIG_SMP) -static inline void setup_cpu_pda_map(void) { } - -#else /* CONFIG_SMP && CONFIG_X86_64 */ - -/* - * Allocate cpu_pda pointer table and array via alloc_bootmem. - */ -static void __init setup_cpu_pda_map(void) -{ - char *pda; - unsigned long size; - int cpu; - - size = roundup(sizeof(struct x8664_pda), cache_line_size()); - - /* allocate cpu_pda array and pointer table */ - { - unsigned long asize = size * (nr_cpu_ids - 1); - - pda = alloc_bootmem(asize); - } - - /* initialize pointer table to static pda's */ - for_each_possible_cpu(cpu) { - if (cpu == 0) { - /* leave boot cpu pda in place */ - continue; - } - cpu_pda(cpu) = (struct x8664_pda *)pda; - cpu_pda(cpu)->in_bootmem = 1; - pda += size; - } -} - -#endif /* CONFIG_SMP && CONFIG_X86_64 */ - -#ifdef CONFIG_X86_64 - -/* correctly size the local cpu masks */ -static void setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - -#else /* CONFIG_X86_32 */ - -static inline void setup_cpu_local_masks(void) -{ -} - -#endif /* CONFIG_X86_32 */ +#endif /* * Great future plan: @@ -171,9 +146,6 @@ void __init setup_per_cpu_areas(void) int cpu; unsigned long align = 1; - /* Setup cpu_pda map */ - setup_cpu_pda_map(); - /* Copy section for each CPU (we discard the original) */ old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); @@ -204,8 +176,21 @@ void __init setup_per_cpu_areas(void) cpu, node, __pa(ptr)); } #endif - per_cpu_offset(cpu) = ptr - __per_cpu_start; + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); +#ifdef CONFIG_X86_64 + cpu_pda(cpu) = (void *)ptr; + + /* + * CPU0 modified pda in the init data area, reload pda + * offset for CPU0 and clear the area for others. + */ + if (cpu == 0) + load_pda_offset(0); + else + memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); +#endif + per_cpu_offset(cpu) = ptr - __per_cpu_start; DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 70d846628bb..f2f77ca494d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -744,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } -#ifdef CONFIG_X86_64 - -/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ -static void __ref free_bootmem_pda(struct x8664_pda *oldpda) -{ - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); -} - -/* - * Allocate node local memory for the AP pda. - * - * Must be called after the _cpu_pda pointer table is initialized. - */ -int __cpuinit get_local_pda(int cpu) -{ - struct x8664_pda *oldpda, *newpda; - unsigned long size = sizeof(struct x8664_pda); - int node = cpu_to_node(cpu); - - if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) - return 0; - - oldpda = cpu_pda(cpu); - newpda = kmalloc_node(size, GFP_ATOMIC, node); - if (!newpda) { - printk(KERN_ERR "Could not allocate node local PDA " - "for CPU %d on node %d\n", cpu, node); - - if (oldpda) - return 0; /* have a usable pda */ - else - return -1; - } - - if (oldpda) { - memcpy(newpda, oldpda, size); - free_bootmem_pda(oldpda); - } - - newpda->in_bootmem = 0; - cpu_pda(cpu) = newpda; - return 0; -} -#endif /* CONFIG_X86_64 */ - static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -807,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) }; INIT_WORK(&c_idle.work, do_fork_idle); -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - if (cpu > 0) { - boot_error = get_local_pda(cpu); - if (boot_error) - goto restore_state; - /* if can't get pda memory, can't start cpu */ - } -#endif - alternatives_smp_switch(1); c_idle.idle = get_idle_for_cpu(cpu); @@ -931,9 +875,7 @@ do_rest: inquire_remote_apic(apicid); } } -#ifdef CONFIG_X86_64 -restore_state: -#endif + if (boot_error) { /* Try to put things back the way they were before ... */ numa_remove_cpu(cpu); /* was set by numa_add_cpu */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index f50280db0df..962f21f1d4d 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -5,6 +5,7 @@ #define LOAD_OFFSET __START_KERNEL_map #include +#include #include #undef i386 /* in case the preprocessor is a 32bit one */ @@ -215,10 +216,11 @@ SECTIONS /* * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - __data_nosave - should - * switch it back to data.init. + * switch it back to data.init. Also, pda should be at the head of + * percpu area. Preallocate it. */ . = ALIGN(PAGE_SIZE); - PERCPU_VADDR(0, :percpu) + PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) #else PERCPU(PAGE_SIZE) #endif -- cgit v1.2.3 From 9939ddaff52787b2a7c1adf1b2afc95421aa0884 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: merge 64 and 32 SMP percpu handling Now that pda is allocated as part of percpu, percpu doesn't need to be accessed through pda. Unify x86_64 SMP percpu access with x86_32 SMP one. Other than the segment register, operand size and the base of percpu symbols, they behave identical now. This patch replaces now unnecessary pda->data_offset with a dummy field which is necessary to keep stack_canary at its place. This patch also moves per_cpu_offset initialization out of init_gdt() into setup_per_cpu_areas(). Note that this change also necessitates explicit per_cpu_offset initializations in voyager_smp.c. With this change, x86_OP_percpu()'s are as efficient on x86_64 as on x86_32 and also x86_64 can use assembly PER_CPU macros. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/entry_64.S | 7 ++++--- arch/x86/kernel/head64.c | 2 -- arch/x86/kernel/setup_percpu.c | 15 ++++++++------- arch/x86/kernel/smpcommon.c | 3 +-- 5 files changed, 13 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f8d1b047ef4..f4cc81bfbf8 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -55,7 +55,6 @@ int main(void) ENTRY(irqcount); ENTRY(cpunumber); ENTRY(irqstackptr); - ENTRY(data_offset); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e28c7a98779..4833f3a1965 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -52,6 +52,7 @@ #include #include #include +#include /* Avoid __ASSEMBLER__'ifying just for this. */ #include @@ -1072,10 +1073,10 @@ ENTRY(\sym) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - movq %gs:pda_data_offset, %rbp - subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + PER_CPU(init_tss, %rbp) + subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) call \do_sym - addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC END(\sym) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1a311293f73..e99b661a97f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -38,8 +38,6 @@ void __init x86_64_init_pda(void) #else cpu_pda(0) = &_boot_cpu_pda; #endif - cpu_pda(0)->data_offset = - (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 63d46280227..be1ff34db11 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -125,14 +125,14 @@ static void __init setup_per_cpu_maps(void) #endif } -#ifdef CONFIG_X86_32 -/* - * Great future not-so-futuristic plan: make i386 and x86_64 do it - * the same way - */ +#ifdef CONFIG_X86_64 +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { + [0] = (unsigned long)__per_cpu_load, +}; +#else unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(__per_cpu_offset); #endif +EXPORT_SYMBOL(__per_cpu_offset); /* * Great future plan: @@ -178,6 +178,7 @@ void __init setup_per_cpu_areas(void) #endif memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); + per_cpu_offset(cpu) = ptr - __per_cpu_start; #ifdef CONFIG_X86_64 cpu_pda(cpu) = (void *)ptr; @@ -190,7 +191,7 @@ void __init setup_per_cpu_areas(void) else memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); #endif - per_cpu_offset(cpu) = ptr - __per_cpu_start; + per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 397e309839d..84395fabc41 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -4,10 +4,10 @@ #include #include -#ifdef CONFIG_X86_32 DEFINE_PER_CPU(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); +#ifdef CONFIG_X86_32 /* * Initialize the CPU's GDT. This is either the boot CPU doing itself * (still using the master per-cpu area), or a CPU doing it for a @@ -24,7 +24,6 @@ __cpuinit void init_gdt(int cpu) write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(cpu_number, cpu) = cpu; } #endif -- cgit v1.2.3 From b12d8db8fbfaed1e8222a15333a3645599636854 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: make pda a percpu variable [ Based on original patch from Christoph Lameter and Mike Travis. ] As pda is now allocated in percpu area, it can easily be made a proper percpu variable. Make it so by defining per cpu symbol from linker script and declaring it in C code for SMP and simply defining it for UP. This change cleans up code and brings SMP and UP closer a bit. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 3 --- arch/x86/kernel/head64.c | 10 ---------- arch/x86/kernel/head_64.S | 5 +++-- arch/x86/kernel/setup_percpu.c | 16 ++++++++++++++-- arch/x86/kernel/vmlinux_64.lds.S | 4 +++- 5 files changed, 20 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7041acdf557..c49498d4083 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -879,9 +879,6 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index e99b661a97f..71b6f6ec96a 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,18 +26,8 @@ #include #include -#ifndef CONFIG_SMP -/* boot cpu pda, referenced by head_64.S to initialize %gs on UP */ -struct x8664_pda _boot_cpu_pda; -#endif - void __init x86_64_init_pda(void) { -#ifdef CONFIG_SMP - cpu_pda(0) = (void *)__per_cpu_load; -#else - cpu_pda(0) = &_boot_cpu_pda; -#endif pda_init(0); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7a995d0e9f7..c8ace880661 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_PARAVIRT #include @@ -250,7 +251,7 @@ ENTRY(secondary_startup_64) * secondary CPU,initial_gs should be set to its pda address * before the CPU runs this code. * - * On UP, initial_gs points to _boot_cpu_pda and doesn't + * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't * change. */ movl $MSR_GS_BASE,%ecx @@ -284,7 +285,7 @@ ENTRY(secondary_startup_64) #ifdef CONFIG_SMP .quad __per_cpu_load #else - .quad _boot_cpu_pda + .quad PER_CPU_VAR(__pda) #endif __FINITDATA diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index be1ff34db11..daeedf82c15 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -66,6 +66,16 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif +/* + * Define load_pda_offset() and per-cpu __pda for x86_64. + * load_pda_offset() is responsible for loading the offset of pda into + * %gs. + * + * On SMP, pda offset also duals as percpu base address and thus it + * should be at the start of per-cpu area. To achieve this, it's + * preallocated in vmlinux_64.lds.S directly instead of using + * DEFINE_PER_CPU(). + */ #ifdef CONFIG_X86_64 void __cpuinit load_pda_offset(int cpu) { @@ -74,6 +84,10 @@ void __cpuinit load_pda_offset(int cpu) wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); mb(); } +#ifndef CONFIG_SMP +DEFINE_PER_CPU(struct x8664_pda, __pda); +EXPORT_PER_CPU_SYMBOL(__pda); +#endif #endif /* CONFIG_SMP && CONFIG_X86_64 */ @@ -180,8 +194,6 @@ void __init setup_per_cpu_areas(void) memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; #ifdef CONFIG_X86_64 - cpu_pda(cpu) = (void *)ptr; - /* * CPU0 modified pda in the init data area, reload pda * offset for CPU0 and clear the area for others. diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 962f21f1d4d..d2a0baa87d1 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -217,10 +217,12 @@ SECTIONS * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - __data_nosave - should * switch it back to data.init. Also, pda should be at the head of - * percpu area. Preallocate it. + * percpu area. Preallocate it and define the percpu offset symbol + * so that it can be accessed as a percpu variable. */ . = ALIGN(PAGE_SIZE); PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) + per_cpu____pda = __per_cpu_start; #else PERCPU(PAGE_SIZE) #endif -- cgit v1.2.3 From 49357d19e4fb31e28796eaff83499e7584c26878 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: convert pda ops to wrappers around x86 percpu accessors pda is now a percpu variable and there's no reason it can't use plain x86 percpu accessors. Add x86_test_and_clear_bit_percpu() and replace pda op implementations with wrappers around x86 percpu accessors. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux_64.lds.S | 1 - arch/x86/kernel/x8664_ksyms_64.c | 2 -- 2 files changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index d2a0baa87d1..a09abb8fb97 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -14,7 +14,6 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) jiffies_64 = jiffies; -_proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(7); /* RWE */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 695e426aa35..3909e3ba5ce 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(init_level4_pgt); EXPORT_SYMBOL(load_gs_index); - -EXPORT_SYMBOL(_proxy_pda); -- cgit v1.2.3 From 004aa322f855a765741d9437a98dd8fe2e4f32a6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: misc clean up after the percpu update Do the following cleanups: * kill x86_64_init_pda() which now is equivalent to pda_init() * use per_cpu_offset() instead of cpu_pda() when initializing initial_gs Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/head64.c | 7 +------ arch/x86/kernel/smpboot.c | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 9ff67f8dc2c..4abff454c55 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -101,7 +101,7 @@ int acpi_save_state_mem(void) stack_start.sp = temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); - initial_gs = (unsigned long)cpu_pda(smp_processor_id()); + initial_gs = per_cpu_offset(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 71b6f6ec96a..af67d3227ea 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,11 +26,6 @@ #include #include -void __init x86_64_init_pda(void) -{ - pda_init(0); -} - static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -96,7 +91,7 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - x86_64_init_pda(); + pda_init(0); x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f2f77ca494d..2f0e0f1090f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -798,7 +798,7 @@ do_rest: #else cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); - initial_gs = (unsigned long)cpu_pda(cpu); + initial_gs = per_cpu_offset(cpu); #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; -- cgit v1.2.3 From 6dbde3530850d4d8bfc1b6bd4006d92786a2787f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 22:15:53 +0900 Subject: percpu: add optimized generic percpu accessors It is an optimization and a cleanup, and adds the following new generic percpu methods: percpu_read() percpu_write() percpu_add() percpu_sub() percpu_and() percpu_or() percpu_xor() and implements support for them on x86. (other architectures will fall back to a default implementation) The advantage is that for example to read a local percpu variable, instead of this sequence: return __get_cpu_var(var); ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx ffffffff8102ca32: 81 ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax We can get a single instruction by using the optimized variants: return percpu_read(var); ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax I also cleaned up the x86-specific APIs and made the x86 code use these new generic percpu primitives. tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out * added percpu_and() for completeness's sake * made generic percpu ops atomic against preemption Signed-off-by: Ingo Molnar Signed-off-by: Tejun Heo --- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/tlb_32.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a546f55c77b..77d546817d9 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -591,7 +591,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) loadsegment(gs, next->gs); - x86_write_percpu(current_task, next_p); + percpu_write(current_task, next_p); return prev_p; } diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ec53818f4e3..e65449d0f7d 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -34,8 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock); */ void leave_mm(int cpu) { - BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); - cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); + BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -103,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { + if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -222,7 +222,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } -- cgit v1.2.3 From a338af2c648f5e07c582154745a6c60cd2d8bf12 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Jan 2009 11:19:03 +0900 Subject: x86: fix build bug introduced during merge EXPORT_PER_CPU_SYMBOL() got misplaced during merge leading to build failure. Fix it. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index daeedf82c15..b5c35af2011 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -86,9 +86,8 @@ void __cpuinit load_pda_offset(int cpu) } #ifndef CONFIG_SMP DEFINE_PER_CPU(struct x8664_pda, __pda); -EXPORT_PER_CPU_SYMBOL(__pda); #endif - +EXPORT_PER_CPU_SYMBOL(__pda); #endif /* CONFIG_SMP && CONFIG_X86_64 */ #ifdef CONFIG_X86_64 -- cgit v1.2.3 From cd3adf52309867955d6e2175246b526442235805 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Jan 2009 12:11:43 +0900 Subject: x86_64: initialize this_cpu_off to __per_cpu_load On x86_64, if get_per_cpu_var() is used before per cpu area is setup (if lockdep is turned on, it happens), it needs this_cpu_off to point to __per_cpu_load. Initialize accordingly. Signed-off-by: Tejun Heo --- arch/x86/kernel/smpcommon.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 84395fabc41..7e157810062 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -3,8 +3,13 @@ */ #include #include +#include +#ifdef CONFIG_X86_64 +DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load; +#else DEFINE_PER_CPU(unsigned long, this_cpu_off); +#endif EXPORT_PER_CPU_SYMBOL(this_cpu_off); #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 6eb714c63ed5bd663627f7dda8c4d5258f3b64ef Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: cpufreq: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write Impact: use new work_on_cpu function to reduce stack usage Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with a work_on_cpu function for drv_read() and drv_write(). Basically converts do_drv_{read,write} into "work_on_cpu" functions that are now called by drv_read and drv_write. Note: This patch basically reverts 50c668d6 which reverted 7503bfba, now that the work_on_cpu() function is more stable. Signed-off-by: Mike Travis Acked-by: Rusty Russell Tested-by: Dieter Ries Tested-by: Maciej Rutecki Cc: Dave Jones Cc: --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 019276717a7..4b1c319d30c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -150,8 +150,9 @@ struct drv_cmd { u32 val; }; -static void do_drv_read(struct drv_cmd *cmd) +static long do_drv_read(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 h; switch (cmd->type) { @@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd) default: break; } + return 0; } -static void do_drv_write(struct drv_cmd *cmd) +static long do_drv_write(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 lo, hi; switch (cmd->type) { @@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd) default: break; } + return 0; } static void drv_read(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - set_cpus_allowed_ptr(current, cmd->mask); - do_drv_read(cmd); - set_cpus_allowed_ptr(current, &saved_mask); + work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); } static void drv_write(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; unsigned int i; for_each_cpu(i, cmd->mask) { - set_cpus_allowed_ptr(current, cpumask_of(i)); - do_drv_write(cmd); + work_on_cpu(i, do_drv_write, cmd); } - - set_cpus_allowed_ptr(current, &saved_mask); - return; } static u32 get_cur_val(const struct cpumask *mask) @@ -367,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return freq; } -static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, +static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, struct acpi_cpufreq_data *data) { unsigned int cur_freq; -- cgit v1.2.3 From cef30b3a84e1c7cbd49987d83c5863c001445842 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 Jan 2009 15:58:13 -0800 Subject: x86: put trigger in to detect mismatched apic versions. Fire off one message if two apic's discovered with different apic versions. Signed-off-by: Mike Travis --- arch/x86/kernel/apic.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0f830e4f567..db0998641c5 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1833,6 +1833,11 @@ void __cpuinit generic_processor_info(int apicid, int version) num_processors++; cpu = cpumask_next_zero(-1, cpu_present_mask); + if (version != apic_version[boot_cpu_physical_apicid]) + WARN_ONCE(1, + "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n", + apic_version[boot_cpu_physical_apicid], cpu, version); + physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { /* -- cgit v1.2.3 From 1b437c8c73a36daa471dd54a63c426d72af5723d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:57 +0900 Subject: x86-64: Move irq stats from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/irq.c | 6 +----- arch/x86/kernel/irq_64.c | 3 +++ arch/x86/kernel/nmi.c | 10 +--------- 3 files changed, 5 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3973e2df7f8..8b30d0c2512 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq) #endif } -#ifdef CONFIG_X86_32 -# define irq_stats(x) (&per_cpu(irq_stat, x)) -#else -# define irq_stats(x) cpu_pda(x) -#endif +#define irq_stats(x) (&per_cpu(irq_stat, x)) /* * /proc/interrupts printing: */ diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0b21cb1ea11..1db05247b47 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -19,6 +19,9 @@ #include #include +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + /* * Probabilistic stack overflow check: * diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 7228979f1e7..23b6d9e6e4f 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -61,11 +61,7 @@ static int endflag __initdata; static inline unsigned int get_nmi_count(int cpu) { -#ifdef CONFIG_X86_64 - return cpu_pda(cpu)->__nmi_count; -#else - return nmi_count(cpu); -#endif + return per_cpu(irq_stat, cpu).__nmi_count; } static inline int mce_in_progress(void) @@ -82,12 +78,8 @@ static inline int mce_in_progress(void) */ static inline unsigned int get_timer_irqs(int cpu) { -#ifdef CONFIG_X86_64 - return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); -#else return per_cpu(irq_stat, cpu).apic_timer_irqs + per_cpu(irq_stat, cpu).irq0_irqs; -#endif } #ifdef CONFIG_SMP -- cgit v1.2.3 From 9eb912d1aa6b8106e06a73ea6702ec3dab0d6a1a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:57 +0900 Subject: x86-64: Move TLB state from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 2 -- arch/x86/kernel/tlb_32.c | 12 ++---------- arch/x86/kernel/tlb_64.c | 13 ++++++++----- 3 files changed, 10 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c49498d4083..3d0cc6f1711 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -897,8 +897,6 @@ void __cpuinit pda_init(int cpu) pda->irqcount = -1; pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; if (cpu == 0) { /* others are initialized in smpboot.c */ diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index e65449d0f7d..abf0808d6fc 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -4,8 +4,8 @@ #include -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) - ____cacheline_aligned = { &init_mm, 0, }; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; /* must come after the send_IPI functions above for inlining */ #include @@ -231,14 +231,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1); } -void reset_lazy_tlbstate(void) -{ - int cpu = raw_smp_processor_id(); - - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} - static int init_flush_cpumask(void) { alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 7f4141d3b66..e64a32c4882 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -18,6 +18,9 @@ #include #include +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; + #include /* * Smarter SMP flushing macros. @@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state); */ void leave_mm(int cpu) { - if (read_pda(mmu_state) == TLBSTATE_OK) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (f->flush_mm == read_pda(active_mm)) { - if (read_pda(mmu_state) == TLBSTATE_OK) { + if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -281,7 +284,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (read_pda(mmu_state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } -- cgit v1.2.3 From 26f80bd6a9ab17bc8a60b6092e7c0d05c5927ce5 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Convert irqstacks to per-cpu Move the irqstackptr variable from the PDA to per-cpu. Make the stacks themselves per-cpu, removing some specific allocation code. Add a seperate flag (is_boot_cpu) to simplify the per-cpu boot adjustments. tj: * sprinkle some underbars around. * irq_stack_ptr is not used till traps_init(), no reason to initialize it early. On SMP, just leaving it NULL till proper initialization in setup_per_cpu_areas() works. Dropped is_boot_cpu and early irq_stack_ptr initialization. * do DECLARE/DEFINE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack) instead of (char, irq_stack[IRQ_STACK_SIZE]). Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 19 +++++++------------ arch/x86/kernel/dumpstack_64.c | 33 +++++++++++++++++---------------- arch/x86/kernel/entry_64.S | 6 +++--- arch/x86/kernel/setup_percpu.c | 4 +++- 5 files changed, 30 insertions(+), 33 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f4cc81bfbf8..5b821fbdaf7 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -54,7 +54,6 @@ int main(void) ENTRY(pcurrent); ENTRY(irqcount); ENTRY(cpunumber); - ENTRY(irqstackptr); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3d0cc6f1711..496f0a01919 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -881,7 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; +DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); +#ifdef CONFIG_SMP +DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ +#else +DEFINE_PER_CPU(char *, irq_stack_ptr) = + per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; +#endif void __cpuinit pda_init(int cpu) { @@ -901,18 +907,7 @@ void __cpuinit pda_init(int cpu) if (cpu == 0) { /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index c302d070704..28e26a4315d 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + unsigned long *irq_stack_end = + (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned used = 0; struct thread_info *tinfo; int graph = 0; @@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *) estack_end[-2]; continue; } - if (irqstack_end) { - unsigned long *irqstack; - irqstack = irqstack_end - - (IRQSTACKSIZE - 64) / sizeof(*irqstack); + if (irq_stack_end) { + unsigned long *irq_stack; + irq_stack = irq_stack_end - + (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); - if (stack >= irqstack && stack < irqstack_end) { + if (stack >= irq_stack && stack < irq_stack_end) { if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, - ops, data, irqstack_end, &graph); + ops, data, irq_stack_end, &graph); /* * We link to the next stack (which would be * the process stack normally) the last * pointer (index -1 to end) in the IRQ stack: */ - stack = (unsigned long *) (irqstack_end[-1]); - irqstack_end = NULL; + stack = (unsigned long *) (irq_stack_end[-1]); + irq_stack_end = NULL; ops->stack(data, "EOI"); continue; } @@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack; int i; const int cpu = smp_processor_id(); - unsigned long *irqstack_end = - (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = - (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + unsigned long *irq_stack_end = + (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); + unsigned long *irq_stack = + (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); /* * debugging aid: "show_stack(NULL, NULL);" prints the @@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, stack = sp; for (i = 0; i < kstack_depth_to_print; i++) { - if (stack >= irqstack && stack <= irqstack_end) { - if (stack == irqstack_end) { - stack = (unsigned long *) (irqstack_end[-1]); + if (stack >= irq_stack && stack <= irq_stack_end) { + if (stack == irq_stack_end) { + stack = (unsigned long *) (irq_stack_end[-1]); printk(" "); } } else { diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4833f3a1965..d22677a6643 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -345,7 +345,7 @@ ENTRY(save_args) 1: incl %gs:pda_irqcount jne 2f popq_cfi %rax /* move return address... */ - mov %gs:pda_irqstackptr,%rsp + mov PER_CPU_VAR(irq_stack_ptr),%rsp EMPTY_FRAME 0 pushq_cfi %rax /* ... to the new stack */ /* @@ -1261,7 +1261,7 @@ ENTRY(call_softirq) mov %rsp,%rbp CFI_DEF_CFA_REGISTER rbp incl %gs:pda_irqcount - cmove %gs:pda_irqstackptr,%rsp + cmove PER_CPU_VAR(irq_stack_ptr),%rsp push %rbp # backlink for old unwinder call __do_softirq leaveq @@ -1300,7 +1300,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 11: incl %gs:pda_irqcount movq %rsp,%rbp CFI_DEF_CFA_REGISTER rbp - cmovzq %gs:pda_irqstackptr,%rsp + cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp pushq %rbp # backlink for old unwinder call xen_evtchn_do_upcall popq %rsp diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b5c35af2011..8b53ef83c61 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -192,7 +192,10 @@ void __init setup_per_cpu_areas(void) memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; + per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); #ifdef CONFIG_X86_64 + per_cpu(irq_stack_ptr, cpu) = + (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; /* * CPU0 modified pda in the init data area, reload pda * offset for CPU0 and clear the area for others. @@ -202,7 +205,6 @@ void __init setup_per_cpu_areas(void) else memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); #endif - per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } -- cgit v1.2.3 From 92d65b2371d86d40807e1dbfdccadc4d501edcde Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Convert exception stacks to per-cpu Move the exception stacks to per-cpu, removing specific allocation code. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 496f0a01919..b6d7eec0be7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -913,8 +913,9 @@ void __cpuinit pda_init(int cpu) } } -static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) + __aligned(PAGE_SIZE); extern asmlinkage void ignore_sysret(void); @@ -972,15 +973,12 @@ void __cpuinit cpu_init(void) struct tss_struct *t = &per_cpu(init_tss, cpu); struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); unsigned long v; - char *estacks = NULL; struct task_struct *me; int i; /* CPU 0 is initialised in head64.c */ if (cpu != 0) pda_init(cpu); - else - estacks = boot_exception_stacks; me = current; @@ -1014,18 +1012,13 @@ void __cpuinit cpu_init(void) * set up and load the per-CPU TSS */ if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + static const unsigned int sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ }; + char *estacks = per_cpu(exception_stacks, cpu); for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; + estacks += sizes[v]; orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } -- cgit v1.2.3 From ea9279066de44053d0c20ea855bc9f4706652d84 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move cpu number from PDA to per-cpu and consolidate with 32-bit. tj: moved cpu_number definition out of CONFIG_HAVE_SETUP_PER_CPU_AREA for voyager. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/process_32.c | 3 --- arch/x86/kernel/setup_percpu.c | 10 ++++++++++ arch/x86/kernel/smpcommon.c | 2 -- 5 files changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 5b821fbdaf7..cae6697c099 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -53,7 +53,6 @@ int main(void) ENTRY(oldrsp); ENTRY(pcurrent); ENTRY(irqcount); - ENTRY(cpunumber); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b6d7eec0be7..4221e920886 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -899,7 +899,6 @@ void __cpuinit pda_init(int cpu) load_pda_offset(cpu); - pda->cpunumber = cpu; pda->irqcount = -1; pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 77d546817d9..2c00a57ccb9 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - /* * Return saved PC of a blocked thread. */ diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 8b53ef83c61..258497f93f4 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -22,6 +22,15 @@ # define DBG(x...) #endif +/* + * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but + * voyager wants cpu_number too. + */ +#ifdef CONFIG_SMP +DEFINE_PER_CPU(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); +#endif + #ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; @@ -193,6 +202,7 @@ void __init setup_per_cpu_areas(void) memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); + per_cpu(cpu_number, cpu) = cpu; #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 7e157810062..add36b4e37c 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -28,7 +28,5 @@ __cpuinit void init_gdt(int cpu) write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); - - per_cpu(cpu_number, cpu) = cpu; } #endif -- cgit v1.2.3 From c6f5e0acd5d12ee23f701f15889872e67b47caa6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move current task from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 5 +---- arch/x86/kernel/dumpstack_64.c | 2 +- arch/x86/kernel/process_64.c | 5 ++++- arch/x86/kernel/smpboot.c | 3 +-- 5 files changed, 7 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index cae6697c099..4f7a210e1e5 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -51,7 +51,6 @@ int main(void) #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) ENTRY(kernelstack); ENTRY(oldrsp); - ENTRY(pcurrent); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4221e920886..b50e38d1688 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -903,10 +903,7 @@ void __cpuinit pda_init(int cpu) pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - } else { + if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 28e26a4315d..d35db5993fd 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -242,7 +242,7 @@ void show_registers(struct pt_regs *regs) int i; unsigned long sp; const int cpu = smp_processor_id(); - struct task_struct *cur = cpu_pda(cpu)->pcurrent; + struct task_struct *cur = current; sp = regs->sp; printk("CPU %d ", cpu); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 416fb9282f4..e00c31a4b3c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -57,6 +57,9 @@ asmlinkage extern void ret_from_fork(void); +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -615,7 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ prev->usersp = read_pda(oldrsp); write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + percpu_write(current_task, next_p); write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2f0e0f1090f..5854be0fb80 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -790,13 +790,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) set_idle_for_cpu(cpu, c_idle.idle); do_rest: -#ifdef CONFIG_X86_32 per_cpu(current_task, cpu) = c_idle.idle; +#ifdef CONFIG_X86_32 init_gdt(cpu); /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); #endif -- cgit v1.2.3 From 9af45651f1f7c89942e016a1a00a7ebddfa727f8 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move kernelstack from PDA to per-cpu. Also clean up PER_CPU_VAR usage in xen-asm_64.S tj: * remove now unused stack_thread_info() * s/kernelstack/kernel_stack/ * added FIXME comment in xen-asm_64.S Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 6 ++++-- arch/x86/kernel/entry_64.S | 4 ++-- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/smpboot.c | 3 +++ 5 files changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 4f7a210e1e5..cafff5f4a03 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(kernelstack); ENTRY(oldrsp); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b50e38d1688..06b6290088f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -889,6 +889,10 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; #endif +DEFINE_PER_CPU(unsigned long, kernel_stack) = + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(kernel_stack); + void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -900,8 +904,6 @@ void __cpuinit pda_init(int cpu) load_pda_offset(cpu); pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d22677a6643..0dd45859a7a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -468,7 +468,7 @@ END(ret_from_fork) ENTRY(system_call) CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK @@ -480,7 +480,7 @@ ENTRY(system_call) ENTRY(system_call_after_swapgs) movq %rsp,%gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs off/on section - it's straight * and short: diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e00c31a4b3c..6c5f5760210 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -620,9 +620,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) write_pda(oldrsp, next->usersp); percpu_write(current_task, next_p); - write_pda(kernelstack, + percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - PDA_STACKOFFSET); + THREAD_SIZE - KERNEL_STACK_OFFSET); #ifdef CONFIG_CC_STACKPROTECTOR write_pda(stack_canary, next_p->stack_canary); /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5854be0fb80..869b98840fd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -798,6 +798,9 @@ do_rest: #else clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(c_idle.idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; -- cgit v1.2.3 From 3d1e42a7cf945e289d6ba26159aa0e2b0645401b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move oldrsp from PDA to per-cpu. tj: * in asm-offsets_64.c, pda.h inclusion shouldn't be removed as pda is still referenced in the file * s/oldrsp/old_rsp/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/entry_64.S | 10 +++++----- arch/x86/kernel/process_64.c | 8 +++++--- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index cafff5f4a03..afda6deb851 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(oldrsp); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0dd45859a7a..7c27da407da 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -210,7 +210,7 @@ ENTRY(native_usergs_sysret64) /* %rsp:at FRAMEEND */ .macro FIXUP_TOP_OF_STACK tmp offset=0 - movq %gs:pda_oldrsp,\tmp + movq PER_CPU_VAR(old_rsp),\tmp movq \tmp,RSP+\offset(%rsp) movq $__USER_DS,SS+\offset(%rsp) movq $__USER_CS,CS+\offset(%rsp) @@ -221,7 +221,7 @@ ENTRY(native_usergs_sysret64) .macro RESTORE_TOP_OF_STACK tmp offset=0 movq RSP+\offset(%rsp),\tmp - movq \tmp,%gs:pda_oldrsp + movq \tmp,PER_CPU_VAR(old_rsp) movq EFLAGS+\offset(%rsp),\tmp movq \tmp,R11+\offset(%rsp) .endm @@ -479,7 +479,7 @@ ENTRY(system_call) */ ENTRY(system_call_after_swapgs) - movq %rsp,%gs:pda_oldrsp + movq %rsp,PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs off/on section - it's straight @@ -523,7 +523,7 @@ sysret_check: CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 /*CFI_REGISTER rflags,r11*/ - movq %gs:pda_oldrsp, %rsp + movq PER_CPU_VAR(old_rsp), %rsp USERGS_SYSRET64 CFI_RESTORE_STATE @@ -833,7 +833,7 @@ common_interrupt: XCPT_FRAME addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ - /* 0(%rsp): oldrsp-ARGOFFSET */ + /* 0(%rsp): old_rsp-ARGOFFSET */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6c5f5760210..48012891892 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -60,6 +60,8 @@ asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); +DEFINE_PER_CPU(unsigned long, old_rsp); + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -395,7 +397,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) load_gs_index(0); regs->ip = new_ip; regs->sp = new_sp; - write_pda(oldrsp, new_sp); + percpu_write(old_rsp, new_sp); regs->cs = __USER_CS; regs->ss = __USER_DS; regs->flags = 0x200; @@ -616,8 +618,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - prev->usersp = read_pda(oldrsp); - write_pda(oldrsp, next->usersp); + prev->usersp = percpu_read(old_rsp); + percpu_write(old_rsp, next->usersp); percpu_write(current_task, next_p); percpu_write(kernel_stack, -- cgit v1.2.3 From 5689553076c4a67b83426b076082c63085b7567a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move irqcount from PDA to per-cpu. tj: s/irqcount/irq_count/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/entry_64.S | 14 +++++++------- 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index afda6deb851..64c834a39aa 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 06b6290088f..e2323ecce1d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -893,6 +893,8 @@ DEFINE_PER_CPU(unsigned long, kernel_stack) = (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; EXPORT_PER_CPU_SYMBOL(kernel_stack); +DEFINE_PER_CPU(unsigned int, irq_count) = -1; + void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -903,8 +905,6 @@ void __cpuinit pda_init(int cpu) load_pda_offset(cpu); - pda->irqcount = -1; - if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7c27da407da..c52b6091916 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -337,12 +337,12 @@ ENTRY(save_args) je 1f SWAPGS /* - * irqcount is used to check if a CPU is already on an interrupt stack + * irq_count is used to check if a CPU is already on an interrupt stack * or not. While this is essentially redundant with preempt_count it is * a little cheaper to use a separate counter in the PDA (short of * moving irq_enter into assembly, which would be too much work) */ -1: incl %gs:pda_irqcount +1: incl PER_CPU_VAR(irq_count) jne 2f popq_cfi %rax /* move return address... */ mov PER_CPU_VAR(irq_stack_ptr),%rsp @@ -837,7 +837,7 @@ common_interrupt: ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) leaveq CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 @@ -1260,14 +1260,14 @@ ENTRY(call_softirq) CFI_REL_OFFSET rbp,0 mov %rsp,%rbp CFI_DEF_CFA_REGISTER rbp - incl %gs:pda_irqcount + incl PER_CPU_VAR(irq_count) cmove PER_CPU_VAR(irq_stack_ptr),%rsp push %rbp # backlink for old unwinder call __do_softirq leaveq CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) ret CFI_ENDPROC END(call_softirq) @@ -1297,7 +1297,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) movq %rdi, %rsp # we don't return, adjust the stack frame CFI_ENDPROC DEFAULT_FRAME -11: incl %gs:pda_irqcount +11: incl PER_CPU_VAR(irq_count) movq %rsp,%rbp CFI_DEF_CFA_REGISTER rbp cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp @@ -1305,7 +1305,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) call xen_evtchn_do_upcall popq %rsp CFI_DEF_CFA_REGISTER rsp - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) jmp error_exit CFI_ENDPROC END(do_hypervisor_callback) -- cgit v1.2.3 From e7a22c1ebcc1caa8178df1819d05128bb5b45ab9 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: x86-64: Move nodenumber from PDA to per-cpu. tj: * s/nodenumber/node_number/ * removed now unused pda variable from pda_init() Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 13 ++++++------- arch/x86/kernel/setup_percpu.c | 4 +++- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2323ecce1d..7976a6a0f65 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -897,18 +897,11 @@ DEFINE_PER_CPU(unsigned int, irq_count) = -1; void __cpuinit pda_init(int cpu) { - struct x8664_pda *pda = cpu_pda(cpu); - /* Setup up data that may be needed in __get_free_pages early */ loadsegment(fs, 0); loadsegment(gs, 0); load_pda_offset(cpu); - - if (cpu != 0) { - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } } static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks @@ -978,6 +971,12 @@ void __cpuinit cpu_init(void) if (cpu != 0) pda_init(cpu); +#ifdef CONFIG_NUMA + if (cpu != 0 && percpu_read(node_number) == 0 && + cpu_to_node(cpu) != NUMA_NO_NODE) + percpu_write(node_number, cpu_to_node(cpu)); +#endif + me = current; if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 258497f93f4..efbafbbff58 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -53,6 +53,8 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) #define X86_64_NUMA 1 /* (used later) */ +DEFINE_PER_CPU(int, node_number) = 0; +EXPORT_PER_CPU_SYMBOL(node_number); /* * Map cpu index to node index @@ -283,7 +285,7 @@ void __cpuinit numa_set_node(int cpu, int node) per_cpu(x86_cpu_to_node_map, cpu) = node; if (node != NUMA_NO_NODE) - cpu_pda(cpu)->nodenumber = node; + per_cpu(node_number, cpu) = node; } void __cpuinit numa_clear_node(int cpu) -- cgit v1.2.3 From c2558e0eba66b49993e619da66c95a50a97830a3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: x86-64: Move isidle from PDA to per-cpu. tj: s/isidle/is_idle/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/process_64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 48012891892..4523ff88a69 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -61,6 +61,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(unsigned long, old_rsp); +static DEFINE_PER_CPU(unsigned char, is_idle); unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; @@ -80,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); void enter_idle(void) { - write_pda(isidle, 1); + percpu_write(is_idle, 1); atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } static void __exit_idle(void) { - if (test_and_clear_bit_pda(0, isidle) == 0) + if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) return; atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); } -- cgit v1.2.3 From 422e79a8b39d9ac73e410dc3cd099aecea82afd2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 19 Jan 2009 17:06:42 +1100 Subject: x86: Remove never-called arch_setup_msi_irq() Since commit 75c46fa, "x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure", x86 has had an implementation of arch_setup_msi_irqs(). That implementation does not call arch_setup_msi_irq(), instead it calls setup_irq(). No other x86 code calls arch_setup_msi_irq(). That leaves only arch_setup_msi_irqs() in drivers/pci/msi.c, but that routine is overridden by the x86 version of arch_setup_msi_irqs(). So arch_setup_msi_irq() is dead code, remove it. Signed-off-by: Michael Ellerman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/io_apic.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 79b8c0c72d3..157aafa4558 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3462,40 +3462,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) return 0; } -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) -{ - unsigned int irq; - int ret; - unsigned int irq_want; - - irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want); - if (irq == 0) - return -1; - -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - ret = msi_alloc_irte(dev, irq, 1); - if (ret < 0) - goto error; -no_ir: -#endif - ret = setup_msi_irq(dev, msidesc, irq); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - return 0; - -#ifdef CONFIG_INTR_REMAP -error: - destroy_irq(irq); - return ret; -#endif -} - int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { unsigned int irq; -- cgit v1.2.3 From 5cdc5e9e69d4dc3a3630ae1fa666401b2a8dcde6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Jan 2009 20:49:37 +0100 Subject: x86: fully honor "nolapic", fix Impact: build fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 48578795583..9ca12af6c87 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1131,7 +1131,9 @@ void __cpuinit setup_local_APIC(void) int i, j; if (disable_apic) { +#ifdef CONFIG_X86_IO_APIC disable_ioapic_setup(); +#endif return; } -- cgit v1.2.3 From c6e50f93db5bd0895ec7c7d1b6f3886c6e1f11b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 Jan 2009 12:29:19 +0900 Subject: x86: cleanup stack protector Impact: cleanup Make the following cleanups. * remove duplicate comment from boot_init_stack_canary() which fits better in the other place - cpu_idle(). * move stack_canary offset check from __switch_to() to boot_init_stack_canary(). Signed-off-by: Tejun Heo --- arch/x86/kernel/process_64.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index aa89eabf09e..088bc9a0f82 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -638,13 +638,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - KERNEL_STACK_OFFSET); -#ifdef CONFIG_CC_STACKPROTECTOR - /* - * Build time only check to make sure the stack_canary is at - * offset 40 in the pda; this is a gcc ABI requirement - */ - BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); -#endif /* * Now maybe reload the debug registers and handle I/O bitmaps -- cgit v1.2.3 From 8ce031972b40da58c268caba8c5ea3c0856d7131 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:27 +0900 Subject: x86: remove pda_init() Impact: cleanup Copy the code to cpu_init() to satisfy the requirement that the cpu be reinitialized. Remove all other calls, since the segments are already initialized in head_64.S. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 15 +++------------ arch/x86/kernel/head64.c | 2 -- 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7976a6a0f65..f83a4d6160f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -895,15 +895,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack); DEFINE_PER_CPU(unsigned int, irq_count) = -1; -void __cpuinit pda_init(int cpu) -{ - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - - load_pda_offset(cpu); -} - static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) __aligned(PAGE_SIZE); @@ -967,9 +958,9 @@ void __cpuinit cpu_init(void) struct task_struct *me; int i; - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); + loadsegment(fs, 0); + loadsegment(gs, 0); + load_pda_offset(cpu); #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index af67d3227ea..f5b27224769 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - pda_init(0); - x86_64_start_reservations(real_mode_data); } -- cgit v1.2.3 From 8c7e58e690ae60ab4215b025f433ed4af261e103 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: x86: rework __per_cpu_load adjustments Impact: cleanup Use cpu_number to determine if the adjustment is necessary. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/head_64.S | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c8ace880661..98ea26a2fca 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -207,19 +207,15 @@ ENTRY(secondary_startup_64) #ifdef CONFIG_SMP /* - * early_gdt_base should point to the gdt_page in static percpu init - * data area. Computing this requires two symbols - __per_cpu_load - * and per_cpu__gdt_page. As linker can't do no such relocation, do - * it by hand. As early_gdt_descr is manipulated by C code for - * secondary CPUs, this should be done only once for the boot CPU - * when early_gdt_descr_base contains zero. + * Fix up static pointers that need __per_cpu_load added. The assembler + * is unable to do this directly. This is only needed for the boot cpu. + * These values are set up with the correct base addresses by C code for + * secondary cpus. */ - movq early_gdt_descr_base(%rip), %rax - testq %rax, %rax - jnz 1f - movq $__per_cpu_load, %rax - addq $per_cpu__gdt_page, %rax - movq %rax, early_gdt_descr_base(%rip) + movq initial_gs(%rip), %rax + cmpl $0, per_cpu__cpu_number(%rax) + jne 1f + addq %rax, early_gdt_descr_base(%rip) 1: #endif /* @@ -431,12 +427,8 @@ NEXT_PAGE(level2_spare_pgt) .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 -#ifdef CONFIG_SMP early_gdt_descr_base: - .quad 0x0000000000000000 -#else .quad per_cpu__gdt_page -#endif ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ -- cgit v1.2.3 From 947e76cdc34c782fc947313d4331380686eebbad Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: x86: move stack_canary into irq_stack Impact: x86_64 percpu area layout change, irq_stack now at the beginning Now that the PDA is empty except for the stack canary, it can be removed. The irqstack is moved to the start of the per-cpu section. If the stack protector is enabled, the canary overlaps the bottom 48 bytes of the irqstack. tj: * updated subject * dropped asm relocation of irq_stack_ptr * updated comments a bit * rebased on top of stack canary changes Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/asm-offsets_64.c | 4 ---- arch/x86/kernel/cpu/common.c | 7 ++++--- arch/x86/kernel/head_64.S | 13 +++++-------- arch/x86/kernel/setup_percpu.c | 34 ++++------------------------------ arch/x86/kernel/vmlinux_64.lds.S | 8 ++++++-- 5 files changed, 19 insertions(+), 47 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 64c834a39aa..94f9c8b39d2 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -48,10 +48,6 @@ int main(void) #endif BLANK(); #undef ENTRY -#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - DEFINE(pda_size, sizeof(struct x8664_pda)); - BLANK(); -#undef ENTRY #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f83a4d6160f..098934e72a1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -881,12 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); +DEFINE_PER_CPU_FIRST(union irq_stack_union, + irq_stack_union) __aligned(PAGE_SIZE); #ifdef CONFIG_SMP DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ #else DEFINE_PER_CPU(char *, irq_stack_ptr) = - per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; + per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; #endif DEFINE_PER_CPU(unsigned long, kernel_stack) = @@ -960,7 +961,7 @@ void __cpuinit cpu_init(void) loadsegment(fs, 0); loadsegment(gs, 0); - load_pda_offset(cpu); + load_gs_base(cpu); #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 98ea26a2fca..a0a2b5ca9b7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -242,13 +242,10 @@ ENTRY(secondary_startup_64) /* Set up %gs. * - * On SMP, %gs should point to the per-cpu area. For initial - * boot, make %gs point to the init data section. For a - * secondary CPU,initial_gs should be set to its pda address - * before the CPU runs this code. - * - * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't - * change. + * The base of %gs always points to the bottom of the irqstack + * union. If the stack protector canary is enabled, it is + * located at %gs:40. Note that, on SMP, the boot cpu uses + * init data section till per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx movq initial_gs(%rip),%rax @@ -281,7 +278,7 @@ ENTRY(secondary_startup_64) #ifdef CONFIG_SMP .quad __per_cpu_load #else - .quad PER_CPU_VAR(__pda) + .quad PER_CPU_VAR(irq_stack_union) #endif __FINITDATA diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index efbafbbff58..90b8e154bb5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif -/* - * Define load_pda_offset() and per-cpu __pda for x86_64. - * load_pda_offset() is responsible for loading the offset of pda into - * %gs. - * - * On SMP, pda offset also duals as percpu base address and thus it - * should be at the start of per-cpu area. To achieve this, it's - * preallocated in vmlinux_64.lds.S directly instead of using - * DEFINE_PER_CPU(). - */ -#ifdef CONFIG_X86_64 -void __cpuinit load_pda_offset(int cpu) -{ - /* Memory clobbers used to order pda/percpu accesses */ - mb(); - wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); - mb(); -} -#ifndef CONFIG_SMP -DEFINE_PER_CPU(struct x8664_pda, __pda); -#endif -EXPORT_PER_CPU_SYMBOL(__pda); -#endif /* CONFIG_SMP && CONFIG_X86_64 */ - #ifdef CONFIG_X86_64 /* correctly size the local cpu masks */ @@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void) per_cpu(cpu_number, cpu) = cpu; #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = - (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; + per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; /* - * CPU0 modified pda in the init data area, reload pda - * offset for CPU0 and clear the area for others. + * Up to this point, CPU0 has been using .data.init + * area. Reload %gs offset for CPU0. */ if (cpu == 0) - load_pda_offset(0); - else - memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); + load_gs_base(cpu); #endif DBG("PERCPU: cpu %4d %p\n", cpu, ptr); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index a09abb8fb97..c9740996430 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -220,8 +220,7 @@ SECTIONS * so that it can be accessed as a percpu variable. */ . = ALIGN(PAGE_SIZE); - PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) - per_cpu____pda = __per_cpu_start; + PERCPU_VADDR(0, :percpu) #else PERCPU(PAGE_SIZE) #endif @@ -262,3 +261,8 @@ SECTIONS */ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE") + +#ifdef CONFIG_SMP +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); +#endif -- cgit v1.2.3 From 0d974d4592708f85044751817da4b7016e1b0602 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 18 Jan 2009 19:52:25 -0500 Subject: x86: remove pda.h Impact: cleanup Signed-off-by: Brian Gerst --- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/process_64.c | 1 - arch/x86/kernel/traps.c | 1 - 4 files changed, 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 94f9c8b39d2..8793ab33e2c 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 098934e72a1..3887fcf6e51 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -30,7 +30,6 @@ #include #endif -#include #include #include #include diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 088bc9a0f82..c422eebb0c5 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 98c2d055284..ed5aee5f3fc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -59,7 +59,6 @@ #ifdef CONFIG_X86_64 #include #include -#include #else #include #include -- cgit v1.2.3 From 5766b842b23c6b40935a5f3bd435b2bcdaff2143 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Jan 2009 09:13:15 +0100 Subject: x86, cpumask: fix tlb flush race Impact: fix bootup crash The cpumask is now passed in as a reference to mm->cpu_vm_mask, not on the stack - hence it is not constant anymore during the TLB flush. That way it could race and some static sanity checks would trigger: [ 238.154287] ------------[ cut here ]------------ [ 238.156039] kernel BUG at arch/x86/kernel/tlb_32.c:130! [ 238.156039] invalid opcode: 0000 [#1] SMP [ 238.156039] last sysfs file: /sys/class/net/eth2/address [ 238.156039] Modules linked in: [ 238.156039] [ 238.156039] Pid: 6493, comm: ifup-eth Not tainted (2.6.29-rc2-tip #1) P4DC6 [ 238.156039] EIP: 0060:[] EFLAGS: 00010202 CPU: 2 [ 238.156039] EIP is at native_flush_tlb_others+0x35/0x158 [ 238.156039] EAX: c0ef972c EBX: f6143301 ECX: 00000000 EDX: 00000000 [ 238.156039] ESI: f61433a8 EDI: f6143200 EBP: f34f3e00 ESP: f34f3df0 [ 238.156039] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 [ 238.156039] Process ifup-eth (pid: 6493, ti=f34f2000 task=f399ab00 task.ti=f34f2000) [ 238.156039] Stack: [ 238.156039] ffffffff f61433a8 ffffffff f6143200 f34f3e18 c0118e9c 00000000 f6143200 [ 238.156039] f61433a8 f5bec738 f34f3e28 c0119435 c2b5b830 f6143200 f34f3e34 c01c2dc3 [ 238.156039] bffd9000 f34f3e60 c01c3051 00000000 ffffffff f34f3e4c 00000000 00000071 [ 238.156039] Call Trace: [ 238.156039] [] ? flush_tlb_others+0x52/0x5b [ 238.156039] [] ? flush_tlb_mm+0x7f/0x8b [ 238.156039] [] ? tlb_finish_mmu+0x2d/0x55 [ 238.156039] [] ? exit_mmap+0x124/0x170 [ 238.156039] [] ? mmput+0x40/0xf5 [ 238.156039] [] ? flush_old_exec+0x640/0x94b [ 238.156039] [] ? fsnotify_access+0x37/0x39 [ 238.156039] [] ? kernel_read+0x39/0x4b [ 238.156039] [] ? load_elf_binary+0x4a1/0x11bb [ 238.156039] [] ? might_fault+0x51/0x9c [ 238.156039] [] ? paravirt_read_tsc+0x20/0x4f [ 238.156039] [] ? native_sched_clock+0x5d/0x60 [ 238.156039] [] ? search_binary_handler+0xab/0x2c4 [ 238.156039] [] ? load_elf_binary+0x0/0x11bb [ 238.156039] [] ? _raw_read_unlock+0x21/0x46 [ 238.156039] [] ? load_elf_binary+0x0/0x11bb [ 238.156039] [] ? search_binary_handler+0xb2/0x2c4 [ 238.156039] [] ? do_execve+0x21c/0x2ee [ 238.156039] [] ? sys_execve+0x51/0x8c [ 238.156039] [] ? sysenter_do_call+0x12/0x43 Fix it by not assuming that the cpumask is constant. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_32.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ec53818f4e3..d37bbfcb813 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -125,9 +125,8 @@ void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { /* - * - mask must exist :) + * mm must exist :) */ - BUG_ON(cpumask_empty(cpumask)); BUG_ON(!mm); /* @@ -138,14 +137,18 @@ void native_flush_tlb_others(const struct cpumask *cpumask, spin_lock(&tlbstate_lock); cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id())); -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask); + + /* + * If a task whose mm mask we are looking at has descheduled and + * has cleared its presence from the mask, or if a CPU which we ran + * on has gone down then there might be no flush work left: + */ if (unlikely(cpumask_empty(flush_cpumask))) { spin_unlock(&tlbstate_lock); return; } -#endif + flush_mm = mm; flush_va = va; -- cgit v1.2.3 From afb33f8c0d7dea8c48ae1c2e3af5b437aa8dd7bb Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 12 Jan 2009 12:53:45 +0100 Subject: x86: remove byte locks Impact: cleanup Remove byte locks implementation, which was introduced by Jeremy in 8efcbab6 ("paravirt: introduce a "lock-byte" spinlock implementation"), but turned out to be dead code that is not used by any in-kernel virtualization guest (Xen uses its own variant of spinlocks implementation and KVM is not planning to move to byte locks). Signed-off-by: Jiri Kosina Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt-spinlocks.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 95777b0faa7..3a7c5a44082 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -26,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = { }; EXPORT_SYMBOL(pv_lock_ops); -void __init paravirt_use_bytelocks(void) -{ -#ifdef CONFIG_SMP - pv_lock_ops.spin_is_locked = __byte_spin_is_locked; - pv_lock_ops.spin_is_contended = __byte_spin_is_contended; - pv_lock_ops.spin_lock = __byte_spin_lock; - pv_lock_ops.spin_trylock = __byte_spin_trylock; - pv_lock_ops.spin_unlock = __byte_spin_unlock; -#endif -} -- cgit v1.2.3 From 06deef892c7327992434917fb6592c233430803d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:05 +0900 Subject: x86: clean up gdt_page definition Impact: cleanup && more compact percpu area layout with future changes Move 64-bit GDT to page-aligned section and clean up comment formatting. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3887fcf6e51..a8f0dedcb0c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -63,23 +63,23 @@ cpumask_t cpu_sibling_setup_map; static struct cpu_dev *this_cpu __cpuinitdata; +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + /* + * We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + * + * The TLS descriptors are currently at a different place compared to i386. + * Hopefully nobody expects them at a fixed place (Wine?) + */ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; #else -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, @@ -112,8 +112,8 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, -} }; #endif +} }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 0dd76d736eeb3e0ef86c5b103b47ae0e15edebad Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:05 +0900 Subject: x86: set %fs to __KERNEL_PERCPU unconditionally for x86_32 Impact: cleanup %fs is currently set to __KERNEL_DS at boot, and conditionally switched to __KERNEL_PERCPU for secondary cpus. Instead, initialize GDT_ENTRY_PERCPU to the same attributes as GDT_ENTRY_KERNEL_DS and set %fs to __KERNEL_PERCPU unconditionally. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/head_32.S | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a8f0dedcb0c..fbebbcec001 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -111,7 +111,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, + [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, #endif } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e835b4eea70..24c0e5cd71e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. - movl %eax,%fs # gets reset once there's real percpu movl $(__USER_DS),%eax # DS/ES contains default USER segment movl %eax,%ds movl %eax,%es + movl $(__KERNEL_PERCPU), %eax + movl %eax,%fs # set this cpu's percpu + xorl %eax,%eax # Clear GS and LDT movl %eax,%gs lldt %ax @@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP movb $1, ready cmpb $0,%cl # the first CPU calls start_kernel je 1f - movl $(__KERNEL_PERCPU), %eax - movl %eax,%fs # set this cpu's percpu movl (stack_start), %esp 1: #endif /* CONFIG_SMP */ -- cgit v1.2.3 From d650a5148593b65a3c3f9a344f46b91b7dfe7713 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: x86: merge irq_regs.h Impact: cleanup, better irq_regs code generation for x86_64 Make 64-bit use the same optimizations as 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/irq_64.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1db05247b47..0b254de8408 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -22,6 +22,9 @@ DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); +DEFINE_PER_CPU(struct pt_regs *, irq_regs); +EXPORT_PER_CPU_SYMBOL(irq_regs); + /* * Probabilistic stack overflow check: * -- cgit v1.2.3 From bdbcdd48883940bbd8d17eb01172d58a261a413a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: x86: uv cleanup Impact: cleanup Make the following uv related cleanups. * collect visible uv related definitions and interfaces into uv/uv.h and use it. this cleans up the messy situation where on 64bit, uv is defined properly, on 32bit generic it's dummy and on the rest undefined. after this clean up, uv is defined on 64 and dummy on 32. * update uv_flush_tlb_others() such that it takes cpumask of to-be-flushed cpus as argument, instead of that minus self, and returns yet-to-be-flushed cpumask, instead of modifying the passed in parameter. this interface change will ease dummy implementation of uv_flush_tlb_others() and makes uv tlb flush related stuff defined in tlb_uv proper. Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/genx2apic_uv_x.c | 1 + arch/x86/kernel/smpboot.c | 1 + arch/x86/kernel/tlb_64.c | 18 +++++------ arch/x86/kernel/tlb_uv.c | 68 +++++++++++++++++++++++----------------- 5 files changed, 50 insertions(+), 39 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fbebbcec001..99904f288d6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -28,6 +28,7 @@ #include #include #include +#include #endif #include diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index b193e082f6c..bfe36249145 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 869b98840fd..def770b57b5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index e64a32c4882..b8bed841ad6 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -15,8 +15,7 @@ #include #include #include -#include -#include +#include DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0, }; @@ -206,16 +205,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { if (is_uv_system()) { - /* FIXME: could be an percpu_alloc'd thing */ - static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); - struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask); + unsigned int cpu; - cpumask_andnot(after_uv_flush, cpumask, - cpumask_of(smp_processor_id())); - if (!uv_flush_tlb_others(after_uv_flush, mm, va)) - flush_tlb_others_ipi(after_uv_flush, mm, va); - - put_cpu_var(flush_tlb_uv_cpumask); + cpu = get_cpu(); + cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); + if (cpumask) + flush_tlb_others_ipi(cpumask, mm, va); + put_cpu(); return; } flush_tlb_others_ipi(cpumask, mm, va); diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 690dcf1a27d..aae15dd7260 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc, * * Send a broadcast and wait for a broadcast message to complete. * - * The cpumaskp mask contains the cpus the broadcast was sent to. + * The flush_mask contains the cpus the broadcast was sent to. * - * Returns 1 if all remote flushing was done. The mask is zeroed. - * Returns 0 if some remote flushing remains to be done. The mask will have - * some bits still set. + * Returns NULL if all remote flushing was done. The mask is zeroed. + * Returns @flush_mask if some remote flushing remains to be done. The + * mask will have some bits still set. */ -int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, - struct cpumask *cpumaskp) +const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, + struct bau_desc *bau_desc, + struct cpumask *flush_mask) { int completion_status = 0; int right_shift; @@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Success, so clear the remote cpu's from the mask so we don't * use the IPI method of shootdown on them. */ - for_each_cpu(bit, cpumaskp) { + for_each_cpu(bit, flush_mask) { blade = uv_cpu_to_blade_id(bit); if (blade == this_blade) continue; - cpumask_clear_cpu(bit, cpumaskp); + cpumask_clear_cpu(bit, flush_mask); } - if (!cpumask_empty(cpumaskp)) - return 0; - return 1; + if (!cpumask_empty(flush_mask)) + return flush_mask; + return NULL; } /** * uv_flush_tlb_others - globally purge translation cache of a virtual * address or all TLB's - * @cpumaskp: mask of all cpu's in which the address is to be removed + * @cpumask: mask of all cpu's in which the address is to be removed * @mm: mm_struct containing virtual address range * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) + * @cpu: the current cpu * * This is the entry point for initiating any UV global TLB shootdown. * * Purges the translation caches of all specified processors of the given * virtual address, or purges all TLB's on specified processors. * - * The caller has derived the cpumaskp from the mm_struct and has subtracted - * the local cpu from the mask. This function is called only if there - * are bits set in the mask. (e.g. flush_tlb_page()) + * The caller has derived the cpumask from the mm_struct. This function + * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) * - * The cpumaskp is converted into a nodemask of the nodes containing + * The cpumask is converted into a nodemask of the nodes containing * the cpus. * - * Returns 1 if all remote flushing was done. - * Returns 0 if some remote flushing remains to be done. + * Note that this function should be called with preemption disabled. + * + * Returns NULL if all remote flushing was done. + * Returns pointer to cpumask if some remote flushing remains to be + * done. The returned pointer is valid till preemption is re-enabled. */ -int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, - unsigned long va) +const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long va, unsigned int cpu) { + static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); + struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask); int i; int bit; int blade; - int cpu; + int uv_cpu; int this_blade; int locals = 0; struct bau_desc *bau_desc; - cpu = uv_blade_processor_id(); + WARN_ON(!in_atomic()); + + cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); + + uv_cpu = uv_blade_processor_id(); this_blade = uv_numa_blade_id(); bau_desc = __get_cpu_var(bau_control).descriptor_base; - bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; + bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); i = 0; - for_each_cpu(bit, cpumaskp) { + for_each_cpu(bit, flush_mask) { blade = uv_cpu_to_blade_id(bit); BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); if (blade == this_blade) { @@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, * no off_node flushing; return status for local node */ if (locals) - return 0; + return flush_mask; else - return 1; + return NULL; } __get_cpu_var(ptcstats).requestor++; __get_cpu_var(ptcstats).ntargeted += i; bau_desc->payload.address = va; - bau_desc->payload.sending_cpu = smp_processor_id(); + bau_desc->payload.sending_cpu = cpu; - return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); + return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask); } /* -- cgit v1.2.3 From 6dd01bedee6c3191643db303a1dc530bad56ec55 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: x86: prepare for tlb merge Impact: clean up, ipi vector number reordering for x86_32 Make the following changes to prepare for tlb merge. * reorder x86_32 ip vectors * adjust tlb_32.c and tlb_64.c such that their logics coincide exactly - on spurious invalidate ipi, tlb_32 acks the irq - tlb_64 now has proper memory barriers around clearing flush_cpumask (no change in generated code) * unexport flush_tlb_page from tlb_32.c, there's no user * use unsigned int for cpu id * drop unnecessary includes from tlb_64.c Signed-off-by: Tejun Heo --- arch/x86/kernel/tlb_32.c | 10 +++++----- arch/x86/kernel/tlb_64.c | 18 +++++++----------- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index abf0808d6fc..93fcb05c7d4 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -84,13 +84,15 @@ EXPORT_SYMBOL_GPL(leave_mm); * * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. * 2) Leave the mm if we are in the lazy tlb mode. + * + * Interrupts are disabled. */ void smp_invalidate_interrupt(struct pt_regs *regs) { - unsigned long cpu; + unsigned int cpu; - cpu = get_cpu(); + cpu = smp_processor_id(); if (!cpumask_test_cpu(cpu, flush_cpumask)) goto out; @@ -112,12 +114,11 @@ void smp_invalidate_interrupt(struct pt_regs *regs) } else leave_mm(cpu); } +out: ack_APIC_irq(); smp_mb__before_clear_bit(); cpumask_clear_cpu(cpu, flush_cpumask); smp_mb__after_clear_bit(); -out: - put_cpu_no_resched(); inc_irq_stat(irq_tlb_count); } @@ -215,7 +216,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } -EXPORT_SYMBOL(flush_tlb_page); static void do_flush_tlb_all(void *info) { diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index b8bed841ad6..19ac661422f 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -1,20 +1,14 @@ #include #include -#include #include #include -#include -#include #include +#include -#include -#include #include #include -#include -#include -#include +#include #include DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) @@ -121,8 +115,8 @@ EXPORT_SYMBOL_GPL(leave_mm); asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) { - int cpu; - int sender; + unsigned int cpu; + unsigned int sender; union smp_flush_state *f; cpu = smp_processor_id(); @@ -155,14 +149,16 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) } out: ack_APIC_irq(); + smp_mb__before_clear_bit(); cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); + smp_mb__after_clear_bit(); inc_irq_stat(irq_tlb_count); } static void flush_tlb_others_ipi(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { - int sender; + unsigned int sender; union smp_flush_state *f; /* Caller has disabled preemption */ -- cgit v1.2.3 From 02cf94c370e0dc9bf408fe45eb86fe9ad58eaf7f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: x86: make x86_32 use tlb_64.c Impact: less contention when issuing invalidate IPI, cleanup Make x86_32 use the same tlb code as 64bit. The 64bit code uses multiple IPI vectors for tlb shootdown to reduce contention. This patch makes x86_32 allocate the same 8 IPIs as x86_64 and share the code paths. Note that the usage of asmlinkage is inconsistent for x86_32 and 64 and calls for further cleanup. This has been noted with a FIXME comment in tlb_64.c. Signed-off-by: Tejun Heo --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/entry_32.S | 6 +- arch/x86/kernel/irqinit_32.c | 11 +- arch/x86/kernel/tlb_32.c | 239 ------------------------------------------- arch/x86/kernel/tlb_64.c | 12 ++- 5 files changed, 25 insertions(+), 245 deletions(-) delete mode 100644 arch/x86/kernel/tlb_32.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index eb074530c7d..a62a15c2222 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_64.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 46469029e9d..a0b91aac72a 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -672,7 +672,7 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC -#define BUILD_INTERRUPT(name, nr) \ +#define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ pushl $~(nr); \ @@ -680,11 +680,13 @@ ENTRY(name) \ SAVE_ALL; \ TRACE_IRQS_OFF \ movl %esp,%eax; \ - call smp_##name; \ + call fn; \ jmp ret_from_intr; \ CFI_ENDPROC; \ ENDPROC(name) +#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) + /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 1507ad4e674..bf629cadec1 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -149,8 +149,15 @@ void __init native_init_IRQ(void) */ alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + /* IPIs for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); /* IPI for generic function call */ alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c deleted file mode 100644 index 93fcb05c7d4..00000000000 --- a/arch/x86/kernel/tlb_32.c +++ /dev/null @@ -1,239 +0,0 @@ -#include -#include -#include - -#include - -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) - = { &init_mm, 0, }; - -/* must come after the send_IPI functions above for inlining */ -#include - -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul - */ - -static cpumask_var_t flush_cpumask; -static struct mm_struct *flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ -void leave_mm(int cpu) -{ - BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK); - cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} -EXPORT_SYMBOL_GPL(leave_mm); - -/* - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superfluous - * tlb flush. - * 1a2) set cpu_tlbstate to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu_tlbstate[].active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu_tlbstate[].active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu_tlbstate to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu_tlbstate is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - * - * Interrupts are disabled. - */ - -void smp_invalidate_interrupt(struct pt_regs *regs) -{ - unsigned int cpu; - - cpu = smp_processor_id(); - - if (!cpumask_test_cpu(cpu, flush_cpumask)) - goto out; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - - if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) { - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { - if (flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); - } else - leave_mm(cpu); - } -out: - ack_APIC_irq(); - smp_mb__before_clear_bit(); - cpumask_clear_cpu(cpu, flush_cpumask); - smp_mb__after_clear_bit(); - inc_irq_stat(irq_tlb_count); -} - -void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long va) -{ - /* - * - mask must exist :) - */ - BUG_ON(cpumask_empty(cpumask)); - BUG_ON(!mm); - - /* - * i'm not happy about this global shared spinlock in the - * MM hot path, but we'll see how contended it is. - * AK: x86-64 has a faster method that could be ported. - */ - spin_lock(&tlbstate_lock); - - cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id())); -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ - cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask); - if (unlikely(cpumask_empty(flush_cpumask))) { - spin_unlock(&tlbstate_lock); - return; - } -#endif - flush_mm = mm; - flush_va = va; - - /* - * Make the above memory operations globally visible before - * sending the IPI. - */ - smp_mb(); - /* - * We have to send the IPI only to - * CPUs affected. - */ - send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR); - - while (!cpumask_empty(flush_cpumask)) - /* nothing. lockup detection does not belong here */ - cpu_relax(); - - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); -} - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - - preempt_disable(); - - local_flush_tlb(); - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); - preempt_enable(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, va); - preempt_enable(); -} - -static void do_flush_tlb_all(void *info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) - leave_mm(cpu); -} - -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, NULL, 1); -} - -static int init_flush_cpumask(void) -{ - alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); - return 0; -} -early_initcall(init_flush_cpumask); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 19ac661422f..b3ca1b94065 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -113,7 +113,17 @@ EXPORT_SYMBOL_GPL(leave_mm); * Interrupts are disabled. */ -asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) +/* + * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop + * but still used for documentation purpose but the usage is slightly + * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt + * entry calls in with the first parameter in %eax. Maybe define + * intrlinkage? + */ +#ifdef CONFIG_X86_64 +asmlinkage +#endif +void smp_invalidate_interrupt(struct pt_regs *regs) { unsigned int cpu; unsigned int sender; -- cgit v1.2.3 From 16c2d3f895a3bc8d8e4c76c2646a6b750c181299 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: x86: rename tlb_64.c to tlb.c Impact: file rename tlb_64.c is now the tlb code for both 32 and 64. Rename it to tlb.c. Signed-off-by: Tejun Heo --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/tlb.c | 296 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/tlb_64.c | 296 ----------------------------------------------- 3 files changed, 297 insertions(+), 297 deletions(-) create mode 100644 arch/x86/kernel/tlb.c delete mode 100644 arch/x86/kernel/tlb_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a62a15c2222..0626a88fbb4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_64.o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/tlb.c b/arch/x86/kernel/tlb.c new file mode 100644 index 00000000000..b3ca1b94065 --- /dev/null +++ b/arch/x86/kernel/tlb.c @@ -0,0 +1,296 @@ +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; + +#include +/* + * Smarter SMP flushing macros. + * c/o Linus Torvalds. + * + * These mean you can really definitely utterly forget about + * writing to user space from interrupts. (Its not allowed anyway). + * + * Optimizations Manfred Spraul + * + * More scalable flush, from Andi Kleen + * + * To avoid global state use 8 different call vectors. + * Each CPU uses a specific vector to trigger flushes on other + * CPUs. Depending on the received vector the target CPUs look into + * the right per cpu variable for the flush data. + * + * With more than 8 CPUs they are hashed to the 8 available + * vectors. The limited global vector space forces us to this right now. + * In future when interrupts are split into per CPU domains this could be + * fixed, at the cost of triggering multiple IPIs in some cases. + */ + +union smp_flush_state { + struct { + struct mm_struct *flush_mm; + unsigned long flush_va; + spinlock_t tlbstate_lock; + DECLARE_BITMAP(flush_cpumask, NR_CPUS); + }; + char pad[SMP_CACHE_BYTES]; +} ____cacheline_aligned; + +/* State is put into the per CPU data section, but padded + to a full cache line because other CPUs can access it and we don't + want false sharing in the per cpu data segment. */ +static DEFINE_PER_CPU(union smp_flush_state, flush_state); + +/* + * We cannot call mmdrop() because we are in interrupt context, + * instead update mm->cpu_vm_mask. + */ +void leave_mm(int cpu) +{ + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + BUG(); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); + load_cr3(swapper_pg_dir); +} +EXPORT_SYMBOL_GPL(leave_mm); + +/* + * + * The flush IPI assumes that a thread switch happens in this order: + * [cpu0: the cpu that switches] + * 1) switch_mm() either 1a) or 1b) + * 1a) thread switch to a different mm + * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); + * Stop ipi delivery for the old mm. This is not synchronized with + * the other cpus, but smp_invalidate_interrupt ignore flush ipis + * for the wrong mm, and in the worst case we perform a superfluous + * tlb flush. + * 1a2) set cpu mmu_state to TLBSTATE_OK + * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 + * was in lazy tlb mode. + * 1a3) update cpu active_mm + * Now cpu0 accepts tlb flushes for the new mm. + * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); + * Now the other cpus will send tlb flush ipis. + * 1a4) change cr3. + * 1b) thread switch without mm change + * cpu active_mm is correct, cpu0 already handles + * flush ipis. + * 1b1) set cpu mmu_state to TLBSTATE_OK + * 1b2) test_and_set the cpu bit in cpu_vm_mask. + * Atomically set the bit [other cpus will start sending flush ipis], + * and test the bit. + * 1b3) if the bit was 0: leave_mm was called, flush the tlb. + * 2) switch %%esp, ie current + * + * The interrupt must handle 2 special cases: + * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. + * - the cpu performs speculative tlb reads, i.e. even if the cpu only + * runs in kernel space, the cpu could load tlb entries for user space + * pages. + * + * The good news is that cpu mmu_state is local to each cpu, no + * write/read ordering problems. + */ + +/* + * TLB flush IPI: + * + * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. + * 2) Leave the mm if we are in the lazy tlb mode. + * + * Interrupts are disabled. + */ + +/* + * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop + * but still used for documentation purpose but the usage is slightly + * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt + * entry calls in with the first parameter in %eax. Maybe define + * intrlinkage? + */ +#ifdef CONFIG_X86_64 +asmlinkage +#endif +void smp_invalidate_interrupt(struct pt_regs *regs) +{ + unsigned int cpu; + unsigned int sender; + union smp_flush_state *f; + + cpu = smp_processor_id(); + /* + * orig_rax contains the negated interrupt vector. + * Use that to determine where the sender put the data. + */ + sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; + f = &per_cpu(flush_state, sender); + + if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) + goto out; + /* + * This was a BUG() but until someone can quote me the + * line from the intel manual that guarantees an IPI to + * multiple CPUs is retried _only_ on the erroring CPUs + * its staying as a return + * + * BUG(); + */ + + if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { + if (f->flush_va == TLB_FLUSH_ALL) + local_flush_tlb(); + else + __flush_tlb_one(f->flush_va); + } else + leave_mm(cpu); + } +out: + ack_APIC_irq(); + smp_mb__before_clear_bit(); + cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); + smp_mb__after_clear_bit(); + inc_irq_stat(irq_tlb_count); +} + +static void flush_tlb_others_ipi(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) +{ + unsigned int sender; + union smp_flush_state *f; + + /* Caller has disabled preemption */ + sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; + f = &per_cpu(flush_state, sender); + + /* + * Could avoid this lock when + * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is + * probably not worth checking this for a cache-hot lock. + */ + spin_lock(&f->tlbstate_lock); + + f->flush_mm = mm; + f->flush_va = va; + cpumask_andnot(to_cpumask(f->flush_cpumask), + cpumask, cpumask_of(smp_processor_id())); + + /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); + /* + * We have to send the IPI only to + * CPUs affected. + */ + send_IPI_mask(to_cpumask(f->flush_cpumask), + INVALIDATE_TLB_VECTOR_START + sender); + + while (!cpumask_empty(to_cpumask(f->flush_cpumask))) + cpu_relax(); + + f->flush_mm = NULL; + f->flush_va = 0; + spin_unlock(&f->tlbstate_lock); +} + +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) +{ + if (is_uv_system()) { + unsigned int cpu; + + cpu = get_cpu(); + cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); + if (cpumask) + flush_tlb_others_ipi(cpumask, mm, va); + put_cpu(); + return; + } + flush_tlb_others_ipi(cpumask, mm, va); +} + +static int __cpuinit init_smp_flush(void) +{ + int i; + + for_each_possible_cpu(i) + spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); + + return 0; +} +core_initcall(init_smp_flush); + +void flush_tlb_current_task(void) +{ + struct mm_struct *mm = current->mm; + + preempt_disable(); + + local_flush_tlb(); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); + preempt_enable(); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + preempt_disable(); + + if (current->active_mm == mm) { + if (current->mm) + local_flush_tlb(); + else + leave_mm(smp_processor_id()); + } + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); + + preempt_enable(); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) +{ + struct mm_struct *mm = vma->vm_mm; + + preempt_disable(); + + if (current->active_mm == mm) { + if (current->mm) + __flush_tlb_one(va); + else + leave_mm(smp_processor_id()); + } + + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, va); + + preempt_enable(); +} + +static void do_flush_tlb_all(void *info) +{ + unsigned long cpu = smp_processor_id(); + + __flush_tlb_all(); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) + leave_mm(cpu); +} + +void flush_tlb_all(void) +{ + on_each_cpu(do_flush_tlb_all, NULL, 1); +} diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c deleted file mode 100644 index b3ca1b94065..00000000000 --- a/arch/x86/kernel/tlb_64.c +++ /dev/null @@ -1,296 +0,0 @@ -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) - = { &init_mm, 0, }; - -#include -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul - * - * More scalable flush, from Andi Kleen - * - * To avoid global state use 8 different call vectors. - * Each CPU uses a specific vector to trigger flushes on other - * CPUs. Depending on the received vector the target CPUs look into - * the right per cpu variable for the flush data. - * - * With more than 8 CPUs they are hashed to the 8 available - * vectors. The limited global vector space forces us to this right now. - * In future when interrupts are split into per CPU domains this could be - * fixed, at the cost of triggering multiple IPIs in some cases. - */ - -union smp_flush_state { - struct { - struct mm_struct *flush_mm; - unsigned long flush_va; - spinlock_t tlbstate_lock; - DECLARE_BITMAP(flush_cpumask, NR_CPUS); - }; - char pad[SMP_CACHE_BYTES]; -} ____cacheline_aligned; - -/* State is put into the per CPU data section, but padded - to a full cache line because other CPUs can access it and we don't - want false sharing in the per cpu data segment. */ -static DEFINE_PER_CPU(union smp_flush_state, flush_state); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - */ -void leave_mm(int cpu) -{ - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) - BUG(); - cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} -EXPORT_SYMBOL_GPL(leave_mm); - -/* - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superfluous - * tlb flush. - * 1a2) set cpu mmu_state to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu mmu_state to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu mmu_state is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - * - * Interrupts are disabled. - */ - -/* - * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop - * but still used for documentation purpose but the usage is slightly - * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt - * entry calls in with the first parameter in %eax. Maybe define - * intrlinkage? - */ -#ifdef CONFIG_X86_64 -asmlinkage -#endif -void smp_invalidate_interrupt(struct pt_regs *regs) -{ - unsigned int cpu; - unsigned int sender; - union smp_flush_state *f; - - cpu = smp_processor_id(); - /* - * orig_rax contains the negated interrupt vector. - * Use that to determine where the sender put the data. - */ - sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &per_cpu(flush_state, sender); - - if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) - goto out; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - - if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { - if (f->flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(f->flush_va); - } else - leave_mm(cpu); - } -out: - ack_APIC_irq(); - smp_mb__before_clear_bit(); - cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); - smp_mb__after_clear_bit(); - inc_irq_stat(irq_tlb_count); -} - -static void flush_tlb_others_ipi(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long va) -{ - unsigned int sender; - union smp_flush_state *f; - - /* Caller has disabled preemption */ - sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; - f = &per_cpu(flush_state, sender); - - /* - * Could avoid this lock when - * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is - * probably not worth checking this for a cache-hot lock. - */ - spin_lock(&f->tlbstate_lock); - - f->flush_mm = mm; - f->flush_va = va; - cpumask_andnot(to_cpumask(f->flush_cpumask), - cpumask, cpumask_of(smp_processor_id())); - - /* - * Make the above memory operations globally visible before - * sending the IPI. - */ - smp_mb(); - /* - * We have to send the IPI only to - * CPUs affected. - */ - send_IPI_mask(to_cpumask(f->flush_cpumask), - INVALIDATE_TLB_VECTOR_START + sender); - - while (!cpumask_empty(to_cpumask(f->flush_cpumask))) - cpu_relax(); - - f->flush_mm = NULL; - f->flush_va = 0; - spin_unlock(&f->tlbstate_lock); -} - -void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long va) -{ - if (is_uv_system()) { - unsigned int cpu; - - cpu = get_cpu(); - cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); - if (cpumask) - flush_tlb_others_ipi(cpumask, mm, va); - put_cpu(); - return; - } - flush_tlb_others_ipi(cpumask, mm, va); -} - -static int __cpuinit init_smp_flush(void) -{ - int i; - - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); - - return 0; -} -core_initcall(init_smp_flush); - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - - preempt_disable(); - - local_flush_tlb(); - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); - preempt_enable(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, va); - - preempt_enable(); -} - -static void do_flush_tlb_all(void *info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) - leave_mm(cpu); -} - -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, NULL, 1); -} -- cgit v1.2.3 From 55f4949f5765e7a29863b6d17a774601810732f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Jan 2009 10:08:53 +0100 Subject: x86, mm: move tlb.c to arch/x86/mm/ Impact: cleanup Now that it's unified, move the (SMP) TLB flushing code from arch/x86/kernel/ to arch/x86/mm/, where it belongs logically. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/tlb.c | 296 ----------------------------------------------- 2 files changed, 1 insertion(+), 297 deletions(-) delete mode 100644 arch/x86/kernel/tlb.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0626a88fbb4..0b3272f58bd 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb.o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/tlb.c b/arch/x86/kernel/tlb.c deleted file mode 100644 index b3ca1b94065..00000000000 --- a/arch/x86/kernel/tlb.c +++ /dev/null @@ -1,296 +0,0 @@ -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) - = { &init_mm, 0, }; - -#include -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul - * - * More scalable flush, from Andi Kleen - * - * To avoid global state use 8 different call vectors. - * Each CPU uses a specific vector to trigger flushes on other - * CPUs. Depending on the received vector the target CPUs look into - * the right per cpu variable for the flush data. - * - * With more than 8 CPUs they are hashed to the 8 available - * vectors. The limited global vector space forces us to this right now. - * In future when interrupts are split into per CPU domains this could be - * fixed, at the cost of triggering multiple IPIs in some cases. - */ - -union smp_flush_state { - struct { - struct mm_struct *flush_mm; - unsigned long flush_va; - spinlock_t tlbstate_lock; - DECLARE_BITMAP(flush_cpumask, NR_CPUS); - }; - char pad[SMP_CACHE_BYTES]; -} ____cacheline_aligned; - -/* State is put into the per CPU data section, but padded - to a full cache line because other CPUs can access it and we don't - want false sharing in the per cpu data segment. */ -static DEFINE_PER_CPU(union smp_flush_state, flush_state); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - */ -void leave_mm(int cpu) -{ - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) - BUG(); - cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} -EXPORT_SYMBOL_GPL(leave_mm); - -/* - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superfluous - * tlb flush. - * 1a2) set cpu mmu_state to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu mmu_state to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu mmu_state is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - * - * Interrupts are disabled. - */ - -/* - * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop - * but still used for documentation purpose but the usage is slightly - * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt - * entry calls in with the first parameter in %eax. Maybe define - * intrlinkage? - */ -#ifdef CONFIG_X86_64 -asmlinkage -#endif -void smp_invalidate_interrupt(struct pt_regs *regs) -{ - unsigned int cpu; - unsigned int sender; - union smp_flush_state *f; - - cpu = smp_processor_id(); - /* - * orig_rax contains the negated interrupt vector. - * Use that to determine where the sender put the data. - */ - sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &per_cpu(flush_state, sender); - - if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) - goto out; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - - if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { - if (f->flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(f->flush_va); - } else - leave_mm(cpu); - } -out: - ack_APIC_irq(); - smp_mb__before_clear_bit(); - cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); - smp_mb__after_clear_bit(); - inc_irq_stat(irq_tlb_count); -} - -static void flush_tlb_others_ipi(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long va) -{ - unsigned int sender; - union smp_flush_state *f; - - /* Caller has disabled preemption */ - sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; - f = &per_cpu(flush_state, sender); - - /* - * Could avoid this lock when - * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is - * probably not worth checking this for a cache-hot lock. - */ - spin_lock(&f->tlbstate_lock); - - f->flush_mm = mm; - f->flush_va = va; - cpumask_andnot(to_cpumask(f->flush_cpumask), - cpumask, cpumask_of(smp_processor_id())); - - /* - * Make the above memory operations globally visible before - * sending the IPI. - */ - smp_mb(); - /* - * We have to send the IPI only to - * CPUs affected. - */ - send_IPI_mask(to_cpumask(f->flush_cpumask), - INVALIDATE_TLB_VECTOR_START + sender); - - while (!cpumask_empty(to_cpumask(f->flush_cpumask))) - cpu_relax(); - - f->flush_mm = NULL; - f->flush_va = 0; - spin_unlock(&f->tlbstate_lock); -} - -void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long va) -{ - if (is_uv_system()) { - unsigned int cpu; - - cpu = get_cpu(); - cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); - if (cpumask) - flush_tlb_others_ipi(cpumask, mm, va); - put_cpu(); - return; - } - flush_tlb_others_ipi(cpumask, mm, va); -} - -static int __cpuinit init_smp_flush(void) -{ - int i; - - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); - - return 0; -} -core_initcall(init_smp_flush); - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - - preempt_disable(); - - local_flush_tlb(); - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); - preempt_enable(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, va); - - preempt_enable(); -} - -static void do_flush_tlb_all(void *info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) - leave_mm(cpu); -} - -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, NULL, 1); -} -- cgit v1.2.3 From 03b486322e994dde49e67aedb391867b7cf28822 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 20 Jan 2009 04:36:04 +0100 Subject: x86: make UV support configurable Make X86 SGI Ultraviolet support configurable. Saves about 13K of text size on my modest config. text data bss dec hex filename 6770537 1158680 694356 8623573 8395d5 vmlinux 6757492 1157664 694228 8609384 835e68 vmlinux.nouv Signed-off-by: Nick Piggin Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 5 +++-- arch/x86/kernel/efi.c | 2 ++ arch/x86/kernel/entry_64.S | 2 ++ arch/x86/kernel/genapic_64.c | 2 ++ arch/x86/kernel/io_apic.c | 2 +- 5 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0b3272f58bd..a99437c965c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -115,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) - obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o - obj-y += bios_uv.o uv_irq.o uv_sysfs.o + obj-y += genapic_64.o genapic_flat_64.o obj-y += genx2apic_cluster.o obj-y += genx2apic_phys.o + obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o + obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1119d247fe1..b205272ad39 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -366,10 +366,12 @@ void __init efi_init(void) SMBIOS_TABLE_GUID)) { efi.smbios = config_tables[i].table; printk(" SMBIOS=0x%lx ", config_tables[i].table); +#ifdef CONFIG_X86_UV } else if (!efi_guidcmp(config_tables[i].guid, UV_SYSTEM_TABLE_GUID)) { efi.uv_systab = config_tables[i].table; printk(" UVsystab=0x%lx ", config_tables[i].table); +#endif } else if (!efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID)) { efi.hcdp = config_tables[i].table; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c52b6091916..a52703864a1 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -982,8 +982,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt #endif +#ifdef CONFIG_X86_UV apicinterrupt UV_BAU_MESSAGE \ uv_bau_message_intr1 uv_bau_message_interrupt +#endif apicinterrupt LOCAL_TIMER_VECTOR \ apic_timer_interrupt smp_apic_timer_interrupt diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 2bced78b0b8..e656c272115 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster; struct genapic __read_mostly *genapic = &apic_flat; static struct genapic *apic_probe[] __initdata = { +#ifdef CONFIG_X86_UV &apic_x2apic_uv_x, +#endif &apic_x2apic_phys, &apic_x2apic_cluster, &apic_physflat, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f7966039072..e4d36bd56b6 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3765,7 +3765,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_UV /* * Re-target the irq to the specified CPU and enable the specified MMR located * on the specified blade to allow the sending of MSIs to the specified CPU. -- cgit v1.2.3 From ab897d2013128f470240a541b31cf5e636984e71 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 22 Jan 2009 14:24:16 -0800 Subject: x86/pvops: remove pte_flags pvop pte_flags() was introduced as a new pvop in order to extract just the flags portion of a pte, which is a potentially cheaper operation than extracting the page number as well. It turns out this operation is not needed, because simply using a mask to extract the flags from a pte is sufficient for all current users. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb60887..202514be592 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -435,7 +435,6 @@ struct pv_mmu_ops pv_mmu_ops = { #endif /* PAGETABLE_LEVELS >= 3 */ .pte_val = native_pte_val, - .pte_flags = native_pte_flags, .pgd_val = native_pgd_val, .make_pte = native_make_pte, -- cgit v1.2.3 From 03d2989df9c1c7df5b33c7a87e0790465461836a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:28 +0900 Subject: x86: remove idle_timestamp from 32bit irq_cpustat_t Impact: bogus irq_cpustat field removed idle_timestamp is left over from the removed irqbalance code. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/process_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2c00a57ccb9..1a1ae8edc40 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -108,7 +108,6 @@ void cpu_idle(void) play_dead(); local_irq_disable(); - __get_cpu_var(irq_stat).idle_timestamp = jiffies; /* Don't trace irqs off for idle */ stop_critical_timings(); pm_idle(); -- cgit v1.2.3 From 3819cd489ec5d18a4cbd2f05acdc516473caa105 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:29 +0900 Subject: x86: remove include of apic.h from hardirq_64.h Impact: cleanup APIC definitions aren't needed here. Remove the include and fix up the fallout. tj: added include to mce_intel_64.c. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 1 + arch/x86/kernel/efi_64.c | 1 + arch/x86/kernel/irq_64.c | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 4b48f251fd3..5e8c79e748a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 652c5287215..a4ee29127fd 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -36,6 +36,7 @@ #include #include #include +#include static pgd_t save_pgd __initdata; static unsigned long efi_flags __initdata; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0b254de8408..018963aa6ee 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,6 +18,7 @@ #include #include #include +#include DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); -- cgit v1.2.3 From 98e3d45edad207b4358948d6e2cac4e482c3bb5d Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 23 Jan 2009 15:50:10 -0800 Subject: x86: signal: use {get|put}_user_try and catch Impact: use new framework Use {get|put}_user_try, catch, and _ex in arch/x86/kernel/signal.c. Note: this patch contains "WARNING: line over 80 characters", because when introducing new block I insert an indent to avoid mistakes by edit. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/signal.c | 291 +++++++++++++++++++++++++---------------------- 1 file changed, 154 insertions(+), 137 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 89bb7668041..cf34eb37fbe 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -51,24 +51,24 @@ #endif #define COPY(x) { \ - err |= __get_user(regs->x, &sc->x); \ + get_user_ex(regs->x, &sc->x); \ } #define COPY_SEG(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp; \ } #define COPY_SEG_CPL3(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp | 3; \ } #define GET_SEG(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ loadsegment(seg, tmp); \ } @@ -83,45 +83,49 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; + get_user_try { + #ifdef CONFIG_X86_32 - GET_SEG(gs); - COPY_SEG(fs); - COPY_SEG(es); - COPY_SEG(ds); + GET_SEG(gs); + COPY_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); #endif /* CONFIG_X86_32 */ - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); - COPY(dx); COPY(cx); COPY(ip); + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); + COPY(dx); COPY(cx); COPY(ip); #ifdef CONFIG_X86_64 - COPY(r8); - COPY(r9); - COPY(r10); - COPY(r11); - COPY(r12); - COPY(r13); - COPY(r14); - COPY(r15); + COPY(r8); + COPY(r9); + COPY(r10); + COPY(r11); + COPY(r12); + COPY(r13); + COPY(r14); + COPY(r15); #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_32 - COPY_SEG_CPL3(cs); - COPY_SEG_CPL3(ss); + COPY_SEG_CPL3(cs); + COPY_SEG_CPL3(ss); #else /* !CONFIG_X86_32 */ - /* Kernel saves and restores only the CS segment register on signals, - * which is the bare minimum needed to allow mixed 32/64-bit code. - * App's signal handler can save/restore other segments if needed. */ - COPY_SEG_CPL3(cs); + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + COPY_SEG_CPL3(cs); #endif /* CONFIG_X86_32 */ - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - regs->orig_ax = -1; /* disable syscall checks */ + get_user_ex(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + regs->orig_ax = -1; /* disable syscall checks */ + + get_user_ex(buf, &sc->fpstate); + err |= restore_i387_xstate(buf); - err |= __get_user(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); + get_user_ex(*pax, &sc->ax); + } get_user_catch(err); - err |= __get_user(*pax, &sc->ax); return err; } @@ -131,57 +135,60 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, { int err = 0; + put_user_try { + #ifdef CONFIG_X86_32 - { - unsigned int tmp; + { + unsigned int tmp; - savesegment(gs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->gs); - } - err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); - err |= __put_user(regs->es, (unsigned int __user *)&sc->es); - err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); + savesegment(gs, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->gs); + } + put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); + put_user_ex(regs->es, (unsigned int __user *)&sc->es); + put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); #endif /* CONFIG_X86_32 */ - err |= __put_user(regs->di, &sc->di); - err |= __put_user(regs->si, &sc->si); - err |= __put_user(regs->bp, &sc->bp); - err |= __put_user(regs->sp, &sc->sp); - err |= __put_user(regs->bx, &sc->bx); - err |= __put_user(regs->dx, &sc->dx); - err |= __put_user(regs->cx, &sc->cx); - err |= __put_user(regs->ax, &sc->ax); + put_user_ex(regs->di, &sc->di); + put_user_ex(regs->si, &sc->si); + put_user_ex(regs->bp, &sc->bp); + put_user_ex(regs->sp, &sc->sp); + put_user_ex(regs->bx, &sc->bx); + put_user_ex(regs->dx, &sc->dx); + put_user_ex(regs->cx, &sc->cx); + put_user_ex(regs->ax, &sc->ax); #ifdef CONFIG_X86_64 - err |= __put_user(regs->r8, &sc->r8); - err |= __put_user(regs->r9, &sc->r9); - err |= __put_user(regs->r10, &sc->r10); - err |= __put_user(regs->r11, &sc->r11); - err |= __put_user(regs->r12, &sc->r12); - err |= __put_user(regs->r13, &sc->r13); - err |= __put_user(regs->r14, &sc->r14); - err |= __put_user(regs->r15, &sc->r15); + put_user_ex(regs->r8, &sc->r8); + put_user_ex(regs->r9, &sc->r9); + put_user_ex(regs->r10, &sc->r10); + put_user_ex(regs->r11, &sc->r11); + put_user_ex(regs->r12, &sc->r12); + put_user_ex(regs->r13, &sc->r13); + put_user_ex(regs->r14, &sc->r14); + put_user_ex(regs->r15, &sc->r15); #endif /* CONFIG_X86_64 */ - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->ip, &sc->ip); + put_user_ex(current->thread.trap_no, &sc->trapno); + put_user_ex(current->thread.error_code, &sc->err); + put_user_ex(regs->ip, &sc->ip); #ifdef CONFIG_X86_32 - err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->sp, &sc->sp_at_signal); - err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); + put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->sp, &sc->sp_at_signal); + put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); #else /* !CONFIG_X86_32 */ - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->cs, &sc->cs); - err |= __put_user(0, &sc->gs); - err |= __put_user(0, &sc->fs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->cs, &sc->cs); + put_user_ex(0, &sc->gs); + put_user_ex(0, &sc->fs); #endif /* CONFIG_X86_32 */ - err |= __put_user(fpstate, &sc->fpstate); + put_user_ex(fpstate, &sc->fpstate); - /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); + /* non-iBCS2 extensions.. */ + put_user_ex(mask, &sc->oldmask); + put_user_ex(current->thread.cr2, &sc->cr2); + } put_user_catch(err); return err; } @@ -336,43 +343,41 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err |= __put_user(sig, &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); - if (err) - return -EFAULT; - - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - return -EFAULT; + put_user_try { + put_user_ex(sig, &frame->sig); + put_user_ex(&frame->info, &frame->pinfo); + put_user_ex(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); - /* Set up to return from userspace. */ - restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - err |= __put_user(restorer, &frame->pretcode); + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + /* Set up to return from userspace. */ + restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + put_user_ex(restorer, &frame->pretcode); - /* - * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 - * - * WE DO NOT USE IT ANY MORE! It's only left here for historical - * reasons and because gdb uses it as a signature to notice - * signal handler stack frames. - */ - err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); + /* + * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 + * + * WE DO NOT USE IT ANY MORE! It's only left here for historical + * reasons and because gdb uses it as a signature to notice + * signal handler stack frames. + */ + put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); + } put_user_catch(err); if (err) return -EFAULT; @@ -436,28 +441,30 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return -EFAULT; } - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - /* x86-64 should always use SA_RESTORER. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); - } else { - /* could use a vstub here */ - return -EFAULT; - } + put_user_try { + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + /* x86-64 should always use SA_RESTORER. */ + if (ka->sa.sa_flags & SA_RESTORER) { + put_user_ex(ka->sa.sa_restorer, &frame->pretcode); + } else { + /* could use a vstub here */ + err |= -EFAULT; + } + } put_user_catch(err); if (err) return -EFAULT; @@ -509,31 +516,41 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { struct k_sigaction new_ka, old_ka; - int ret; + int ret = 0; if (act) { old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + if (!access_ok(VERIFY_READ, act, sizeof(*act))) return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); + get_user_try { + get_user_ex(new_ka.sa.sa_handler, &act->sa_handler); + get_user_ex(new_ka.sa.sa_flags, &act->sa_flags); + get_user_ex(mask, &act->sa_mask); + get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer); + } get_user_catch(ret); + + if (ret) + return -EFAULT; siginitset(&new_ka.sa.sa_mask, mask); } ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact))) return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + put_user_try { + put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler); + put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags); + put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer); + } put_user_catch(ret); + + if (ret) + return -EFAULT; } return ret; -- cgit v1.2.3 From 75a048119e76540d73132cfc8e0fa0c0a8bb6c83 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 22 Jan 2009 16:17:05 -0800 Subject: x86: handle PAT more like other CPU features Impact: Cleanup When PAT was originally introduced, it was handled specially for a few reasons: - PAT bugs are hard to track down, so we wanted to maintain a whitelist of CPUs. - The i386 and x86-64 CPUID code was not yet unified. Both of these are now obsolete, so handle PAT like any other features, including ordinary feature blacklisting due to known bugs. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/addon_cpuid_features.c | 34 ------------------------------ arch/x86/kernel/cpu/common.c | 2 -- arch/x86/kernel/cpu/intel.c | 12 +++++++++++ 3 files changed, 12 insertions(+), 36 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 2cf23634b6d..4e581fdc0a5 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -143,37 +143,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) return; #endif } - -#ifdef CONFIG_X86_PAT -void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) -{ - if (!cpu_has_pat) - pat_disable("PAT not supported by CPU."); - - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - /* - * There is a known erratum on Pentium III and Core Solo - * and Core Duo CPUs. - * " Page with PAT set to WC while associated MTRR is UC - * may consolidate to UC " - * Because of this erratum, it is better to stick with - * setting WC in MTRR rather than using PAT on these CPUs. - * - * Enable PAT WC only on P4, Core 2 or later CPUs. - */ - if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) - return; - - pat_disable("PAT WC disabled due to known CPU erratum."); - return; - - case X86_VENDOR_AMD: - case X86_VENDOR_CENTAUR: - case X86_VENDOR_TRANSMETA: - return; - } - - pat_disable("PAT disabled. Not yet verified on this CPU type."); -} -#endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 83492b1f93b..0f8656361e0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -570,8 +570,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_early_init) this_cpu->c_early_init(c); - validate_pat_support(c); - #ifdef CONFIG_SMP c->cpu_index = boot_cpu_id; #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8ea6929e974..20ce03acf04 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -50,6 +50,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); } + /* + * There is a known erratum on Pentium III and Core Solo + * and Core Duo CPUs. + * " Page with PAT set to WC while associated MTRR is UC + * may consolidate to UC " + * Because of this erratum, it is better to stick with + * setting WC in MTRR rather than using PAT on these CPUs. + * + * Enable PAT WC only on P4, Core 2 or later CPUs. + */ + if (c->x86 == 6 && c->x86_model < 15) + clear_cpu_cap(c, X86_FEATURE_PAT); } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From b38b0665905538e76e26f2a4c686179abb1f69f6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 23 Jan 2009 17:20:50 -0800 Subject: x86: filter CPU features dependent on unavailable CPUID levels Impact: Fixes potential crashes on misconfigured systems. Some CPU features require specific CPUID levels to be available in order to function, as they contain information about the operation of a specific feature. However, some BIOSes and virtualization software provide the ability to mask CPUID levels in order to support legacy operating systems. We try to enable such CPUID levels when we know how to do it, but for the remaining cases, filter out such CPU features when there is no way for us to support them. Do this in one place, in the CPUID code, with a table-driven approach. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 47 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0f8656361e0..21f086b4c1a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -212,6 +212,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) } #endif +/* + * Some CPU features depend on higher CPUID levels, which may not always + * be available due to CPUID level capping or broken virtualization + * software. Add those features to this table to auto-disable them. + */ +struct cpuid_dependent_feature { + u32 feature; + u32 level; +}; +static const struct cpuid_dependent_feature __cpuinitconst +cpuid_dependent_features[] = { + { X86_FEATURE_MWAIT, 0x00000005 }, + { X86_FEATURE_DCA, 0x00000009 }, + { X86_FEATURE_XSAVE, 0x0000000d }, + { 0, 0 } +}; + +static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) +{ + const struct cpuid_dependent_feature *df; + for (df = cpuid_dependent_features; df->feature; df++) { + /* + * Note: cpuid_level is set to -1 if unavailable, but + * extended_extended_level is set to 0 if unavailable + * and the legitimate extended levels are all negative + * when signed; hence the weird messing around with + * signs here... + */ + if (cpu_has(c, df->feature) && + ((s32)df->feature < 0 ? + (u32)df->feature > (u32)c->extended_cpuid_level : + (s32)df->feature > (s32)c->cpuid_level)) { + clear_cpu_cap(c, df->feature); + if (warn) + printk(KERN_WARNING + "CPU: CPU feature %s disabled " + "due to lack of CPUID level 0x%x\n", + x86_cap_flags[df->feature], + df->level); + } + } +} + /* * Naming convention should be: [()] * This table only is used unless init_() below doesn't set it; @@ -573,6 +616,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP c->cpu_index = boot_cpu_id; #endif + filter_cpuid_features(c, false); } void __init early_cpu_init(void) @@ -706,6 +750,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) * we do "generic changes." */ + /* Filter out anything that depends on CPUID levels we don't have */ + filter_cpuid_features(c, true); + /* If the model name is still unset, do table lookup. */ if (!c->x86_model_id[0]) { char *p; -- cgit v1.2.3 From 34707bcd0452aba644396767bc9fb61585bdab4f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Jan 2009 14:18:05 +0100 Subject: x86, debug: remove early_printk() #ifdefs from head_32.S Impact: cleanup Remove such constructs: #ifdef CONFIG_EARLY_PRINTK call early_printk #else call printk #endif Not only are they ugly, they are also pointless: a call to printk() maps to early_printk during early bootup anyway, if CONFIG_EARLY_PRINTK is enabled. Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e835b4eea70..9f141071160 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -548,11 +548,7 @@ early_fault: pushl %eax pushl %edx /* trapno */ pushl $fault_msg -#ifdef CONFIG_EARLY_PRINTK - call early_printk -#else call printk -#endif #endif call dump_stack hlt_loop: @@ -580,11 +576,7 @@ ignore_int: pushl 32(%esp) pushl 40(%esp) pushl $int_msg -#ifdef CONFIG_EARLY_PRINTK - call early_printk -#else call printk -#endif addl $(5*4),%esp popl %ds popl %es -- cgit v1.2.3 From d5e397cb49b53381e4c99a064ca733c665646de8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Jan 2009 06:09:00 +0100 Subject: x86: improve early fault/irq printout Impact: add a stack dump to early IRQs/faults Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 9f141071160..84d05a4d7fc 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -577,6 +577,9 @@ ignore_int: pushl 40(%esp) pushl $int_msg call printk + + call dump_stack + addl $(5*4),%esp popl %ds popl %es @@ -652,7 +655,7 @@ early_recursion_flag: .long 0 int_msg: - .asciz "Unknown interrupt or fault at EIP %p %p %p\n" + .asciz "Unknown interrupt or fault at: %p %p %p\n" fault_msg: /* fault info: */ -- cgit v1.2.3 From 0d77e7f04d5da160307f4f5c030a171e004f602b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: x86: merge setup_per_cpu_maps() into setup_per_cpu_areas() Impact: minor optimization Eliminates the need for two loops over possible cpus. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 48 +++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 29 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 90b8e154bb5..d0b1476490a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -97,33 +97,6 @@ static inline void setup_cpu_local_masks(void) #endif /* CONFIG_X86_32 */ #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA -/* - * Copy data used in early init routines from the initial arrays to the - * per cpu data areas. These arrays then become expendable and the - * *_early_ptr's are zeroed indicating that the static arrays are gone. - */ -static void __init setup_per_cpu_maps(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - per_cpu(x86_cpu_to_apicid, cpu) = - early_per_cpu_map(x86_cpu_to_apicid, cpu); - per_cpu(x86_bios_cpu_apicid, cpu) = - early_per_cpu_map(x86_bios_cpu_apicid, cpu); -#ifdef X86_64_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); -#endif - } - - /* indicate the early static arrays will soon be gone */ - early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; - early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; -#ifdef X86_64_NUMA - early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; -#endif -} #ifdef CONFIG_X86_64 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { @@ -181,6 +154,19 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; + /* + * Copy data used in early init routines from the initial arrays to the + * per cpu data areas. These arrays then become expendable and the + * *_early_ptr's are zeroed indicating that the static arrays are gone. + */ + per_cpu(x86_cpu_to_apicid, cpu) = + early_per_cpu_map(x86_cpu_to_apicid, cpu); + per_cpu(x86_bios_cpu_apicid, cpu) = + early_per_cpu_map(x86_bios_cpu_apicid, cpu); +#ifdef X86_64_NUMA + per_cpu(x86_cpu_to_node_map, cpu) = + early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; @@ -195,8 +181,12 @@ void __init setup_per_cpu_areas(void) DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } - /* Setup percpu data maps */ - setup_per_cpu_maps(); + /* indicate the early static arrays will soon be gone */ + early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; + early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; +#ifdef X86_64_NUMA + early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; +#endif /* Setup node to cpumask map */ setup_node_to_cpumask_map(); -- cgit v1.2.3 From 6470aff619fbb9dff8dfe8afa5033084cd55ca20 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: x86: move 64-bit NUMA code Impact: Code movement, no functional change. Move the 64-bit NUMA code from setup_percpu.c to numa_64.c Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 237 +---------------------------------------- 1 file changed, 5 insertions(+), 232 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d0b1476490a..cb6d622520b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -51,32 +51,6 @@ DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -#define X86_64_NUMA 1 /* (used later) */ -DEFINE_PER_CPU(int, node_number) = 0; -EXPORT_PER_CPU_SYMBOL(node_number); - -/* - * Map cpu index to node index - */ -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); - -/* - * Which logical CPUs are on which nodes - */ -cpumask_t *node_to_cpumask_map; -EXPORT_SYMBOL(node_to_cpumask_map); - -/* - * Setup node_to_cpumask_map - */ -static void __init setup_node_to_cpumask_map(void); - -#else -static inline void setup_node_to_cpumask_map(void) { } -#endif - #ifdef CONFIG_X86_64 /* correctly size the local cpu masks */ @@ -163,13 +137,13 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); -#ifdef X86_64_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); -#endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; +#ifdef CONFIG_NUMA + per_cpu(x86_cpu_to_node_map, cpu) = + early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif /* * Up to this point, CPU0 has been using .data.init * area. Reload %gs offset for CPU0. @@ -184,7 +158,7 @@ void __init setup_per_cpu_areas(void) /* indicate the early static arrays will soon be gone */ early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; -#ifdef X86_64_NUMA +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif @@ -197,204 +171,3 @@ void __init setup_per_cpu_areas(void) #endif -#ifdef X86_64_NUMA - -/* - * Allocate node_to_cpumask_map based on number of available nodes - * Requires node_possible_map to be valid. - * - * Note: node_to_cpumask() is not valid until after this is done. - * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) - */ -static void __init setup_node_to_cpumask_map(void) -{ - unsigned int node, num = 0; - cpumask_t *map; - - /* setup nr_node_ids if not done yet */ - if (nr_node_ids == MAX_NUMNODES) { - for_each_node_mask(node, node_possible_map) - num = node; - nr_node_ids = num + 1; - } - - /* allocate the map */ - map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); - DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); - - pr_debug("Node to cpumask map at %p for %d nodes\n", - map, nr_node_ids); - - /* node_to_cpumask() will now work */ - node_to_cpumask_map = map; -} - -void __cpuinit numa_set_node(int cpu, int node) -{ - int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - - /* early setting, no percpu area yet */ - if (cpu_to_node_map) { - cpu_to_node_map[cpu] = node; - return; - } - -#ifdef CONFIG_DEBUG_PER_CPU_MAPS - if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { - printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); - dump_stack(); - return; - } -#endif - per_cpu(x86_cpu_to_node_map, cpu) = node; - - if (node != NUMA_NO_NODE) - per_cpu(node_number, cpu) = node; -} - -void __cpuinit numa_clear_node(int cpu) -{ - numa_set_node(cpu, NUMA_NO_NODE); -} - -#ifndef CONFIG_DEBUG_PER_CPU_MAPS - -void __cpuinit numa_add_cpu(int cpu) -{ - cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -#else /* CONFIG_DEBUG_PER_CPU_MAPS */ - -/* - * --------- debug versions of the numa functions --------- - */ -static void __cpuinit numa_set_cpumask(int cpu, int enable) -{ - int node = early_cpu_to_node(cpu); - cpumask_t *mask; - char buf[64]; - - if (node_to_cpumask_map == NULL) { - printk(KERN_ERR "node_to_cpumask_map NULL\n"); - dump_stack(); - return; - } - - mask = &node_to_cpumask_map[node]; - if (enable) - cpu_set(cpu, *mask); - else - cpu_clear(cpu, *mask); - - cpulist_scnprintf(buf, sizeof(buf), mask); - printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", - enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); -} - -void __cpuinit numa_add_cpu(int cpu) -{ - numa_set_cpumask(cpu, 1); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - numa_set_cpumask(cpu, 0); -} - -int cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) { - printk(KERN_WARNING - "cpu_to_node(%d): usage too early!\n", cpu); - dump_stack(); - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} -EXPORT_SYMBOL(cpu_to_node); - -/* - * Same function as cpu_to_node() but used if called before the - * per_cpu areas are setup. - */ -int early_cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - if (!per_cpu_offset(cpu)) { - printk(KERN_WARNING - "early_cpu_to_node(%d): no per_cpu area!\n", cpu); - dump_stack(); - return NUMA_NO_NODE; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} - - -/* empty cpumask */ -static const cpumask_t cpu_mask_none; - -/* - * Returns a pointer to the bitmask of CPUs on Node 'node'. - */ -const cpumask_t *cpumask_of_node(int node) -{ - if (node_to_cpumask_map == NULL) { - printk(KERN_WARNING - "cpumask_of_node(%d): no node_to_cpumask_map!\n", - node); - dump_stack(); - return (const cpumask_t *)&cpu_online_map; - } - if (node >= nr_node_ids) { - printk(KERN_WARNING - "cpumask_of_node(%d): node > nr_node_ids(%d)\n", - node, nr_node_ids); - dump_stack(); - return &cpu_mask_none; - } - return &node_to_cpumask_map[node]; -} -EXPORT_SYMBOL(cpumask_of_node); - -/* - * Returns a bitmask of CPUs on Node 'node'. - * - * Side note: this function creates the returned cpumask on the stack - * so with a high NR_CPUS count, excessive stack space is used. The - * node_to_cpumask_ptr function should be used whenever possible. - */ -cpumask_t node_to_cpumask(int node) -{ - if (node_to_cpumask_map == NULL) { - printk(KERN_WARNING - "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); - dump_stack(); - return cpu_online_map; - } - if (node >= nr_node_ids) { - printk(KERN_WARNING - "node_to_cpumask(%d): node > nr_node_ids(%d)\n", - node, nr_node_ids); - dump_stack(); - return cpu_mask_none; - } - return node_to_cpumask_map[node]; -} -EXPORT_SYMBOL(node_to_cpumask); - -/* - * --------- end of debug versions of the numa functions --------- - */ - -#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ - -#endif /* X86_64_NUMA */ - -- cgit v1.2.3 From 2f2f52bad72f5e1ca5d1b9ad00a7b57a8cbd9159 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: x86: move setup_cpu_local_masks() Impact: Code movement, no functional change. Move setup_cpu_local_masks() to kernel/cpu/common.c, where the masks are defined. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 9 +++++++++ arch/x86/kernel/setup_percpu.c | 19 ------------------- 2 files changed, 9 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 99904f288d6..67e30c8a282 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -52,6 +52,15 @@ cpumask_var_t cpu_initialized_mask; /* representing cpus for which sibling maps can be computed */ cpumask_var_t cpu_sibling_setup_mask; +/* correctly size the local cpu masks */ +void setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + #else /* CONFIG_X86_32 */ cpumask_t cpu_callin_map; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index cb6d622520b..7bebdba8eb8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -51,25 +51,6 @@ DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); -#ifdef CONFIG_X86_64 - -/* correctly size the local cpu masks */ -static void setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - -#else /* CONFIG_X86_32 */ - -static inline void setup_cpu_local_masks(void) -{ -} - -#endif /* CONFIG_X86_32 */ - #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 74631a248dc2c2129a96f6b8b706ed54bb5c3d3c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: x86: always page-align per-cpu area start and size Impact: cleanup The way the code is written, align is always PAGE_SIZE. Simplify the code by removing the align variable. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 7bebdba8eb8..5d4a4964a8b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -69,15 +69,12 @@ EXPORT_SYMBOL(__per_cpu_offset); */ void __init setup_per_cpu_areas(void) { - ssize_t size, old_size; + ssize_t size; char *ptr; int cpu; - unsigned long align = 1; /* Copy section for each CPU (we discard the original) */ - old_size = PERCPU_ENOUGH_ROOM; - align = max_t(unsigned long, PAGE_SIZE, align); - size = roundup(old_size, align); + size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE); pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); @@ -86,20 +83,17 @@ void __init setup_per_cpu_areas(void) for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = __alloc_bootmem(size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages(size); #else int node = early_cpu_to_node(cpu); if (!node_online(node) || !NODE_DATA(node)) { - ptr = __alloc_bootmem(size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages(size); pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); pr_debug("per cpu data for cpu%d at %016lx\n", cpu, __pa(ptr)); } else { - ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); pr_debug("per cpu data for cpu%d on node%d at %016lx\n", cpu, node, __pa(ptr)); } -- cgit v1.2.3 From ec70de8b04bf37213982a5e8f303bc38679f3f8e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: x86: move apic variables to apic.c Impact: Code movement Move the variable definitions to apic.c. Ifdef the copying of the two early per-cpu variables, since Voyager doesn't use them. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/apic.c | 18 ++++++++++++++++++ arch/x86/kernel/setup_percpu.c | 22 ++-------------------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 1df341a528a..c6f15647eba 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -60,6 +60,24 @@ # error SPURIOUS_APIC_VECTOR definition error #endif +unsigned int num_processors; +unsigned disabled_cpus __cpuinitdata; +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; +EXPORT_SYMBOL(boot_cpu_physical_apicid); +unsigned int max_physical_apicid; + +/* Bitmask of physically existing CPUs */ +physid_mask_t phys_cpu_present_map; + +/* + * Map cpu index to physical APIC ID + */ +DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + #ifdef CONFIG_X86_32 /* * Knob to control our willingness to enable the local APIC. diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5d4a4964a8b..d367996693e 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -31,26 +31,6 @@ DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); #endif -#ifdef CONFIG_X86_LOCAL_APIC -unsigned int num_processors; -unsigned disabled_cpus __cpuinitdata; -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; -EXPORT_SYMBOL(boot_cpu_physical_apicid); -unsigned int max_physical_apicid; - -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map; -#endif - -/* - * Map cpu index to physical APIC ID - */ -DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA #ifdef CONFIG_X86_64 @@ -108,10 +88,12 @@ void __init setup_per_cpu_areas(void) * per cpu data areas. These arrays then become expendable and the * *_early_ptr's are zeroed indicating that the static arrays are gone. */ +#ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); +#endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; -- cgit v1.2.3 From 996db817e3d1529d711e55b938d72ae4060b39fd Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: x86: only compile setup_percpu.o on SMP Impact: Minor build optimization Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/Makefile | 3 ++- arch/x86/kernel/setup_percpu.c | 6 ------ 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a99437c965c..73de055c29c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -28,7 +28,7 @@ CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o -obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o +obj-y += setup.o i8259.o irqinit_$(BITS).o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o @@ -59,6 +59,7 @@ apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o +obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d367996693e..f30ff691c34 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -22,14 +22,8 @@ # define DBG(x...) #endif -/* - * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but - * voyager wants cpu_number too. - */ -#ifdef CONFIG_SMP DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); -#endif #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA -- cgit v1.2.3 From 1688401a0fddba8991aa5c0943b8ae9583998d60 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: move this_cpu_offset Impact: Small cleanup Define BOOT_PERCPU_OFFSET and use it for this_cpu_offset and __per_cpu_offset initializers. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 15 ++++++++++----- arch/x86/kernel/smpcommon.c | 7 ------- 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f30ff691c34..36c2e81dfc3 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -25,15 +25,20 @@ DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); +#ifdef CONFIG_X86_64 +#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) +#else +#define BOOT_PERCPU_OFFSET 0 +#endif + +DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; +EXPORT_PER_CPU_SYMBOL(this_cpu_off); + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA -#ifdef CONFIG_X86_64 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { - [0] = (unsigned long)__per_cpu_load, + [0] = BOOT_PERCPU_OFFSET, }; -#else -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; -#endif EXPORT_SYMBOL(__per_cpu_offset); /* diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index add36b4e37c..5ec29a1a846 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -5,13 +5,6 @@ #include #include -#ifdef CONFIG_X86_64 -DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load; -#else -DEFINE_PER_CPU(unsigned long, this_cpu_off); -#endif -EXPORT_PER_CPU_SYMBOL(this_cpu_off); - #ifdef CONFIG_X86_32 /* * Initialize the CPU's GDT. This is either the boot CPU doing itself -- cgit v1.2.3 From 34019be1cd2941128b5de6d7c0fbdb51f967d268 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: don't assume boot cpu is #0 Impact: minor cleanup Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 36c2e81dfc3..be77f1a1231 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_PER_CPU_MAPS # define DBG(x...) printk(KERN_DEBUG x) @@ -37,7 +38,7 @@ EXPORT_PER_CPU_SYMBOL(this_cpu_off); #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { - [0] = BOOT_PERCPU_OFFSET, + [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, }; EXPORT_SYMBOL(__per_cpu_offset); @@ -101,10 +102,10 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif /* - * Up to this point, CPU0 has been using .data.init - * area. Reload %gs offset for CPU0. + * Up to this point, the boot CPU has been using .data.init + * area. Reload %gs offset for the boot CPU. */ - if (cpu == 0) + if (cpu == boot_cpu_id) load_gs_base(cpu); #endif -- cgit v1.2.3 From 89c9c4c58ee86e6e8802597271f23679e0c46647 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: make Voyager use x86 per-cpu setup. Impact: standardize all x86 platforms on same setup code With the preceding changes, Voyager can use the same per-cpu setup code as all the other x86 platforms. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index be77f1a1231..599dc1cc1da 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -35,8 +35,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; EXPORT_PER_CPU_SYMBOL(this_cpu_off); -#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA - unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, }; @@ -125,6 +123,3 @@ void __init setup_per_cpu_areas(void) /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); } - -#endif - -- cgit v1.2.3 From b2d2f4312b117a6cc647c8521e2643a88771f757 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: initialize per-cpu GDT segment in per-cpu setup Impact: cleanup Rename init_gdt() to setup_percpu_segment(), and move it to setup_percpu.c. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/Makefile | 3 +-- arch/x86/kernel/setup_percpu.c | 14 ++++++++++++++ arch/x86/kernel/smpboot.c | 4 ---- arch/x86/kernel/smpcommon.c | 25 ------------------------- 4 files changed, 15 insertions(+), 31 deletions(-) delete mode 100644 arch/x86/kernel/smpcommon.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 73de055c29c..37fa30bada1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -60,8 +60,7 @@ obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_SMP) += setup_percpu.o -obj-$(CONFIG_X86_32_SMP) += smpcommon.o -obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o +obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 599dc1cc1da..bcca3a7b374 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -40,6 +40,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +static inline void setup_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + struct desc_struct gdt; + + pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, + 0x2 | DESCTYPE_S, 0x8); + gdt.s = 1; + write_gdt_entry(get_cpu_gdt_table(cpu), + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); +#endif +} + /* * Great future plan: * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. @@ -81,6 +94,7 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; + setup_percpu_segment(cpu); /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index def770b57b5..f9dbcff4354 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -793,7 +793,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) do_rest: per_cpu(current_task, cpu) = c_idle.idle; #ifdef CONFIG_X86_32 - init_gdt(cpu); /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else @@ -1186,9 +1185,6 @@ out: void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); -#ifdef CONFIG_X86_32 - init_gdt(me); -#endif switch_to_new_gdt(); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c deleted file mode 100644 index 5ec29a1a846..00000000000 --- a/arch/x86/kernel/smpcommon.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SMP stuff which is common to all sub-architectures. - */ -#include -#include -#include - -#ifdef CONFIG_X86_32 -/* - * Initialize the CPU's GDT. This is either the boot CPU doing itself - * (still using the master per-cpu area), or a CPU doing it for a - * secondary which will soon come up. - */ -__cpuinit void init_gdt(int cpu) -{ - struct desc_struct gdt; - - pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; - - write_gdt_entry(get_cpu_gdt_table(cpu), - GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); -} -#endif -- cgit v1.2.3 From 2697fbd5faf19c84c17441b1752bdcbdcfd1248c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: load new GDT after setting up boot cpu per-cpu area Impact: sync 32 and 64-bit code Merge load_gs_base() into switch_to_new_gdt(). Load the GDT and per-cpu state for the boot cpu when its new area is set up. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 15 +++++++++------ arch/x86/kernel/setup_percpu.c | 6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 67e30c8a282..0c766b80d91 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -258,12 +258,17 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; void switch_to_new_gdt(void) { struct desc_ptr gdt_descr; + int cpu = smp_processor_id(); - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.address = (long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); + /* Reload the per-cpu base */ #ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); + loadsegment(fs, __KERNEL_PERCPU); +#else + loadsegment(gs, 0); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif } @@ -968,10 +973,6 @@ void __cpuinit cpu_init(void) struct task_struct *me; int i; - loadsegment(fs, 0); - loadsegment(gs, 0); - load_gs_base(cpu); - #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) @@ -993,6 +994,8 @@ void __cpuinit cpu_init(void) */ switch_to_new_gdt(); + loadsegment(fs, 0); + load_idt((const struct desc_ptr *)&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index bcca3a7b374..4caa78d7cb1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -112,14 +112,14 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif #endif /* * Up to this point, the boot CPU has been using .data.init - * area. Reload %gs offset for the boot CPU. + * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) - load_gs_base(cpu); -#endif + switch_to_new_gdt(); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } -- cgit v1.2.3 From 22f25138c345ec46a13744c93c093ff822cd98d1 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 27 Jan 2009 14:21:37 +0900 Subject: x86: fix build breakage on voyage Impact: build fix x86_cpu_to_apicid and x86_bios_cpu_apicid aren't defined for voyage. Earlier patch forgot to conditionalize early percpu clearing. Fix it. Signed-off-by: James Bottomley Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 4caa78d7cb1..c7458ead22d 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -125,8 +125,10 @@ void __init setup_per_cpu_areas(void) } /* indicate the early static arrays will soon be gone */ +#ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; +#endif #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif -- cgit v1.2.3 From cf3997f507624757f149fcc42b76fb03c151fb65 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Jan 2009 14:25:05 +0900 Subject: x86: clean up indentation in setup_per_cpu_areas() Impact: cosmetic cleanup Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index c7458ead22d..0d1e7ac439f 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -96,22 +96,25 @@ void __init setup_per_cpu_areas(void) per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); /* - * Copy data used in early init routines from the initial arrays to the - * per cpu data areas. These arrays then become expendable and the - * *_early_ptr's are zeroed indicating that the static arrays are gone. + * Copy data used in early init routines from the + * initial arrays to the per cpu data areas. These + * arrays then become expendable and the *_early_ptr's + * are zeroed indicating that the static arrays are + * gone. */ #ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = - early_per_cpu_map(x86_cpu_to_apicid, cpu); + early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = - early_per_cpu_map(x86_bios_cpu_apicid, cpu); + early_per_cpu_map(x86_bios_cpu_apicid, cpu); #endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = - per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; + per_cpu(irq_stack_union.irq_stack, cpu) + + IRQ_STACK_SIZE - 64; #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); + early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif #endif /* -- cgit v1.2.3 From 8f6d86dc4178957d9814b1784848012a927a3898 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 27 Jan 2009 21:41:34 +0100 Subject: x86: cpu_init(): remove ugly #ifdef construct around debug register clear Impact: Cleanup While I was looking through the new and improved bootstrap code - great work that, thanks! I found the below a slight improvement. Remove unnecessary ugly #ifdef construct around debug register clear. Signed-off-by: Peter Zijlstra Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f0025846244..3f272d42d09 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1071,22 +1071,19 @@ void __cpuinit cpu_init(void) */ if (kgdb_connected && arch_kgdb_ops.correct_hw_break) arch_kgdb_ops.correct_hw_break(); - else { + else #endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ + { + /* + * Clear all 6 debug registers: + */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); } -#endif fpu_init(); -- cgit v1.2.3 From c8d46cf06dc2e3a8f57a350eb9f9b19fd7f2ffe5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 00:14:11 +0100 Subject: x86: rename 'genapic' to 'apic' Rename genapic-> to apic-> references because in a future chagne we'll open-code all the indirect calls (instead of obscuring them via macros), so we want this reference to be as short as possible. Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 6 ++-- arch/x86/kernel/genapic_64.c | 16 ++++----- arch/x86/kernel/io_apic.c | 80 ++++++++++++++++++++++---------------------- arch/x86/kernel/numaq_32.c | 2 +- arch/x86/kernel/setup.c | 2 +- 5 files changed, 53 insertions(+), 53 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 53699c931ad..20a2a43c2a9 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -188,14 +188,14 @@ static void noop_wait_for_deassert(atomic_t *deassert_not_used) static int __init es7000_update_genapic(void) { - genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; + apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; /* MPENTIUMIII */ if (boot_cpu_data.x86 == 6 && (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { es7000_update_genapic_to_cluster(); - genapic->wait_for_init_deassert = noop_wait_for_deassert; - genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; + apic->wait_for_init_deassert = noop_wait_for_deassert; + apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; } return 0; diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index e656c272115..2b986389a24 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -29,7 +29,7 @@ extern struct genapic apic_x2xpic_uv_x; extern struct genapic apic_x2apic_phys; extern struct genapic apic_x2apic_cluster; -struct genapic __read_mostly *genapic = &apic_flat; +struct genapic __read_mostly *apic = &apic_flat; static struct genapic *apic_probe[] __initdata = { #ifdef CONFIG_X86_UV @@ -46,15 +46,15 @@ static struct genapic *apic_probe[] __initdata = { */ void __init setup_apic_routing(void) { - if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { + if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { if (!intr_remapping_enabled) - genapic = &apic_flat; + apic = &apic_flat; } - if (genapic == &apic_flat) { + if (apic == &apic_flat) { if (max_physical_apicid >= 8) - genapic = &apic_physflat; - printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + apic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } if (x86_quirks->update_genapic) @@ -74,9 +74,9 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) for (i = 0; apic_probe[i]; ++i) { if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { - genapic = apic_probe[i]; + apic = apic_probe[i]; printk(KERN_INFO "Setting APIC routing to %s.\n", - genapic->name); + apic->name); return 1; } } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index bfb7d734062..7283234229f 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1486,7 +1486,7 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t handle_edge_irq, "edge"); } -static int setup_ioapic_entry(int apic, int irq, +static int setup_ioapic_entry(int apic_id, int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int trigger, int polarity, int vector) @@ -1498,18 +1498,18 @@ static int setup_ioapic_entry(int apic, int irq, #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); struct irte irte; struct IR_IO_APIC_route_entry *ir_entry = (struct IR_IO_APIC_route_entry *) entry; int index; if (!iommu) - panic("No mapping iommu for ioapic %d\n", apic); + panic("No mapping iommu for ioapic %d\n", apic_id); index = alloc_irte(iommu, irq, 1); if (index < 0) - panic("Failed to allocate IRTE for ioapic %d\n", apic); + panic("Failed to allocate IRTE for ioapic %d\n", apic_id); memset(&irte, 0, sizeof(irte)); @@ -1547,7 +1547,7 @@ static int setup_ioapic_entry(int apic, int irq, return 0; } -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, +static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, int trigger, int polarity) { struct irq_cfg *cfg; @@ -1567,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].apicid, pin, cfg->vector, + apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector, irq, trigger, polarity); - if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry, + if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, dest, trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].apicid, pin); + mp_ioapics[apic_id].apicid, pin); __clear_irq_vector(irq, cfg); return; } @@ -1583,12 +1583,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de if (irq < NR_IRQS_LEGACY) disable_8259A_irq(irq); - ioapic_write_entry(apic, pin, entry); + ioapic_write_entry(apic_id, pin, entry); } static void __init setup_IO_APIC_irqs(void) { - int apic, pin, idx, irq; + int apic_id, pin, idx, irq; int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; @@ -1596,19 +1596,19 @@ static void __init setup_IO_APIC_irqs(void) apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { + for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); + idx = find_irq_entry(apic_id, pin, mp_INT); if (idx == -1) { if (!notcon) { notcon = 1; apic_printk(APIC_VERBOSE, KERN_DEBUG " %d-%d", - mp_ioapics[apic].apicid, pin); + mp_ioapics[apic_id].apicid, pin); } else apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic].apicid, pin); + mp_ioapics[apic_id].apicid, pin); continue; } if (notcon) { @@ -1617,9 +1617,9 @@ static void __init setup_IO_APIC_irqs(void) notcon = 0; } - irq = pin_2_irq(idx, apic, pin); + irq = pin_2_irq(idx, apic_id, pin); #ifdef CONFIG_X86_32 - if (multi_timer_check(apic, irq)) + if (multi_timer_check(apic_id, irq)) continue; #endif desc = irq_to_desc_alloc_cpu(irq, cpu); @@ -1628,9 +1628,9 @@ static void __init setup_IO_APIC_irqs(void) continue; } cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, apic, pin); + add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); - setup_IO_APIC_irq(apic, pin, irq, desc, + setup_IO_APIC_irq(apic_id, pin, irq, desc, irq_trigger(idx), irq_polarity(idx)); } } @@ -1643,7 +1643,7 @@ static void __init setup_IO_APIC_irqs(void) /* * Set up the timer pin, possibly with the 8259A-master behind. */ -static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, +static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, int vector) { struct IO_APIC_route_entry entry; @@ -1676,7 +1676,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, /* * Add it to the IO-APIC irq-routing table: */ - ioapic_write_entry(apic, pin, entry); + ioapic_write_entry(apic_id, pin, entry); } @@ -2089,7 +2089,7 @@ static void __init setup_ioapic_ids_from_mpc(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; - int apic; + int apic_id; int i; unsigned char old_id; unsigned long flags; @@ -2113,21 +2113,21 @@ static void __init setup_ioapic_ids_from_mpc(void) /* * Set the IOAPIC ID to the value stored in the MPC table. */ - for (apic = 0; apic < nr_ioapics; apic++) { + for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { /* Read the register 0 value */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(apic_id, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic].apicid; + old_id = mp_ioapics[apic_id].apicid; - if (mp_ioapics[apic].apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].apicid); + apic_id, mp_ioapics[apic_id].apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].apicid = reg_00.bits.ID; + mp_ioapics[apic_id].apicid = reg_00.bits.ID; } /* @@ -2136,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].apicid)) { + mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].apicid); + apic_id, mp_ioapics[apic_id].apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -2147,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].apicid = i; + mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic_id].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].apicid); + mp_ioapics[apic_id].apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -2162,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].apicid) + if (old_id != mp_ioapics[apic_id].apicid) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].dstapic == old_id) mp_irqs[i].dstapic - = mp_ioapics[apic].apicid; + = mp_ioapics[apic_id].apicid; /* * Read the right value from the MPC table and @@ -2174,20 +2174,20 @@ static void __init setup_ioapic_ids_from_mpc(void) */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].apicid); + mp_ioapics[apic_id].apicid); - reg_00.bits.ID = mp_ioapics[apic].apicid; + reg_00.bits.ID = mp_ioapics[apic_id].apicid; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); + io_apic_write(apic_id, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(apic_id, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].apicid) + if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index f2191d4f271..3928280278f 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -236,7 +236,7 @@ static int __init numaq_setup_ioapic_ids(void) static int __init numaq_update_genapic(void) { - genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; + apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; return 0; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f41c4486c27..a58e9f5e603 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -590,7 +590,7 @@ static int __init default_update_genapic(void) { #ifdef CONFIG_X86_SMP # if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) - genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; + apic->wakeup_cpu = wakeup_secondary_cpu_via_init; # endif #endif -- cgit v1.2.3 From f2f05ee8b8d346d4edee766384a5fedafdd4f9f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: x86: clean up genapic_flat - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 73 +++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 34185488e4f..f1bfdd3608a 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -175,25 +175,60 @@ static unsigned int phys_pkg_id(int index_msb) } struct genapic apic_flat = { - .name = "flat", - .acpi_madt_oem_check = flat_acpi_madt_oem_check, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), - .target_cpus = flat_target_cpus, - .vector_allocation_domain = flat_vector_allocation_domain, - .apic_id_registered = flat_apic_id_registered, - .init_apic_ldr = flat_init_apic_ldr, - .send_IPI_all = flat_send_IPI_all, - .send_IPI_allbutself = flat_send_IPI_allbutself, - .send_IPI_mask = flat_send_IPI_mask, - .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, - .send_IPI_self = apic_send_IPI_self, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFu<<24), + .name = "flat", + .probe = NULL, + .acpi_madt_oem_check = flat_acpi_madt_oem_check, + .apic_id_registered = flat_apic_id_registered, + + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + + .target_cpus = flat_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = flat_vector_allocation_domain, + .init_apic_ldr = flat_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFu << 24, + + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + + .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, + .send_IPI_allbutself = flat_send_IPI_allbutself, + .send_IPI_all = flat_send_IPI_all, + .send_IPI_self = apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; /* -- cgit v1.2.3 From 4c3e51e05a7eefead4033b187394458ff8626497 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: x86: clean up genapic_phys_flat - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 75 +++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index f1bfdd3608a..e9233374cef 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -320,23 +320,60 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, } struct genapic apic_physflat = { - .name = "physical flat", - .acpi_madt_oem_check = physflat_acpi_madt_oem_check, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .target_cpus = physflat_target_cpus, - .vector_allocation_domain = physflat_vector_allocation_domain, - .apic_id_registered = flat_apic_id_registered, - .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ - .send_IPI_all = physflat_send_IPI_all, - .send_IPI_allbutself = physflat_send_IPI_allbutself, - .send_IPI_mask = physflat_send_IPI_mask, - .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, - .send_IPI_self = apic_send_IPI_self, - .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFu<<24), + + .name = "physical flat", + .probe = NULL, + .acpi_madt_oem_check = physflat_acpi_madt_oem_check, + .apic_id_registered = flat_apic_id_registered, + + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + + .target_cpus = physflat_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = physflat_vector_allocation_domain, + /* not needed, but shouldn't hurt: */ + .init_apic_ldr = flat_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFu<<24, + + .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, + + .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, + .send_IPI_allbutself = physflat_send_IPI_allbutself, + .send_IPI_all = physflat_send_IPI_all, + .send_IPI_self = apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; -- cgit v1.2.3 From c7967329911013a05920ef12832935c541bb8c9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: x86: clean up apic_x2apic_uv_x - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 74 +++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index bfe36249145..94f606f204a 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -237,25 +237,61 @@ static void uv_send_IPI_self(int vector) } struct genapic apic_x2apic_uv_x = { - .name = "UV large system", - .acpi_madt_oem_check = uv_acpi_madt_oem_check, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .target_cpus = uv_target_cpus, - .vector_allocation_domain = uv_vector_allocation_domain, - .apic_id_registered = uv_apic_id_registered, - .init_apic_ldr = uv_init_apic_ldr, - .send_IPI_all = uv_send_IPI_all, - .send_IPI_allbutself = uv_send_IPI_allbutself, - .send_IPI_mask = uv_send_IPI_mask, - .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, - .send_IPI_self = uv_send_IPI_self, - .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), + + .name = "UV large system", + .probe = NULL, + .acpi_madt_oem_check = uv_acpi_madt_oem_check, + .apic_id_registered = uv_apic_id_registered, + + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + + .target_cpus = uv_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = uv_vector_allocation_domain, + .init_apic_ldr = uv_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFFFFFFFu, + + .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, + + .send_IPI_mask = uv_send_IPI_mask, + .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, + .send_IPI_allbutself = uv_send_IPI_allbutself, + .send_IPI_all = uv_send_IPI_all, + .send_IPI_self = uv_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; static __cpuinit void set_x2apic_extra_bits(int pnode) -- cgit v1.2.3 From 05c155c235c757329ec89ad591516538ed8352c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: x86: clean up apic_x2apic_phys - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_phys.c | 74 +++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 21bcc0e098b..c98361fb7ee 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -172,23 +172,59 @@ static void init_x2apic_ldr(void) } struct genapic apic_x2apic_phys = { - .name = "physical x2apic", - .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .target_cpus = x2apic_target_cpus, - .vector_allocation_domain = x2apic_vector_allocation_domain, - .apic_id_registered = x2apic_apic_id_registered, - .init_apic_ldr = init_x2apic_ldr, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, - .send_IPI_self = x2apic_send_IPI_self, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), + + .name = "physical x2apic", + .probe = NULL, + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .apic_id_registered = x2apic_apic_id_registered, + + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + + .target_cpus = x2apic_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = x2apic_vector_allocation_domain, + .init_apic_ldr = init_x2apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFFFFFFFu, + + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_self = x2apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; -- cgit v1.2.3 From 504a3c3ad45d200a6ac8be5aa019c8fa05e26dc8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: x86: clean up apic_x2apic_cluster - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_cluster.c | 74 +++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 6ce497cc372..fc855e503ac 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -176,23 +176,59 @@ static void init_x2apic_ldr(void) } struct genapic apic_x2apic_cluster = { - .name = "cluster x2apic", - .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), - .target_cpus = x2apic_target_cpus, - .vector_allocation_domain = x2apic_vector_allocation_domain, - .apic_id_registered = x2apic_apic_id_registered, - .init_apic_ldr = init_x2apic_ldr, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, - .send_IPI_self = x2apic_send_IPI_self, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), + + .name = "cluster x2apic", + .probe = NULL, + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .apic_id_registered = x2apic_apic_id_registered, + + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + + .target_cpus = x2apic_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = x2apic_vector_allocation_domain, + .init_apic_ldr = init_x2apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFFFFFFFu, + + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_self = x2apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; -- cgit v1.2.3 From 306db03b0d71bf9c94155c0c4771a79fc70b4b27 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:43:47 +0100 Subject: x86: clean up apic->acpi_madt_oem_check methods Impact: refactor code x86 subarchitectures each defined a "acpi_madt_oem_check()" method, which could be an inline function, or an extern, or a static function, and which was also the name of a genapic field. Untangle this namespace spaghetti by setting ->acpi_madt_oem_check() to NULL on those subarchitectures that have no detection quirks, and rename the other ones (summit, es7000) that do. Also change default_acpi_madt_oem_check() to handle NULL entries, and clean its control flow up as well. Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 3 ++- arch/x86/kernel/genapic_64.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4cb5964f149..314fe0dddef 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -239,7 +239,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) madt->address); } - acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); + default_acpi_madt_oem_check(madt->header.oem_id, + madt->header.oem_table_id); return 0; } diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 2b986389a24..060945b8eec 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -68,7 +68,7 @@ void apic_send_IPI_self(int vector) __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { int i; -- cgit v1.2.3 From 7ed248daa56156f2fd7175f90b62fc6397b0c7b7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:43:47 +0100 Subject: x86: clean up apic->apic_id_registered() methods Impact: cleanup x86 subarchitectures each defined a "apic_id_registered()" method, which could be an inline function depending on which subarch we build for, and which was also the name of a genapic field. Untangle this namespace spaghetti by giving each of the instances a separate name. Also remove wrapper macro obfuscation. Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index c6f15647eba..b6740de18fb 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1171,7 +1171,7 @@ void __cpuinit setup_local_APIC(void) * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. */ - if (!apic_id_registered()) + if (!apic->apic_id_registered()) BUG(); /* -- cgit v1.2.3 From f8987a1093cc7a896137e264c24e04d4048e9f95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:02:31 +0100 Subject: x86, genapic: rename int_delivery_mode, et. al. int_delivery_mode is supposed to mean 'interrupt delivery mode', but it's quite a misnomer as 'int' we usually think of as an integer type ... The standard naming for such attributes is 'irq' - so rename the following fields and macros: int_delivery_mode => irq_delivery_mode INT_DELIVERY_MODE => IRQ_DELIVERY_MODE int_dest_mode => irq_dest_mode INT_DEST_MODE => IRQ_DEST_MODE Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 8 ++++---- arch/x86/kernel/genx2apic_cluster.c | 4 ++-- arch/x86/kernel/genx2apic_phys.c | 4 ++-- arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- arch/x86/kernel/io_apic.c | 30 +++++++++++++++--------------- 5 files changed, 25 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index e9233374cef..0a263d6bb5e 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -180,8 +180,8 @@ struct genapic apic_flat = { .acpi_madt_oem_check = flat_acpi_madt_oem_check, .apic_id_registered = flat_apic_id_registered, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_delivery_mode = dest_LowestPrio, + .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, .ESR_DISABLE = 0, @@ -326,8 +326,8 @@ struct genapic apic_physflat = { .acpi_madt_oem_check = physflat_acpi_madt_oem_check, .apic_id_registered = flat_apic_id_registered, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index fc855e503ac..e9ff7dc9a0f 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -182,8 +182,8 @@ struct genapic apic_x2apic_cluster = { .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .apic_id_registered = x2apic_apic_id_registered, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_delivery_mode = dest_LowestPrio, + .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = x2apic_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index c98361fb7ee..8141b5a88f6 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -178,8 +178,8 @@ struct genapic apic_x2apic_phys = { .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .apic_id_registered = x2apic_apic_id_registered, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = x2apic_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 94f606f204a..6a73cad0d3e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -243,8 +243,8 @@ struct genapic apic_x2apic_uv_x = { .acpi_madt_oem_check = uv_acpi_madt_oem_check, .apic_id_registered = uv_apic_id_registered, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7283234229f..5f967b9c9af 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1514,9 +1514,9 @@ static int setup_ioapic_entry(int apic_id, int irq, memset(&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = INT_DEST_MODE; + irte.dst_mode = IRQ_DEST_MODE; irte.trigger_mode = trigger; - irte.dlvry_mode = INT_DELIVERY_MODE; + irte.dlvry_mode = IRQ_DELIVERY_MODE; irte.vector = vector; irte.dest_id = IRTE_DEST(destination); @@ -1529,8 +1529,8 @@ static int setup_ioapic_entry(int apic_id, int irq, } else #endif { - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; + entry->delivery_mode = IRQ_DELIVERY_MODE; + entry->dest_mode = IRQ_DEST_MODE; entry->dest = destination; } @@ -1659,10 +1659,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * We use logical delivery to get the timer IRQ * to the first CPU. */ - entry.dest_mode = INT_DEST_MODE; + entry.dest_mode = IRQ_DEST_MODE; entry.mask = 1; /* mask IRQ now */ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; + entry.delivery_mode = IRQ_DELIVERY_MODE; entry.polarity = 0; entry.trigger = 0; entry.vector = vector; @@ -3279,9 +3279,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms memset (&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = INT_DEST_MODE; + irte.dst_mode = IRQ_DEST_MODE; irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = INT_DELIVERY_MODE; + irte.dlvry_mode = IRQ_DELIVERY_MODE; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3299,10 +3299,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? + ((IRQ_DEST_MODE == 0) ? MSI_ADDR_DEST_MODE_PHYSICAL: MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? + ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU: MSI_ADDR_REDIRECTION_LOWPRI) | MSI_ADDR_DEST_ID(dest); @@ -3310,7 +3310,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? + ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); @@ -3711,11 +3711,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) HT_IRQ_LOW_BASE | HT_IRQ_LOW_DEST_ID(dest) | HT_IRQ_LOW_VECTOR(cfg->vector) | - ((INT_DEST_MODE == 0) ? + ((IRQ_DEST_MODE == 0) ? HT_IRQ_LOW_DM_PHYSICAL : HT_IRQ_LOW_DM_LOGICAL) | HT_IRQ_LOW_RQEOI_EDGE | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? + ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? HT_IRQ_LOW_MT_FIXED : HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; @@ -3763,8 +3763,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); entry->vector = cfg->vector; - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; + entry->delivery_mode = IRQ_DELIVERY_MODE; + entry->dest_mode = IRQ_DEST_MODE; entry->polarity = 0; entry->trigger = 0; entry->mask = 0; -- cgit v1.2.3 From 9b5bc8dc12421a4b17047061f473d85c1797d543 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:09:58 +0100 Subject: x86, apic: remove IRQ_DEST_MODE / IRQ_DELIVERY_MODE Remove the wrapper macros IRQ_DEST_MODE and IRQ_DELIVERY_MODE. The typical 32-bit and the 64-bit build all dereference via the genapic, so it's pointless to hide that indirection via these ugly macros. Furthermore, it also obscures subarchitecture details. So replace it with apic->irq_dest_mode / etc. accesses. Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 5f967b9c9af..301b6571d70 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1514,9 +1514,9 @@ static int setup_ioapic_entry(int apic_id, int irq, memset(&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = IRQ_DEST_MODE; + irte.dst_mode = apic->irq_dest_mode; irte.trigger_mode = trigger; - irte.dlvry_mode = IRQ_DELIVERY_MODE; + irte.dlvry_mode = apic->irq_delivery_mode; irte.vector = vector; irte.dest_id = IRTE_DEST(destination); @@ -1529,8 +1529,8 @@ static int setup_ioapic_entry(int apic_id, int irq, } else #endif { - entry->delivery_mode = IRQ_DELIVERY_MODE; - entry->dest_mode = IRQ_DEST_MODE; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; entry->dest = destination; } @@ -1659,10 +1659,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * We use logical delivery to get the timer IRQ * to the first CPU. */ - entry.dest_mode = IRQ_DEST_MODE; + entry.dest_mode = apic->irq_dest_mode; entry.mask = 1; /* mask IRQ now */ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = IRQ_DELIVERY_MODE; + entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; entry.vector = vector; @@ -3279,9 +3279,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms memset (&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = IRQ_DEST_MODE; + irte.dst_mode = apic->irq_dest_mode; irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = IRQ_DELIVERY_MODE; + irte.dlvry_mode = apic->irq_delivery_mode; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3299,10 +3299,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | - ((IRQ_DEST_MODE == 0) ? + ((apic->irq_dest_mode == 0) ? MSI_ADDR_DEST_MODE_PHYSICAL: MSI_ADDR_DEST_MODE_LOGICAL) | - ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? + ((apic->irq_delivery_mode != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU: MSI_ADDR_REDIRECTION_LOWPRI) | MSI_ADDR_DEST_ID(dest); @@ -3310,7 +3310,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | - ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? + ((apic->irq_delivery_mode != dest_LowestPrio) ? MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); @@ -3711,11 +3711,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) HT_IRQ_LOW_BASE | HT_IRQ_LOW_DEST_ID(dest) | HT_IRQ_LOW_VECTOR(cfg->vector) | - ((IRQ_DEST_MODE == 0) ? + ((apic->irq_dest_mode == 0) ? HT_IRQ_LOW_DM_PHYSICAL : HT_IRQ_LOW_DM_LOGICAL) | HT_IRQ_LOW_RQEOI_EDGE | - ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? + ((apic->irq_delivery_mode != dest_LowestPrio) ? HT_IRQ_LOW_MT_FIXED : HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; @@ -3763,8 +3763,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); entry->vector = cfg->vector; - entry->delivery_mode = IRQ_DELIVERY_MODE; - entry->dest_mode = IRQ_DEST_MODE; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; entry->polarity = 0; entry->trigger = 0; entry->mask = 0; -- cgit v1.2.3 From fe402e1f2b67a63f1e53ab2a316fc20f7ca4ec91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:32:51 +0100 Subject: x86, apic: clean up / remove TARGET_CPUS Impact: cleanup use apic->target_cpus() directly instead of the TARGET_CPUS wrapper. Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 301b6571d70..7503285e180 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1559,10 +1559,10 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, TARGET_CPUS)) + if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1661,7 +1661,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, */ entry.dest_mode = apic->irq_dest_mode; entry.mask = 1; /* mask IRQ now */ - entry.dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.dest = cpu_mask_to_apicid(apic->target_cpus()); entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; @@ -2877,7 +2877,7 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ disable_8259A_irq(0); - assign_irq_vector(0, cfg, TARGET_CPUS); + assign_irq_vector(0, cfg, apic->target_cpus()); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -3195,7 +3195,7 @@ unsigned int create_irq_nr(unsigned int irq_want) if (cfg_new->vector != 0) continue; - if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) + if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; break; } @@ -3261,11 +3261,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms return -ENXIO; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); if (err) return err; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3698,12 +3698,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) return -ENXIO; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); if (!err) { struct ht_irq_msg msg; unsigned dest; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3987,7 +3987,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) /* * This function currently is only a helper for the i386 smp boot process where * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS + * so mask in all cases should simply be apic->target_cpus() */ #ifdef CONFIG_SMP void __init setup_ioapic_dest(void) @@ -4028,7 +4028,7 @@ void __init setup_ioapic_dest(void) (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) mask = desc->affinity; else - mask = TARGET_CPUS; + mask = apic->target_cpus(); #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) -- cgit v1.2.3 From f6f52baf2613dd319e9ba3f3319bf1f1c442e4b3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:01:41 +0100 Subject: x86: clean up esr_disable() methods Impact: cleanup Most subarchitectures want to disable the APIC ESR (Error Status Register), because they generally have hardware hacks that wrap standard CPUs into a bigger system and hence the APIC bus is quite non-standard and weirdnesses (lockups) have been seen with ESR reporting. Remove the esr_disable macros and put the desired flag into each subarchitecture's genapic template directly. Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b6740de18fb..69d8c30d571 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1107,7 +1107,7 @@ static void __cpuinit lapic_setup_esr(void) return; } - if (esr_disable) { + if (apic->ESR_DISABLE) { /* * Something untraceable is creating bad interrupts on * secondary quads ... for the moment, just leave the @@ -1157,7 +1157,7 @@ void __cpuinit setup_local_APIC(void) #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (lapic_is_integrated() && esr_disable) { + if (lapic_is_integrated() && apic->ESR_DISABLE) { apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); -- cgit v1.2.3 From 08125d3edab90644724652eedec3e219e3e0f2e7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:08:44 +0100 Subject: x86: rename ->ESR_DISABLE to ->disable_esr the ->ESR_DISABLE shouting variant was used to enable the esr_disable macro wrappers. Those ugly macros are removed now so we can rename ->ESR_DISABLE to ->disable_esr Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 4 ++-- arch/x86/kernel/genapic_flat_64.c | 4 ++-- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 69d8c30d571..3853ed76c88 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1107,7 +1107,7 @@ static void __cpuinit lapic_setup_esr(void) return; } - if (apic->ESR_DISABLE) { + if (apic->disable_esr) { /* * Something untraceable is creating bad interrupts on * secondary quads ... for the moment, just leave the @@ -1157,7 +1157,7 @@ void __cpuinit setup_local_APIC(void) #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (lapic_is_integrated() && apic->ESR_DISABLE) { + if (lapic_is_integrated() && apic->disable_esr) { apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 0a263d6bb5e..d437a60cc58 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -184,7 +184,7 @@ struct genapic apic_flat = { .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, @@ -330,7 +330,7 @@ struct genapic apic_physflat = { .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index e9ff7dc9a0f..c1cffae4a4c 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -186,7 +186,7 @@ struct genapic apic_x2apic_cluster = { .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = x2apic_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 8141b5a88f6..c59602be035 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -182,7 +182,7 @@ struct genapic apic_x2apic_phys = { .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = x2apic_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 6a73cad0d3e..525b4e480a7 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -247,7 +247,7 @@ struct genapic apic_x2apic_uv_x = { .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, -- cgit v1.2.3 From 0b06e734bff7554c31eac4aad2fc9be4adb7c1c1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:13:04 +0100 Subject: x86: clean up the APIC_DEST_LOGICAL logic Impact: cleanup The bigsmp and es7000 subarchitectures un-defined APIC_DEST_LOGICAL in a rather nasty way by re-defining it to zero. That is infinitely fragile and makes it very hard to see what to code really does in a given context. The very same constant has different meanings and values - depending on which subarch is enabled. Untangle this mess by never undefining the constant, but instead propagating the right values into the genapic driver templates. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 12 ++++++------ arch/x86/kernel/genx2apic_cluster.c | 10 +++++----- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index d437a60cc58..fd242c6b3ba 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -74,7 +74,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) unsigned long flags; local_irq_save(flags); - __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); + __send_IPI_dest_field(mask, vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -114,7 +114,7 @@ static void flat_send_IPI_allbutself(int vector) _flat_send_IPI_mask(mask, vector); } } else if (num_online_cpus() > 1) { - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->apic_destination_logical); } } @@ -123,7 +123,7 @@ static void flat_send_IPI_all(int vector) if (vector == NMI_VECTOR) flat_send_IPI_mask(cpu_online_mask, vector); else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->apic_destination_logical); } static unsigned int get_apic_id(unsigned long x) @@ -181,11 +181,11 @@ struct genapic apic_flat = { .apic_id_registered = flat_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, - .irq_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_dest_mode = 1, /* logical */ .target_cpus = flat_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, @@ -327,7 +327,7 @@ struct genapic apic_physflat = { .apic_id_registered = flat_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_dest_mode = 0, /* physical */ .target_cpus = physflat_target_cpus, .disable_esr = 0, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index c1cffae4a4c..a76e75ecc20 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -64,7 +64,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) for_each_cpu(query_cpu, mask) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -80,7 +80,7 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -95,7 +95,7 @@ static void x2apic_send_IPI_allbutself(int vector) if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -183,11 +183,11 @@ struct genapic apic_x2apic_cluster = { .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, - .irq_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_dest_mode = 1, /* logical */ .target_cpus = x2apic_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index c59602be035..9b6d68deb14 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -179,7 +179,7 @@ struct genapic apic_x2apic_phys = { .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_dest_mode = 0, /* physical */ .target_cpus = x2apic_target_cpus, .disable_esr = 0, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 525b4e480a7..0a756800c11 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -244,11 +244,11 @@ struct genapic apic_x2apic_uv_x = { .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_dest_mode = 1, /* logical */ .target_cpus = uv_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7503285e180..17526d7a8ab 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -806,7 +806,7 @@ void send_IPI_self(int vector) * Wait for idle. */ apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->apic_destination_logical; /* * Send the IPI. The write to APIC_ICR fires this off. */ diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 285bbf8831f..400b7bd48f6 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -30,7 +30,7 @@ static inline int __prepare_ICR(unsigned int shortcut, int vector) { - unsigned int icr = shortcut | APIC_DEST_LOGICAL; + unsigned int icr = shortcut | apic->apic_destination_logical; switch (vector) { default: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f9dbcff4354..f0a173718d9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -583,7 +583,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) /* Target chip */ /* Boot on the stack */ /* Kick the second */ - apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); + apic_icr_write(APIC_DM_NMI | apic->apic_destination_logical, logical_apicid); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); -- cgit v1.2.3 From bdb1a9b62fc182d4da3143e346f7a0925d243352 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:29:25 +0100 Subject: x86, apic: rename genapic::apic_destination_logical to genapic::dest_logical This field name was unreasonably long - shorten it. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 10 +++++----- arch/x86/kernel/genx2apic_cluster.c | 8 ++++---- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index fd242c6b3ba..d22cbdaee20 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -74,7 +74,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) unsigned long flags; local_irq_save(flags); - __send_IPI_dest_field(mask, vector, apic->apic_destination_logical); + __send_IPI_dest_field(mask, vector, apic->dest_logical); local_irq_restore(flags); } @@ -114,7 +114,7 @@ static void flat_send_IPI_allbutself(int vector) _flat_send_IPI_mask(mask, vector); } } else if (num_online_cpus() > 1) { - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->apic_destination_logical); + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); } } @@ -123,7 +123,7 @@ static void flat_send_IPI_all(int vector) if (vector == NMI_VECTOR) flat_send_IPI_mask(cpu_online_mask, vector); else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->apic_destination_logical); + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); } static unsigned int get_apic_id(unsigned long x) @@ -185,7 +185,7 @@ struct genapic apic_flat = { .target_cpus = flat_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, @@ -331,7 +331,7 @@ struct genapic apic_physflat = { .target_cpus = physflat_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index a76e75ecc20..b91a48eae52 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -64,7 +64,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) for_each_cpu(query_cpu, mask) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->apic_destination_logical); + vector, apic->dest_logical); local_irq_restore(flags); } @@ -80,7 +80,7 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->apic_destination_logical); + vector, apic->dest_logical); local_irq_restore(flags); } @@ -95,7 +95,7 @@ static void x2apic_send_IPI_allbutself(int vector) if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->apic_destination_logical); + vector, apic->dest_logical); local_irq_restore(flags); } @@ -187,7 +187,7 @@ struct genapic apic_x2apic_cluster = { .target_cpus = x2apic_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 9b6d68deb14..f070e86af0f 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -183,7 +183,7 @@ struct genapic apic_x2apic_phys = { .target_cpus = x2apic_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 0a756800c11..c8a89158679 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -248,7 +248,7 @@ struct genapic apic_x2apic_uv_x = { .target_cpus = uv_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 17526d7a8ab..7f8b32b2089 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -806,7 +806,7 @@ void send_IPI_self(int vector) * Wait for idle. */ apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->apic_destination_logical; + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->dest_logical; /* * Send the IPI. The write to APIC_ICR fires this off. */ diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 400b7bd48f6..e2e4895ca69 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -30,7 +30,7 @@ static inline int __prepare_ICR(unsigned int shortcut, int vector) { - unsigned int icr = shortcut | apic->apic_destination_logical; + unsigned int icr = shortcut | apic->dest_logical; switch (vector) { default: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f0a173718d9..45c096f605f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -583,7 +583,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) /* Target chip */ /* Boot on the stack */ /* Kick the second */ - apic_icr_write(APIC_DM_NMI | apic->apic_destination_logical, logical_apicid); + apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); -- cgit v1.2.3 From d1d7cae8fd54a301a0de531b48451649933ffdcf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:41:42 +0100 Subject: x86, apic: clean up check_apicid*() callbacks Clean up these methods - to make it clearer which function is used in which case. Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7f8b32b2089..733ecf17272 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2135,7 +2135,7 @@ static void __init setup_ioapic_ids_from_mpc(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (check_apicid_used(phys_id_present_map, + if (apic->check_apicid_used(phys_id_present_map, mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", apic_id, mp_ioapics[apic_id].apicid); @@ -3878,10 +3878,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (check_apicid_used(apic_id_map, apic_id)) { + if (apic->check_apicid_used(apic_id_map, apic_id)) { for (i = 0; i < get_physical_broadcast(); i++) { - if (!check_apicid_used(apic_id_map, i)) + if (!apic->check_apicid_used(apic_id_map, i)) break; } -- cgit v1.2.3 From 2e867b17cc02e1799f18126af0ddd7b63dd8f6f4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:57:56 +0100 Subject: x86, apic: remove no_balance_irq and no_ioapic_check flags These flags are completely unused. (the in-kernel IRQ balancer has been removed from the upstream kernel.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 6 ------ arch/x86/kernel/genx2apic_cluster.c | 3 --- arch/x86/kernel/genx2apic_phys.c | 3 --- arch/x86/kernel/genx2apic_uv_x.c | 3 --- 4 files changed, 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index d22cbdaee20..9446f372a16 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -189,9 +189,6 @@ struct genapic apic_flat = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, @@ -335,9 +332,6 @@ struct genapic apic_physflat = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = physflat_vector_allocation_domain, /* not needed, but shouldn't hurt: */ .init_apic_ldr = flat_init_apic_ldr, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index b91a48eae52..2eeca6e744a 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -191,9 +191,6 @@ struct genapic apic_x2apic_cluster = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = x2apic_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index f070e86af0f..be0ee3e56ef 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -187,9 +187,6 @@ struct genapic apic_x2apic_phys = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = x2apic_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index c8a89158679..68b423f3da9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -252,9 +252,6 @@ struct genapic apic_x2apic_uv_x = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = uv_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, -- cgit v1.2.3 From e2d40b1878bd13ca1028ddd299c48e4821ac3535 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: x86, apic: clean up ->vector_allocation_domain() - separate the namespace - remove macros - move the default vector-allocation-domain to mach-generic - fix whitespace damage Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 733ecf17272..49899e06624 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1316,7 +1316,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int new_cpu; int vector, offset; - vector_allocation_domain(cpu, tmp_mask); + apic->vector_allocation_domain(cpu, tmp_mask); vector = current_vector; offset = current_offset; -- cgit v1.2.3 From a5c4329622a3437adef4b2a4288d127957743c97 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: x86, apic: clean up ->init_apic_ldr() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 3853ed76c88..b7077936ac0 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1179,7 +1179,7 @@ void __cpuinit setup_local_APIC(void) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ - init_apic_ldr(); + apic->init_apic_ldr(); /* * Set Task Priority to 'accept all'. We never change this -- cgit v1.2.3 From d190cb87c4503014353f2310c4bfa2268fa7111d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: x86, apic: clean up ->ioapic_phys_id_map() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 49899e06624..db79ad9a764 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2108,7 +2108,7 @@ static void __init setup_ioapic_ids_from_mpc(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); + phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -3862,7 +3862,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) */ if (physids_empty(apic_id_map)) - apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); + apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); -- cgit v1.2.3 From 72ce016583916fb7ffcbaa6a3e1f8f731b79a865 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: x86, apic: clean up ->setup_apic_routing() - separate the namespace - remove macros - remove namespace clash on 64-bit Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 5 ++--- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/mpparse.c | 6 +++--- arch/x86/kernel/smpboot.c | 2 +- 5 files changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 314fe0dddef..539163161a4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1360,9 +1360,8 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; -#ifdef CONFIG_X86_32 - setup_apic_routing(); -#endif + if (apic->setup_apic_routing) + apic->setup_apic_routing(); } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b7077936ac0..fcbcc03cd4b 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1625,7 +1625,7 @@ int __init APIC_init_uniprocessor(void) enable_IR_x2apic(); #endif #ifdef CONFIG_X86_64 - setup_apic_routing(); + default_setup_apic_routing(); #endif verify_local_APIC(); diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 060945b8eec..d57d2138f07 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -44,7 +44,7 @@ static struct genapic *apic_probe[] __initdata = { /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ -void __init setup_apic_routing(void) +void __init default_setup_apic_routing(void) { if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { if (!intr_remapping_enabled) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index fa6bb263892..c8a534a16d9 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -390,9 +390,9 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) generic_bigsmp_probe(); #endif -#ifdef CONFIG_X86_32 - setup_apic_routing(); -#endif + if (apic->setup_apic_routing) + apic->setup_apic_routing(); + if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 45c096f605f..3791b4ae567 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1128,7 +1128,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) #ifdef CONFIG_X86_64 enable_IR_x2apic(); - setup_apic_routing(); + default_setup_apic_routing(); #endif if (smp_sanity_check(max_cpus) < 0) { -- cgit v1.2.3 From 33a201fac698a93d9d1ffa77030ba2ff38d1a3d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 07:17:26 +0100 Subject: x86, apic: streamline the ->multi_timer_check() quirk only NUMAQ uses this quirk: to prevent the timer IRQ from being added on secondary nodes. All other genapic templates can have a NULL ->multi_timer_check() callback. Also, extend the generic code to treat a NULL pointer accordingly. Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index db79ad9a764..282ea112f3c 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1618,10 +1618,15 @@ static void __init setup_IO_APIC_irqs(void) } irq = pin_2_irq(idx, apic_id, pin); -#ifdef CONFIG_X86_32 - if (multi_timer_check(apic_id, irq)) + + /* + * Skip the timer IRQ if there's a quirk handler + * installed and if it returns 1: + */ + if (apic->multi_timer_check && + apic->multi_timer_check(apic_id, irq)) continue; -#endif + desc = irq_to_desc_alloc_cpu(irq, cpu); if (!desc) { printk(KERN_INFO "can not get irq_desc for %d\n", irq); -- cgit v1.2.3 From 3f57a318c36e1f24070a18df8c4971ca08d33142 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: x86, apic: clean up ->apicid_to_node() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3791b4ae567..1dd4cecd4bc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -163,7 +163,7 @@ static void map_cpu_to_logical_apicid(void) { int cpu = smp_processor_id(); int apicid = logical_smp_processor_id(); - int node = apicid_to_node(apicid); + int node = apic->apicid_to_node(apicid); if (!node_online(node)) node = first_online_node; -- cgit v1.2.3 From 5257c5111ca21c8e857b65a79ab986b313e1c362 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: x86, apic: clean up ->cpu_to_logical_apicid() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/kernel/ipi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index e2e4895ca69..367c5e684fa 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -140,7 +140,7 @@ void send_IPI_mask_sequence(const struct cpumask *mask, int vector) local_irq_save(flags); for_each_cpu(query_cpu, mask) - __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); + __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); local_irq_restore(flags); } @@ -155,7 +155,7 @@ void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) local_irq_save(flags); for_each_cpu(query_cpu, mask) if (query_cpu != this_cpu) - __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), + __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); local_irq_restore(flags); } -- cgit v1.2.3 From a21769a4461801454930a06bc18bd8249cd9e993 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: x86, apic: clean up ->cpu_present_to_apicid() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 4 ++-- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/smpboot.c | 9 ++++++++- 5 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 9446f372a16..f4a2c1c0a1a 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -197,7 +197,7 @@ struct genapic apic_flat = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, @@ -341,7 +341,7 @@ struct genapic apic_physflat = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 2eeca6e744a..710d612a964 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -199,7 +199,7 @@ struct genapic apic_x2apic_cluster = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index be0ee3e56ef..49a449178c3 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -195,7 +195,7 @@ struct genapic apic_x2apic_phys = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 68b423f3da9..a08a6359186 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -260,7 +260,7 @@ struct genapic apic_x2apic_uv_x = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1dd4cecd4bc..812bf39de35 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -903,9 +903,16 @@ do_rest: return boot_error; } +#ifdef CONFIG_X86_64 +int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} +#endif + int __cpuinit native_cpu_up(unsigned int cpu) { - int apicid = cpu_present_to_apicid(cpu); + int apicid = apic->cpu_present_to_apicid(cpu); unsigned long flags; int err; -- cgit v1.2.3 From 8058714a41afc4c983acb274b1adf7bd3cfe7f6e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: x86, apic: clean up ->apicid_to_cpu_present() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 4 ++-- arch/x86/kernel/visws_quirks.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 282ea112f3c..3d85d3d810b 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2155,7 +2155,7 @@ static void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic_id].apicid); + tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", mp_ioapics[apic_id].apicid); @@ -3899,7 +3899,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - tmp = apicid_to_cpu_present(apic_id); + tmp = apic->apicid_to_cpu_present(apic_id); physids_or(apic_id_map, apic_id_map, tmp); if (reg_00.bits.ID != apic_id) { diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index d801d06af06..2ed5bdf15c9 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -200,7 +200,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) return; } - apic_cpus = apicid_to_cpu_present(m->apicid); + apic_cpus = apic->apicid_to_cpu_present(m->apicid); physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); /* * Validate version -- cgit v1.2.3 From d83093b50416f4ca59d3a84b2ddc217748214d64 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 12:43:18 +0100 Subject: x86: refactor ->setup_portio_remap() subarch methods Only NUMAQ has a real ->setup_portio_remap() method, the other subarchitectures define it but keep it empty. So mark the vector as NULL, extend the generic code to handle NULL -setup_portio_remap() entries and remove all the empty handlers. Also move the NUMAQ method from the header file into the apic driver .c file. Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 812bf39de35..0e7d26c01f9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1170,7 +1170,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) map_cpu_to_logical_apicid(); - setup_portio_remap(); + if (apic->setup_portio_remap) + apic->setup_portio_remap(); smpboot_setup_io_apic(); /* -- cgit v1.2.3 From a27a621001f4c3e57caf47feff4b014577fd01c6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 12:43:18 +0100 Subject: x86: refactor ->check_phys_apicid_present() subarch methods - spread out the namespace to per driver methods - extend it to 64-bit as well so that we can use apic->check_phys_apicid_present() unconditionally Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 4 ++-- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/smpboot.c | 7 ++++++- 5 files changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index f4a2c1c0a1a..78adf71f7e5 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -200,7 +200,7 @@ struct genapic apic_flat = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, @@ -344,7 +344,7 @@ struct genapic apic_physflat = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 710d612a964..7062e24b18f 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -202,7 +202,7 @@ struct genapic apic_x2apic_cluster = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 49a449178c3..7177a1110f0 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -198,7 +198,7 @@ struct genapic apic_x2apic_phys = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a08a6359186..debd721f0b4 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -263,7 +263,7 @@ struct genapic apic_x2apic_uv_x = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0e7d26c01f9..ab83be2f8e0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -908,6 +908,11 @@ int default_cpu_present_to_apicid(int mps_cpu) { return __default_cpu_present_to_apicid(mps_cpu); } + +int default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} #endif int __cpuinit native_cpu_up(unsigned int cpu) @@ -1058,7 +1063,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. */ - if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { + if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n", boot_cpu_physical_apicid); -- cgit v1.2.3 From 4904033302c745342e3b3a611881cdee57fbe06a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 12:43:18 +0100 Subject: x86: refactor ->enable_apic_mode() subarch methods Only ES7000 has a real ->enable_apic_mode() method, the other subarchitectures define it but keep it empty. So mark the vector as NULL, extend the generic code to handle NULL -setup_portio_remap() entries and remove all the empty handlers. Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index fcbcc03cd4b..9d6374da478 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1763,7 +1763,8 @@ void __init connect_bsp_APIC(void) outb(0x01, 0x23); } #endif - enable_apic_mode(); + if (apic->enable_apic_mode) + apic->enable_apic_mode(); } /** -- cgit v1.2.3 From b0b20e5a3a6615ae750804523aeedd32911bb9d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:15:06 +0100 Subject: x86, es7000: clean up es7000_enable_apic_mode() - eliminate the needless es7000_enable_apic_mode() complication which was not apparent prior the namespace cleanups - clean up the control flow in es7000_enable_apic_mode() - other cleanups Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 20a2a43c2a9..e73fe18488a 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -359,20 +359,21 @@ es7000_mip_write(struct mip_reg *mip_reg) return status; } -void __init -es7000_sw_apic(void) +void __init es7000_enable_apic_mode(void) { - if (es7000_plat) { - int mip_status; - struct mip_reg es7000_mip_reg; - - printk("ES7000: Enabling APIC mode.\n"); - memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); - es7000_mip_reg.off_0 = MIP_SW_APIC; - es7000_mip_reg.off_38 = (MIP_VALID); - while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) - printk("es7000_sw_apic: command failed, status = %x\n", - mip_status); + struct mip_reg es7000_mip_reg; + int mip_status; + + if (!es7000_plat) return; + + printk("ES7000: Enabling APIC mode.\n"); + memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); + es7000_mip_reg.off_0 = MIP_SW_APIC; + es7000_mip_reg.off_38 = MIP_VALID; + + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) { + printk("es7000_enable_apic_mode: command failed, status = %x\n", + mip_status); } } -- cgit v1.2.3 From d4c9a9f3d416cfa1f5ffbe09d864d069467fe693 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:31:22 +0100 Subject: x86, apic: unify phys_pkg_id() - unify the call signature of 64-bit to that of 32-bit - clean up the types all around - clean up namespace contamination Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 10 +--------- arch/x86/kernel/cpu/common.c | 11 +---------- arch/x86/kernel/genapic_flat_64.c | 6 +++--- arch/x86/kernel/genx2apic_cluster.c | 4 ++-- arch/x86/kernel/genx2apic_phys.c | 4 ++-- arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- 6 files changed, 11 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 4e581fdc0a5..84f8e4a5aef 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -116,7 +116,6 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; -#ifdef CONFIG_X86_32 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) & core_select_mask; c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); @@ -124,14 +123,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) * Reinit the apicid, now that we have extended initial_apicid. */ c->apicid = phys_pkg_id(c->initial_apicid, 0); -#else - c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; - c->phys_proc_id = phys_pkg_id(core_plus_mask_width); - /* - * Reinit the apicid, now that we have extended initial_apicid. - */ - c->apicid = phys_pkg_id(0); -#endif + c->x86_max_cores = (core_level_siblings / smp_num_siblings); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 275e2cb43b9..93c491c4fe7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -442,11 +442,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); -#ifdef CONFIG_X86_64 - c->phys_proc_id = phys_pkg_id(index_msb); -#else c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); -#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -454,13 +450,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); -#ifdef CONFIG_X86_64 - c->cpu_core_id = phys_pkg_id(index_msb) & - ((1 << core_bits) - 1); -#else c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); -#endif } out: @@ -742,7 +733,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); #ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(0); + c->apicid = phys_pkg_id(c->initial_apicid, 0); #endif /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 78adf71f7e5..cc9e07b9609 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -169,7 +169,7 @@ static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return mask1 & mask2; } -static unsigned int phys_pkg_id(int index_msb) +static int flat_phys_pkg_id(int initial_apic_id, int index_msb) { return hard_smp_processor_id() >> index_msb; } @@ -202,7 +202,7 @@ struct genapic apic_flat = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, @@ -346,7 +346,7 @@ struct genapic apic_physflat = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 7062e24b18f..18b6f14376e 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -157,7 +157,7 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int phys_pkg_id(int index_msb) +static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) { return current_cpu_data.initial_apicid >> index_msb; } @@ -204,7 +204,7 @@ struct genapic apic_x2apic_cluster = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = x2apic_cluster_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 7177a1110f0..2cb6f49e4c5 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -156,7 +156,7 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int phys_pkg_id(int index_msb) +static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) { return current_cpu_data.initial_apicid >> index_msb; } @@ -200,7 +200,7 @@ struct genapic apic_x2apic_phys = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = x2apic_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index debd721f0b4..67e7658775e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -226,7 +226,7 @@ static unsigned int uv_read_apic_id(void) return get_apic_id(apic_read(APIC_ID)); } -static unsigned int phys_pkg_id(int index_msb) +static int uv_phys_pkg_id(int initial_apicid, int index_msb) { return uv_read_apic_id() >> index_msb; } @@ -265,7 +265,7 @@ struct genapic apic_x2apic_uv_x = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = uv_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, -- cgit v1.2.3 From cb8cc442dc7e07cb5438b357843ab4095ad73933 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:24:54 +0100 Subject: x86, apic: refactor ->phys_pkg_id() Refactor the ->phys_pkg_id() methods: - namespace separation - macro wrapper removal - open-coded calls to the methods in the generic code Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 6 +++--- arch/x86/kernel/cpu/common.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84f8e4a5aef..e8bb892c09f 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -116,13 +116,13 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; - c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) & core_select_mask; - c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width); /* * Reinit the apicid, now that we have extended initial_apicid. */ - c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); c->x86_max_cores = (core_level_siblings / smp_num_siblings); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 93c491c4fe7..055b9c3a660 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -442,7 +442,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -450,7 +450,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); } @@ -686,7 +686,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; #ifdef CONFIG_X86_32 # ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); # else c->apicid = c->initial_apicid; # endif @@ -733,7 +733,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); #ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); #endif /* -- cgit v1.2.3 From 1322a2e2db87c938d8381f8501af9a4d0eab8bc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:54:56 +0100 Subject: x86, mpparse: call the generic quirk handlers early Call all the registered MPS quirk handlers early. These methods scan low RAM typically for specific signatures so are safe to be called early. Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c8a534a16d9..f6fb1928439 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -292,16 +292,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) return 0; #ifdef CONFIG_X86_32 - /* - * need to make sure summit and es7000's mps_oem_check is safe to be - * called early via genericarch 's mps_oem_check - */ - if (early) { -#ifdef CONFIG_X86_NUMAQ - numaq_mps_oem_check(mpc, oem, str); -#endif - } else - mps_oem_check(mpc, oem, str); + mps_oem_check(mpc, oem, str); #endif /* save the local APIC address, it might be non-default */ if (!acpi_lapic) -- cgit v1.2.3 From 9c7642470ecf03d8b4946a2addc8fe631b8426dd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:44:32 +0100 Subject: x86: consolidate the ->mps_oem_check() code - spread out the mps_oem_check() namespace on a per APIC driver basis Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f6fb1928439..b12fa5ce6f5 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -292,7 +292,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) return 0; #ifdef CONFIG_X86_32 - mps_oem_check(mpc, oem, str); + generic_mps_oem_check(mpc, oem, str); #endif /* save the local APIC address, it might be non-default */ if (!acpi_lapic) -- cgit v1.2.3 From ca6c8ed4646f8ccaa4f7db618bf69b8b8fb49767 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 14:08:38 +0100 Subject: x86, apic: refactor ->get_apic_id() & GET_APIC_ID() - spread out the namespace on a per driver basis - get rid of macro wrappers - small cleanups Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 9 +++++---- arch/x86/kernel/genx2apic_cluster.c | 4 ++-- arch/x86/kernel/genx2apic_phys.c | 4 ++-- arch/x86/kernel/genx2apic_uv_x.c | 6 +++--- 4 files changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index cc9e07b9609..ab47091dac2 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -126,11 +126,12 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); } -static unsigned int get_apic_id(unsigned long x) +static unsigned int flat_get_apic_id(unsigned long x) { unsigned int id; id = (((x)>>24) & 0xFFu); + return id; } @@ -146,7 +147,7 @@ static unsigned int read_xapic_id(void) { unsigned int id; - id = get_apic_id(apic_read(APIC_ID)); + id = flat_get_apic_id(apic_read(APIC_ID)); return id; } @@ -205,7 +206,7 @@ struct genapic apic_flat = { .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, @@ -349,7 +350,7 @@ struct genapic apic_physflat = { .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu<<24, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 18b6f14376e..c7557e05184 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -141,7 +141,7 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static unsigned int get_apic_id(unsigned long x) +static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) { unsigned int id; @@ -207,7 +207,7 @@ struct genapic apic_x2apic_cluster = { .phys_pkg_id = x2apic_cluster_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = x2apic_cluster_phys_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 2cb6f49e4c5..80cba49cfd8 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -140,7 +140,7 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static unsigned int get_apic_id(unsigned long x) +static unsigned int x2apic_phys_get_apic_id(unsigned long x) { unsigned int id; @@ -203,7 +203,7 @@ struct genapic apic_x2apic_phys = { .phys_pkg_id = x2apic_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = x2apic_phys_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 67e7658775e..50310b96adc 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -201,7 +201,7 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static unsigned int get_apic_id(unsigned long x) +static unsigned int x2apic_get_apic_id(unsigned long x) { unsigned int id; @@ -223,7 +223,7 @@ static unsigned long set_apic_id(unsigned int id) static unsigned int uv_read_apic_id(void) { - return get_apic_id(apic_read(APIC_ID)); + return x2apic_get_apic_id(apic_read(APIC_ID)); } static int uv_phys_pkg_id(int initial_apicid, int index_msb) @@ -268,7 +268,7 @@ struct genapic apic_x2apic_uv_x = { .phys_pkg_id = uv_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, -- cgit v1.2.3 From 5b8127277bc4cdca78eda5ee900a314642822ace Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 14:59:17 +0100 Subject: x86, apic: refactor ->apic_id_mask & APIC_ID_MASK - spread out the namespace on a per driver basis - get rid of wrapper macros Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 4 ++-- arch/x86/kernel/genapic_flat_64.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 9d6374da478..5f7f3a9a47a 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1009,11 +1009,11 @@ int __init verify_local_APIC(void) */ reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + apic_write(APIC_ID, reg0 ^ apic->apic_id_mask); reg1 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) + if (reg1 != (reg0 ^ apic->apic_id_mask)) return 0; /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index ab47091dac2..78baa55cd0e 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -352,7 +352,7 @@ struct genapic apic_physflat = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFu<<24, + .apic_id_mask = 0xFFu << 24, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, -- cgit v1.2.3 From debccb3e77be52cfc26c5a99e123c114c5c72aeb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 15:20:18 +0100 Subject: x86, apic: refactor ->cpu_mask_to_apicid*() - spread out the namespace on a per driver basis - clean up the functions - get rid of macros Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 4 +++- arch/x86/kernel/genx2apic_cluster.c | 15 +++++++++------ arch/x86/kernel/genx2apic_phys.c | 15 +++++++++------ arch/x86/kernel/genx2apic_uv_x.c | 14 ++++++++------ arch/x86/kernel/io_apic.c | 21 ++++++++++++--------- 5 files changed, 41 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 78baa55cd0e..b941b112caf 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -309,11 +309,13 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index c7557e05184..62f9fccf01d 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -111,21 +111,21 @@ static int x2apic_apic_id_registered(void) static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { - int cpu; - /* * We're using fixed IRQ delivery, can only return one logical APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + int cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_logical_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int +x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -133,11 +133,14 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one logical APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } + if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_logical_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 80cba49cfd8..3da1675b260 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -110,21 +110,21 @@ static int x2apic_apic_id_registered(void) static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { - int cpu; - /* * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + int cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int +x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -132,11 +132,14 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } + if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 50310b96adc..f957878c21e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -171,21 +171,21 @@ static void uv_init_apic_ldr(void) static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) { - int cpu; - /* * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + int cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int +uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -193,11 +193,13 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 3d85d3d810b..01a2505d727 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -563,8 +563,9 @@ static int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); /* - * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid - * of that, or returns BAD_APICID and leaves desc->affinity untouched. + * Either sets desc->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that, or returns BAD_APICID and + * leaves desc->affinity untouched. */ static unsigned int set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) @@ -582,7 +583,8 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) cpumask_and(desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); + + return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); } static void @@ -1562,7 +1564,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; - dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1666,7 +1668,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, */ entry.dest_mode = apic->irq_dest_mode; entry.mask = 1; /* mask IRQ now */ - entry.dest = cpu_mask_to_apicid(apic->target_cpus()); + entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; @@ -2367,7 +2369,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) set_extra_move_desc(desc, mask); - dest = cpu_mask_to_apicid_and(cfg->domain, mask); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { @@ -3270,7 +3272,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms if (err) return err; - dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3708,7 +3710,8 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct ht_irq_msg msg; unsigned dest; - dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus()); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3773,7 +3776,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = cpu_mask_to_apicid(eligible_cpu); + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); -- cgit v1.2.3 From dac5f4121df3c39fdb2ea57acd669a0ae19e46f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 15:42:24 +0100 Subject: x86, apic: untangle the send_IPI_*() jungle Our send_IPI_*() methods and definitions are a twisted mess: the same symbol is defined to different things depending on .config details, in a non-transparent way. - spread out the quirks into separately named per apic driver methods - prefix the standard PC methods with default_ - get rid of wrapper macro obfuscation - clean up various details Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/genapic_flat_64.c | 24 +++++++++++++---------- arch/x86/kernel/genx2apic_cluster.c | 38 ++++++++++++++++++++----------------- arch/x86/kernel/genx2apic_phys.c | 36 +++++++++++++++-------------------- arch/x86/kernel/genx2apic_uv_x.c | 21 +++++++++++--------- arch/x86/kernel/io_apic.c | 10 +++++----- arch/x86/kernel/ipi.c | 28 ++++++++++++++------------- arch/x86/kernel/kgdb.c | 2 +- arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/smp.c | 10 +++++----- 11 files changed, 91 insertions(+), 84 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 5f7f3a9a47a..84b8e7c57ab 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -475,7 +475,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, static void lapic_timer_broadcast(const struct cpumask *mask) { #ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); + apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); #endif } diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index d57d2138f07..820dea5d0eb 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -65,7 +65,7 @@ void __init default_setup_apic_routing(void) void apic_send_IPI_self(int vector) { - __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index b941b112caf..7c648ccea51 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -74,7 +74,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) unsigned long flags; local_irq_save(flags); - __send_IPI_dest_field(mask, vector, apic->dest_logical); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); local_irq_restore(flags); } @@ -85,14 +85,15 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) _flat_send_IPI_mask(mask, vector); } -static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) +static void + flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { unsigned long mask = cpumask_bits(cpumask)[0]; int cpu = smp_processor_id(); if (cpu < BITS_PER_LONG) clear_bit(cpu, &mask); + _flat_send_IPI_mask(mask, vector); } @@ -114,16 +115,19 @@ static void flat_send_IPI_allbutself(int vector) _flat_send_IPI_mask(mask, vector); } } else if (num_online_cpus() > 1) { - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, + vector, apic->dest_logical); } } static void flat_send_IPI_all(int vector) { - if (vector == NMI_VECTOR) + if (vector == NMI_VECTOR) { flat_send_IPI_mask(cpu_online_mask, vector); - else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); + } else { + __default_send_IPI_shortcut(APIC_DEST_ALLINC, + vector, apic->dest_logical); + } } static unsigned int flat_get_apic_id(unsigned long x) @@ -265,18 +269,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { - send_IPI_mask_sequence(cpumask, vector); + default_send_IPI_mask_sequence(cpumask, vector); } static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { - send_IPI_mask_allbutself(cpumask, vector); + default_send_IPI_mask_allbutself(cpumask, vector); } static void physflat_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself(cpu_online_mask, vector); } static void physflat_send_IPI_all(int vector) diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 62f9fccf01d..2d97726a973 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -36,8 +36,8 @@ static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) cpumask_set_cpu(cpu, retmask); } -static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, - unsigned int dest) +static void + __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) { unsigned long cfg; @@ -57,45 +57,50 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, */ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { - unsigned long flags; unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_cpu(query_cpu, mask) + for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, apic->dest_logical); + } local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) +static void + x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, apic->dest_logical); + } local_irq_restore(flags); } static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( + for_each_online_cpu(query_cpu) { + if (query_cpu == this_cpu) + continue; + __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, apic->dest_logical); + } local_irq_restore(flags); } @@ -175,7 +180,6 @@ static void init_x2apic_ldr(void) int cpu = smp_processor_id(); per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); - return; } struct genapic apic_x2apic_cluster = { diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3da1675b260..74777c27669 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -55,8 +55,8 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { - unsigned long flags; unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); for_each_cpu(query_cpu, mask) { @@ -66,12 +66,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) +static void + x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); for_each_cpu(query_cpu, mask) { @@ -85,16 +85,17 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); + for_each_online_cpu(query_cpu) { + if (query_cpu == this_cpu) + continue; + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } local_irq_restore(flags); } @@ -145,18 +146,12 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, static unsigned int x2apic_phys_get_apic_id(unsigned long x) { - unsigned int id; - - id = x; - return id; + return x; } static unsigned long set_apic_id(unsigned int id) { - unsigned long x; - - x = id; - return x; + return id; } static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) @@ -171,7 +166,6 @@ static void x2apic_send_IPI_self(int vector) static void init_x2apic_ldr(void) { - return; } struct genapic apic_x2apic_phys = { diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index f957878c21e..24b9f42db9b 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -118,12 +118,13 @@ static void uv_send_IPI_one(int cpu, int vector) int pnode; apicid = per_cpu(x86_cpu_to_apicid, cpu); - lapicid = apicid & 0x3f; /* ZZZ macro needed */ + lapicid = apicid & 0x3f; /* ZZZ macro needed */ pnode = uv_apicid_to_pnode(apicid); - val = - (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << - UVH_IPI_INT_APIC_ID_SHFT) | - (vector << UVH_IPI_INT_VECTOR_SHFT); + + val = ( 1UL << UVH_IPI_INT_SEND_SHFT ) | + ( lapicid << UVH_IPI_INT_APIC_ID_SHFT ) | + ( vector << UVH_IPI_INT_VECTOR_SHFT ); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } @@ -137,22 +138,24 @@ static void uv_send_IPI_mask(const struct cpumask *mask, int vector) static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned int cpu; unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; - for_each_cpu(cpu, mask) + for_each_cpu(cpu, mask) { if (cpu != this_cpu) uv_send_IPI_one(cpu, vector); + } } static void uv_send_IPI_allbutself(int vector) { - unsigned int cpu; unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { if (cpu != this_cpu) uv_send_IPI_one(cpu, vector); + } } static void uv_send_IPI_all(int vector) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 01a2505d727..e90970ce21a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -514,11 +514,11 @@ static void send_cleanup_vector(struct irq_cfg *cfg) for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) cfg->move_cleanup_count++; for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); + apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); cfg->move_cleanup_count = cpumask_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } cfg->move_in_progress = 0; @@ -800,7 +800,7 @@ static void clear_IO_APIC (void) } #if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) -void send_IPI_self(int vector) +void default_send_IPI_self(int vector) { unsigned int cfg; @@ -2297,7 +2297,7 @@ static int ioapic_retrigger_irq(unsigned int irq) unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); + apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -2305,7 +2305,7 @@ static int ioapic_retrigger_irq(unsigned int irq) #else static int ioapic_retrigger_irq(unsigned int irq) { - send_IPI_self(irq_cfg(irq)->vector); + apic->send_IPI_self(irq_cfg(irq)->vector); return 1; } diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 367c5e684fa..e16c41b2e4e 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -48,7 +48,7 @@ static inline int __prepare_ICR2(unsigned int mask) return SET_APIC_DEST_FIELD(mask); } -void __send_IPI_shortcut(unsigned int shortcut, int vector) +void __default_send_IPI_shortcut(unsigned int shortcut, int vector) { /* * Subtle. In the case of the 'never do double writes' workaround @@ -75,16 +75,16 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector) apic_write(APIC_ICR, cfg); } -void send_IPI_self(int vector) +void default_send_IPI_self(int vector) { - __send_IPI_shortcut(APIC_DEST_SELF, vector); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector); } /* * This is used to send an IPI with no shorthand notation (the destination is * specified in bits 56 to 63 of the ICR). */ -static inline void __send_IPI_dest_field(unsigned long mask, int vector) +static inline void __default_send_IPI_dest_field(unsigned long mask, int vector) { unsigned long cfg; @@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * This is only used on smaller machines. */ -void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) +void default_send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) { unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; local_irq_save(flags); WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); - __send_IPI_dest_field(mask, vector); + __default_send_IPI_dest_field(mask, vector); local_irq_restore(flags); } -void send_IPI_mask_sequence(const struct cpumask *mask, int vector) +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -140,11 +140,11 @@ void send_IPI_mask_sequence(const struct cpumask *mask, int vector) local_irq_save(flags); for_each_cpu(query_cpu, mask) - __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); + __default_send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); local_irq_restore(flags); } -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -153,10 +153,12 @@ void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) /* See Hack comment above */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), - vector); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector); + } local_irq_restore(flags); } diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 10435a120d2..b62a3811e01 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -347,7 +347,7 @@ void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code) */ void kgdb_roundup_cpus(unsigned long flags) { - send_IPI_allbutself(APIC_DM_NMI); + apic->send_IPI_allbutself(APIC_DM_NMI); } #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f8536fee5c1..38dace28d62 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -651,7 +651,7 @@ static int crash_nmi_callback(struct notifier_block *self, static void smp_send_nmi_allbutself(void) { - send_IPI_allbutself(NMI_VECTOR); + apic->send_IPI_allbutself(NMI_VECTOR); } static struct notifier_block crash_nmi_nb = { diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index e6faa3316bd..c48ba6cc32a 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -118,12 +118,12 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); + apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); } void native_send_call_func_ipi(const struct cpumask *mask) @@ -131,7 +131,7 @@ void native_send_call_func_ipi(const struct cpumask *mask) cpumask_var_t allbutself; if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); return; } @@ -140,9 +140,9 @@ void native_send_call_func_ipi(const struct cpumask *mask) if (cpumask_equal(mask, allbutself) && cpumask_equal(cpu_online_mask, cpu_callout_mask)) - send_IPI_allbutself(CALL_FUNCTION_VECTOR); + apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); else - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); free_cpumask_var(allbutself); } -- cgit v1.2.3 From abfa584c8df8b691cf18f51c7d4af27e5b32be4a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:15:16 +0100 Subject: x86: set ->trampoline_phys_low/high on 64-bit too 64-bit x86 has zero for ->trampoline_phys_low/high, but the smpboot code can use these values - so it's better to set them up to their correct values. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 8 ++++---- arch/x86/kernel/genx2apic_cluster.c | 4 ++-- arch/x86/kernel/genx2apic_phys.c | 4 ++-- arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 7c648ccea51..3a28d6a8c49 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -224,8 +224,8 @@ struct genapic apic_flat = { .send_IPI_self = apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, @@ -370,8 +370,8 @@ struct genapic apic_physflat = { .send_IPI_self = apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 2d97726a973..abc5ee329f2 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -228,8 +228,8 @@ struct genapic apic_x2apic_cluster = { .send_IPI_self = x2apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 74777c27669..dc815ef22d8 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -214,8 +214,8 @@ struct genapic apic_x2apic_phys = { .send_IPI_self = x2apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 24b9f42db9b..b5908735ca5 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -287,8 +287,8 @@ struct genapic apic_x2apic_uv_x = { .send_IPI_self = uv_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, -- cgit v1.2.3 From a965936643e28af8152d9e960b966baa1a5588a2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:21:32 +0100 Subject: x86, smp: refactor ->wait_for_init_deassert() - spread out the namespace on a per APIC driver basis - handle a NULL ->wait_for_init_deassert() as a 'dont wait' default method - remove NUMAQ and Summit handlers Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 6 +----- arch/x86/kernel/smpboot.c | 3 ++- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index e73fe18488a..d7f433ee602 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -182,10 +182,6 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) return 0; } -static void noop_wait_for_deassert(atomic_t *deassert_not_used) -{ -} - static int __init es7000_update_genapic(void) { apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; @@ -194,7 +190,7 @@ static int __init es7000_update_genapic(void) if (boot_cpu_data.x86 == 6 && (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { es7000_update_genapic_to_cluster(); - apic->wait_for_init_deassert = noop_wait_for_deassert; + apic->wait_for_init_deassert = NULL; apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ab83be2f8e0..558af378a61 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -196,7 +196,8 @@ static void __cpuinit smp_callin(void) * our local APIC. We have to wait for the IPI or we'll * lock up on an APIC access. */ - wait_for_init_deassert(&init_deasserted); + if (apic->wait_for_init_deassert) + apic->wait_for_init_deassert(&init_deasserted); /* * (This works even if the APIC is not enabled.) -- cgit v1.2.3 From 333344d94300500e401cffb4eea10a5ab6e5a41d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:31:52 +0100 Subject: x86, smp: refactor ->smp_callin_clear_local_apic() methods Only NUMAQ does something substantial here, because it initializes via NMIs (not via INIT as standard SMP startup) - so it needs to reset the APIC. - extend the generic code to handle NULL methods - clear out dummy methods and replace them with NULL - clean up: remove wrapper macros, etc. Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 558af378a61..10873a46b29 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -244,7 +244,8 @@ static void __cpuinit smp_callin(void) */ pr_debug("CALLIN, before setup_local_APIC().\n"); - smp_callin_clear_local_apic(); + if (apic->smp_callin_clear_local_apic) + apic->smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); map_cpu_to_logical_apicid(); -- cgit v1.2.3 From 7bd06ec63a1204ca44b9f1dc487b8632016162d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:31:52 +0100 Subject: x86, smp: refactor ->store/restore_NMI_vector() methods Only NUMAQ does something substantial here, because it initializes via NMIs (not via INIT as standard SMP startup) - so it needs to store and restore the NMI vector. - extend the generic code to handle NULL methods - clear out dummy methods and replace them with NULL - clean up: remove wrapper macros, etc. Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 10873a46b29..1492024592f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -826,7 +826,8 @@ do_rest: pr_debug("Setting warm reset code and vector.\n"); - store_NMI_vector(&nmi_high, &nmi_low); + if (apic->store_NMI_vector) + apic->store_NMI_vector(&nmi_high, &nmi_low); smpboot_setup_warm_reset_vector(start_ip); /* -- cgit v1.2.3 From 3d5f597e938c425554cb7668fd3c9d6a536a984a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:43:47 +0100 Subject: x86, smp: remove ->restore_NMI_vector() Nothing actually restores the NMI vector - so remove this logic altogether. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 2 -- arch/x86/kernel/genx2apic_cluster.c | 1 - arch/x86/kernel/genx2apic_phys.c | 1 - arch/x86/kernel/genx2apic_uv_x.c | 1 - 4 files changed, 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 3a28d6a8c49..e9237f59928 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -229,7 +229,6 @@ struct genapic apic_flat = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; @@ -375,6 +374,5 @@ struct genapic apic_physflat = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index abc5ee329f2..7c87156b641 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -233,6 +233,5 @@ struct genapic apic_x2apic_cluster = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index dc815ef22d8..5cbae8aa040 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -219,6 +219,5 @@ struct genapic apic_x2apic_phys = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index b5908735ca5..6adb5e6f4d9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -292,7 +292,6 @@ struct genapic apic_x2apic_uv_x = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; -- cgit v1.2.3 From 25dc004903a38f0b6f6626dbbab058c8709c5398 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:31:52 +0100 Subject: x86, smp: refactor ->inquire_remote_apic() methods Nothing exciting - a few subarches dont want APIC remote reads to be performed - the others are content with the default method. - extend the generic code to handle NULL methods - clear out dummy methods and replace them with NULL - clean up: remove wrapper macros, etc. Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1492024592f..170adc5b6cb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -876,8 +876,8 @@ do_rest: else /* trampoline code not run */ printk(KERN_ERR "Not responding.\n"); - if (get_uv_system_type() != UV_NON_UNIQUE_APIC) - inquire_remote_apic(apicid); + if (apic->inquire_remote_apic) + apic->inquire_remote_apic(apicid); } } -- cgit v1.2.3 From 0939e4fd351c58d08d25650797749f18904461af Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:16:25 +0100 Subject: x86, smp: eliminate asm/mach-default/mach_wakecpu.h Spread mach_wakecpu.h's definitions into apic.h and genapic.h and remove mach_wakecpu.h. Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 170adc5b6cb..1fdc1a7e7b5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -66,7 +66,6 @@ #include #include -#include #include #ifdef CONFIG_X86_32 -- cgit v1.2.3 From fb5b33c9f62ca9222c11841d61ddb7dc1a6552e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:29:27 +0100 Subject: x86: eliminate asm/mach-*/mach_mpparse.h Move the definition to mpparse.h. Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 1 - arch/x86/kernel/es7000_32.c | 1 - arch/x86/kernel/mpparse.c | 1 - 3 files changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 539163161a4..7b02a1cedca 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -63,7 +63,6 @@ EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_LOCAL_APIC #include -#include #endif /* CONFIG_X86_LOCAL_APIC */ #endif /* X86 */ diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index d7f433ee602..8faea13c8fa 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b12fa5ce6f5..c6930162b3b 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -32,7 +32,6 @@ #include #ifdef CONFIG_X86_32 #include -#include #endif /* -- cgit v1.2.3 From 1f75ed0c1311a50ed393bcac258de65680d360e5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:36:56 +0100 Subject: x86: remove mach_apicdef.h Move its definitions into apic.h. Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 1 - arch/x86/kernel/genapic_flat_64.c | 1 - arch/x86/kernel/io_apic.c | 1 - arch/x86/kernel/mpparse.c | 3 --- 4 files changed, 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 84b8e7c57ab..e6220809ca1 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -50,7 +50,6 @@ #include #include -#include #include /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index e9237f59928..19bffb3f732 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -19,7 +19,6 @@ #include #include #include -#include #ifdef CONFIG_ACPI #include diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e90970ce21a..abae81989c2 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -64,7 +64,6 @@ #include #include -#include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c6930162b3b..a1452a53d14 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -30,9 +30,6 @@ #include #include -#ifdef CONFIG_X86_32 -#include -#endif /* * Checksum an MP configuration block. -- cgit v1.2.3 From 328386d7ab600aa0993a1226f5817ac30a735724 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:50:18 +0100 Subject: x86, smp: refactor ->wake_cpu - remove macro wrappers Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 5 ++--- arch/x86/kernel/smpboot.c | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a58e9f5e603..6b27f6dc7bf 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -589,9 +589,8 @@ early_param("elfcorehdr", setup_elfcorehdr); static int __init default_update_genapic(void) { #ifdef CONFIG_X86_SMP -# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) - apic->wakeup_cpu = wakeup_secondary_cpu_via_init; -# endif + if (!apic->wakeup_cpu) + apic->wakeup_cpu = wakeup_secondary_cpu_via_init; #endif return 0; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1fdc1a7e7b5..3fed177f345 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -750,7 +750,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. + * Returns zero if CPU booted OK, else error code from ->wakeup_cpu. */ { unsigned long boot_error = 0; @@ -841,7 +841,7 @@ do_rest: /* * Starting actual IPI sequence... */ - boot_error = wakeup_secondary_cpu(apicid, start_ip); + boot_error = apic->wakeup_cpu(apicid, start_ip); if (!boot_error) { /* -- cgit v1.2.3 From b11b867f78910192fc54bd0d09148cf768c7aaad Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 18:49:31 +0100 Subject: x86, summit: consolidate code Consolidate all the Summit code into a single file: arch/x86/kernel/summit_32.c. Signed-off-by: Ingo Molnar --- arch/x86/kernel/summit_32.c | 417 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 416 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 7b987852e87..3b60dd5e57f 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -30,7 +30,364 @@ #include #include #include -#include + +/* + * APIC driver for the IBM "Summit" chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline unsigned summit_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +void default_send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void default_send_IPI_mask_allbutself(const cpumask_t *mask, int vector); + +static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void summit_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + summit_send_IPI_mask(&mask, vector); +} + +static inline void summit_send_IPI_all(int vector) +{ + summit_send_IPI_mask(&cpu_online_map, vector); +} + +#include + +extern int use_cyclone; + +#ifdef CONFIG_X86_SUMMIT_NUMA +extern void setup_summit(void); +#else +#define setup_summit() {} +#endif + +static inline int +summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +{ + if (!strncmp(oem, "IBM ENSW", 8) && + (!strncmp(productid, "VIGIL SMP", 9) + || !strncmp(productid, "EXA", 3) + || !strncmp(productid, "RUTHLESS SMP", 12))){ + mark_tsc_unstable("Summit based system"); + use_cyclone = 1; /*enable cyclone-timer*/ + setup_summit(); + return 1; + } + return 0; +} + +/* Hook from generic ACPI tables.c */ +static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "IBM", 3) && + (!strncmp(oem_table_id, "SERVIGIL", 8) + || !strncmp(oem_table_id, "EXA", 3))){ + mark_tsc_unstable("Summit based system"); + use_cyclone = 1; /*enable cyclone-timer*/ + setup_summit(); + return 1; + } + return 0; +} + +struct rio_table_hdr { + unsigned char version; /* Version number of this data structure */ + /* Version 3 adds chassis_num & WP_index */ + unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ + unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ +} __attribute__((packed)); + +struct scal_detail { + unsigned char node_id; /* Scalability Node ID */ + unsigned long CBAR; /* Address of 1MB register space */ + unsigned char port0node; /* Node ID port connected to: 0xFF=None */ + unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port1node; /* Node ID port connected to: 0xFF = None */ + unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port2node; /* Node ID port connected to: 0xFF = None */ + unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ +} __attribute__((packed)); + +struct rio_detail { + unsigned char node_id; /* RIO Node ID */ + unsigned long BBAR; /* Address of 1MB register space */ + unsigned char type; /* Type of device */ + unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ + /* For CYC: Node ID of Twister that owns this CYC */ + unsigned char port0node; /* Node ID port connected to: 0xFF=None */ + unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port1node; /* Node ID port connected to: 0xFF=None */ + unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ + /* For CYC: 0 */ + unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ + /* = 0 : the XAPIC is not used, ie:*/ + /* ints fwded to another XAPIC */ + /* Bits1:7 Reserved */ + /* For CYC: Bits0:7 Reserved */ + unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ + /* lower slot numbers/PCI bus numbers */ + /* For CYC: No meaning */ + unsigned char chassis_num; /* 1 based Chassis number */ + /* For LookOut WPEGs this field indicates the */ + /* Expansion Chassis #, enumerated from Boot */ + /* Node WPEG external port, then Boot Node CYC */ + /* external port, then Next Vigil chassis WPEG */ + /* external port, etc. */ + /* Shared Lookouts have only 1 chassis number (the */ + /* first one assigned) */ +} __attribute__((packed)); + + +typedef enum { + CompatTwister = 0, /* Compatibility Twister */ + AltTwister = 1, /* Alternate Twister of internal 8-way */ + CompatCyclone = 2, /* Compatibility Cyclone */ + AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ + CompatWPEG = 4, /* Compatibility WPEG */ + AltWPEG = 5, /* Second Planar WPEG */ + LookOutAWPEG = 6, /* LookOut WPEG */ + LookOutBWPEG = 7, /* LookOut WPEG */ +} node_type; + +static inline int is_WPEG(struct rio_detail *rio){ + return (rio->type == CompatWPEG || rio->type == AltWPEG || + rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); +} + + +/* In clustered mode, the high nibble of APIC ID is a cluster number. + * The low nibble is a 4-bit bitmap. */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) + +#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline const cpumask_t *summit_target_cpus(void) +{ + /* CPU_MASK_ALL (0xff) has undefined behaviour with + * dest_LowestPrio mode logical clustered apic interrupt routing + * Just start on cpu 0. IRQ balancing will spread load + */ + return &cpumask_of_cpu(0); +} + +static inline unsigned long +summit_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} + +/* we don't use the phys_cpu_present_map to indicate apicid presence */ +static inline unsigned long summit_check_apicid_present(int bit) +{ + return 1; +} + +#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) + +extern u8 cpu_2_logical_apicid[]; + +static inline void summit_init_apic_ldr(void) +{ + unsigned long val, id; + int count = 0; + u8 my_id = (u8)hard_smp_processor_id(); + u8 my_cluster = (u8)apicid_cluster(my_id); +#ifdef CONFIG_SMP + u8 lid; + int i; + + /* Create logical APIC IDs by counting CPUs already in cluster. */ + for (count = 0, i = nr_cpu_ids; --i >= 0; ) { + lid = cpu_2_logical_apicid[i]; + if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) + ++count; + } +#endif + /* We only have a 4 wide bitmap in cluster mode. If a deranged + * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ + BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); + id = my_cluster | (1UL << count); + apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(id); + apic_write(APIC_LDR, val); +} + +static inline int summit_apic_id_registered(void) +{ + return 1; +} + +static inline void summit_setup_apic_routing(void) +{ + printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", + nr_ioapics); +} + +static inline int summit_apicid_to_node(int logical_apicid) +{ +#ifdef CONFIG_SMP + return apicid_2_node[hard_smp_processor_id()]; +#else + return 0; +#endif +} + +/* Mapping from cpu number to logical apicid */ +static inline int summit_cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static inline int summit_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline physid_mask_t +summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0x0F); +} + +static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) +{ + return physid_mask_of_physid(0); +} + +static inline void summit_setup_portio_remap(void) +{ +} + +static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpus_weight(*cpumask); + /* Return id to all */ + if (num_bits_set >= nr_cpu_ids) + return 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = first_cpu(*cpumask); + apicid = summit_cpu_to_logical_apicid(cpu); + + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask)) { + int new_apicid = summit_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return 0xFF; + } + apicid = apicid | new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static inline unsigned int +summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int apicid = summit_cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = summit_cpu_mask_to_apicid(cpumask); + + free_cpumask_var(cpumask); + + return apicid; +} + +/* + * cpuid returns the value latched in the HW at reset, not the APIC ID + * register's value. For any box whose BIOS changes APIC IDs, like + * clustered APIC systems, we must use hard_smp_processor_id. + * + * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. + */ +static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + +static int probe_summit(void) +{ + /* probed later in mptable/ACPI hooks */ + return 0; +} + +static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; @@ -186,3 +543,61 @@ void __init setup_summit(void) next_wpeg = 0; } while (next_wpeg != 0); } + + +struct genapic apic_summit = { + + .name = "summit", + .probe = probe_summit, + .acpi_madt_oem_check = summit_acpi_madt_oem_check, + .apic_id_registered = summit_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = summit_target_cpus, + .disable_esr = 1, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = summit_check_apicid_used, + .check_apicid_present = summit_check_apicid_present, + + .vector_allocation_domain = summit_vector_allocation_domain, + .init_apic_ldr = summit_init_apic_ldr, + + .ioapic_phys_id_map = summit_ioapic_phys_id_map, + .setup_apic_routing = summit_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = summit_apicid_to_node, + .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, + .cpu_present_to_apicid = summit_cpu_present_to_apicid, + .apicid_to_cpu_present = summit_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = summit_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = summit_phys_pkg_id, + .mps_oem_check = summit_mps_oem_check, + + .get_apic_id = summit_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0xFF << 24, + + .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, + + .send_IPI_mask = summit_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = summit_send_IPI_allbutself, + .send_IPI_all = summit_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = default_wait_for_init_deassert, + + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .inquire_remote_apic = default_inquire_remote_apic, +}; -- cgit v1.2.3 From 7c20dcc545d78946e40e8fab99637fe815b1d211 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 29 Jan 2009 11:29:22 +0100 Subject: x86, summit: consolidate code, fix Build fix for !NUMA Summit. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/summit_32.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 37fa30bada1..a382ca6f6a1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -73,7 +73,7 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_ES7000) += es7000_32.o -obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o +obj-$(CONFIG_X86_SUMMIT) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module_$(BITS).o diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 3b60dd5e57f..84ff9ebbcc9 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -389,6 +389,7 @@ static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } +#ifdef CONFIG_X86_SUMMIT_NUMA static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; @@ -543,7 +544,7 @@ void __init setup_summit(void) next_wpeg = 0; } while (next_wpeg != 0); } - +#endif struct genapic apic_summit = { -- cgit v1.2.3 From 1dcdd3d15ecea0c22a09d4d001a39d425fceff2c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:55:37 +0100 Subject: x86: remove mach_apic.h Spread mach_apic.h definitions into genapic.h. (with some knock-on effects on smp.h and apic.h.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 14 +------------- arch/x86/kernel/apic.c | 22 ++++++++++++++++++++-- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/mpparse.c | 3 +-- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/tlb_uv.c | 2 +- arch/x86/kernel/visws_quirks.c | 2 +- 15 files changed, 34 insertions(+), 29 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7b02a1cedca..cb8b52785e3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -42,10 +42,6 @@ #include #include -#ifdef CONFIG_X86_LOCAL_APIC -# include -#endif - static int __initdata acpi_force = 0; u32 acpi_rsdt_forced; #ifdef CONFIG_ACPI @@ -56,15 +52,7 @@ int acpi_disabled = 1; EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_64 - -#include - -#else /* X86 */ - -#ifdef CONFIG_X86_LOCAL_APIC -#include -#endif /* CONFIG_X86_LOCAL_APIC */ - +# include #endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index e6220809ca1..41a0ba34d6b 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -49,7 +49,6 @@ #include #include -#include #include /* @@ -1910,11 +1909,30 @@ void __cpuinit generic_processor_info(int apicid, int version) set_cpu_present(cpu, true); } -#ifdef CONFIG_X86_64 int hard_smp_processor_id(void) { return read_apic_id(); } + +void default_init_apic_ldr(void) +{ + unsigned long val; + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); + apic_write(APIC_LDR, val); +} + +#ifdef CONFIG_X86_32 +int default_apicid_to_node(int logical_apicid) +{ +#ifdef CONFIG_SMP + return apicid_2_node[hard_smp_processor_id()]; +#else + return 0; +#endif +} #endif /* diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index e8bb892c09f..4a48bb40974 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,7 +7,7 @@ #include #include -#include +#include struct cpuid_bit { u16 feature; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7c878f6aa91..ff4d7b9e32e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -12,7 +12,7 @@ # include #endif -#include +#include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 055b9c3a660..c4bdc7f0020 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -26,7 +26,7 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include +#include #include #include #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5deefae9064..1cef0aa5e5d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -24,7 +24,7 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include +#include #endif static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index abae81989c2..e0744ea6d0f 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -63,7 +63,7 @@ #include #include -#include +#include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index e16c41b2e4e..50076d92fbc 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -19,7 +19,7 @@ #include #ifdef CONFIG_X86_32 -#include +#include #include /* diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index e0f29be8ab0..d802c844991 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -231,7 +231,7 @@ unsigned int do_IRQ(struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -#include +#include /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a1452a53d14..94fe71029c3 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -29,8 +29,7 @@ #include #include -#include - +#include /* * Checksum an MP configuration block. */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6b27f6dc7bf..92e42939fb0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -97,7 +97,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index c48ba6cc32a..892e7c389be 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include /* * Some notes on x86 processor bugs affecting SMP operation: * diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3fed177f345..489fde9d947 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -65,7 +65,7 @@ #include #include -#include +#include #include #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 89fce1b6d01..755ede02b13 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -20,7 +20,7 @@ #include #include -#include +#include static struct bau_control **uv_bau_table_bases __read_mostly; static int uv_bau_retry_limit __read_mostly; diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 2ed5bdf15c9..3bd7f47a91b 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -34,7 +34,7 @@ #include -#include "mach_apic.h" +#include #include -- cgit v1.2.3 From 2e096df8edefad78155bb406a5a86c182b17786e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:01:05 +0100 Subject: x86, ES7000: Consolidate code Move all ES7000 code into arch/x86/kernel/es7000_32.c. With this it ceases to rely on any build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 428 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 428 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 8faea13c8fa..078364ccfa0 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -372,3 +372,431 @@ void __init es7000_enable_apic_mode(void) mip_status); } } + +/* + * APIC driver for the Unisys ES7000 chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) +#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) +#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +extern void es7000_enable_apic_mode(void); +extern int apic_version [MAX_APICS]; +extern u8 cpu_2_logical_apicid[]; +extern unsigned int boot_cpu_physical_apicid; + +extern int parse_unisys_oem (char *oemptr); +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); +extern void setup_unisys(void); + +#define apicid_cluster(apicid) (apicid & 0xF0) +#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) + +static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} + + +static void es7000_wait_for_init_deassert(atomic_t *deassert) +{ +#ifndef CONFIG_ES7000_CLUSTERED_APIC + while (!atomic_read(deassert)) + cpu_relax(); +#endif + return; +} + +static unsigned int es7000_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +#ifdef CONFIG_ACPI +static int es7000_check_dsdt(void) +{ + struct acpi_table_header header; + + if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && + !strncmp(header.oem_id, "UNISYS", 6)) + return 1; + return 0; +} +#endif + +static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static void es7000_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static void es7000_send_IPI_all(int vector) +{ + es7000_send_IPI_mask(cpu_online_mask, vector); +} + +static int es7000_apic_id_registered(void) +{ + return 1; +} + +static const cpumask_t *target_cpus_cluster(void) +{ + return &CPU_MASK_ALL; +} + +static const cpumask_t *es7000_target_cpus(void) +{ + return &cpumask_of_cpu(smp_processor_id()); +} + +static unsigned long +es7000_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} +static unsigned long es7000_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static unsigned long calculate_ldr(int cpu) +{ + unsigned long id = xapic_phys_to_log_apicid(cpu); + + return (SET_APIC_LOGICAL_ID(id)); +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LdR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static void es7000_init_apic_ldr_cluster(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static void es7000_init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static void es7000_setup_apic_routing(void) +{ + int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + (apic_version[apic] == 0x14) ? + "Physical Cluster" : "Logical Cluster", + nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); +} + +static int es7000_apicid_to_node(int logical_apicid) +{ + return 0; +} + + +static int es7000_cpu_present_to_apicid(int mps_cpu) +{ + if (!mps_cpu) + return boot_cpu_physical_apicid; + else if (mps_cpu < nr_cpu_ids) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) +{ + static int id = 0; + physid_mask_t mask; + + mask = physid_mask_of_physid(id); + ++id; + + return mask; +} + +/* Mapping from cpu number to logical apicid */ +static int es7000_cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xff); +} + +static int es7000_check_phys_apicid_present(int cpu_physical_apicid) +{ + boot_cpu_physical_apicid = read_apic_id(); + return (1); +} + +static unsigned int +es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpumask_weight(cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + return 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = cpumask_first(cpumask); + apicid = es7000_cpu_to_logical_apicid(cpu); + + while (cpus_found < num_bits_set) { + if (cpumask_test_cpu(cpu, cpumask)) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return 0xFF; + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpus_weight(*cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + return es7000_cpu_to_logical_apicid(0); + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = first_cpu(*cpumask); + apicid = es7000_cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask)) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return es7000_cpu_to_logical_apicid(0); + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static unsigned int +es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int apicid = es7000_cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = es7000_cpu_mask_to_apicid(cpumask); + + free_cpumask_var(cpumask); + + return apicid; +} + +static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +void __init es7000_update_genapic_to_cluster(void) +{ + apic->target_cpus = target_cpus_cluster; + apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; + apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; + + apic->init_apic_ldr = es7000_init_apic_ldr_cluster; + + apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster; +} + +static int probe_es7000(void) +{ + /* probed later in mptable/ACPI hooks */ + return 0; +} + +static __init int +es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +{ + if (mpc->oemptr) { + struct mpc_oemtable *oem_table = + (struct mpc_oemtable *)mpc->oemptr; + + if (!strncmp(oem, "UNISYS", 6)) + return parse_unisys_oem((char *)oem_table); + } + return 0; +} + +#ifdef CONFIG_ACPI +/* Hook from generic ACPI tables.c */ +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + unsigned long oem_addr = 0; + int check_dsdt; + int ret = 0; + + /* check dsdt at first to avoid clear fix_map for oem_addr */ + check_dsdt = es7000_check_dsdt(); + + if (!find_unisys_acpi_oem_table(&oem_addr)) { + if (check_dsdt) + ret = parse_unisys_oem((char *)oem_addr); + else { + setup_unisys(); + ret = 1; + } + /* + * we need to unmap it + */ + unmap_unisys_acpi_oem_table(oem_addr); + } + return ret; +} +#else +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 0; +} +#endif + + +struct genapic apic_es7000 = { + + .name = "es7000", + .probe = probe_es7000, + .acpi_madt_oem_check = es7000_acpi_madt_oem_check, + .apic_id_registered = es7000_apic_id_registered, + + .irq_delivery_mode = dest_Fixed, + /* phys delivery to target CPUs: */ + .irq_dest_mode = 0, + + .target_cpus = es7000_target_cpus, + .disable_esr = 1, + .dest_logical = 0, + .check_apicid_used = es7000_check_apicid_used, + .check_apicid_present = es7000_check_apicid_present, + + .vector_allocation_domain = es7000_vector_allocation_domain, + .init_apic_ldr = es7000_init_apic_ldr, + + .ioapic_phys_id_map = es7000_ioapic_phys_id_map, + .setup_apic_routing = es7000_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = es7000_apicid_to_node, + .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, + .cpu_present_to_apicid = es7000_cpu_present_to_apicid, + .apicid_to_cpu_present = es7000_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = es7000_check_phys_apicid_present, + .enable_apic_mode = es7000_enable_apic_mode, + .phys_pkg_id = es7000_phys_pkg_id, + .mps_oem_check = es7000_mps_oem_check, + + .get_apic_id = es7000_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0xFF << 24, + + .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, + + .send_IPI_mask = es7000_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = es7000_send_IPI_allbutself, + .send_IPI_all = es7000_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + + .trampoline_phys_low = 0x467, + .trampoline_phys_high = 0x469, + + .wait_for_init_deassert = es7000_wait_for_init_deassert, + + /* Nothing to do for most platforms, since cleared by the INIT cycle: */ + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .inquire_remote_apic = default_inquire_remote_apic, +}; -- cgit v1.2.3 From 61b90b7ca10cc65d8b850ab542859dc593e5a381 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:01:05 +0100 Subject: x86, NUMAQ: Consolidate code Move all NUMAQ code into arch/x86/kernel/numaq.c. With this it ceases to rely on any build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/kernel/numaq_32.c | 278 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 3928280278f..83bb05524f4 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -291,3 +291,281 @@ int __init get_memcfg_numaq(void) smp_dump_qct(); return 1; } + +/* + * APIC driver for the IBM NUMAQ chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline unsigned int numaq_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0x0F; +} + +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); + +static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void numaq_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static inline void numaq_send_IPI_all(int vector) +{ + numaq_send_IPI_mask(cpu_online_mask, vector); +} + +extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); + +#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) +#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) + +/* + * Because we use NMIs rather than the INIT-STARTUP sequence to + * bootstrap the CPUs, the APIC may be in a weird state. Kick it: + */ +static inline void numaq_smp_callin_clear_local_apic(void) +{ + clear_local_APIC(); +} + +static inline void +numaq_store_NMI_vector(unsigned short *high, unsigned short *low) +{ + printk("Storing NMI vector\n"); + *high = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); + *low = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); +} + +static inline const cpumask_t *numaq_target_cpus(void) +{ + return &CPU_MASK_ALL; +} + +static inline unsigned long +numaq_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long numaq_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +#define apicid_cluster(apicid) (apicid & 0xF0) + +static inline int numaq_apic_id_registered(void) +{ + return 1; +} + +static inline void numaq_init_apic_ldr(void) +{ + /* Already done in NUMA-Q firmware */ +} + +static inline void numaq_setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "NUMA-Q", nr_ioapics); +} + +/* + * Skip adding the timer int on secondary nodes, which causes + * a small but painful rift in the time-space continuum. + */ +static inline int numaq_multi_timer_check(int apic, int irq) +{ + return apic != 0 && irq == 0; +} + +static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* We don't have a good way to do this yet - hack */ + return physids_promote(0xFUL); +} + +/* Mapping from cpu number to logical apicid */ +extern u8 cpu_2_logical_apicid[]; + +static inline int numaq_cpu_to_logical_apicid(int cpu) +{ + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +} + +/* + * Supporting over 60 cpus on NUMA-Q requires a locality-dependent + * cpu to APIC ID relation to properly interact with the intelligent + * mode of the cluster controller. + */ +static inline int numaq_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < 60) + return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); + else + return BAD_APICID; +} + +static inline int numaq_apicid_to_node(int logical_apicid) +{ + return logical_apicid >> 4; +} + +static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) +{ + int node = numaq_apicid_to_node(logical_apicid); + int cpu = __ffs(logical_apicid & 0xf); + + return physid_mask_of_physid(cpu + 4*node); +} + +extern void *xquad_portio; + +static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +/* + * We use physical apicids here, not logical, so just return the default + * physical broadcast to stop people from breaking us + */ +static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + return 0x0F; +} + +static inline unsigned int +numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + return 0x0F; +} + +/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ +static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} +static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +{ + numaq_mps_oem_check(mpc, oem, productid); + return found_numaq; +} + +static int probe_numaq(void) +{ + /* already know from get_memcfg_numaq() */ + return found_numaq; +} + +static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} + +static void numaq_setup_portio_remap(void) +{ + int num_quads = num_online_nodes(); + + if (num_quads <= 1) + return; + + printk("Remapping cross-quad port I/O for %d quads\n", num_quads); + xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); + printk("xquad_portio vaddr 0x%08lx, len %08lx\n", + (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); +} + +struct genapic apic_numaq = { + + .name = "NUMAQ", + .probe = probe_numaq, + .acpi_madt_oem_check = NULL, + .apic_id_registered = numaq_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* physical delivery on LOCAL quad: */ + .irq_dest_mode = 0, + + .target_cpus = numaq_target_cpus, + .disable_esr = 1, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = numaq_check_apicid_used, + .check_apicid_present = numaq_check_apicid_present, + + .vector_allocation_domain = numaq_vector_allocation_domain, + .init_apic_ldr = numaq_init_apic_ldr, + + .ioapic_phys_id_map = numaq_ioapic_phys_id_map, + .setup_apic_routing = numaq_setup_apic_routing, + .multi_timer_check = numaq_multi_timer_check, + .apicid_to_node = numaq_apicid_to_node, + .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, + .cpu_present_to_apicid = numaq_cpu_present_to_apicid, + .apicid_to_cpu_present = numaq_apicid_to_cpu_present, + .setup_portio_remap = numaq_setup_portio_remap, + .check_phys_apicid_present = numaq_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = numaq_phys_pkg_id, + .mps_oem_check = __numaq_mps_oem_check, + + .get_apic_id = numaq_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, + + .send_IPI_mask = numaq_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = numaq_send_IPI_allbutself, + .send_IPI_all = numaq_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, + + /* We don't do anything here because we use NMI's to boot instead */ + .wait_for_init_deassert = NULL, + + .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, + .store_NMI_vector = numaq_store_NMI_vector, + .inquire_remote_apic = NULL, +}; -- cgit v1.2.3 From b3daa3a1a56cf09fb91773f3658692fd02d08bb1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:04:37 +0100 Subject: x86, bigsmp: consolidate code Move all code to arch/x86/kernel/bigsmp_32.c. With this it ceases to rely on any build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/bigsmp_32.c | 113 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 arch/x86/kernel/bigsmp_32.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a382ca6f6a1..4239847906a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o +obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_ES7000) += es7000_32.o obj-$(CONFIG_X86_SUMMIT) += summit_32.o diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c new file mode 100644 index 00000000000..626f45ca4e7 --- /dev/null +++ b/arch/x86/kernel/bigsmp_32.c @@ -0,0 +1,113 @@ +/* + * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs. + * Drives the local APIC in "clustered mode". + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int dmi_bigsmp; /* can be set by dmi scanners */ + +static int hp_ht_bigsmp(const struct dmi_system_id *d) +{ + printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); + dmi_bigsmp = 1; + return 0; +} + + +static const struct dmi_system_id bigsmp_dmi_table[] = { + { hp_ht_bigsmp, "HP ProLiant DL760 G2", + { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), + DMI_MATCH(DMI_BIOS_VERSION, "P44-"),} + }, + + { hp_ht_bigsmp, "HP ProLiant DL740", + { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), + DMI_MATCH(DMI_BIOS_VERSION, "P47-"),} + }, + { } +}; + +static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + cpus_clear(*retmask); + cpu_set(cpu, *retmask); +} + +static int probe_bigsmp(void) +{ + if (def_to_bigsmp) + dmi_bigsmp = 1; + else + dmi_check_system(bigsmp_dmi_table); + return dmi_bigsmp; +} + +struct genapic apic_bigsmp = { + + .name = "bigsmp", + .probe = probe_bigsmp, + .acpi_madt_oem_check = NULL, + .apic_id_registered = bigsmp_apic_id_registered, + + .irq_delivery_mode = dest_Fixed, + /* phys delivery to target CPU: */ + .irq_dest_mode = 0, + + .target_cpus = bigsmp_target_cpus, + .disable_esr = 1, + .dest_logical = 0, + .check_apicid_used = bigsmp_check_apicid_used, + .check_apicid_present = bigsmp_check_apicid_present, + + .vector_allocation_domain = bigsmp_vector_allocation_domain, + .init_apic_ldr = bigsmp_init_apic_ldr, + + .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, + .setup_apic_routing = bigsmp_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = bigsmp_apicid_to_node, + .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, + .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, + .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = bigsmp_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = bigsmp_phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = bigsmp_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0xFF << 24, + + .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, + + .send_IPI_mask = default_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = bigsmp_send_IPI_allbutself, + .send_IPI_all = bigsmp_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = default_wait_for_init_deassert, + + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .inquire_remote_apic = default_inquire_remote_apic, +}; -- cgit v1.2.3 From 9f4187f0a3b93fc215b4472063b6c0b44364e60c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:19:12 +0100 Subject: x86, bigsmp: consolidate header code Move all the asm/bigsmp/*.h definitions into bigsmp_32.c. Signed-off-by: Ingo Molnar --- arch/x86/kernel/bigsmp_32.c | 163 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 159 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 626f45ca4e7..b1f91931003 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -12,10 +12,165 @@ #include #include #include -#include #include -#include -#include + + +static inline unsigned bigsmp_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) + +static inline int bigsmp_apic_id_registered(void) +{ + return 1; +} + +static inline const cpumask_t *bigsmp_target_cpus(void) +{ +#ifdef CONFIG_SMP + return &cpu_online_map; +#else + return &cpumask_of_cpu(0); +#endif +} + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +static inline unsigned long +bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} + +static inline unsigned long bigsmp_check_apicid_present(int bit) +{ + return 1; +} + +static inline unsigned long calculate_ldr(int cpu) +{ + unsigned long val, id; + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + id = xapic_phys_to_log_apicid(cpu); + val |= SET_APIC_LOGICAL_ID(id); + return val; +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static inline void bigsmp_init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static inline void bigsmp_setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "Physflat", nr_ioapics); +} + +static inline int bigsmp_apicid_to_node(int logical_apicid) +{ + return apicid_2_node[hard_smp_processor_id()]; +} + +static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + + return BAD_APICID; +} + +static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + +extern u8 cpu_2_logical_apicid[]; +/* Mapping from cpu number to logical apicid */ +static inline int bigsmp_cpu_to_logical_apicid(int cpu) +{ + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return cpu_physical_id(cpu); +} + +static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xFFL); +} + +static inline void bigsmp_setup_portio_remap(void) +{ +} + +static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +/* As we are using single CPU as destination, pick only one CPU here */ +static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); +} + +static inline unsigned int +bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) { + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + } + if (cpu < nr_cpu_ids) + return bigsmp_cpu_to_logical_apicid(cpu); + + return BAD_APICID; +} + +static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); + +static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void bigsmp_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static inline void bigsmp_send_IPI_all(int vector) +{ + bigsmp_send_IPI_mask(cpu_online_mask, vector); +} static int dmi_bigsmp; /* can be set by dmi scanners */ @@ -95,7 +250,7 @@ struct genapic apic_bigsmp = { .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, - .send_IPI_mask = default_send_IPI_mask, + .send_IPI_mask = bigsmp_send_IPI_mask, .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = bigsmp_send_IPI_allbutself, .send_IPI_all = bigsmp_send_IPI_all, -- cgit v1.2.3 From d53e2f2855f1c7c2725d550c1ae6b26f4d671c50 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:14:52 +0100 Subject: x86, smp: remove mach_ipi.h Move mach_ipi.h definitions into genapic.h. Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/crash.c | 2 +- arch/x86/kernel/io_apic.c | 1 - arch/x86/kernel/ipi.c | 1 - arch/x86/kernel/kgdb.c | 2 +- arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/smp.c | 1 - arch/x86/kernel/visws_quirks.c | 2 +- 8 files changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 41a0ba34d6b..81efe86eca8 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -49,7 +49,7 @@ #include #include -#include +#include /* * Sanity check diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 11b93cabdf7..ad7f2a696f4 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -28,7 +28,7 @@ #include #include -#include +#include #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e0744ea6d0f..241a01d6fd4 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -62,7 +62,6 @@ #include #include -#include #include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 50076d92fbc..0893fa14458 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -20,7 +20,6 @@ #ifdef CONFIG_X86_32 #include -#include /* * the following functions deal with sending IPIs between CPUs. diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b62a3811e01..5c4f5548384 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -46,7 +46,7 @@ #include #include -#include +#include /* * Put the error code here just in case the user cares: diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 38dace28d62..32e8f0af292 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -24,7 +24,7 @@ # include #endif -#include +#include /* * Power off function, if any diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 892e7c389be..0eb32ae9bf1 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -26,7 +26,6 @@ #include #include #include -#include #include /* * Some notes on x86 processor bugs affecting SMP operation: diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 3bd7f47a91b..4fd646e6dd4 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include -- cgit v1.2.3 From 7b38725318f4517af6168ccbff99060d67aba1c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:11:44 +0100 Subject: x86: remove subarchitecture support code Remove remaining bits of the subarchitecture code. Now that all the special platforms are runtime probed and runtime handled, we can remove these facilities. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/probe_32.c | 248 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/probe_32.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4239847906a..d3f8f49aed6 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -30,7 +30,7 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o obj-y += setup.o i8259.o irqinit_$(BITS).o obj-$(CONFIG_X86_VISWS) += visws_quirks.o -obj-$(CONFIG_X86_32) += probe_roms_32.o +obj-$(CONFIG_X86_32) += probe_32.o probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c new file mode 100644 index 00000000000..a6fba691416 --- /dev/null +++ b/arch/x86/kernel/probe_32.c @@ -0,0 +1,248 @@ +/* + * Default generic APIC driver. This handles up to 8 CPUs. + * + * Copyright 2003 Andi Kleen, SuSE Labs. + * Subject to the GNU Public License, v.2 + * + * Generic x86 APIC driver probe layer. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + /* + * Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; +} + +/* should be called last. */ +static int probe_default(void) +{ + return 1; +} + +struct genapic apic_default = { + + .name = "default", + .probe = probe_default, + .acpi_madt_oem_check = NULL, + .apic_id_registered = default_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = default_target_cpus, + .disable_esr = 0, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = default_check_apicid_used, + .check_apicid_present = default_check_apicid_present, + + .vector_allocation_domain = default_vector_allocation_domain, + .init_apic_ldr = default_init_apic_ldr, + + .ioapic_phys_id_map = default_ioapic_phys_id_map, + .setup_apic_routing = default_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = default_apicid_to_node, + .cpu_to_logical_apicid = default_cpu_to_logical_apicid, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = default_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = default_phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = default_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + + .send_IPI_mask = default_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = default_send_IPI_allbutself, + .send_IPI_all = default_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = default_wait_for_init_deassert, + + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .inquire_remote_apic = default_inquire_remote_apic, +}; + +extern struct genapic apic_numaq; +extern struct genapic apic_summit; +extern struct genapic apic_bigsmp; +extern struct genapic apic_es7000; +extern struct genapic apic_default; + +struct genapic *apic = &apic_default; + +static struct genapic *apic_probe[] __initdata = { +#ifdef CONFIG_X86_NUMAQ + &apic_numaq, +#endif +#ifdef CONFIG_X86_SUMMIT + &apic_summit, +#endif +#ifdef CONFIG_X86_BIGSMP + &apic_bigsmp, +#endif +#ifdef CONFIG_X86_ES7000 + &apic_es7000, +#endif + &apic_default, /* must be last */ + NULL, +}; + +static int cmdline_apic __initdata; +static int __init parse_apic(char *arg) +{ + int i; + + if (!arg) + return -EINVAL; + + for (i = 0; apic_probe[i]; i++) { + if (!strcmp(apic_probe[i]->name, arg)) { + apic = apic_probe[i]; + cmdline_apic = 1; + return 0; + } + } + + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); + + /* Parsed again by __setup for debug/verbose */ + return 0; +} +early_param("apic", parse_apic); + +void __init generic_bigsmp_probe(void) +{ +#ifdef CONFIG_X86_BIGSMP + /* + * This routine is used to switch to bigsmp mode when + * - There is no apic= option specified by the user + * - generic_apic_probe() has chosen apic_default as the sub_arch + * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support + */ + + if (!cmdline_apic && apic == &apic_default) { + if (apic_bigsmp.probe()) { + apic = &apic_bigsmp; + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); + printk(KERN_INFO "Overriding APIC driver with %s\n", + apic->name); + } + } +#endif +} + +void __init generic_apic_probe(void) +{ + if (!cmdline_apic) { + int i; + for (i = 0; apic_probe[i]; i++) { + if (apic_probe[i]->probe()) { + apic = apic_probe[i]; + break; + } + } + /* Not visible without early console */ + if (!apic_probe[i]) + panic("Didn't find an APIC driver"); + + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); + } + printk(KERN_INFO "Using APIC driver %s\n", apic->name); +} + +/* These functions can switch the APIC even after the initial ->probe() */ + +int __init +generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +{ + int i; + + for (i = 0; apic_probe[i]; ++i) { + if (!apic_probe[i]->mps_oem_check) + continue; + if (!apic_probe[i]->mps_oem_check(mpc, oem, productid)) + continue; + + if (!cmdline_apic) { + apic = apic_probe[i]; + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); + printk(KERN_INFO "Switched to APIC driver `%s'.\n", + apic->name); + } + return 1; + } + return 0; +} + +int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + int i; + + for (i = 0; apic_probe[i]; ++i) { + if (!apic_probe[i]->acpi_madt_oem_check) + continue; + if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) + continue; + + if (!cmdline_apic) { + apic = apic_probe[i]; + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); + printk(KERN_INFO "Switched to APIC driver `%s'.\n", + apic->name); + } + return 1; + } + return 0; +} -- cgit v1.2.3 From 1164dd0099c0d79146a55319670f57ab7ad1d352 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:34:09 +0100 Subject: x86: move mach-default/*.h files to asm/ We are getting rid of subarchitecture support - move the hook files to asm/. (These are now stale and should be replaced with more explicit runtime mechanisms - but the transition is simpler this way.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 2 +- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/probe_roms_32.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/time_32.c | 2 +- arch/x86/kernel/traps.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 98807bb095a..37ba5f85b71 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -301,7 +301,7 @@ extern int (*console_blank_hook)(int); */ #define APM_ZERO_SEGS -#include "apm.h" +#include /* * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a0b91aac72a..65efd42454b 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -688,7 +688,7 @@ ENDPROC(name) #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) /* The include is where all of the SMP etc. interrupts come from */ -#include "entry_arch.h" +#include ENTRY(coprocessor_error) RING0_INT_FRAME diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 23b6d9e6e4f..bdfad80c3cf 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -34,7 +34,7 @@ #include -#include +#include int unknown_nmi_panic; int nmi_watchdog_enabled; diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c index 675a48c404a..071e7fea42e 100644 --- a/arch/x86/kernel/probe_roms_32.c +++ b/arch/x86/kernel/probe_roms_32.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include static struct resource system_rom_resource = { .name = "System ROM", diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 92e42939fb0..e645d4793e4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -81,7 +81,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 489fde9d947..e90b3e50b54 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -66,7 +66,7 @@ #include #include -#include +#include #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 3985cac0ed4..764c74e871f 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -38,7 +38,7 @@ #include #include -#include "do_timer.h" +#include int timer_ack; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ed5aee5f3fc..214bc327a0c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -54,7 +54,7 @@ #include #include -#include +#include #ifdef CONFIG_X86_64 #include -- cgit v1.2.3 From 6bda2c8b32febeb38ee128047253751e080bad52 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:32:55 +0100 Subject: x86: remove subarchitecture support Remove the 32-bit subarchitecture support code. All subarchitectures but Voyager have been converted. Voyager will be done later or will be removed. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/io_apic.c | 17 ----- arch/x86/kernel/probe_32.c | 163 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d3f8f49aed6..1bc4cdc797d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -63,7 +63,7 @@ obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o ipi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 241a01d6fd4..3378ffb2140 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -797,23 +797,6 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } -#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) -void default_send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->dest_logical; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP && CONFIG_X86_32*/ - #ifdef CONFIG_X86_32 /* * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index a6fba691416..61339a0d55c 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -32,6 +32,26 @@ #include #include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_HOTPLUG_CPU +#define DEFAULT_SEND_IPI (1) +#else +#define DEFAULT_SEND_IPI (0) +#endif + +int no_broadcast = DEFAULT_SEND_IPI; + +#ifdef CONFIG_X86_LOCAL_APIC + static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* @@ -246,3 +266,146 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } return 0; } + +#endif /* CONFIG_X86_LOCAL_APIC */ + +/** + * pre_intr_init_hook - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +void __init pre_intr_init_hook(void) +{ + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) + return; + } + init_ISA_irqs(); +} + +/** + * intr_init_hook - post gate setup interrupt initialisation + * + * Description: + * Fill in any interrupts that may have been left out by the general + * init_IRQ() routine. interrupts having to do with the machine rather + * than the devices on the I/O bus (like APIC interrupts in intel MP + * systems) are started here. + **/ +void __init intr_init_hook(void) +{ + if (x86_quirks->arch_intr_init) { + if (x86_quirks->arch_intr_init()) + return; + } +} + +/** + * pre_setup_arch_hook - hook called prior to any setup_arch() execution + * + * Description: + * generally used to activate any machine specific identification + * routines that may be needed before setup_arch() runs. On Voyager + * this is used to get the board revision and type. + **/ +void __init pre_setup_arch_hook(void) +{ +} + +/** + * trap_init_hook - initialise system specific traps + * + * Description: + * Called as the final act of trap_init(). Used in VISWS to initialise + * the various board specific APIC traps. + **/ +void __init trap_init_hook(void) +{ + if (x86_quirks->arch_trap_init) { + if (x86_quirks->arch_trap_init()) + return; + } +} + +static struct irqaction irq0 = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, + .mask = CPU_MASK_NONE, + .name = "timer" +}; + +/** + * pre_time_init_hook - do any specific initialisations before. + * + **/ +void __init pre_time_init_hook(void) +{ + if (x86_quirks->arch_pre_time_init) + x86_quirks->arch_pre_time_init(); +} + +/** + * time_init_hook - do any specific initialisations for the system timer. + * + * Description: + * Must plug the system timer interrupt source at HZ into the IRQ listed + * in irq_vectors.h:TIMER_IRQ + **/ +void __init time_init_hook(void) +{ + if (x86_quirks->arch_time_init) { + /* + * A nonzero return code does not mean failure, it means + * that the architecture quirk does not want any + * generic (timer) setup to be performed after this: + */ + if (x86_quirks->arch_time_init()) + return; + } + + irq0.mask = cpumask_of_cpu(0); + setup_irq(0, &irq0); +} + +#ifdef CONFIG_MCA +/** + * mca_nmi_hook - hook into MCA specific NMI chain + * + * Description: + * The MCA (Microchannel Architecture) has an NMI chain for NMI sources + * along the MCA bus. Use this to hook into that chain if you will need + * it. + **/ +void mca_nmi_hook(void) +{ + /* + * If I recall correctly, there's a whole bunch of other things that + * we can do to check for NMI problems, but that's all I know about + * at the moment. + */ + pr_warning("NMI generated from unknown source!\n"); +} +#endif + +static __init int no_ipi_broadcast(char *str) +{ + get_option(&str, &no_broadcast); + pr_info("Using %s mode\n", + no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); + return 1; +} +__setup("no_ipi_broadcast=", no_ipi_broadcast); + +static int __init print_ipi_mode(void) +{ + pr_info("Using IPI %s mode\n", + no_broadcast ? "No-Shortcut" : "Shortcut"); + return 0; +} + +late_initcall(print_ipi_mode); + -- cgit v1.2.3 From 3e5095d15276efd14a45393666b1bb7536bf179f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:07:08 +0100 Subject: x86: replace CONFIG_X86_SMP with CONFIG_SMP The x86/Voyager subarch used to have this distinction between 'x86 SMP support' and 'Voyager SMP support': config X86_SMP bool depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) This is a pointless distinction - Voyager can (and already does) use smp_ops to implement various SMP quirks it has - and it can be extended more to cover all the specialities of Voyager. So remove this complication in the Kconfig space. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 4 ++-- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +- arch/x86/kernel/process.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/tsc.c | 2 +- arch/x86/kernel/vmiclock_32.c | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 1bc4cdc797d..dc05a57a474 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -57,8 +57,8 @@ obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 81efe86eca8..968c817762a 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1900,7 +1900,7 @@ void __cpuinit generic_processor_info(int apicid, int version) } #endif -#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) +#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; #endif diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 4a48bb40974..e48640cfac0 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) */ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e68bb9e3086..89537f678b2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -344,7 +344,7 @@ static void c1e_idle(void) void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP if (pm_idle == poll_idle && smp_num_siblings > 1) { printk(KERN_WARNING "WARNING: polling idle and HT enabled," " performance may degrade.\n"); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e645d4793e4..eeb180b1d7a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -588,7 +588,7 @@ early_param("elfcorehdr", setup_elfcorehdr); static int __init default_update_genapic(void) { -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP if (!apic->wakeup_cpu) apic->wakeup_cpu = wakeup_secondary_cpu_via_init; #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 599e5816863..83d53ce5d4c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -773,7 +773,7 @@ __cpuinit int unsynchronized_tsc(void) if (!cpu_has_tsc || tsc_unstable) return 1; -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP if (apic_is_clustered_box()) return 1; #endif diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index c4c1f9e0940..a4791ef412d 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -256,7 +256,7 @@ void __devinit vmi_time_bsp_init(void) */ clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); local_irq_disable(); -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP /* * XXX handle_percpu_irq only defined for SMP; we need to switch over * to using it, since this is a local interrupt, which each CPU must -- cgit v1.2.3 From c0b5842a457d44c8788b3fd0c64969be7ef673cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:13:05 +0100 Subject: x86: generalize boot_cpu_id x86/Voyager can boot on non-zero processors. While that can probably be fixed by properly remapping the physical CPU IDs, keep boot_cpu_id for now for easier transition - and expand it to all of x86. Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 14 ++++++++++++++ arch/x86/kernel/smpboot.c | 12 ------------ 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index eeb180b1d7a..609e5af6028 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,20 @@ #define ARCH_SETUP #endif +unsigned int boot_cpu_id __read_mostly; + +#ifdef CONFIG_X86_64 +int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +int default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} +#endif + #ifndef CONFIG_DEBUG_BOOT_PARAMS struct boot_params __initdata boot_params; #else diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e90b3e50b54..bc7e220ba0b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -905,18 +905,6 @@ do_rest: return boot_error; } -#ifdef CONFIG_X86_64 -int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -int default_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return __default_check_phys_apicid_present(boot_cpu_physical_apicid); -} -#endif - int __cpuinit native_cpu_up(unsigned int cpu) { int apicid = apic->cpu_present_to_apicid(cpu); -- cgit v1.2.3 From f095df0a0cb35a52605541f619d038339b90d7cc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:17:55 +0100 Subject: x86/Voyager: remove X86_BIOS_REBOOT Kconfig quirk Voyager has this Kconfig quirk: config X86_BIOS_REBOOT bool depends on !X86_VOYAGER default y Voyager should use the existing machine_ops.emergency_restart reboot quirk mechanism instead of a build-time quirk. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index dc05a57a474..24f357e7557 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -50,7 +50,7 @@ obj-y += step.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ -obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o +obj-y += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -- cgit v1.2.3 From 550fe4f198558c147c6b8273a709568222a1668a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:28:08 +0100 Subject: x86/Voyager: remove X86_FIND_SMP_CONFIG Kconfig quirk x86/Voyager had this Kconfig quirk: config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER Which splits off the find_smp_config() callback into a build-time quirk. Voyager should use the existing x86_quirks.mach_find_smp_config() callback to introduce SMP-config quirks. NUMAQ-32 and VISWS already use this. Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 609e5af6028..6abce6703c5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -905,12 +905,11 @@ void __init setup_arch(char **cmdline_p) */ acpi_reserve_bootmem(); #endif -#ifdef CONFIG_X86_FIND_SMP_CONFIG /* * Find and reserve possible boot-time SMP configuration: */ find_smp_config(); -#endif + reserve_crashkernel(); #ifdef CONFIG_X86_64 -- cgit v1.2.3 From e0c7ae376a13fd79a4dad8becab51040d13dfa90 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:43:09 +0100 Subject: x86: rename X86_GENERICARCH to X86_32_NON_STANDARD X86_GENERICARCH is a misnomer - it contains non-PC 32-bit architectures that are not included in the default build. Rename it to X86_32_NON_STANDARD. Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index cb8b52785e3..7352c60f29d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1335,7 +1335,7 @@ static void __init acpi_process_madt(void) if (!error) { acpi_lapic = 1; -#ifdef CONFIG_X86_GENERICARCH +#ifdef CONFIG_X86_32_NON_STANDARD generic_bigsmp_probe(); #endif /* diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 94fe71029c3..89aaced51bd 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -372,7 +372,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) (*x86_quirks->mpc_record)++; } -#ifdef CONFIG_X86_GENERICARCH +#ifdef CONFIG_X86_32_NON_STANDARD generic_bigsmp_probe(); #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6abce6703c5..f64e1a487c9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,7 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#ifdef CONFIG_X86_GENERICARCH +#ifdef CONFIG_X86_32_NON_STANDARD generic_apic_probe(); #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bc7e220ba0b..fc80bc18943 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1007,7 +1007,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_WARNING "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); + "Use CONFIG_X86_32_NON_STANDARD and CONFIG_X86_BIGSMP.\n"); nr = 0; for_each_present_cpu(cpu) { -- cgit v1.2.3 From 0a1e8869f453ceda121a1ff8d6c4380832377be7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 28 Jan 2009 23:21:25 +0300 Subject: x86: trampoline_64.S - use predefined constants with simplification Impact: cleanup We may use macros from processor-flags.h instead of hardcoding bits. Actually it's not direct mapping of old instructions with new ones -- BTS does change CF flag while MOV does not. But i didn't find any dependency on CF in this code. Signed-off-by: Cyrill Gorcunov Acked-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/trampoline_64.S | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 894293c598d..95a012a4664 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -29,6 +29,7 @@ #include #include #include +#include .section .rodata, "a", @progbits @@ -37,7 +38,7 @@ ENTRY(trampoline_data) r_base = . cli # We should be safe anyway - wbinvd + wbinvd mov %cs, %ax # Code and data in the same place mov %ax, %ds mov %ax, %es @@ -73,9 +74,8 @@ r_base = . lidtl tidt - r_base # load idt with 0, 0 lgdtl tgdt - r_base # load gdt with whatever is appropriate - xor %ax, %ax - inc %ax # protected mode (PE) bit - lmsw %ax # into protected mode + mov $X86_CR0_PE, %ax # protected mode (PE) bit + lmsw %ax # into protected mode # flush prefetch and jump to startup_32 ljmpl *(startup_32_vector - r_base) @@ -86,9 +86,8 @@ startup_32: movl $__KERNEL_DS, %eax # Initialize the %ds segment register movl %eax, %ds - xorl %eax, %eax - btsl $5, %eax # Enable PAE mode - movl %eax, %cr4 + movl $X86_CR4_PAE, %eax + movl %eax, %cr4 # Enable PAE mode # Setup trampoline 4 level pagetables leal (trampoline_level4_pgt - r_base)(%esi), %eax @@ -99,9 +98,9 @@ startup_32: xorl %edx, %edx wrmsr - xorl %eax, %eax - btsl $31, %eax # Enable paging and in turn activate Long Mode - btsl $0, %eax # Enable protected mode + # Enable paging and in turn activate Long Mode + # Enable protected mode + movl $(X86_CR0_PG | X86_CR0_PE), %eax movl %eax, %cr0 /* -- cgit v1.2.3 From fbeb2ca0224182033f196cf8f63989c3e6b90aba Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 29 Jan 2009 12:11:08 -0800 Subject: x86: unify genapic code, unify subarchitectures, remove old subarchitecture code, xapic fix xapic fix for 32bit platform with less than 8 cpu's. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/probe_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 61339a0d55c..b5db26f8e06 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -113,7 +113,7 @@ struct genapic apic_default = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = default_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, + .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself, .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, .send_IPI_self = NULL, -- cgit v1.2.3 From 4272ebfbefd0db40073f3ee5990bceaf2894f08b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 15:14:46 -0800 Subject: x86: allow more than 8 cpus to be used on 32-bit X86_PC is the only remaining 'sub' architecture, so we dont need it anymore. This also cleans up a few spurious references to X86_PC in the driver space - those certainly should be X86. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fc80bc18943..2912fa3a8ef 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1000,7 +1000,7 @@ static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); -#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) +#ifndef CONFIG_X86_BIGSMP if (def_to_bigsmp && nr_cpu_ids > 8) { unsigned int cpu; unsigned nr; -- cgit v1.2.3 From 754ef0cd65faac4840ada4362bda322d9a811fbf Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Wed, 28 Jan 2009 12:51:09 +0900 Subject: x86: fix debug message of CPU clock speed Impact: Fixes incorrect printk LOCAL APIC is corrected by PM-Timer, when SMI occurred while LOCAL APIC is calibrated. In this case, LOCAL APIC debug message(Boot with apic=debug) is displayed correctly, however, CPU clock speed debug message is displayed wrongly . When SMI occured on my machine, which has 1.6GHz CPU, CPU clock speed is displayed 3622.0205 MHz as follow. CPU0: Intel(R) Xeon(R) CPU 5110 @ 1.60GHz stepping 06 Using local APIC timer interrupts. calibrating APIC timer ... ... lapic delta = 3773130 ... PM timer delta = 812434 APIC calibration not consistent with PM Timer: 226ms instead of 100ms APIC delta adjusted to PM-Timer: 1662420 (3773130) ..... delta 1662420 ..... mult: 71411249 ..... calibration result: 265987 ..... CPU clock speed is 3622.0205 MHz. =====> here ..... host bus clock speed is 265.0987 MHz. This patch fixes to displaying CPU clock speed correctly as follow. CPU0: Intel(R) Xeon(R) CPU 5110 @ 1.60GHz stepping 06 Using local APIC timer interrupts. calibrating APIC timer ... ... lapic delta = 3773131 ... PM timer delta = 812434 APIC calibration not consistent with PM Timer: 226ms instead of 100ms APIC delta adjusted to PM-Timer: 1662420 (3773131) TSC delta adjusted to PM-Timer: 159592409 (362220564) ..... delta 1662420 ..... mult: 71411249 ..... calibration result: 265987 ..... CPU clock speed is 1595.0924 MHz. ..... host bus clock speed is 265.0987 MHz. Signed-off-by: Yasuaki Ishimatsu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic.c | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 4b6df2469fe..7bcd746d704 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -535,7 +535,8 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) } } -static int __init calibrate_by_pmtimer(long deltapm, long *delta) +static int __init +calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) { const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; const long pm_thresh = pm_100ms / 100; @@ -557,18 +558,29 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) if (deltapm > (pm_100ms - pm_thresh) && deltapm < (pm_100ms + pm_thresh)) { apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64)deltapm) * mult) >> 22; - do_div(res, 1000000); - pr_warning("APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64)(*delta)) * pm_100ms); + return 0; + } + + res = (((u64)deltapm) * mult) >> 22; + do_div(res, 1000000); + pr_warning("APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n",(long)res); + + /* Correct the lapic counter value */ + res = (((u64)(*delta)) * pm_100ms); + do_div(res, deltapm); + pr_info("APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long)res, *delta); + *delta = (long)res; + + /* Correct the tsc counter value */ + if (cpu_has_tsc) { + res = (((u64)(*deltatsc)) * pm_100ms); do_div(res, deltapm); - pr_info("APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long)res, *delta); - *delta = (long)res; + apic_printk(APIC_VERBOSE, "TSC delta adjusted to " + "PM-Timer: %lu (%ld) \n", + (unsigned long)res, *deltatsc); + *deltatsc = (long)res; } return 0; @@ -579,7 +591,7 @@ static int __init calibrate_APIC_clock(void) struct clock_event_device *levt = &__get_cpu_var(lapic_events); void (*real_handler)(struct clock_event_device *dev); unsigned long deltaj; - long delta; + long delta, deltatsc; int pm_referenced = 0; local_irq_disable(); @@ -609,9 +621,11 @@ static int __init calibrate_APIC_clock(void) delta = lapic_cal_t1 - lapic_cal_t2; apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); + deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); + /* we trust the PM based calibration if possible */ pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, - &delta); + &delta, &deltatsc); /* Calculate the scaled math multiplication factor */ lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, @@ -629,11 +643,10 @@ static int __init calibrate_APIC_clock(void) calibration_result); if (cpu_has_tsc) { - delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); apic_printk(APIC_VERBOSE, "..... CPU clock speed is " "%ld.%04ld MHz.\n", - (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), + (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); } apic_printk(APIC_VERBOSE, "..... host bus clock speed is " -- cgit v1.2.3 From 39ba5d43fc9133696240fc8b6b13e7a41fea87cd Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Wed, 28 Jan 2009 12:52:24 +0900 Subject: x86: unify PM-Timer messages Impact: Cleans up printk formatting When LOCAL APIC was calibrated, the debug message is displayed as follows. CPU0: Intel(R) Xeon(R) CPU 5110 @ 1.60GHz stepping 06 Using local APIC timer interrupts. calibrating APIC timer ... ... lapic delta = 3773131 ... PM timer delta = 812434 APIC calibration not consistent with PM Timer: 226ms instead of 100ms APIC delta adjusted to PM-Timer: 1662420 (3773131) TSC delta adjusted to PM-Timer: 159592409 (362220564) ..... delta 1662420 ..... mult: 71411249 ..... calibration result: 265987 ..... CPU clock speed is 1595.0924 MHz. ..... host bus clock speed is 265.0987 MHz. There are three type of PM-Timer (PM-Timer, PM Timer, and PM timer), in this message. This patch unifies those messages to PM-Timer. Signed-off-by: Yasuaki Ishimatsu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 7bcd746d704..0d935d218d0 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -547,7 +547,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) return -1; #endif - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); /* Check, if the PM timer is available */ if (!deltapm) @@ -557,14 +557,14 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) if (deltapm > (pm_100ms - pm_thresh) && deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); return 0; } res = (((u64)deltapm) * mult) >> 22; do_div(res, 1000000); pr_warning("APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n",(long)res); + "with PM-Timer: %ldms instead of 100ms\n",(long)res); /* Correct the lapic counter value */ res = (((u64)(*delta)) * pm_100ms); -- cgit v1.2.3 From 1ff2f20de354a621ef4b56b9cfe6f9139a7e493b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 19:30:04 -0800 Subject: x86: fix compiling with 64bit with def_to_bigsmp only need to do cut off with 32bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2912fa3a8ef..4c3cff57494 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1000,7 +1000,7 @@ static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); -#ifndef CONFIG_X86_BIGSMP +#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32) if (def_to_bigsmp && nr_cpu_ids > 8) { unsigned int cpu; unsigned nr; -- cgit v1.2.3 From 43f39890db2959b10891cf7bbf3f53fffc8ce3bd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 19:31:49 -0800 Subject: x86: seperate default_send_IPI_mask_sequence/allbutself from logical Impact: 32-bit should use logical version there are two version: for default_send_IPI_mask_sequence/allbutself one in ipi.h and one in ipi.c for 32bit it seems .h version overwrote ipi.c for a while. restore it so 32 bit could use its old logical version. also remove dupicated functions in .c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/bigsmp_32.c | 8 +-- arch/x86/kernel/es7000_32.c | 4 +- arch/x86/kernel/genapic_flat_64.c | 6 +- arch/x86/kernel/ipi.c | 139 +------------------------------------- arch/x86/kernel/numaq_32.c | 8 +-- arch/x86/kernel/probe_32.c | 4 +- arch/x86/kernel/summit_32.c | 6 +- 7 files changed, 17 insertions(+), 158 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index b1f91931003..ab645c93a6e 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -154,17 +155,14 @@ static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_phys(mask, vector); } static inline void bigsmp_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); } static inline void bigsmp_send_IPI_all(int vector) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 078364ccfa0..b5b50e8b94a 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -451,12 +451,12 @@ static int es7000_check_dsdt(void) static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_phys(mask, vector); } static void es7000_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); } static void es7000_send_IPI_all(int vector) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 19bffb3f732..249d2d3c034 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -267,18 +267,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { - default_send_IPI_mask_sequence(cpumask, vector); + default_send_IPI_mask_sequence_phys(cpumask, vector); } static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { - default_send_IPI_mask_allbutself(cpumask, vector); + default_send_IPI_mask_allbutself_phys(cpumask, vector); } static void physflat_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); } static void physflat_send_IPI_all(int vector) diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 0893fa14458..339f4f3feee 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -17,148 +17,13 @@ #include #include #include +#include #ifdef CONFIG_X86_32 -#include - -/* - * the following functions deal with sending IPIs between CPUs. - * - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. - */ - -static inline int __prepare_ICR(unsigned int shortcut, int vector) -{ - unsigned int icr = shortcut | apic->dest_logical; - - switch (vector) { - default: - icr |= APIC_DM_FIXED | vector; - break; - case NMI_VECTOR: - icr |= APIC_DM_NMI; - break; - } - return icr; -} - -static inline int __prepare_ICR2(unsigned int mask) -{ - return SET_APIC_DEST_FIELD(mask); -} - -void __default_send_IPI_shortcut(unsigned int shortcut, int vector) -{ - /* - * Subtle. In the case of the 'never do double writes' workaround - * we have to lock out interrupts to be safe. As we don't care - * of the value read we use an atomic rmw access to avoid costly - * cli/sti. Otherwise we use an even cheaper single atomic write - * to the APIC. - */ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * No need to touch the target chip field - */ - cfg = __prepare_ICR(shortcut, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} void default_send_IPI_self(int vector) { - __default_send_IPI_shortcut(APIC_DEST_SELF, vector); -} - -/* - * This is used to send an IPI with no shorthand notation (the destination is - * specified in bits 56 to 63 of the ICR). - */ -static inline void __default_send_IPI_dest_field(unsigned long mask, int vector) -{ - unsigned long cfg; - - /* - * Wait for idle. - */ - if (unlikely(vector == NMI_VECTOR)) - safe_apic_wait_icr_idle(); - else - apic_wait_icr_idle(); - - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(mask); - apic_write(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} - -/* - * This is only used on smaller machines. - */ -void default_send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - unsigned long flags; - - local_irq_save(flags); - WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); - __default_send_IPI_dest_field(mask, vector); - local_irq_restore(flags); -} - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicasts to each CPU instead. This - * should be modified to do 1 message per cluster ID - mbligh - */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __default_send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); - local_irq_restore(flags); -} - -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) - continue; - __default_send_IPI_dest_field( - apic->cpu_to_logical_apicid(query_cpu), vector); - } - local_irq_restore(flags); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical); } /* must come after the send_IPI functions above for inlining */ diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 83bb05524f4..c143b329216 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -302,6 +302,7 @@ int __init get_memcfg_numaq(void) #include #include #include +#include #include #include #include @@ -319,17 +320,14 @@ static inline unsigned int numaq_get_apic_id(unsigned long x) return (x >> 24) & 0x0F; } -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_logical(mask, vector); } static inline void numaq_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); } static inline void numaq_send_IPI_all(int vector) diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index b5db26f8e06..3f12a401182 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -112,8 +112,8 @@ struct genapic apic_default = { .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - .send_IPI_mask = default_send_IPI_mask, - .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself, + .send_IPI_mask = default_send_IPI_mask_logical, + .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, .send_IPI_self = NULL, diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 84ff9ebbcc9..ecb41b9d7aa 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -54,12 +55,9 @@ static inline unsigned summit_get_apic_id(unsigned long x) return (x >> 24) & 0xFF; } -void default_send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void default_send_IPI_mask_allbutself(const cpumask_t *mask, int vector); - static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_logical(mask, vector); } static inline void summit_send_IPI_allbutself(int vector) -- cgit v1.2.3 From 26f7ef14a76b0e590a3797fd7b2f3cee868d9664 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 14:19:22 -0800 Subject: x86: don't treat bigsmp as non-standard just like 64 bit switch from flat logical APIC messages to flat physical mode automatically. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/mpparse.c | 4 ++-- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7352c60f29d..3efa996b036 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1335,7 +1335,7 @@ static void __init acpi_process_madt(void) if (!error) { acpi_lapic = 1; -#ifdef CONFIG_X86_32_NON_STANDARD +#ifdef CONFIG_X86_BIGSMP generic_bigsmp_probe(); #endif /* diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 89aaced51bd..b46ca7d31fe 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -372,8 +372,8 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) (*x86_quirks->mpc_record)++; } -#ifdef CONFIG_X86_32_NON_STANDARD - generic_bigsmp_probe(); +#ifdef CONFIG_X86_BIGSMP + generic_bigsmp_probe(); #endif if (apic->setup_apic_routing) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f64e1a487c9..df64afff580 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,7 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#ifdef CONFIG_X86_32_NON_STANDARD +#if defined(CONFIG_X86_32_NON_STANDARD) || defined(CONFIG_X86_BIGSMP) generic_apic_probe(); #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4c3cff57494..1268a862abb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1007,7 +1007,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_WARNING "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_32_NON_STANDARD and CONFIG_X86_BIGSMP.\n"); + "Use CONFIG_X86_BIGSMP.\n"); nr = 0; for_each_present_cpu(cpu) { -- cgit v1.2.3 From 6b64ee02da20d6c0d97115e0b1ab47f9fa2f0d8f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 30 Jan 2009 23:42:18 +0100 Subject: x86, apic, 32-bit: add self-IPI methods Impact: fix rare crash on 32-bit The 32-bit APIC drivers had their send_IPI_self vectors set to NULL, but ioapic_retrigger_irq() depends on it being always set. Fix it. Signed-off-by: Ingo Molnar --- arch/x86/kernel/bigsmp_32.c | 2 +- arch/x86/kernel/es7000_32.c | 2 +- arch/x86/kernel/numaq_32.c | 2 +- arch/x86/kernel/probe_32.c | 2 +- arch/x86/kernel/summit_32.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index ab645c93a6e..47a62f46afd 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -252,7 +252,7 @@ struct genapic apic_bigsmp = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = bigsmp_send_IPI_allbutself, .send_IPI_all = bigsmp_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index b5b50e8b94a..d6184c12a18 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -786,7 +786,7 @@ struct genapic apic_es7000 = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = es7000_send_IPI_allbutself, .send_IPI_all = es7000_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index c143b329216..947748e1721 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -554,7 +554,7 @@ struct genapic apic_numaq = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = numaq_send_IPI_allbutself, .send_IPI_all = numaq_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 3f12a401182..22337b75de6 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -116,7 +116,7 @@ struct genapic apic_default = { .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index ecb41b9d7aa..1e733eff9b3 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -588,7 +588,7 @@ struct genapic apic_summit = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = summit_send_IPI_allbutself, .send_IPI_all = summit_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, -- cgit v1.2.3 From 41edafdb78feac1d1f8823846209975fde990633 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:02 -0800 Subject: x86/pvops: add a paravirt_ident functions to allow special patching Impact: Optimization Several paravirt ops implementations simply return their arguments, the most obvious being the make_pte/pte_val class of operations on native. On 32-bit, the identity function is literally a no-op, as the calling convention uses the same registers for the first argument and return. On 64-bit, it can be implemented with a single "mov". This patch adds special identity functions for 32 and 64 bit argument, and machinery to recognize them and replace them with either nops or a mov as appropriate. At the moment, the only users for the identity functions are the pagetable entry conversion functions. The result is a measureable improvement on pagetable-heavy benchmarks (2-3%, reducing the pvops overhead from 5 to 2%). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/paravirt.c | 75 ++++++++++++++++++++++++++++++++----- arch/x86/kernel/paravirt_patch_32.c | 12 ++++++ arch/x86/kernel/paravirt_patch_64.c | 15 ++++++++ 3 files changed, 93 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 202514be592..dd25e2b1593 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -44,6 +44,17 @@ void _paravirt_nop(void) { } +/* identity function, which can be inlined */ +u32 _paravirt_ident_32(u32 x) +{ + return x; +} + +u64 _paravirt_ident_64(u64 x) +{ + return x; +} + static void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", @@ -138,9 +149,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, if (opfunc == NULL) /* If there's no function, patch it with a ud2a (BUG) */ ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); - else if (opfunc == paravirt_nop) + else if (opfunc == _paravirt_nop) /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); + + /* identity functions just return their single argument */ + else if (opfunc == _paravirt_ident_32) + ret = paravirt_patch_ident_32(insnbuf, len); + else if (opfunc == _paravirt_ident_64) + ret = paravirt_patch_ident_64(insnbuf, len); + else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || @@ -373,6 +391,45 @@ struct pv_apic_ops pv_apic_ops = { #endif }; +typedef pte_t make_pte_t(pteval_t); +typedef pmd_t make_pmd_t(pmdval_t); +typedef pud_t make_pud_t(pudval_t); +typedef pgd_t make_pgd_t(pgdval_t); + +typedef pteval_t pte_val_t(pte_t); +typedef pmdval_t pmd_val_t(pmd_t); +typedef pudval_t pud_val_t(pud_t); +typedef pgdval_t pgd_val_t(pgd_t); + + +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) +/* 32-bit pagetable entries */ +#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_32 +#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_32 + +#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_32 +#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_32 + +#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_32 +#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_32 + +#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_32 +#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_32 +#else +/* 64-bit pagetable entries */ +#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_64 +#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_64 + +#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_64 +#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_64 + +#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_64 +#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_64 + +#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_64 +#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_64 +#endif + struct pv_mmu_ops pv_mmu_ops = { #ifndef CONFIG_X86_64 .pagetable_setup_start = native_pagetable_setup_start, @@ -424,21 +481,21 @@ struct pv_mmu_ops pv_mmu_ops = { .pmd_clear = native_pmd_clear, #endif .set_pud = native_set_pud, - .pmd_val = native_pmd_val, - .make_pmd = native_make_pmd, + .pmd_val = paravirt_native_pmd_val, + .make_pmd = paravirt_native_make_pmd, #if PAGETABLE_LEVELS == 4 - .pud_val = native_pud_val, - .make_pud = native_make_pud, + .pud_val = paravirt_native_pud_val, + .make_pud = paravirt_native_make_pud, .set_pgd = native_set_pgd, #endif #endif /* PAGETABLE_LEVELS >= 3 */ - .pte_val = native_pte_val, - .pgd_val = native_pgd_val, + .pte_val = paravirt_native_pte_val, + .pgd_val = paravirt_native_pgd_val, - .make_pte = native_make_pte, - .make_pgd = native_make_pgd, + .make_pte = paravirt_native_make_pte, + .make_pgd = paravirt_native_make_pgd, .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 9fe644f4861..d9f32e6d6ab 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) +{ + /* arg in %eax, return in %eax */ + return 0; +} + +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) +{ + /* arg in %edx:%eax, return in %edx:%eax */ + return 0; +} + unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 061d01df9ae..3f08f34f93e 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); +DEF_NATIVE(, mov32, "mov %edi, %eax"); +DEF_NATIVE(, mov64, "mov %rdi, %rax"); + +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) +{ + return paravirt_patch_insns(insnbuf, len, + start__mov32, end__mov32); +} + +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) +{ + return paravirt_patch_insns(insnbuf, len, + start__mov64, end__mov64); +} + unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { -- cgit v1.2.3 From b8aa287f77be943e37a84fa4657e27df95269bfb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:03 -0800 Subject: x86: fix paravirt clobber in entry_64.S Impact: Fix latent bug The clobber is trying to say that anything except RDI is available for clobbering, but actually clobbers everything. This hasn't mattered because the clobbers were basically ignored, but subsequent patches will rely on them. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a52703864a1..e4c9710cae5 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1140,7 +1140,7 @@ ENTRY(native_load_gs_index) CFI_STARTPROC pushf CFI_ADJUST_CFA_OFFSET 8 - DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) + DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS gs_change: movl %edi,%gs -- cgit v1.2.3 From ecb93d1ccd0aac63f03be2db3cac3fa974716f4c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:05 -0800 Subject: x86/paravirt: add register-saving thunks to reduce caller register pressure Impact: Optimization One of the problems with inserting a pile of C calls where previously there were none is that the register pressure is greatly increased. The C calling convention says that the caller must expect a certain set of registers may be trashed by the callee, and that the callee can use those registers without restriction. This includes the function argument registers, and several others. This patch seeks to alleviate this pressure by introducing wrapper thunks that will do the register saving/restoring, so that the callsite doesn't need to worry about it, but the callee function can be conventional compiler-generated code. In many cases (particularly performance-sensitive cases) the callee will be in assembler anyway, and need not use the compiler's calling convention. Standard calling convention is: arguments return scratch x86-32 eax edx ecx eax ? x86-64 rdi rsi rdx rcx rax r8 r9 r10 r11 The thunk preserves all argument and scratch registers. The return register is not preserved, and is available as a scratch register for unwrapped callee code (and of course the return value). Wrapped function pointers are themselves wrapped in a struct paravirt_callee_save structure, in order to get some warning from the compiler when functions with mismatched calling conventions are used. The most common paravirt ops, both statically and dynamically, are interrupt enable/disable/save/restore, so handle them first. This is particularly easy since their calls are handled specially anyway. XXX Deal with VMI. What's their calling convention? Signed-off-by: H. Peter Anvin --- arch/x86/kernel/paravirt.c | 8 ++++---- arch/x86/kernel/vsmp_64.c | 12 ++++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index dd25e2b1593..8adb6b5aa42 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -310,10 +310,10 @@ struct pv_time_ops pv_time_ops = { struct pv_irq_ops pv_irq_ops = { .init_IRQ = native_init_IRQ, - .save_fl = native_save_fl, - .restore_fl = native_restore_fl, - .irq_disable = native_irq_disable, - .irq_enable = native_irq_enable, + .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), + .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), + .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), + .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), .safe_halt = native_safe_halt, .halt = native_halt, #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index a688f3bfaec..c609205df59 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void) flags &= ~X86_EFLAGS_IF; return flags; } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); static void vsmp_restore_fl(unsigned long flags) { @@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags) flags |= X86_EFLAGS_AC; native_restore_fl(flags); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); static void vsmp_irq_disable(void) { @@ -53,6 +55,7 @@ static void vsmp_irq_disable(void) native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); static void vsmp_irq_enable(void) { @@ -60,6 +63,7 @@ static void vsmp_irq_enable(void) native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void) cap, ctl); if (cap & ctl & (1 << 4)) { /* Setup irq ops and turn on vSMP IRQ fastpath handling */ - pv_irq_ops.irq_disable = vsmp_irq_disable; - pv_irq_ops.irq_enable = vsmp_irq_enable; - pv_irq_ops.save_fl = vsmp_save_fl; - pv_irq_ops.restore_fl = vsmp_restore_fl; + pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); + pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); + pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); + pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); pv_init_ops.patch = vsmp_patch; ctl &= ~(1 << 4); -- cgit v1.2.3 From da5de7c22eb705be709a57e486e7475a6969b994 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:07 -0800 Subject: x86/paravirt: use callee-saved convention for pte_val/make_pte/etc Impact: Optimization In the native case, pte_val, make_pte, etc are all just identity functions, so there's no need to clobber a lot of registers over them. (This changes the 32-bit callee-save calling convention to return both EAX and EDX so functions can return 64-bit values.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/paravirt.c | 53 +++++++++++----------------------------------- 1 file changed, 12 insertions(+), 41 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8adb6b5aa42..cea11c8e304 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -391,43 +391,12 @@ struct pv_apic_ops pv_apic_ops = { #endif }; -typedef pte_t make_pte_t(pteval_t); -typedef pmd_t make_pmd_t(pmdval_t); -typedef pud_t make_pud_t(pudval_t); -typedef pgd_t make_pgd_t(pgdval_t); - -typedef pteval_t pte_val_t(pte_t); -typedef pmdval_t pmd_val_t(pmd_t); -typedef pudval_t pud_val_t(pud_t); -typedef pgdval_t pgd_val_t(pgd_t); - - #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) /* 32-bit pagetable entries */ -#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_32 -#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_32 -#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_32 -#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_32 -#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_32 +#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) #else /* 64-bit pagetable entries */ -#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_64 -#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_64 -#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_64 -#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_64 -#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_64 +#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) #endif struct pv_mmu_ops pv_mmu_ops = { @@ -481,21 +450,23 @@ struct pv_mmu_ops pv_mmu_ops = { .pmd_clear = native_pmd_clear, #endif .set_pud = native_set_pud, - .pmd_val = paravirt_native_pmd_val, - .make_pmd = paravirt_native_make_pmd, + + .pmd_val = PTE_IDENT, + .make_pmd = PTE_IDENT, #if PAGETABLE_LEVELS == 4 - .pud_val = paravirt_native_pud_val, - .make_pud = paravirt_native_make_pud, + .pud_val = PTE_IDENT, + .make_pud = PTE_IDENT, + .set_pgd = native_set_pgd, #endif #endif /* PAGETABLE_LEVELS >= 3 */ - .pte_val = paravirt_native_pte_val, - .pgd_val = paravirt_native_pgd_val, + .pte_val = PTE_IDENT, + .pgd_val = PTE_IDENT, - .make_pte = paravirt_native_make_pte, - .make_pgd = paravirt_native_make_pgd, + .make_pte = PTE_IDENT, + .make_pgd = PTE_IDENT, .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, -- cgit v1.2.3 From d1de36f5b5a30b8f9dae7142516fb122ce1e0661 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 01:59:14 +0100 Subject: x86, apic: clean up header section Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 968c817762a..29e7186b0e8 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -14,43 +14,41 @@ * Mikael Pettersson : PM converted to driver model. */ -#include - -#include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include #include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include #include -#include -#include +#include +#include +#include #include -#include +#include +#include -#include -#include -#include -#include #include -#include #include +#include +#include +#include #include -#include +#include #include #include -#include +#include +#include +#include +#include #include -#include - /* * Sanity check */ -- cgit v1.2.3 From 8f47e16348e8e25eedf639092a8a2f10a66aba34 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:03:42 +0100 Subject: x86: update copyrights Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/stacktrace.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 29e7186b0e8..d6da6dd2f60 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1,7 +1,7 @@ /* * Local APIC handling, local APIC timers * - * (c) 1999, 2000 Ingo Molnar + * (c) 1999, 2000, 2009 Ingo Molnar * * Fixes * Maciej W. Rozycki : Bits for genuine 82489DX APICs; diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 3378ffb2140..57d60c741e3 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1,7 +1,7 @@ /* * Intel IO-APIC support for multi-Pentium hosts. * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo + * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * * Many thanks to Stig Venaas for trying out countless experimental * patches and reporting/debugging problems patiently! diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b46ca7d31fe..66ebb823f39 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -3,7 +3,7 @@ * compliant MP-table parsing routines. * * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar + * (c) 1998, 1999, 2000, 2009 Ingo Molnar * (c) 2008 Alexey Starikovskiy */ diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 0eb32ae9bf1..eaaffae31cc 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -2,7 +2,7 @@ * Intel SMP support routines. * * (c) 1995 Alan Cox, Building #3 - * (c) 1998-99, 2000 Ingo Molnar + * (c) 1998-99, 2000, 2009 Ingo Molnar * (c) 2002,2003 Andi Kleen, SuSE Labs. * * i386 and x86_64 integration by Glauber Costa diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1268a862abb..f40f86fec2f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -2,7 +2,7 @@ * x86 SMP booting functions * * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar + * (c) 1998, 1999, 2000, 2009 Ingo Molnar * Copyright 2001 Andi Kleen, SuSE Labs. * * Much of the core SMP work is based on previous work by Thomas Radke, to diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 10786af9554..f7bddc2e37d 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -1,7 +1,7 @@ /* * Stack trace management functions * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar */ #include #include -- cgit v1.2.3 From 647ad94fc0479e33958cb4d0e20e241c0bcf599c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:06:50 +0100 Subject: x86, apic: clean up spurious vector sanity check Move the spurious vector sanity check to the place where it's defined - out of a .c file. Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index d6da6dd2f60..85d8b50d1af 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -49,13 +49,6 @@ #include #include -/* - * Sanity check - */ -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) -# error SPURIOUS_APIC_VECTOR definition error -#endif - unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; /* Processor that is doing the boot up */ -- cgit v1.2.3 From 2749ebe320ff9f77548d10fcc0a3464ac21c8e58 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Thu, 29 Jan 2009 15:35:26 -0600 Subject: x86: UV fix uv_flush_send_and_wait() Impact: fix possible tlb mis-flushing on UV uv_flush_send_and_wait() should return a pointer if the broadcast remote tlb shootdown requests fail. That causes the conventional IPI method of shootdown to be used. Signed-off-by: Cliff Wickman Signed-off-by: Tejun Heo --- arch/x86/kernel/tlb_uv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 89fce1b6d01..f4b2f27d19b 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -259,7 +259,7 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, * the cpu's, all of which are still in the mask. */ __get_cpu_var(ptcstats).ptc_i++; - return 0; + return flush_mask; } /* -- cgit v1.2.3 From 552be871e67ff577ed36beb2f53d078b42304739 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 30 Jan 2009 17:47:53 +0900 Subject: x86: pass in cpu number to switch_to_new_gdt() Impact: cleanup, prepare for xen boot fix. Xen needs to call this function very early to setup the GDT and per-cpu segments. Remove the call to smp_processor_id() and just pass in the cpu number. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 7 +++---- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 652fdc9a757..6eacd64b602 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -255,10 +255,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, * it's on the real one. */ -void switch_to_new_gdt(void) +void switch_to_new_gdt(int cpu) { struct desc_ptr gdt_descr; - int cpu = smp_processor_id(); gdt_descr.address = (long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; @@ -993,7 +992,7 @@ void __cpuinit cpu_init(void) * and set up the GDT descriptor: */ - switch_to_new_gdt(); + switch_to_new_gdt(cpu); loadsegment(fs, 0); load_idt((const struct desc_ptr *)&idt_descr); @@ -1098,7 +1097,7 @@ void __cpuinit cpu_init(void) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_idt(&idt_descr); - switch_to_new_gdt(); + switch_to_new_gdt(cpu); /* * Set up and load the per-CPU TSS and LDT diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 0d1e7ac439f..ef91747bbed 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -122,7 +122,7 @@ void __init setup_per_cpu_areas(void) * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) - switch_to_new_gdt(); + switch_to_new_gdt(cpu); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f9dbcff4354..612d3c74f6a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1185,7 +1185,7 @@ out: void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); - switch_to_new_gdt(); + switch_to_new_gdt(me); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); per_cpu(cpu_state, me) = CPU_ONLINE; -- cgit v1.2.3 From 11e3a840cd5b731cdd8f6f956dfae78a8046d09c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 17:47:54 +0900 Subject: x86: split loading percpu segments from loading gdt Impact: split out a function, no functional change Xen needs to be able to access percpu data from very early on. For various reasons, it cannot also load the gdt at that time. It does, however, have a pefectly functional gdt at that point, so there's no pressing need to reload the gdt. Split the function to load the segment registers off, so Xen can call it directly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6eacd64b602..0f73ea42308 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -253,6 +253,16 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +void load_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + loadsegment(fs, __KERNEL_PERCPU); +#else + loadsegment(gs, 0); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); +#endif +} + /* Current gdt points %fs at the "master" per-cpu area: after this, * it's on the real one. */ void switch_to_new_gdt(int cpu) @@ -263,12 +273,8 @@ void switch_to_new_gdt(int cpu) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); /* Reload the per-cpu base */ -#ifdef CONFIG_X86_32 - loadsegment(fs, __KERNEL_PERCPU); -#else - loadsegment(gs, 0); - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); -#endif + + load_percpu_segment(cpu); } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; -- cgit v1.2.3 From 664c7954721adfc9bd61de6ec78f89f482f1a802 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 23:18:41 -0800 Subject: x86/vmi: fix interrupt enable/disable/save/restore calling convention. Zach says: > Enable/Disable have no clobbers at all. > Save clobbers only return value, %eax > Restore also clobbers nothing. This is precisely compatible with the calling convention, so we can just call them directly without wrapping. (Compile tested only.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmi_32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 1d3302cc2dd..eb9e7347928 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -670,10 +670,11 @@ static inline int __init activate_vmi(void) para_fill(pv_mmu_ops.write_cr2, SetCR2); para_fill(pv_mmu_ops.write_cr3, SetCR3); para_fill(pv_cpu_ops.write_cr4, SetCR4); - para_fill(pv_irq_ops.save_fl, GetInterruptMask); - para_fill(pv_irq_ops.restore_fl, SetInterruptMask); - para_fill(pv_irq_ops.irq_disable, DisableInterrupts); - para_fill(pv_irq_ops.irq_enable, EnableInterrupts); + + para_fill(pv_irq_ops.save_fl.func, GetInterruptMask); + para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask); + para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts); + para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts); para_fill(pv_cpu_ops.wbinvd, WBINVD); para_fill(pv_cpu_ops.read_tsc, RDTSC); -- cgit v1.2.3 From ef3892bd63420380d115f755d351d2071f1f805f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 2 Feb 2009 18:16:19 -0800 Subject: x86, percpu: fix kexec with vmlinux Impact: fix regression with kexec with vmlinux Split data.init into data.init, percpu, data.init2 sections instead of let data.init wrap percpu secion. Thus kexec loading will be happy, because sections will not overlap. Before the patch we have: Elf file type is EXEC (Executable file) Entry point 0x200000 There are 6 program headers, starting at offset 64 Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x0000000000200000 0xffffffff80200000 0x0000000000200000 0x0000000000ca6000 0x0000000000ca6000 R E 200000 LOAD 0x0000000000ea6000 0xffffffff80ea6000 0x0000000000ea6000 0x000000000014dfe0 0x000000000014dfe0 RWE 200000 LOAD 0x0000000001000000 0xffffffffff600000 0x0000000000ff4000 0x0000000000000888 0x0000000000000888 RWE 200000 LOAD 0x00000000011f6000 0xffffffff80ff6000 0x0000000000ff6000 0x0000000000073086 0x0000000000a2d938 RWE 200000 LOAD 0x0000000001400000 0x0000000000000000 0x000000000106a000 0x00000000001d2ce0 0x00000000001d2ce0 RWE 200000 NOTE 0x00000000009e2c1c 0xffffffff809e2c1c 0x00000000009e2c1c 0x0000000000000024 0x0000000000000024 4 Section to Segment mapping: Segment Sections... 00 .text .notes __ex_table .rodata __bug_table .pci_fixup .builtin_fw __ksymtab __ksymtab_gpl __ksymtab_strings __init_rodata __param 01 .data .init.rodata .data.cacheline_aligned .data.read_mostly 02 .vsyscall_0 .vsyscall_fn .vsyscall_gtod_data .vsyscall_1 .vsyscall_2 .vgetcpu_mode .jiffies 03 .data.init_task .smp_locks .init.text .init.data .init.setup .initcall.init .con_initcall.init .x86_cpu_dev.init .altinstructions .altinstr_replacement .exit.text .init.ramfs .bss 04 .data.percpu 05 .notes After patch we've got: Elf file type is EXEC (Executable file) Entry point 0x200000 There are 7 program headers, starting at offset 64 Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x0000000000200000 0xffffffff80200000 0x0000000000200000 0x0000000000ca6000 0x0000000000ca6000 R E 200000 LOAD 0x0000000000ea6000 0xffffffff80ea6000 0x0000000000ea6000 0x000000000014dfe0 0x000000000014dfe0 RWE 200000 LOAD 0x0000000001000000 0xffffffffff600000 0x0000000000ff4000 0x0000000000000888 0x0000000000000888 RWE 200000 LOAD 0x00000000011f6000 0xffffffff80ff6000 0x0000000000ff6000 0x0000000000073086 0x0000000000073086 RWE 200000 LOAD 0x0000000001400000 0x0000000000000000 0x000000000106a000 0x00000000001d2ce0 0x00000000001d2ce0 RWE 200000 LOAD 0x000000000163d000 0xffffffff8123d000 0x000000000123d000 0x0000000000000000 0x00000000007e6938 RWE 200000 NOTE 0x00000000009e2c1c 0xffffffff809e2c1c 0x00000000009e2c1c 0x0000000000000024 0x0000000000000024 4 Section to Segment mapping: Segment Sections... 00 .text .notes __ex_table .rodata __bug_table .pci_fixup .builtin_fw __ksymtab __ksymtab_gpl __ksymtab_strings __init_rodata __param 01 .data .init.rodata .data.cacheline_aligned .data.read_mostly 02 .vsyscall_0 .vsyscall_fn .vsyscall_gtod_data .vsyscall_1 .vsyscall_2 .vgetcpu_mode .jiffies 03 .data.init_task .smp_locks .init.text .init.data .init.setup .initcall.init .con_initcall.init .x86_cpu_dev.init .altinstructions .altinstr_replacement .exit.text .init.ramfs 04 .data.percpu 05 .bss 06 .notes Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux_64.lds.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index c9740996430..07f62d287ff 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -22,6 +22,7 @@ PHDRS { #ifdef CONFIG_SMP percpu PT_LOAD FLAGS(7); /* RWE */ #endif + data.init2 PT_LOAD FLAGS(7); /* RWE */ note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS @@ -215,7 +216,7 @@ SECTIONS /* * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - __data_nosave - should - * switch it back to data.init. Also, pda should be at the head of + * start another section data.init2. Also, pda should be at the head of * percpu area. Preallocate it and define the percpu offset symbol * so that it can be accessed as a percpu variable. */ @@ -232,7 +233,7 @@ SECTIONS __nosave_begin = .; .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) - } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ + } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ . = ALIGN(PAGE_SIZE); __nosave_end = .; -- cgit v1.2.3 From f5deb79679af6eb41b61112fadcda28b2a4cfb0d Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 3 Feb 2009 14:22:48 +0800 Subject: x86: kexec: Use one page table in x86_64 machine_kexec Impact: reduce kernel BSS size by 7 pages, improve code readability Two page tables are used in current x86_64 kexec implementation. One is used to jump from kernel virtual address to identity map address, the other is used to map all physical memory. In fact, on x86_64, there is no conflict between kernel virtual address space and physical memory space, so just one page table is sufficient. The page table pages used to map control page are dynamically allocated to save memory if kexec image is not loaded. ASM code used to map control page is replaced by C code too. Signed-off-by: Huang Ying Signed-off-by: H. Peter Anvin --- arch/x86/kernel/machine_kexec_64.c | 82 +++++++++++++++-------- arch/x86/kernel/relocate_kernel_64.S | 125 +---------------------------------- 2 files changed, 58 insertions(+), 149 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index c43caa3a91f..6993d51b7fd 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,15 +18,6 @@ #include #include -#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) -static u64 kexec_pgd[512] PAGE_ALIGNED; -static u64 kexec_pud0[512] PAGE_ALIGNED; -static u64 kexec_pmd0[512] PAGE_ALIGNED; -static u64 kexec_pte0[512] PAGE_ALIGNED; -static u64 kexec_pud1[512] PAGE_ALIGNED; -static u64 kexec_pmd1[512] PAGE_ALIGNED; -static u64 kexec_pte1[512] PAGE_ALIGNED; - static void init_level2_page(pmd_t *level2p, unsigned long addr) { unsigned long end_addr; @@ -107,12 +98,65 @@ out: return result; } +static void free_transition_pgtable(struct kimage *image) +{ + free_page((unsigned long)image->arch.pud); + free_page((unsigned long)image->arch.pmd); + free_page((unsigned long)image->arch.pte); +} + +static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long vaddr, paddr; + int result = -ENOMEM; + + vaddr = (unsigned long)relocate_kernel; + paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); + pgd += pgd_index(vaddr); + if (!pgd_present(*pgd)) { + pud = (pud_t *)get_zeroed_page(GFP_KERNEL); + if (!pud) + goto err; + image->arch.pud = pud; + set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); + } + pud = pud_offset(pgd, vaddr); + if (!pud_present(*pud)) { + pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); + if (!pmd) + goto err; + image->arch.pmd = pmd; + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + } + pmd = pmd_offset(pud, vaddr); + if (!pmd_present(*pmd)) { + pte = (pte_t *)get_zeroed_page(GFP_KERNEL); + if (!pte) + goto err; + image->arch.pte = pte; + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + } + pte = pte_offset_kernel(pmd, vaddr); + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); + return 0; +err: + free_transition_pgtable(image); + return result; +} + static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { pgd_t *level4p; + int result; level4p = (pgd_t *)__va(start_pgtable); - return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); + result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); + if (result) + return result; + return init_transition_pgtable(image, level4p); } static void set_idt(void *newidt, u16 limit) @@ -174,7 +218,7 @@ int machine_kexec_prepare(struct kimage *image) void machine_kexec_cleanup(struct kimage *image) { - return; + free_transition_pgtable(image); } /* @@ -195,22 +239,6 @@ void machine_kexec(struct kimage *image) memcpy(control_page, relocate_kernel, PAGE_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); - page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; - page_list[PA_PGD] = virt_to_phys(&kexec_pgd); - page_list[VA_PGD] = (unsigned long)kexec_pgd; - page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0); - page_list[VA_PUD_0] = (unsigned long)kexec_pud0; - page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0); - page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; - page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0); - page_list[VA_PTE_0] = (unsigned long)kexec_pte0; - page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1); - page_list[VA_PUD_1] = (unsigned long)kexec_pud1; - page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1); - page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; - page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1); - page_list[VA_PTE_1] = (unsigned long)kexec_pte1; - page_list[PA_TABLE_PAGE] = (unsigned long)__pa(page_address(image->control_code_page)); diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index f5afe665a82..b0bbdd4829c 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -29,122 +29,6 @@ relocate_kernel: * %rdx start address */ - /* map the control page at its virtual address */ - - movq $0x0000ff8000000000, %r10 /* mask */ - mov $(39 - 3), %cl /* bits to shift */ - movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PGD)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PUD_0)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PUD_0)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PMD_0)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PMD_0)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PTE_0)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PTE_0)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - /* identity map the control page at its physical address */ - - movq $0x0000ff8000000000, %r10 /* mask */ - mov $(39 - 3), %cl /* bits to shift */ - movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PGD)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PUD_1)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PUD_1)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PMD_1)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PMD_1)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PTE_1)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PTE_1)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - -relocate_new_kernel: - /* %rdi indirection_page - * %rsi page_list - * %rdx start address - */ - /* zero out flags, and disable interrupts */ pushq $0 popfq @@ -156,9 +40,8 @@ relocate_new_kernel: /* get physical address of page table now too */ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx - /* switch to new set of page tables */ - movq PTR(PA_PGD)(%rsi), %r9 - movq %r9, %cr3 + /* Switch to the identity mapped page tables */ + movq %rcx, %cr3 /* setup a new stack at the end of the physical control page */ lea PAGE_SIZE(%r8), %rsp @@ -194,9 +77,7 @@ identity_mapped: jmp 1f 1: - /* Switch to the identity mapped page tables, - * and flush the TLB. - */ + /* Flush the TLB (needed?) */ movq %rcx, %cr3 /* Do the copies */ -- cgit v1.2.3 From 48ec4d9537282a55d602136724f069faafcac8c8 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Wed, 4 Feb 2009 15:54:45 -0500 Subject: x86, 64-bit: print DMI info in the oops trace This patch echoes what we already do on 32-bit since 90f7d25c6b672137344f447a30a9159945ffea72, and prints the DMI product name in show_regs, so that system specific problems can be easily identified. Signed-off-by: Kyle McMartin Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 416fb9282f4..85b4cb5c198 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -151,14 +152,18 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned int fsindex, gsindex; unsigned int ds, cs, es; + const char *board; printk("\n"); print_modules(); - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + init_utsname()->version, board); printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, -- cgit v1.2.3 From a6a95406c676ffe4f9dee708eb404a17c69f7fdd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 4 Feb 2009 13:40:31 +0300 Subject: x86: fix hpet timer reinit for x86_64 There's a small problem with hpet_rtc_reinit function - it checks for the: hpet_readl(HPET_COUNTER) - hpet_t1_cmp > 0 to continue increasing both the HPET_T1_CMP (register) and the hpet_t1_cmp (variable). But since the HPET_COUNTER is always 32-bit, if the hpet_t1_cmp is 64-bit this condition will always be FALSE once the latter hits the 32-bit boundary, and we can have a situation, when we don't increase the HPET_T1_CMP register high enough. The result - timer stops ticking, since HPET_T1_CMP becomes less, than the COUNTER and never increased again. The solution is (based on Linus's suggestion) to not compare 64-bits (on 64-bit x86), but to do the comparison on 32-bit signed integers. Reported-by: Kirill Korotaev Signed-off-by: Pavel Emelyanov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 64d5ad0b8ad..c761f914430 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1075,7 +1075,7 @@ static void hpet_rtc_timer_reinit(void) hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; - } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); + } while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) -- cgit v1.2.3 From 4560839939f4b4a96e21e80584f87308ac93c1da Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 4 Feb 2009 16:44:01 -0700 Subject: x86: fix grammar in user-visible BIOS warning Fix user-visible grammo. Signed-off-by: Alex Chiang Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ae0d8042cf6..c461f6d6907 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -607,7 +607,7 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { printk(KERN_NOTICE - "%s detected: BIOS may corrupt low RAM, working it around.\n", + "%s detected: BIOS may corrupt low RAM, working around it.\n", d->ident); e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); -- cgit v1.2.3 From 732553e567c2700ba5b9bccc6ec885c75779a94b Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 3 Feb 2009 17:46:43 +0100 Subject: [CPUFREQ] powernow-k8: Get transition latency from ACPI _PSS table At this time, the PowerNow! driver for K8 uses an experimentally derived formula to calculate transition latency. The value it provides is orders of magnitude too large on modern systems. This patch replaces the formula with ACPI _PSS latency values for more accuracy and better performance. I've tested it on two 2nd generation Opteron systems, a 3rd generation Operton system, and a Turion X2 without seeing any stability problems. Signed-off-by: Mark Langsdorf Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 5c28b37dea1..fb039cd345d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -939,10 +939,25 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) free_cpumask_var(data->acpi_data.shared_cpu_map); } +static int get_transition_latency(struct powernow_k8_data *data) +{ + int max_latency = 0; + int i; + for (i = 0; i < data->acpi_data.state_count; i++) { + int cur_latency = data->acpi_data.states[i].transition_latency + + data->acpi_data.states[i].bus_master_latency; + if (cur_latency > max_latency) + max_latency = cur_latency; + } + /* value in usecs, needs to be in nanoseconds */ + return 1000 * max_latency; +} + #else static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +static int get_transition_latency(struct powernow_k8_data *data) { return 0; } #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ /* Take a frequency, and issue the fid/vid transition command */ @@ -1173,7 +1188,13 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (rc) { goto err_out; } - } + /* Take a crude guess here. + * That guess was in microseconds, so multiply with 1000 */ + pol->cpuinfo.transition_latency = ( + ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + + ((1 << data->irt) * 30)) * 1000; + } else /* ACPI _PSS objects available */ + pol->cpuinfo.transition_latency = get_transition_latency(data); /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; @@ -1204,11 +1225,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); data->available_cores = pol->cpus; - /* Take a crude guess here. - * That guess was in microseconds, so multiply with 1000 */ - pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) - + (3 * (1 << data->irt) * 10)) * 1000; - if (cpu_family == CPU_HW_PSTATE) pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else -- cgit v1.2.3 From 65a4e574d2382d83f71b30ea92f86d2e40a6ef8d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 03:36:17 +0100 Subject: smp, generic: introduce arch_disable_smp_support() instead of disable_ioapic_setup() Impact: cleanup disable_ioapic_setup() in init/main.c is ugly as the function is x86-specific. The #ifdef inline prototype there is ugly too. Replace it with a generic arch_disable_smp_support() function - which has a weak alias for non-x86 architectures and for non-ioapic x86 builds. Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 4 +--- arch/x86/kernel/io_apic.c | 11 ++++++++++- arch/x86/kernel/smpboot.c | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 85d8b50d1af..a04a73a51d2 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1138,9 +1138,7 @@ void __cpuinit setup_local_APIC(void) int i, j; if (disable_apic) { -#ifdef CONFIG_X86_IO_APIC - disable_ioapic_setup(); -#endif + arch_disable_smp_support(); return; } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 57d60c741e3..84bccac4619 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -98,10 +98,19 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int skip_ioapic_setup; +void arch_disable_smp_support(void) +{ +#ifdef CONFIG_PCI + noioapicquirk = 1; + noioapicreroute = -1; +#endif + skip_ioapic_setup = 1; +} + static int __init parse_noapic(char *str) { /* disable IO-APIC */ - disable_ioapic_setup(); + arch_disable_smp_support(); return 0; } early_param("noapic", parse_noapic); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f40f86fec2f..96f7d304f5c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1071,7 +1071,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation." "(tell your hw vendor)\n"); smpboot_clear_io_apic(); - disable_ioapic_setup(); + arch_disable_smp_support(); return -1; } -- cgit v1.2.3 From fdbecd9fd14198853bec4cbae8bc7af93f2e3de3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 03:57:12 +0100 Subject: x86, apic: explain the purpose of max_physical_apicid Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index a04a73a51d2..5475e1c3180 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -50,13 +50,26 @@ #include unsigned int num_processors; + unsigned disabled_cpus __cpuinitdata; + /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; -EXPORT_SYMBOL(boot_cpu_physical_apicid); + +/* + * The highest APIC ID seen during enumeration. + * + * This determines the messaging protocol we can use: if all APIC IDs + * are in the 0 ... 7 range, then we can use logical addressing which + * has some performance advantages (better broadcasting). + * + * If there's an APIC ID above 8, we use physical addressing. + */ unsigned int max_physical_apicid; -/* Bitmask of physically existing CPUs */ +/* + * Bitmask of physically existing CPUs: + */ physid_mask_t phys_cpu_present_map; /* -- cgit v1.2.3 From c5e954820335ef5aed1662b70aaf5deb9de16735 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 30 Jan 2009 17:29:27 -0800 Subject: x86: move default_ipi_xx back to ipi.c Impact: cleanup only leave _default_ipi_xx etc in .h Beyond the cleanup factor, this saves a bit of code size as well: text data bss dec hex filename 7281931 1630144 1463304 10375379 9e50d3 vmlinux.before 7281753 1630144 1463304 10375201 9e5021 vmlinux.after Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/ipi.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 339f4f3feee..dbf5445727a 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -19,8 +19,116 @@ #include #include +void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) +{ + unsigned long query_cpu; + unsigned long flags; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicast to each CPU instead. + * - mbligh + */ + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, + int vector) +{ + unsigned int this_cpu = smp_processor_id(); + unsigned int query_cpu; + unsigned long flags; + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, + int vector) +{ + unsigned long flags; + unsigned int query_cpu; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicasts to each CPU instead. This + * should be modified to do 1 message per cluster ID - mbligh + */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + local_irq_restore(flags); +} + +void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, + int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + } + local_irq_restore(flags); +} + #ifdef CONFIG_X86_32 +/* + * This is only used on smaller machines. + */ +void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) +{ + unsigned long mask = cpumask_bits(cpumask)[0]; + unsigned long flags; + + local_irq_save(flags); + WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); + local_irq_restore(flags); +} + +void default_send_IPI_allbutself(int vector) +{ + /* + * if there are no other CPUs in the system then we get an APIC send + * error if we try to broadcast, thus avoid sending IPIs in this case. + */ + if (!(num_online_cpus() > 1)) + return; + + __default_local_send_IPI_allbutself(vector); +} + +void default_send_IPI_all(int vector) +{ + __default_local_send_IPI_all(vector); +} + void default_send_IPI_self(int vector) { __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical); -- cgit v1.2.3 From 4f179d121885142fb907b64956228b369d495958 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 1 Feb 2009 11:25:57 +0100 Subject: x86, numaq: cleanups Also move xquad_portio over to where it's allocated. Signed-off-by: Ingo Molnar --- arch/x86/kernel/numaq_32.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 947748e1721..0cc41a1d255 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -3,7 +3,7 @@ * * Copyright (C) 2002, IBM Corp. * - * All rights reserved. + * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,17 +23,18 @@ * Send feedback to */ -#include +#include #include #include #include -#include -#include -#include +#include + #include +#include #include -#include +#include #include +#include #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) @@ -91,19 +92,20 @@ static int __init numaq_pre_time_init(void) } int found_numaq; + /* * Have to match translation table entries to main table entries by counter * hence the mpc_record variable .... can't see a less disgusting way of * doing this .... */ struct mpc_config_translation { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; }; /* x86_quirks member */ @@ -444,7 +446,8 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) return physid_mask_of_physid(cpu + 4*node); } -extern void *xquad_portio; +/* Where the IO area was mapped on multiquad, always 0 otherwise */ +void *xquad_portio; static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) { @@ -502,7 +505,7 @@ static void numaq_setup_portio_remap(void) int num_quads = num_online_nodes(); if (num_quads <= 1) - return; + return; printk("Remapping cross-quad port I/O for %d quads\n", num_quads); xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); -- cgit v1.2.3 From ff08f76d738d0ec0f334b187f61e160caa321d54 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 4 Feb 2009 13:40:31 +0300 Subject: x86: clean up hpet timer reinit Implement Linus's suggestion: introduce the hpet_cnt_ahead() helper function to compare hpet time values - like other wrapping counter comparisons are abstracted away elsewhere. (jiffies, ktime_t, etc.) Reported-by: Kirill Korotaev Signed-off-by: Pavel Emelyanov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index c761f914430..388254f69a2 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -897,13 +897,21 @@ static unsigned long hpet_rtc_flags; static int hpet_prev_update_sec; static struct rtc_time hpet_alarm_time; static unsigned long hpet_pie_count; -static unsigned long hpet_t1_cmp; +static u32 hpet_t1_cmp; static unsigned long hpet_default_delta; static unsigned long hpet_pie_delta; static unsigned long hpet_pie_limit; static rtc_irq_handler irq_handler; +/* + * Check that the hpet counter c1 is ahead of the c2 + */ +static inline int hpet_cnt_ahead(u32 c1, u32 c2) +{ + return (s32)(c2 - c1) < 0; +} + /* * Registers a IRQ handler. */ @@ -1075,7 +1083,7 @@ static void hpet_rtc_timer_reinit(void) hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; - } while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); + } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER))); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) -- cgit v1.2.3 From fb08b20fe7c8491a35a4369cce60fcb886d7609d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:05:28 -0800 Subject: x86: Fix compile error in arch/x86/kernel/early_printk.c Fix compile problem: CC arch/x86/kernel/early_printk.o In file included from /home/jeremy/hg/xen/paravirt/linux/arch/x86/kernel/early_printk.c:17: /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h: In function 'pmd_page': /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:516: error: implicit declaration of function '__pfn_to_section' /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:516: warning: initialization makes pointer from integer without a cast /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:516: error: implicit declaration of function '__section_mem_map_addr' /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:516: warning: return makes pointer from integer without a cast /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h: In function 'pud_page': /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:586: warning: initialization makes pointer from integer without a cast /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:586: warning: return makes pointer from integer without a cast /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h: In function 'pgd_page': /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:625: warning: initialization makes pointer from integer without a cast /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:625: warning: return makes pointer from integer without a cast This is a cycling dependency between asm/pgtable.h and linux/mmzone.h when using CONFIG_SPARSEMEM. Rather than hacking up the headers some more, remove asm/pgtable.h, since early_printk.c doesn't actually need it. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/early_printk.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 504ad198e4a..6a36dd228b6 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From f1ee5548a6507d2b4293635c61ddbf12e2c00e94 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: x86/irq: optimize nr_irqs Impact: make nr_irqs depend more on cards used in a system depend on nr_irq_gsi more, and have a ratio for MSI. v2: make nr_irqs less than NR_VECTORS * nr_cpu_ids aka if only one cpu, we only can support nr_irqs = NR_VECTORS Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9578d33f20a..43f95d7b13a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3479,9 +3479,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = create_irq_nr(irq_want); - irq_want++; if (irq == 0) return -1; + irq_want = irq + 1; #ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) goto no_ir; @@ -3825,11 +3825,17 @@ int __init arch_probe_nr_irqs(void) { int nr; - nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ? - (NR_VECTORS + (8 * nr_cpu_ids)) : - (NR_VECTORS + (32 * nr_ioapics))); + if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) + nr_irqs = NR_VECTORS * nr_cpu_ids; - if (nr < nr_irqs && nr > nr_irqs_gsi) + nr = nr_irqs_gsi + 8 * nr_cpu_ids; +#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) + /* + * for MSI and HT dyn irq + */ + nr += nr_irqs_gsi * 16; +#endif + if (nr < nr_irqs) nr_irqs = nr; return 0; -- cgit v1.2.3 From abcaa2b8319a7673e76c2391cb5de3045ab1b401 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: x86: use NR_IRQS_LEGACY to replace 16 Impact: cleanup also could kill platform_legacy_irq Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 43f95d7b13a..e5be9f35ea5 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3165,6 +3165,7 @@ static int __init ioapic_init_sysfs(void) device_initcall(ioapic_init_sysfs); +static int nr_irqs_gsi = NR_IRQS_LEGACY; /* * Dynamic irq allocate and deallocation */ @@ -3179,11 +3180,11 @@ unsigned int create_irq_nr(unsigned int irq_want) struct irq_desc *desc_new = NULL; irq = 0; + if (irq_want < nr_irqs_gsi) + irq_want = nr_irqs_gsi; + spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { - if (platform_legacy_irq(new)) - continue; - desc_new = irq_to_desc_alloc_cpu(new, cpu); if (!desc_new) { printk(KERN_INFO "can not get irq_desc for %d\n", new); @@ -3208,7 +3209,6 @@ unsigned int create_irq_nr(unsigned int irq_want) return irq; } -static int nr_irqs_gsi = NR_IRQS_LEGACY; int create_irq(void) { unsigned int irq_want; -- cgit v1.2.3 From f72dccace737df74d04a96461785a3ad61724b9f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: x86: check_timer cleanup Impact: make check-timer more robust potentially solve boot fragility For edge trigger io-apic routing, we already unmasked the pin via setup_IO_APIC_irq(), so don't unmask it again. Also call local_irq_disable() between timer_irq_works(), because it calls local_irq_enable() inside. Also remove not needed apic version reading for 64-bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e5be9f35ea5..855209a1b17 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1657,7 +1657,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * to the first CPU. */ entry.dest_mode = apic->irq_dest_mode; - entry.mask = 1; /* mask IRQ now */ + entry.mask = 0; /* don't mask IRQ for edge */ entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; @@ -2863,14 +2863,10 @@ static inline void __init check_timer(void) int cpu = boot_cpu_id; int apic1, pin1, apic2, pin2; unsigned long flags; - unsigned int ver; int no_pin1 = 0; local_irq_save(flags); - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - /* * get/set the timer IRQ vector: */ @@ -2889,7 +2885,13 @@ static inline void __init check_timer(void) apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); #ifdef CONFIG_X86_32 - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); + { + unsigned int ver; + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); + } #endif pin1 = find_isa_irq_pin(0, mp_INT); @@ -2928,8 +2930,17 @@ static inline void __init check_timer(void) if (no_pin1) { add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + } else { + /* for edge trigger, setup_IO_APIC_irq already + * leave it unmasked. + * so only need to unmask if it is level-trigger + * do we really have level trigger timer? + */ + int idx; + idx = find_irq_entry(apic1, pin1, mp_INT); + if (idx != -1 && irq_trigger(idx)) + unmask_IO_APIC_irq_desc(desc); } - unmask_IO_APIC_irq_desc(desc); if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@ -2943,6 +2954,7 @@ static inline void __init check_timer(void) if (intr_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); #endif + local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -2957,7 +2969,6 @@ static inline void __init check_timer(void) */ replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq_desc(desc); enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -2972,6 +2983,7 @@ static inline void __init check_timer(void) /* * Cleanup, just in case ... */ + local_irq_disable(); disable_8259A_irq(0); clear_IO_APIC_pin(apic2, pin2); apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); @@ -2997,6 +3009,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } + local_irq_disable(); disable_8259A_irq(0); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); @@ -3014,6 +3027,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } + local_irq_disable(); apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option.\n"); -- cgit v1.2.3 From cc6c50066ec1ac98bef97117e2f078bb89bbccc7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: x86: find nr_irqs_gsi with mp_ioapic_routing Impact: find right nr_irqs_gsi on some systems. One test-system has gap between gsi's: [ 0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0]) [ 0.000000] IOAPIC[0]: apic_id 4, version 0, address 0xfec00000, GSI 0-23 [ 0.000000] ACPI: IOAPIC (id[0x05] address[0xfeafd000] gsi_base[48]) [ 0.000000] IOAPIC[1]: apic_id 5, version 0, address 0xfeafd000, GSI 48-54 [ 0.000000] ACPI: IOAPIC (id[0x06] address[0xfeafc000] gsi_base[56]) [ 0.000000] IOAPIC[2]: apic_id 6, version 0, address 0xfeafc000, GSI 56-62 ... [ 0.000000] nr_irqs_gsi: 38 So nr_irqs_gsi is not right. some irq for MSI will overwrite with io_apic. need to get that with acpi_probe_gsi when acpi io_apic is used Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 23 +++++++++++++++++++++++ arch/x86/kernel/io_apic.c | 20 +++++++++++++++----- 2 files changed, 38 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3efa996b036..c334fe75dcd 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -961,6 +961,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int __init acpi_probe_gsi(void) +{ + int idx; + int gsi; + int max_gsi = 0; + + if (acpi_disabled) + return 0; + + if (!acpi_ioapic) + return 0; + + max_gsi = 0; + for (idx = 0; idx < nr_ioapics; idx++) { + gsi = mp_ioapic_routing[idx].gsi_end; + + if (gsi > max_gsi) + max_gsi = gsi; + } + + return max_gsi + 1; +} + static void assign_to_mp_irq(struct mpc_intsrc *m, struct mpc_intsrc *mp_irq) { diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 855209a1b17..56e51eb551a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3824,14 +3824,24 @@ int __init io_apic_get_redir_entries (int ioapic) void __init probe_nr_irqs_gsi(void) { - int idx; int nr = 0; - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - if (nr > nr_irqs_gsi) + nr = acpi_probe_gsi(); + if (nr > nr_irqs_gsi) { nr_irqs_gsi = nr; + } else { + /* for acpi=off or acpi is not compiled in */ + int idx; + + nr = 0; + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; + } + + printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } #ifdef CONFIG_SPARSE_IRQ -- cgit v1.2.3 From 2c344e9d6e1938fdf15e93c56d6fe42f8410e9d3 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 7 Feb 2009 12:23:37 -0800 Subject: x86: don't pretend that non-framepointer stack traces are reliable Without frame pointers enabled, the x86 stack traces should not pretend to be reliable; instead they should just be what they are: unreliable. The effect of this is that they have a '?' printed in the stacktrace, to warn the reader that these entries are guesses rather than known based on more reliable information. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6b1f6f6f866..87d103ded1c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -99,7 +99,7 @@ print_context_stack(struct thread_info *tinfo, frame = frame->next_frame; bp = (unsigned long) frame; } else { - ops->address(data, addr, bp == 0); + ops->address(data, addr, 0); } print_ftrace_graph_addr(addr, data, ops, tinfo, graph); } -- cgit v1.2.3 From 2add8e235cbe0dcd672c33fc322754e15500238c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:39 -0500 Subject: x86: use linker to offset symbols by __per_cpu_load Impact: cleanup and bug fix Use the linker to create symbols for certain per-cpu variables that are offset by __per_cpu_load. This allows the removal of the runtime fixup of the GDT pointer, which fixes a bug with resume reported by Jiri Slaby. Reported-by: Jiri Slaby Signed-off-by: Brian Gerst Acked-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 6 +----- arch/x86/kernel/head_64.S | 21 ++------------------- arch/x86/kernel/vmlinux_64.lds.S | 8 ++++++++ 3 files changed, 11 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0f73ea42308..41b0de6df87 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -902,12 +902,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); -#ifdef CONFIG_SMP -DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ -#else DEFINE_PER_CPU(char *, irq_stack_ptr) = - per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; -#endif + init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; DEFINE_PER_CPU(unsigned long, kernel_stack) = (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a0a2b5ca9b7..2e648e3a5ea 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -205,19 +205,6 @@ ENTRY(secondary_startup_64) pushq $0 popfq -#ifdef CONFIG_SMP - /* - * Fix up static pointers that need __per_cpu_load added. The assembler - * is unable to do this directly. This is only needed for the boot cpu. - * These values are set up with the correct base addresses by C code for - * secondary cpus. - */ - movq initial_gs(%rip), %rax - cmpl $0, per_cpu__cpu_number(%rax) - jne 1f - addq %rax, early_gdt_descr_base(%rip) -1: -#endif /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -275,11 +262,7 @@ ENTRY(secondary_startup_64) ENTRY(initial_code) .quad x86_64_start_kernel ENTRY(initial_gs) -#ifdef CONFIG_SMP - .quad __per_cpu_load -#else - .quad PER_CPU_VAR(irq_stack_union) -#endif + .quad INIT_PER_CPU_VAR(irq_stack_union) __FINITDATA ENTRY(stack_start) @@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt) early_gdt_descr: .word GDT_ENTRIES*8-1 early_gdt_descr_base: - .quad per_cpu__gdt_page + .quad INIT_PER_CPU_VAR(gdt_page) ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 07f62d287ff..087a7f2c639 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -257,6 +257,14 @@ SECTIONS DWARF_DEBUG } + /* + * Per-cpu symbols which need to be offset from __per_cpu_load + * for the boot processor. + */ +#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load +INIT_PER_CPU(gdt_page); +INIT_PER_CPU(irq_stack_union); + /* * Build-time check on the image size: */ -- cgit v1.2.3 From e736ad548db152776de61d7a26805cfae77ce5ce Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Fri, 6 Feb 2009 16:52:05 -0800 Subject: x86: add clflush before monitor for Intel 7400 series For Intel 7400 series CPUs, the recommendation is to use a clflush on the monitored address just before monitor and mwait pair [1]. This clflush makes sure that there are no false wakeups from mwait when the monitored address was recently written to. [1] "MONITOR/MWAIT Recommendations for Intel Xeon Processor 7400 series" section in specification update document of 7400 series http://download.intel.com/design/xeon/specupdt/32033601.pdf Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 3 +++ arch/x86/kernel/process.c | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 430e5c38a54..24ff26a38ad 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -291,6 +291,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } + if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) + set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); + #ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e68bb9e3086..6d12f7e37f8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -180,6 +180,9 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); if (!need_resched()) { + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) @@ -194,6 +197,9 @@ static void mwait_idle(void) struct power_trace it; if (!need_resched()) { trace_power_start(&it, POWER_CSTATE, 1); + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) -- cgit v1.2.3 From 726c0d95b6bd06cb83efd36a76ccf03fa9a770f0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Feb 2009 11:32:17 +0100 Subject: x86: early_printk.c - fix pgtable.h unification fallout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/x86/kernel/early_printk.c: In function ‘early_dbgp_init’: arch/x86/kernel/early_printk.c:827: error: ‘PAGE_KERNEL_NOCACHE’ undeclared (first use in this function) arch/x86/kernel/early_printk.c:827: error: (Each undeclared identifier is reported only once arch/x86/kernel/early_printk.c:827: error: for each function it appears in.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 6a36dd228b6..639ad98238a 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -14,6 +14,7 @@ #include #include #include +#include #include /* Simple VGA output */ -- cgit v1.2.3 From 9b2b76a3344146c4d8d300874e73af8161204f87 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:40 -0800 Subject: x86: add handle_irq() to allow interrupt injection Xen uses a different interrupt path, so introduce handle_irq() to allow interrupts to be inserted into the normal interrupt path. This is handled slightly differently on 32 and 64-bit. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_32.c | 34 +++++++++++++++++++++------------- arch/x86/kernel/irq_64.c | 23 ++++++++++++++++------- 2 files changed, 37 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index d802c844991..61f09fb969e 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -191,6 +191,26 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } #endif +bool handle_irq(unsigned irq, struct pt_regs *regs) +{ + struct irq_desc *desc; + int overflow; + + overflow = check_stack_overflow(); + + desc = irq_to_desc(irq); + if (unlikely(!desc)) + return false; + + if (!execute_on_irq_stack(overflow, desc, irq)) { + if (unlikely(overflow)) + print_stack_overflow(); + desc->handle_irq(irq, desc); + } + + return true; +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -200,31 +220,19 @@ unsigned int do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs; /* high bit used in ret_from_ code */ - int overflow; unsigned vector = ~regs->orig_ax; - struct irq_desc *desc; unsigned irq; - old_regs = set_irq_regs(regs); irq_enter(); irq = __get_cpu_var(vector_irq)[vector]; - overflow = check_stack_overflow(); - - desc = irq_to_desc(irq); - if (unlikely(!desc)) { + if (!handle_irq(irq, regs)) { printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", __func__, irq, vector, smp_processor_id()); BUG(); } - if (!execute_on_irq_stack(overflow, desc, irq)) { - if (unlikely(overflow)) - print_stack_overflow(); - desc->handle_irq(irq, desc); - } - irq_exit(); set_irq_regs(old_regs); return 1; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 018963aa6ee..a93f3b0dc7f 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -48,6 +48,20 @@ static inline void stack_overflow_check(struct pt_regs *regs) #endif } +bool handle_irq(unsigned irq, struct pt_regs *regs) +{ + struct irq_desc *desc; + + stack_overflow_check(regs); + + desc = irq_to_desc(irq); + if (unlikely(!desc)) + return false; + + generic_handle_irq_desc(irq, desc); + return true; +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -56,7 +70,6 @@ static inline void stack_overflow_check(struct pt_regs *regs) asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - struct irq_desc *desc; /* high bit used in ret_from_ code */ unsigned vector = ~regs->orig_ax; @@ -64,14 +77,10 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) exit_idle(); irq_enter(); - irq = __get_cpu_var(vector_irq)[vector]; - stack_overflow_check(regs); + irq = __get_cpu_var(vector_irq)[vector]; - desc = irq_to_desc(irq); - if (likely(desc)) - generic_handle_irq_desc(irq, desc); - else { + if (!handle_irq(irq, regs)) { if (!disable_apic) ack_APIC_irq(); -- cgit v1.2.3 From 7c1d7cdcef1b54f4a78892b6b99d19f12c4f398e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:41 -0800 Subject: x86: unify do_IRQ() With the differences in interrupt handling hoisted into handle_irq(), do_IRQ is more or less identical between 32 and 64 bit, so unify it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 38 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/irq_32.c | 27 --------------------------- arch/x86/kernel/irq_64.c | 33 --------------------------------- 3 files changed, 38 insertions(+), 60 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 8b30d0c2512..f13ca1650aa 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -6,10 +6,12 @@ #include #include #include +#include #include #include #include +#include atomic_t irq_err_count; @@ -188,4 +190,40 @@ u64 arch_irq_stat(void) return sum; } + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +unsigned int __irq_entry do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* high bit used in ret_from_ code */ + unsigned vector = ~regs->orig_ax; + unsigned irq; + + exit_idle(); + irq_enter(); + + irq = __get_cpu_var(vector_irq)[vector]; + + if (!handle_irq(irq, regs)) { +#ifdef CONFIG_X86_64 + if (!disable_apic) + ack_APIC_irq(); +#endif + + if (printk_ratelimit()) + printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", + __func__, smp_processor_id(), vector, irq); + } + + irq_exit(); + + set_irq_regs(old_regs); + return 1; +} + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 61f09fb969e..4beb9a13873 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -211,33 +211,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -unsigned int do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs; - /* high bit used in ret_from_ code */ - unsigned vector = ~regs->orig_ax; - unsigned irq; - - old_regs = set_irq_regs(regs); - irq_enter(); - irq = __get_cpu_var(vector_irq)[vector]; - - if (!handle_irq(irq, regs)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", - __func__, irq, vector, smp_processor_id()); - BUG(); - } - - irq_exit(); - set_irq_regs(old_regs); - return 1; -} - #ifdef CONFIG_HOTPLUG_CPU #include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index a93f3b0dc7f..977d8b43a0d 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -62,39 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* high bit used in ret_from_ code */ - unsigned vector = ~regs->orig_ax; - unsigned irq; - - exit_idle(); - irq_enter(); - - irq = __get_cpu_var(vector_irq)[vector]; - - if (!handle_irq(irq, regs)) { - if (!disable_apic) - ack_APIC_irq(); - - if (printk_ratelimit()) - printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", - __func__, smp_processor_id(), vector); - } - - irq_exit(); - - set_irq_regs(old_regs); - return 1; -} - #ifdef CONFIG_HOTPLUG_CPU /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) -- cgit v1.2.3 From 3f4a739c6accd651a11fcf3c7a20ec8147c42660 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: x86: find nr_irqs_gsi with mp_ioapic_routing Impact: find right nr_irqs_gsi on some systems. One test-system has gap between gsi's: [ 0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0]) [ 0.000000] IOAPIC[0]: apic_id 4, version 0, address 0xfec00000, GSI 0-23 [ 0.000000] ACPI: IOAPIC (id[0x05] address[0xfeafd000] gsi_base[48]) [ 0.000000] IOAPIC[1]: apic_id 5, version 0, address 0xfeafd000, GSI 48-54 [ 0.000000] ACPI: IOAPIC (id[0x06] address[0xfeafc000] gsi_base[56]) [ 0.000000] IOAPIC[2]: apic_id 6, version 0, address 0xfeafc000, GSI 56-62 ... [ 0.000000] nr_irqs_gsi: 38 So nr_irqs_gsi is not right. some irq for MSI will overwrite with io_apic. need to get that with acpi_probe_gsi when acpi io_apic is used Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 23 +++++++++++++++++++++++ arch/x86/kernel/io_apic.c | 20 +++++++++++++++----- 2 files changed, 38 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c2f43..7678f10c456 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -973,6 +973,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int __init acpi_probe_gsi(void) +{ + int idx; + int gsi; + int max_gsi = 0; + + if (acpi_disabled) + return 0; + + if (!acpi_ioapic) + return 0; + + max_gsi = 0; + for (idx = 0; idx < nr_ioapics; idx++) { + gsi = mp_ioapic_routing[idx].gsi_end; + + if (gsi > max_gsi) + max_gsi = gsi; + } + + return max_gsi + 1; +} + static void assign_to_mp_irq(struct mp_config_intsrc *m, struct mp_config_intsrc *mp_irq) { diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9b0c480c383..bc7ac4da90d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3841,14 +3841,24 @@ int __init io_apic_get_redir_entries (int ioapic) void __init probe_nr_irqs_gsi(void) { - int idx; int nr = 0; - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - if (nr > nr_irqs_gsi) + nr = acpi_probe_gsi(); + if (nr > nr_irqs_gsi) { nr_irqs_gsi = nr; + } else { + /* for acpi=off or acpi is not compiled in */ + int idx; + + nr = 0; + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; + } + + printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } /* -------------------------------------------------------------------------- -- cgit v1.2.3 From 55a8ba4b7f76bebd7e8ce3f74c04b140627a1bad Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 6 Feb 2009 10:29:35 -0800 Subject: x86, vmi: put a missing paravirt_release_pmd in pgd_dtor Commit 6194ba6ff6ccf8d5c54c857600843c67aa82c407 ("x86: don't special-case pmd allocations as much") made changes to the way we handle pmd allocations, and while doing that it dropped a call to paravirt_release_pd on the pgd page from the pgd_dtor code path. As a result of this missing release, the hypervisor is now unaware of the pgd page being freed, and as a result it ends up tracking this page as a page table page. After this the guest may start using the same page for other purposes, and depending on what use the page is put to, it may result in various performance and/or functional issues ( hangs, reboots). Since this release is only required for VMI, I now release the pgd page from the (vmi)_pgd_free hook. Signed-off-by: Alok N Kataria Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Cc: --- arch/x86/kernel/vmi_32.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 1d3302cc2dd..bef58b4982d 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -320,6 +320,16 @@ static void vmi_release_pmd(unsigned long pfn) vmi_ops.release_page(pfn, VMI_PAGE_L2); } +/* + * We use the pgd_free hook for releasing the pgd page: + */ +static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + unsigned long pfn = __pa(pgd) >> PAGE_SHIFT; + + vmi_ops.release_page(pfn, VMI_PAGE_L2); +} + /* * Helper macros for MMU update flags. We can defer updates until a flush * or page invalidation only if the update is to the current address space @@ -762,6 +772,7 @@ static inline int __init activate_vmi(void) if (vmi_ops.release_page) { pv_mmu_ops.release_pte = vmi_release_pte; pv_mmu_ops.release_pmd = vmi_release_pmd; + pv_mmu_ops.pgd_free = vmi_pgd_free; } /* Set linear is needed in all cases */ -- cgit v1.2.3 From 4924e228ae039029a9503ad571d91086e4042c90 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: x86: unstatic mp_find_ioapic so it can be used elsewhere Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/acpi/boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c334fe75dcd..068b900f4b0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -872,7 +872,7 @@ static struct { DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); } mp_ioapic_routing[MAX_IO_APICS]; -static int mp_find_ioapic(int gsi) +int mp_find_ioapic(int gsi) { int i = 0; -- cgit v1.2.3 From c3e137d1e882c4fab9adcce7ae2be9bf3eb64c4c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: x86: add mp_find_ioapic_pin Add mp_find_ioapic_pin() to find an IO APIC's specific pin from a GSI, and use this function within acpi/boot. Make it non-static so other code can use it too. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/acpi/boot.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 068b900f4b0..bba162c81d5 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -887,6 +887,16 @@ int mp_find_ioapic(int gsi) return -1; } +int mp_find_ioapic_pin(int ioapic, int gsi) +{ + if (WARN_ON(ioapic == -1)) + return -1; + if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end)) + return -1; + + return gsi - mp_ioapic_routing[ioapic].gsi_base; +} + static u8 __init uniq_ioapic_id(u8 id) { #ifdef CONFIG_X86_32 @@ -1022,7 +1032,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) ioapic = mp_find_ioapic(gsi); if (ioapic < 0) return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + pin = mp_find_ioapic_pin(ioapic, gsi); /* * TBD: This check is for faulty timer entries, where the override @@ -1142,7 +1152,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) return gsi; } - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); #ifdef CONFIG_X86_32 if (ioapic_renumber_irq) @@ -1231,7 +1241,7 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); ioapic = mp_find_ioapic(gsi); mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; - mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); save_mp_irq(&mp_irq); #endif -- cgit v1.2.3 From ca97ab90164c7b978abf9d82dc82d6dc2cbac4a0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: x86: unstatic ioapic entry funcs Unstatic ioapic_write_entry and setup_ioapic_entry functions so that the Xen code can do its own ioapic routing setup. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/io_apic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 56e51eb551a..7248ca11bdc 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -486,7 +486,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) io_apic_write(apic, 0x10 + 2*pin, eu.w1); } -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); @@ -1478,10 +1478,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t handle_edge_irq, "edge"); } -static int setup_ioapic_entry(int apic_id, int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int trigger, - int polarity, int vector) +int setup_ioapic_entry(int apic_id, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) { /* * add it to the IO-APIC irq-routing table: -- cgit v1.2.3 From d315760ffa261c15ff92699ac6f514112543d7ca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: x86: fix math_emu register frame access do_device_not_available() is the handler for #NM and it declares that it takes a unsigned long and calls math_emu(), which takes a long argument and surprisingly expects the stack frame starting at the zero argument would match struct math_emu_info, which isn't true regardless of configuration in the current code. This patch makes do_device_not_available() take struct pt_regs like other exception handlers and initialize struct math_emu_info with pointer to it and pass pointer to the math_emu_info to math_emulate() like normal C functions do. This way, unless gcc makes a copy of struct pt_regs in do_device_not_available(), the register frame is correctly accessed regardless of kernel configuration or compiler used. This doesn't fix all math_emu problems but it at least gets it somewhat working. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 98c2d055284..7932338d7cb 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -896,7 +896,7 @@ asmlinkage void math_state_restore(void) EXPORT_SYMBOL_GPL(math_state_restore); #ifndef CONFIG_MATH_EMULATION -asmlinkage void math_emulate(long arg) +void math_emulate(struct math_emu_info *info) { printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n"); @@ -906,16 +906,19 @@ asmlinkage void math_emulate(long arg) } #endif /* CONFIG_MATH_EMULATION */ -dotraplinkage void __kprobes -do_device_not_available(struct pt_regs *regs, long error) +dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) { #ifdef CONFIG_X86_32 if (read_cr0() & X86_CR0_EM) { - conditional_sti(regs); - math_emulate(0); + struct math_emu_info info = { }; + + conditional_sti(®s); + + info.regs = ®s; + math_emulate(&info); } else { math_state_restore(); /* interrupts still off */ - conditional_sti(regs); + conditional_sti(®s); } #else math_state_restore(); -- cgit v1.2.3 From f0d96110f9fd98a1a22e03b8adba69508843d910 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: x86: use asm .macro instead of cpp #define in entry_32.S Impact: cleanup Use .macro instead of cpp #define where approriate. This cleans up code and will ease future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 293 +++++++++++++++++++++++---------------------- 1 file changed, 151 insertions(+), 142 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a0b91aac72a..c461925d3b6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -101,121 +101,127 @@ #define resume_userspace_sig resume_userspace #endif -#define SAVE_ALL \ - cld; \ - pushl %fs; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET fs, 0;*/\ - pushl %es; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET es, 0;*/\ - pushl %ds; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET ds, 0;*/\ - pushl %eax; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET eax, 0;\ - pushl %ebp; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ebp, 0;\ - pushl %edi; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET edi, 0;\ - pushl %esi; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET esi, 0;\ - pushl %edx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET edx, 0;\ - pushl %ecx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ecx, 0;\ - pushl %ebx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ebx, 0;\ - movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ - movl %edx, %es; \ - movl $(__KERNEL_PERCPU), %edx; \ +.macro SAVE_ALL + cld + pushl %fs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET fs, 0;*/ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0;*/ + pushl %ds + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ds, 0;*/ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eax, 0 + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp, 0 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + movl $(__USER_DS), %edx + movl %edx, %ds + movl %edx, %es + movl $(__KERNEL_PERCPU), %edx movl %edx, %fs +.endm -#define RESTORE_INT_REGS \ - popl %ebx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ebx;\ - popl %ecx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ecx;\ - popl %edx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE edx;\ - popl %esi; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE esi;\ - popl %edi; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE edi;\ - popl %ebp; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ebp;\ - popl %eax; \ - CFI_ADJUST_CFA_OFFSET -4;\ +.macro RESTORE_INT_REGS + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + popl %edi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edi + popl %ebp + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebp + popl %eax + CFI_ADJUST_CFA_OFFSET -4 CFI_RESTORE eax +.endm -#define RESTORE_REGS \ - RESTORE_INT_REGS; \ -1: popl %ds; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE ds;*/\ -2: popl %es; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE es;*/\ -3: popl %fs; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE fs;*/\ -.pushsection .fixup,"ax"; \ -4: movl $0,(%esp); \ - jmp 1b; \ -5: movl $0,(%esp); \ - jmp 2b; \ -6: movl $0,(%esp); \ - jmp 3b; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ - .long 3b,6b; \ +.macro RESTORE_REGS + RESTORE_INT_REGS +1: popl %ds + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE ds;*/ +2: popl %es + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE es;*/ +3: popl %fs + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE fs;*/ +.pushsection .fixup, "ax" +4: movl $0, (%esp) + jmp 1b +5: movl $0, (%esp) + jmp 2b +6: movl $0, (%esp) + jmp 3b +.section __ex_table, "a" + .align 4 + .long 1b, 4b + .long 2b, 5b + .long 3b, 6b .popsection +.endm -#define RING0_INT_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, 3*4;\ - /*CFI_OFFSET cs, -2*4;*/\ +.macro RING0_INT_FRAME + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, 3*4 + /*CFI_OFFSET cs, -2*4;*/ CFI_OFFSET eip, -3*4 +.endm -#define RING0_EC_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, 4*4;\ - /*CFI_OFFSET cs, -2*4;*/\ +.macro RING0_EC_FRAME + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, 4*4 + /*CFI_OFFSET cs, -2*4;*/ CFI_OFFSET eip, -3*4 +.endm -#define RING0_PTREGS_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ - /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ - CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ - /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ - /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ - CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ - CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ - CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ - CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ - CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ - CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ +.macro RING0_PTREGS_FRAME + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, PT_OLDESP-PT_EBX + /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ + CFI_OFFSET eip, PT_EIP-PT_OLDESP + /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ + /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ + CFI_OFFSET eax, PT_EAX-PT_OLDESP + CFI_OFFSET ebp, PT_EBP-PT_OLDESP + CFI_OFFSET edi, PT_EDI-PT_OLDESP + CFI_OFFSET esi, PT_ESI-PT_OLDESP + CFI_OFFSET edx, PT_EDX-PT_OLDESP + CFI_OFFSET ecx, PT_ECX-PT_OLDESP CFI_OFFSET ebx, PT_EBX-PT_OLDESP +.endm ENTRY(ret_from_fork) CFI_STARTPROC @@ -595,28 +601,30 @@ syscall_badsys: END(syscall_badsys) CFI_ENDPROC -#define FIXUP_ESPFIX_STACK \ - /* since we are on a wrong stack, we cant make it a C code :( */ \ - PER_CPU(gdt_page, %ebx); \ - GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ - addl %esp, %eax; \ - pushl $__KERNEL_DS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl %eax; \ - CFI_ADJUST_CFA_OFFSET 4; \ - lss (%esp), %esp; \ - CFI_ADJUST_CFA_OFFSET -8; -#define UNWIND_ESPFIX_STACK \ - movl %ss, %eax; \ - /* see if on espfix stack */ \ - cmpw $__ESPFIX_SS, %ax; \ - jne 27f; \ - movl $__KERNEL_DS, %eax; \ - movl %eax, %ds; \ - movl %eax, %es; \ - /* switch to normal stack */ \ - FIXUP_ESPFIX_STACK; \ -27:; +.macro FIXUP_ESPFIX_STACK + /* since we are on a wrong stack, we cant make it a C code :( */ + PER_CPU(gdt_page, %ebx) + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) + addl %esp, %eax + pushl $__KERNEL_DS + CFI_ADJUST_CFA_OFFSET 4 + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + lss (%esp), %esp + CFI_ADJUST_CFA_OFFSET -8 +.endm +.macro UNWIND_ESPFIX_STACK + movl %ss, %eax + /* see if on espfix stack */ + cmpw $__ESPFIX_SS, %ax + jne 27f + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + /* switch to normal stack */ + FIXUP_ESPFIX_STACK +27: +.endm /* * Build the entry stubs and pointer table with some assembler magic. @@ -1136,26 +1144,27 @@ END(page_fault) * by hand onto the new stack - while updating the return eip past * the instruction that would have done it for sysenter. */ -#define FIX_STACK(offset, ok, label) \ - cmpw $__KERNEL_CS,4(%esp); \ - jne ok; \ -label: \ - movl TSS_sysenter_sp0+offset(%esp),%esp; \ - CFI_DEF_CFA esp, 0; \ - CFI_UNDEFINED eip; \ - pushfl; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $__KERNEL_CS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $sysenter_past_esp; \ - CFI_ADJUST_CFA_OFFSET 4; \ +.macro FIX_STACK offset ok label + cmpw $__KERNEL_CS, 4(%esp) + jne \ok +\label: + movl TSS_sysenter_sp0 + \offset(%esp), %esp + CFI_DEF_CFA esp, 0 + CFI_UNDEFINED eip + pushfl + CFI_ADJUST_CFA_OFFSET 4 + pushl $__KERNEL_CS + CFI_ADJUST_CFA_OFFSET 4 + pushl $sysenter_past_esp + CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 +.endm ENTRY(debug) RING0_INT_FRAME cmpl $ia32_sysenter_target,(%esp) jne debug_stack_correct - FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) + FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn debug_stack_correct: pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 @@ -1213,7 +1222,7 @@ nmi_stack_correct: nmi_stack_fixup: RING0_INT_FRAME - FIX_STACK(12,nmi_stack_correct, 1) + FIX_STACK 12, nmi_stack_correct, 1 jmp nmi_stack_correct nmi_debug_stack_check: @@ -1224,7 +1233,7 @@ nmi_debug_stack_check: jb nmi_stack_correct cmpl $debug_esp_fix_insn,(%esp) ja nmi_stack_correct - FIX_STACK(24,nmi_stack_correct, 1) + FIX_STACK 24, nmi_stack_correct, 1 jmp nmi_stack_correct nmi_espfix_stack: -- cgit v1.2.3 From d9a89a26e02ef9ed03f74a755a8b4d8f3a066622 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: x86: add %gs accessors for x86_32 Impact: cleanup On x86_32, %gs is handled lazily. It's not saved and restored on kernel entry/exit but only when necessary which usually is during task switch but there are few other places. Currently, it's done by calling savesegment() and loadsegment() explicitly. Define get_user_gs(), set_user_gs() and task_user_gs() and use them instead. While at it, clean up register access macros in signal.c. This cleans up code a bit and will help future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 6 +++--- arch/x86/kernel/ptrace.c | 14 ++++++-------- arch/x86/kernel/signal.c | 41 ++++++++++++++++------------------------- arch/x86/kernel/vm86_32.c | 4 ++-- 4 files changed, 27 insertions(+), 38 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 1a1ae8edc40..d58a340e1be 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -131,7 +131,7 @@ void __show_regs(struct pt_regs *regs, int all) if (user_mode_vm(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; - savesegment(gs, gs); + gs = get_user_gs(regs); } else { sp = (unsigned long) (®s->sp); savesegment(ss, ss); @@ -304,7 +304,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, p->thread.ip = (unsigned long) ret_from_fork; - savesegment(gs, p->thread.gs); + task_user_gs(p) = get_user_gs(regs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { @@ -342,7 +342,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { - __asm__("movl %0, %%gs" : : "r"(0)); + set_user_gs(regs, 0); regs->fs = 0; set_fs(USER_DS); regs->ds = __USER_DS; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb..508b6b57d0c 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -90,9 +90,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) if (offset != offsetof(struct user_regs_struct, gs)) retval = *pt_regs_access(task_pt_regs(task), offset); else { - retval = task->thread.gs; if (task == current) - savesegment(gs, retval); + retval = get_user_gs(task_pt_regs(task)); + else + retval = task_user_gs(task); } return retval; } @@ -126,13 +127,10 @@ static int set_segment_reg(struct task_struct *task, break; case offsetof(struct user_regs_struct, gs): - task->thread.gs = value; if (task == current) - /* - * The user-mode %gs is not affected by - * kernel entry, so we must update the CPU. - */ - loadsegment(gs, value); + set_user_gs(task_pt_regs(task), value); + else + task_user_gs(task) = value; } return 0; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 7fc78b01981..8562387c75a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -50,27 +50,23 @@ # define FIX_EFLAGS __FIX_EFLAGS #endif -#define COPY(x) { \ - get_user_ex(regs->x, &sc->x); \ -} +#define COPY(x) do { \ + get_user_ex(regs->x, &sc->x); \ +} while (0) -#define COPY_SEG(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - regs->seg = tmp; \ -} +#define GET_SEG(seg) ({ \ + unsigned short tmp; \ + get_user_ex(tmp, &sc->seg); \ + tmp; \ +}) -#define COPY_SEG_CPL3(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - regs->seg = tmp | 3; \ -} +#define COPY_SEG(seg) do { \ + regs->seg = GET_SEG(seg); \ +} while (0) -#define GET_SEG(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - loadsegment(seg, tmp); \ -} +#define COPY_SEG_CPL3(seg) do { \ + regs->seg = GET_SEG(seg) | 3; \ +} while (0) static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, @@ -86,7 +82,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, get_user_try { #ifdef CONFIG_X86_32 - GET_SEG(gs); + set_user_gs(regs, GET_SEG(gs)); COPY_SEG(fs); COPY_SEG(es); COPY_SEG(ds); @@ -138,12 +134,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_try { #ifdef CONFIG_X86_32 - { - unsigned int tmp; - - savesegment(gs, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->gs); - } + put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs); put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); put_user_ex(regs->es, (unsigned int __user *)&sc->es); put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 4eeb5cf9720..55ea30d2a3d 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) ret = KVM86->regs32; ret->fs = current->thread.saved_fs; - loadsegment(gs, current->thread.saved_gs); + set_user_gs(ret, current->thread.saved_gs); return ret; } @@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk info->regs32->ax = 0; tsk->thread.saved_sp0 = tsk->thread.sp0; tsk->thread.saved_fs = info->regs32->fs; - savesegment(gs, tsk->thread.saved_gs); + tsk->thread.saved_gs = get_user_gs(info->regs32); tss = &per_cpu(init_tss, get_cpu()); tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; -- cgit v1.2.3 From ccbeed3a05908d201b47b6c3dd1a373138bba566 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: x86: make lazy %gs optional on x86_32 Impact: pt_regs changed, lazy gs handling made optional, add slight overhead to SAVE_ALL, simplifies error_code path a bit On x86_32, %gs hasn't been used by kernel and handled lazily. pt_regs doesn't have place for it and gs is saved/loaded only when necessary. In preparation for stack protector support, this patch makes lazy %gs handling optional by doing the followings. * Add CONFIG_X86_32_LAZY_GS and place for gs in pt_regs. * Save and restore %gs along with other registers in entry_32.S unless LAZY_GS. Note that this unfortunately adds "pushl $0" on SAVE_ALL even when LAZY_GS. However, it adds no overhead to common exit path and simplifies entry path with error code. * Define different user_gs accessors depending on LAZY_GS and add lazy_save_gs() and lazy_load_gs() which are noop if !LAZY_GS. The lazy_*_gs() ops are used to save, load and clear %gs lazily. * Define ELF_CORE_COPY_KERNEL_REGS() which always read %gs directly. xen and lguest changes need to be verified. Signed-off-by: Tejun Heo Cc: Jeremy Fitzhardinge Cc: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_32.c | 1 + arch/x86/kernel/entry_32.S | 132 +++++++++++++++++++++++++++++++++------ arch/x86/kernel/process_32.c | 4 +- arch/x86/kernel/ptrace.c | 5 +- 4 files changed, 117 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ee4df08feee..fbf2f33e308 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -75,6 +75,7 @@ void foo(void) OFFSET(PT_DS, pt_regs, ds); OFFSET(PT_ES, pt_regs, es); OFFSET(PT_FS, pt_regs, fs); + OFFSET(PT_GS, pt_regs, gs); OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); OFFSET(PT_EIP, pt_regs, ip); OFFSET(PT_CS, pt_regs, cs); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c461925d3b6..82e6868bee4 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -30,12 +30,13 @@ * 1C(%esp) - %ds * 20(%esp) - %es * 24(%esp) - %fs - * 28(%esp) - orig_eax - * 2C(%esp) - %eip - * 30(%esp) - %cs - * 34(%esp) - %eflags - * 38(%esp) - %oldesp - * 3C(%esp) - %oldss + * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS + * 2C(%esp) - orig_eax + * 30(%esp) - %eip + * 34(%esp) - %cs + * 38(%esp) - %eflags + * 3C(%esp) - %oldesp + * 40(%esp) - %oldss * * "current" is in register %ebx during any slow entries. */ @@ -101,8 +102,99 @@ #define resume_userspace_sig resume_userspace #endif +/* + * User gs save/restore + * + * %gs is used for userland TLS and kernel only uses it for stack + * canary which is required to be at %gs:20 by gcc. Read the comment + * at the top of stackprotector.h for more info. + * + * Local labels 98 and 99 are used. + */ +#ifdef CONFIG_X86_32_LAZY_GS + + /* unfortunately push/pop can't be no-op */ +.macro PUSH_GS + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +.endm +.macro POP_GS pop=0 + addl $(4 + \pop), %esp + CFI_ADJUST_CFA_OFFSET -(4 + \pop) +.endm +.macro POP_GS_EX +.endm + + /* all the rest are no-op */ +.macro PTGS_TO_GS +.endm +.macro PTGS_TO_GS_EX +.endm +.macro GS_TO_REG reg +.endm +.macro REG_TO_PTGS reg +.endm +.macro SET_KERNEL_GS reg +.endm + +#else /* CONFIG_X86_32_LAZY_GS */ + +.macro PUSH_GS + pushl %gs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET gs, 0*/ +.endm + +.macro POP_GS pop=0 +98: popl %gs + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE gs*/ + .if \pop <> 0 + add $\pop, %esp + CFI_ADJUST_CFA_OFFSET -\pop + .endif +.endm +.macro POP_GS_EX +.pushsection .fixup, "ax" +99: movl $0, (%esp) + jmp 98b +.section __ex_table, "a" + .align 4 + .long 98b, 99b +.popsection +.endm + +.macro PTGS_TO_GS +98: mov PT_GS(%esp), %gs +.endm +.macro PTGS_TO_GS_EX +.pushsection .fixup, "ax" +99: movl $0, PT_GS(%esp) + jmp 98b +.section __ex_table, "a" + .align 4 + .long 98b, 99b +.popsection +.endm + +.macro GS_TO_REG reg + movl %gs, \reg + /*CFI_REGISTER gs, \reg*/ +.endm +.macro REG_TO_PTGS reg + movl \reg, PT_GS(%esp) + /*CFI_REL_OFFSET gs, PT_GS*/ +.endm +.macro SET_KERNEL_GS reg + xorl \reg, \reg + movl \reg, %gs +.endm + +#endif /* CONFIG_X86_32_LAZY_GS */ + .macro SAVE_ALL cld + PUSH_GS pushl %fs CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET fs, 0;*/ @@ -138,6 +230,7 @@ movl %edx, %es movl $(__KERNEL_PERCPU), %edx movl %edx, %fs + SET_KERNEL_GS %edx .endm .macro RESTORE_INT_REGS @@ -164,7 +257,7 @@ CFI_RESTORE eax .endm -.macro RESTORE_REGS +.macro RESTORE_REGS pop=0 RESTORE_INT_REGS 1: popl %ds CFI_ADJUST_CFA_OFFSET -4 @@ -175,6 +268,7 @@ 3: popl %fs CFI_ADJUST_CFA_OFFSET -4 /*CFI_RESTORE fs;*/ + POP_GS \pop .pushsection .fixup, "ax" 4: movl $0, (%esp) jmp 1b @@ -188,6 +282,7 @@ .long 2b, 5b .long 3b, 6b .popsection + POP_GS_EX .endm .macro RING0_INT_FRAME @@ -368,6 +463,7 @@ sysenter_exit: xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs + PTGS_TO_GS ENABLE_INTERRUPTS_SYSEXIT #ifdef CONFIG_AUDITSYSCALL @@ -416,6 +512,7 @@ sysexit_audit: .align 4 .long 1b,2b .popsection + PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) # system call handler stub @@ -458,8 +555,7 @@ restore_all: restore_nocheck: TRACE_IRQS_IRET restore_nocheck_notrace: - RESTORE_REGS - addl $4, %esp # skip orig_eax/error_code + RESTORE_REGS 4 # skip orig_eax/error_code CFI_ADJUST_CFA_OFFSET -4 irq_return: INTERRUPT_RETURN @@ -1078,7 +1174,10 @@ ENTRY(page_fault) CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: - /* the function address is in %fs's slot on the stack */ + /* the function address is in %gs's slot on the stack */ + pushl %fs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET fs, 0*/ pushl %es CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET es, 0*/ @@ -1107,20 +1206,15 @@ error_code: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 cld - pushl %fs - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET fs, 0*/ movl $(__KERNEL_PERCPU), %ecx movl %ecx, %fs UNWIND_ESPFIX_STACK - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - /*CFI_REGISTER es, ecx*/ - movl PT_FS(%esp), %edi # get the function address + GS_TO_REG %ecx + movl PT_GS(%esp), %edi # get the function address movl PT_ORIG_EAX(%esp), %edx # get the error code movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_FS(%esp) - /*CFI_REL_OFFSET fs, ES*/ + REG_TO_PTGS %ecx + SET_KERNEL_GS %ecx movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d58a340e1be..86122fa2a1b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ - savesegment(gs, prev->gs); + lazy_save_gs(prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. @@ -585,7 +585,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) - loadsegment(gs, next->gs); + lazy_load_gs(next->gs); percpu_write(current_task, next_p); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 508b6b57d0c..7ec39ab37a2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value) static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); - regno >>= 2; - if (regno > FS) - --regno; - return ®s->bx + regno; + return ®s->bx + (regno >> 2); } static u16 get_segment_reg(struct task_struct *task, unsigned long offset) -- cgit v1.2.3 From 60a5317ff0f42dd313094b88f809f63041568b08 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: x86: implement x86_32 stack protector Impact: stack protector for x86_32 Implement stack protector for x86_32. GDT entry 28 is used for it. It's set to point to stack_canary-20 and have the length of 24 bytes. CONFIG_CC_STACKPROTECTOR turns off CONFIG_X86_32_LAZY_GS and sets %gs to the stack canary segment on entry. As %gs is otherwise unused by the kernel, the canary can be anywhere. It's defined as a percpu variable. x86_32 exception handlers take register frame on stack directly as struct pt_regs. With -fstack-protector turned on, gcc copies the whole structure after the stack canary and (of course) doesn't copy back on return thus losing all changed. For now, -fno-stack-protector is added to all files which contain those functions. We definitely need something better. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 18 ++++++++++++++++++ arch/x86/kernel/cpu/common.c | 17 +++++++++++------ arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/head_32.S | 20 +++++++++++++++++++- arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/setup_percpu.c | 2 ++ 6 files changed, 52 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 37fa30bada1..b1f8be33300 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,6 +24,24 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp) +# +# On x86_32, register frame is passed verbatim on stack as struct +# pt_regs. gcc considers the parameter to belong to the callee and +# with -fstack-protector it copies pt_regs to the callee's stack frame +# to put the structure after the stack canary causing changes made by +# the exception handlers to be lost. Turn off stack protector for all +# files containing functions which take struct pt_regs from register +# frame. +# +# The proper way to fix this is to teach gcc that the argument belongs +# to the caller for these functions, oh well... +# +ifdef CONFIG_X86_32 +CFLAGS_process_32.o := $(nostackp) +CFLAGS_vm86_32.o := $(nostackp) +CFLAGS_signal.o := $(nostackp) +CFLAGS_traps.o := $(nostackp) +endif obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 41b0de6df87..260fe4cb2c8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "cpu.h" @@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, + GDT_STACK_CANARY_INIT #endif } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); @@ -261,6 +263,7 @@ void load_percpu_segment(int cpu) loadsegment(gs, 0); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif + load_stack_canary_segment(); } /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -946,16 +949,21 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); -#else +#else /* x86_64 */ + +#ifdef CONFIG_CC_STACKPROTECTOR +DEFINE_PER_CPU(unsigned long, stack_canary); +#endif -/* Make sure %fs is initialized properly in idle threads */ +/* Make sure %fs and %gs are initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); regs->fs = __KERNEL_PERCPU; + regs->gs = __KERNEL_STACK_CANARY; return regs; } -#endif +#endif /* x86_64 */ /* * cpu_init() initializes state that is per-CPU. Some data is already @@ -1120,9 +1128,6 @@ void __cpuinit cpu_init(void) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - /* Clear all 6 debug registers: */ set_debugreg(0, 0); set_debugreg(0, 1); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 82e6868bee4..5f5bd22adcd 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -186,7 +186,7 @@ /*CFI_REL_OFFSET gs, PT_GS*/ .endm .macro SET_KERNEL_GS reg - xorl \reg, \reg + movl $(__KERNEL_STACK_CANARY), \reg movl \reg, %gs .endm diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 24c0e5cd71e..924e31615fb 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -19,6 +19,7 @@ #include #include #include +#include /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -437,8 +438,25 @@ is386: movl $2,%ecx # set MP movl $(__KERNEL_PERCPU), %eax movl %eax,%fs # set this cpu's percpu - xorl %eax,%eax # Clear GS and LDT +#ifdef CONFIG_CC_STACKPROTECTOR + /* + * The linker can't handle this by relocation. Manually set + * base address in stack canary segment descriptor. + */ + cmpb $0,ready + jne 1f + movl $per_cpu__gdt_page,%eax + movl $per_cpu__stack_canary,%ecx + movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) + shrl $16, %ecx + movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) + movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) +1: +#endif + movl $(__KERNEL_STACK_CANARY),%eax movl %eax,%gs + + xorl %eax,%eax # Clear LDT lldt %ax cld # gcc2 wants the direction flag cleared at all times diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 86122fa2a1b..9a62383e7c3 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -212,6 +212,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.ds = __USER_DS; regs.es = __USER_DS; regs.fs = __KERNEL_PERCPU; + regs.gs = __KERNEL_STACK_CANARY; regs.orig_ax = -1; regs.ip = (unsigned long) kernel_thread_helper; regs.cs = __KERNEL_CS | get_kernel_rpl(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ef91747bbed..d992e6cff73 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_PER_CPU_MAPS # define DBG(x...) printk(KERN_DEBUG x) @@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void) per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); + setup_stack_canary_segment(cpu); /* * Copy data used in early init routines from the * initial arrays to the per cpu data areas. These -- cgit v1.2.3 From b52af40923fc91a12e3c7152d833e0c0c6a508f6 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 10 Feb 2009 09:21:07 +0100 Subject: i8327: fix outb() parameter order In i8237A_resume(), when resetting the DMA controller, the parameters to dma_outb() were mixed up. Signed-off-by: Clemens Ladisch [ cleaned up the file a tiny bit. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8237.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index dbd6c1d1b63..b42ca694dc6 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -28,10 +28,10 @@ static int i8237A_resume(struct sys_device *dev) flags = claim_dma_lock(); - dma_outb(DMA1_RESET_REG, 0); - dma_outb(DMA2_RESET_REG, 0); + dma_outb(0, DMA1_RESET_REG); + dma_outb(0, DMA2_RESET_REG); - for (i = 0;i < 8;i++) { + for (i = 0; i < 8; i++) { set_dma_addr(i, 0x000000); /* DMA count is a bit weird so this is not 0 */ set_dma_count(i, 1); @@ -51,14 +51,14 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state) } static struct sysdev_class i8237_sysdev_class = { - .name = "i8237", - .suspend = i8237A_suspend, - .resume = i8237A_resume, + .name = "i8237", + .suspend = i8237A_suspend, + .resume = i8237A_resume, }; static struct sys_device device_i8237A = { - .id = 0, - .cls = &i8237_sysdev_class, + .id = 0, + .cls = &i8237_sysdev_class, }; static int __init i8237A_init_sysfs(void) @@ -68,5 +68,4 @@ static int __init i8237A_init_sysfs(void) error = sysdev_register(&device_i8237A); return error; } - device_initcall(i8237A_init_sysfs); -- cgit v1.2.3 From e3944bfac961cd7fc82f3b3143c55dc375748569 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Feb 2009 13:07:13 -0500 Subject: tracing, x86: fix fixup section to return to original code Impact: fix to prevent a kernel crash on fault If for some reason the pointer to the parent function on the stack takes a fault, the fix up code will not return back to the original faulting code. This can lead to unpredictable results and perhaps even a kernel panic. A fault should not happen, but if it does, we should simply disable the tracer, warn, and continue running the kernel. It should not lead to a kernel crash. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1b43086b097..9d549e4fe88 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -491,13 +491,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) "1: " _ASM_MOV " (%[parent_old]), %[old]\n" "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" " movl $0, %[faulted]\n" + "3:\n" ".section .fixup, \"ax\"\n" - "3: movl $1, %[faulted]\n" + "4: movl $1, %[faulted]\n" + " jmp 3b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) : [parent_replaced] "=r" (parent), [old] "=r" (old), [faulted] "=r" (faulted) -- cgit v1.2.3 From f47a454db9129d2e61b224a40f4365cdd4f83042 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Feb 2009 11:53:23 -0500 Subject: tracing, x86: fix constraint for parent variable The constraint used for retrieving and restoring the parent function pointer is incorrect. The parent variable is a pointer, and the address of the pointer is modified by the asm statement and not the pointer itself. It is incorrect to pass it in as an output constraint since the asm will never update the pointer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9d549e4fe88..231bdd3c5b1 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -488,8 +488,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) * ignore such a protection. */ asm volatile( - "1: " _ASM_MOV " (%[parent_old]), %[old]\n" - "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" + "1: " _ASM_MOV " (%[parent]), %[old]\n" + "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" " movl $0, %[faulted]\n" "3:\n" @@ -501,9 +501,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) _ASM_EXTABLE(1b, 4b) _ASM_EXTABLE(2b, 4b) - : [parent_replaced] "=r" (parent), [old] "=r" (old), - [faulted] "=r" (faulted) - : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) + : [old] "=r" (old), [faulted] "=r" (faulted) + : [parent] "r" (parent), [return_hooker] "r" (return_hooker) : "memory" ); -- cgit v1.2.3 From 0e81cb59c7120191c0243c686b591797bc79e5c6 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 10 Feb 2009 16:45:37 -0800 Subject: x86, apic: fix initialization of wakeup_cpu With refactoring of wake_cpu macros the 32bit code in tip doesn't execute generic_apic_probe if CONFIG_X86_32_NON_STANDARD is not set. Even on a x86 STANDARD cpu we need to execute the generic_apic_probe function, as we rely on this function to execute the update_genapic quirk which initilizes apic->wakeup_cpu. Failing to do so results in we making a call to a null function in do_boot_cpu. The stack trace without the patch goes like this. Booting processor 1 APIC 0x1 ip 0x6000 BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<(null)>] (null) *pdpt = 0000000000839001 *pde = 0000000000c97067 *pte = 0000000000000163 Oops: 0000 [#1] SMP last sysfs file: Modules linked in: Pid: 1, comm: swapper Not tainted (2.6.29-rc4-tip #18) VMware Virtual Platform EIP: 0062:[<00000000>] EFLAGS: 00010293 CPU: 0 EIP is at 0x0 EAX: 00000001 EBX: 00006000 ECX: c077ed00 EDX: 00006000 ESI: 00000001 EDI: 00000001 EBP: ef04cf40 ESP: ef04cf1c DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 006a Process swapper (pid: 1, ti=ef04c000 task=ef050000 task.ti=ef04c000) Stack: c0644e52 00000000 ef04cf24 ef04cf24 c064468d c0886dc0 00000000 c0702aea ef055480 00000001 00000101 dead4ead ffffffff ffffffff c08af530 00000000 c0709715 ef04cf60 ef04cf60 00000001 00000000 00000000 dead4ead ffffffff Call Trace: [] ? native_cpu_up+0x2de/0x45b [] ? do_fork_idle+0x0/0x19 [] ? _cpu_up+0x88/0xe8 [] ? cpu_up+0x42/0x4e [] ? kernel_init+0x99/0x14b [] ? kernel_init+0x0/0x14b [] ? kernel_thread_helper+0x7/0x10 Code: Bad EIP value. EIP: [<00000000>] 0x0 SS:ESP 006a:ef04cf1c I think we should call generic_apic_probe unconditionally for 32 bit now. Signed-off-by: Alok N Kataria Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8d8fa992c9a..150e6d0a3b4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,7 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#if defined(CONFIG_X86_32_NON_STANDARD) || defined(CONFIG_X86_BIGSMP) +#ifdef CONFIG_X86_32 generic_apic_probe(); #endif -- cgit v1.2.3 From 160d8dac12932ad6eb4a359b66521e2e3282ea7d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 11 Feb 2009 11:27:39 +0100 Subject: x86, apic: make generic_apic_probe() generally available Impact: build fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 150e6d0a3b4..8fce6c71451 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,9 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#ifdef CONFIG_X86_32 generic_apic_probe(); -#endif early_quirks(); -- cgit v1.2.3 From 5c79d2a517a9905599d192db8ce77ab5f1a2faca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 11 Feb 2009 16:31:00 +0900 Subject: x86: fix x86_32 stack protector bugs Impact: fix x86_32 stack protector Brian Gerst found out that %gs was being initialized to stack_canary instead of stack_canary - 20, which basically gave the same canary value for all threads. Fixing this also exposed the following bugs. * cpu_idle() didn't call boot_init_stack_canary() * stack canary switching in switch_to() was being done too late making the initial run of a new thread use the old stack canary value. Fix all of them and while at it update comment in cpu_idle() about calling boot_init_stack_canary(). Reported-by: Brian Gerst Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 1 + arch/x86/kernel/process_32.c | 10 ++++++++++ arch/x86/kernel/process_64.c | 11 +++++------ 3 files changed, 16 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 924e31615fb..cf21fd0cf6a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -447,6 +447,7 @@ is386: movl $2,%ecx # set MP jne 1f movl $per_cpu__gdt_page,%eax movl $per_cpu__stack_canary,%ecx + subl $20, %ecx movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) shrl $16, %ecx movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9a62383e7c3..b50604bb1e4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -11,6 +11,7 @@ #include +#include #include #include #include @@ -91,6 +92,15 @@ void cpu_idle(void) { int cpu = smp_processor_id(); + /* + * If we're the non-boot CPU, nothing set the stack canary up + * for us. CPU0 already has it initialized but no harm in + * doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). + */ + boot_init_stack_canary(); + current_thread_info()->status |= TS_POLLING; /* endless idle loop with no priority at all */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8eb169e4558..836ef6575f0 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -120,12 +120,11 @@ void cpu_idle(void) current_thread_info()->status |= TS_POLLING; /* - * If we're the non-boot CPU, nothing set the PDA stack - * canary up for us - and if we are the boot CPU we have - * a 0 stack canary. This is a good place for updating - * it, as we wont ever return from this function (so the - * invalid canaries already on the stack wont ever - * trigger): + * If we're the non-boot CPU, nothing set the stack canary up + * for us. CPU0 already has it initialized but no harm in + * doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). */ boot_init_stack_canary(); -- cgit v1.2.3 From aa78bcfa01dec3cdbde3cda098ce32abbd9c3bf6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 10 Feb 2009 09:51:45 -0500 Subject: x86: use pt_regs pointer in do_device_not_available() The generic exception handler (error_code) passes in the pt_regs pointer and the error code (unused in this case). The commit "x86: fix math_emu register frame access" changed this to pass by value, which doesn't work correctly with stack protector enabled. Change it back to use the pt_regs pointer. Signed-off-by: Brian Gerst Acked-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3b7b2e19020..71a8f871331 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -905,19 +905,20 @@ void math_emulate(struct math_emu_info *info) } #endif /* CONFIG_MATH_EMULATION */ -dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) +dotraplinkage void __kprobes +do_device_not_available(struct pt_regs *regs, long error_code) { #ifdef CONFIG_X86_32 if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; - conditional_sti(®s); + conditional_sti(regs); - info.regs = ®s; + info.regs = regs; math_emulate(&info); } else { math_state_restore(); /* interrupts still off */ - conditional_sti(®s); + conditional_sti(regs); } #else math_state_restore(); -- cgit v1.2.3 From 253f29a4ae9cc6cdc7b94f96517f27a93885a6ce Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 10 Feb 2009 09:51:46 -0500 Subject: x86: pass in pt_regs pointer for syscalls that need it Some syscalls need to access the pt_regs structure, either to copy user register state or to modifiy it. This patch adds stubs to load the address of the pt_regs struct into the %eax register, and changes the syscalls to regparm(1) to receive the pt_regs pointer as the first argument. Signed-off-by: Brian Gerst Acked-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 20 ++++++++++++++++++++ arch/x86/kernel/ioport.c | 4 +--- arch/x86/kernel/process_32.c | 35 ++++++++++++++--------------------- arch/x86/kernel/signal.c | 35 +++++++---------------------------- arch/x86/kernel/syscall_table_32.S | 20 ++++++++++---------- arch/x86/kernel/vm86_32.c | 15 +++++++-------- 6 files changed, 59 insertions(+), 70 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5f5bd22adcd..3de7b5710dc 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -697,6 +697,26 @@ syscall_badsys: END(syscall_badsys) CFI_ENDPROC +/* + * System calls that need a pt_regs pointer. + */ +#define PTREGSCALL(name) \ + ALIGN; \ +ptregs_##name: \ + leal 4(%esp),%eax; \ + jmp sys_##name; + +PTREGSCALL(iopl) +PTREGSCALL(fork) +PTREGSCALL(clone) +PTREGSCALL(vfork) +PTREGSCALL(execve) +PTREGSCALL(sigaltstack) +PTREGSCALL(sigreturn) +PTREGSCALL(rt_sigreturn) +PTREGSCALL(vm86) +PTREGSCALL(vm86old) + .macro FIXUP_ESPFIX_STACK /* since we are on a wrong stack, we cant make it a C code :( */ PER_CPU(gdt_page, %ebx) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index b12208f4dfe..7ec14864631 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -131,10 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) } #ifdef CONFIG_X86_32 -asmlinkage long sys_iopl(unsigned long regsp) +ptregscall long sys_iopl(struct pt_regs *regs, unsigned int level) { - struct pt_regs *regs = (struct pt_regs *)®sp; - unsigned int level = regs->bx; struct thread_struct *t = ¤t->thread; int rc; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b50604bb1e4..5a9dcfb01f7 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -603,24 +603,18 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -asmlinkage int sys_fork(struct pt_regs regs) +ptregscall int sys_fork(struct pt_regs *regs) { - return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL); + return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); } -asmlinkage int sys_clone(struct pt_regs regs) +ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags, + unsigned long newsp, int __user *parent_tidptr, + unsigned long unused, int __user *child_tidptr) { - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; - - clone_flags = regs.bx; - newsp = regs.cx; - parent_tidptr = (int __user *)regs.dx; - child_tidptr = (int __user *)regs.di; if (!newsp) - newsp = regs.sp; - return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); } /* @@ -633,27 +627,26 @@ asmlinkage int sys_clone(struct pt_regs regs) * do not have enough call-clobbered registers to hold all * the information you need. */ -asmlinkage int sys_vfork(struct pt_regs regs) +ptregscall int sys_vfork(struct pt_regs *regs) { - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL); + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL); } /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(struct pt_regs regs) +ptregscall int sys_execve(struct pt_regs *regs, char __user *u_filename, + char __user * __user *argv, + char __user * __user *envp) { int error; char *filename; - filename = getname((char __user *) regs.bx); + filename = getname(u_filename); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, - (char __user * __user *) regs.cx, - (char __user * __user *) regs.dx, - ®s); + error = do_execve(filename, argv, envp, regs); if (error == 0) { /* Make sure we don't return using sysenter.. */ set_thread_flag(TIF_IRET); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 8562387c75a..d7a158367e3 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -549,39 +549,28 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, #endif /* CONFIG_X86_32 */ #ifdef CONFIG_X86_32 -asmlinkage int sys_sigaltstack(unsigned long bx) -{ - /* - * This is needed to make gcc realize it doesn't own the - * "struct pt_regs" - */ - struct pt_regs *regs = (struct pt_regs *)&bx; - const stack_t __user *uss = (const stack_t __user *)bx; - stack_t __user *uoss = (stack_t __user *)regs->cx; - - return do_sigaltstack(uss, uoss, regs->sp); -} +ptregscall int +sys_sigaltstack(struct pt_regs *regs, const stack_t __user *uss, + stack_t __user *uoss) #else /* !CONFIG_X86_32 */ asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) +#endif /* CONFIG_X86_32 */ { return do_sigaltstack(uss, uoss, regs->sp); } -#endif /* CONFIG_X86_32 */ /* * Do a signal return; undo the signal stack. */ #ifdef CONFIG_X86_32 -asmlinkage unsigned long sys_sigreturn(unsigned long __unused) +ptregscall unsigned long sys_sigreturn(struct pt_regs *regs) { struct sigframe __user *frame; - struct pt_regs *regs; unsigned long ax; sigset_t set; - regs = (struct pt_regs *) &__unused; frame = (struct sigframe __user *)(regs->sp - 8); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) @@ -640,23 +629,13 @@ badframe: } #ifdef CONFIG_X86_32 -/* - * Note: do not pass in pt_regs directly as with tail-call optimization - * GCC will incorrectly stomp on the caller's frame and corrupt user-space - * register state: - */ -asmlinkage int sys_rt_sigreturn(unsigned long __unused) -{ - struct pt_regs *regs = (struct pt_regs *)&__unused; - - return do_rt_sigreturn(regs); -} +ptregscall int sys_rt_sigreturn(struct pt_regs *regs) #else /* !CONFIG_X86_32 */ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +#endif /* CONFIG_X86_32 */ { return do_rt_sigreturn(regs); } -#endif /* CONFIG_X86_32 */ /* * OK, we're invoking a handler: diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index e2e86a08f31..3bdb64829b8 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -1,7 +1,7 @@ ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit - .long sys_fork + .long ptregs_fork .long sys_read .long sys_write .long sys_open /* 5 */ @@ -10,7 +10,7 @@ ENTRY(sys_call_table) .long sys_creat .long sys_link .long sys_unlink /* 10 */ - .long sys_execve + .long ptregs_execve .long sys_chdir .long sys_time .long sys_mknod @@ -109,17 +109,17 @@ ENTRY(sys_call_table) .long sys_newlstat .long sys_newfstat .long sys_uname - .long sys_iopl /* 110 */ + .long ptregs_iopl /* 110 */ .long sys_vhangup .long sys_ni_syscall /* old "idle" system call */ - .long sys_vm86old + .long ptregs_vm86old .long sys_wait4 .long sys_swapoff /* 115 */ .long sys_sysinfo .long sys_ipc .long sys_fsync - .long sys_sigreturn - .long sys_clone /* 120 */ + .long ptregs_sigreturn + .long ptregs_clone /* 120 */ .long sys_setdomainname .long sys_newuname .long sys_modify_ldt @@ -165,14 +165,14 @@ ENTRY(sys_call_table) .long sys_mremap .long sys_setresuid16 .long sys_getresuid16 /* 165 */ - .long sys_vm86 + .long ptregs_vm86 .long sys_ni_syscall /* Old sys_query_module */ .long sys_poll .long sys_nfsservctl .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl - .long sys_rt_sigreturn + .long ptregs_rt_sigreturn .long sys_rt_sigaction .long sys_rt_sigprocmask /* 175 */ .long sys_rt_sigpending @@ -185,11 +185,11 @@ ENTRY(sys_call_table) .long sys_getcwd .long sys_capget .long sys_capset /* 185 */ - .long sys_sigaltstack + .long ptregs_sigaltstack .long sys_sendfile .long sys_ni_syscall /* reserved for streams1 */ .long sys_ni_syscall /* reserved for streams2 */ - .long sys_vfork /* 190 */ + .long ptregs_vfork /* 190 */ .long sys_getrlimit .long sys_mmap2 .long sys_truncate64 diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 55ea30d2a3d..8fa6ba7c923 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -197,9 +197,8 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); -asmlinkage int sys_vm86old(struct pt_regs regs) +ptregscall int sys_vm86old(struct pt_regs *regs, struct vm86_struct __user *v86) { - struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. * This remains on the stack until we @@ -218,7 +217,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs) if (tmp) goto out; memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); - info.regs32 = ®s; + info.regs32 = regs; tsk->thread.vm86_info = v86; do_sys_vm86(&info, tsk); ret = 0; /* we never return here */ @@ -227,7 +226,7 @@ out: } -asmlinkage int sys_vm86(struct pt_regs regs) +ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long arg) { struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. @@ -239,12 +238,12 @@ asmlinkage int sys_vm86(struct pt_regs regs) struct vm86plus_struct __user *v86; tsk = current; - switch (regs.bx) { + switch (cmd) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: case VM86_GET_IRQ_BITS: case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); + ret = do_vm86_irq_handling(cmd, (int)arg); goto out; case VM86_PLUS_INSTALL_CHECK: /* @@ -261,14 +260,14 @@ asmlinkage int sys_vm86(struct pt_regs regs) ret = -EPERM; if (tsk->thread.saved_sp0) goto out; - v86 = (struct vm86plus_struct __user *)regs.cx; + v86 = (struct vm86plus_struct __user *)arg; tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, offsetof(struct kernel_vm86_struct, regs32) - sizeof(info.regs)); ret = -EFAULT; if (tmp) goto out; - info.regs32 = ®s; + info.regs32 = regs; info.vm86plus.is_vm86pus = 1; tsk->thread.vm86_info = (struct vm86_struct __user *)v86; do_sys_vm86(&info, tsk); -- cgit v1.2.3 From 9c8bb6b534d1c89a20bf9bb45e1471cf8f4747c0 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 10 Feb 2009 09:51:47 -0500 Subject: x86: drop -fno-stack-protector annotations after pt_regs fixes Now that no functions rely on struct pt_regs being passed by value, various "no stack protector" annotations can be dropped. Signed-off-by: Brian Gerst Acked-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b1f8be33300..37fa30bada1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,24 +24,6 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp) -# -# On x86_32, register frame is passed verbatim on stack as struct -# pt_regs. gcc considers the parameter to belong to the callee and -# with -fstack-protector it copies pt_regs to the callee's stack frame -# to put the structure after the stack canary causing changes made by -# the exception handlers to be lost. Turn off stack protector for all -# files containing functions which take struct pt_regs from register -# frame. -# -# The proper way to fix this is to teach gcc that the argument belongs -# to the caller for these functions, oh well... -# -ifdef CONFIG_X86_32 -CFLAGS_process_32.o := $(nostackp) -CFLAGS_vm86_32.o := $(nostackp) -CFLAGS_signal.o := $(nostackp) -CFLAGS_traps.o := $(nostackp) -endif obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -- cgit v1.2.3 From 9f339e7028e2855717af3193c938f9960ad13b38 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Wed, 11 Feb 2009 15:10:27 +0100 Subject: x86, ptrace, mm: fix double-free on race Ptrace_detach() races with __ptrace_unlink() if the traced task is reaped while detaching. This might cause a double-free of the BTS buffer. Change the ptrace_detach() path to only do the memory accounting in ptrace_bts_detach() and leave the buffer free to ptrace_bts_untrace() which will be called from __ptrace_unlink(). The fix follows a proposal from Oleg Nesterov. Reported-by: Oleg Nesterov Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb..5a4c23d8989 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child) { - if (unlikely(child->bts)) { - ds_release_bts(child->bts); - child->bts = NULL; - - ptrace_bts_free_buffer(child); - } + /* + * Ptrace_detach() races with ptrace_untrace() in case + * the child dies and is reaped by another thread. + * + * We only do the memory accounting at this point and + * leave the buffer deallocation and the bts tracer + * release to ptrace_bts_untrace() which will be called + * later on with tasklist_lock held. + */ + release_locked_buffer(child->bts_buffer, child->bts_size); } #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} -- cgit v1.2.3 From ba1511bf7fbda452138e4096bf10d5a382710f4f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 11 Feb 2009 23:38:25 +0530 Subject: x86: kernel/mpparse.c fix compilation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/x86/kernel/mpparse.c: In function ‘smp_scan_config’: arch/x86/kernel/mpparse.c:696: warning: format ‘%08lx’ expects type ‘long unsigned int’, but argument 3 has type ‘phys_addr_t’ arch/x86/kernel/mpparse.c: In function ‘update_mp_table’: arch/x86/kernel/mpparse.c:1014: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 2 has type ‘phys_addr_t’ Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 66ebb823f39..20076445319 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -692,8 +692,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, #endif mpf_found = mpf; - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", - mpf, virt_to_phys(mpf)); + printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", + mpf, (u64)virt_to_phys(mpf)); if (!reserve) return 1; @@ -1011,7 +1011,7 @@ static int __init update_mp_table(void) if (!smp_check_mpc(mpc, oem, str)) return 0; - printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); + printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf)); printk(KERN_INFO "physptr: %x\n", mpf->physptr); if (mpc_new_phys && mpc->length > mpc_new_length) { -- cgit v1.2.3 From b12bdaf11f935d7be030207e3c77faeaeab8ded3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 11 Feb 2009 16:43:58 -0500 Subject: x86: use regparm(3) for passed-in pt_regs pointer Some syscalls need to access the pt_regs structure, either to copy user register state or to modifiy it. This patch adds stubs to load the address of the pt_regs struct into the %eax register, and changes the syscalls to take the pointer as an argument instead of relying on the assumption that the pt_regs structure overlaps the function arguments. Drop the use of regparm(1) due to concern about gcc bugs, and to move in the direction of the eventual removal of regparm(0) for asmlinkage. Signed-off-by: Brian Gerst Signed-off-by: H. Peter Anvin --- arch/x86/kernel/ioport.c | 3 ++- arch/x86/kernel/process_32.c | 27 +++++++++++++++++---------- arch/x86/kernel/signal.c | 21 ++++++++++++++------- arch/x86/kernel/vm86_32.c | 11 ++++++----- 4 files changed, 39 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 7ec14864631..e41980a373a 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -131,8 +131,9 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) } #ifdef CONFIG_X86_32 -ptregscall long sys_iopl(struct pt_regs *regs, unsigned int level) +long sys_iopl(struct pt_regs *regs) { + unsigned int level = regs->bx; struct thread_struct *t = ¤t->thread; int rc; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5a9dcfb01f7..fec79ad85dc 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -603,15 +603,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -ptregscall int sys_fork(struct pt_regs *regs) +int sys_fork(struct pt_regs *regs) { return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); } -ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags, - unsigned long newsp, int __user *parent_tidptr, - unsigned long unused, int __user *child_tidptr) +int sys_clone(struct pt_regs *regs) { + unsigned long clone_flags; + unsigned long newsp; + int __user *parent_tidptr, *child_tidptr; + + clone_flags = regs->bx; + newsp = regs->cx; + parent_tidptr = (int __user *)regs->dx; + child_tidptr = (int __user *)regs->di; if (!newsp) newsp = regs->sp; return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); @@ -627,7 +633,7 @@ ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags, * do not have enough call-clobbered registers to hold all * the information you need. */ -ptregscall int sys_vfork(struct pt_regs *regs) +int sys_vfork(struct pt_regs *regs) { return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL); } @@ -635,18 +641,19 @@ ptregscall int sys_vfork(struct pt_regs *regs) /* * sys_execve() executes a new program. */ -ptregscall int sys_execve(struct pt_regs *regs, char __user *u_filename, - char __user * __user *argv, - char __user * __user *envp) +int sys_execve(struct pt_regs *regs) { int error; char *filename; - filename = getname(u_filename); + filename = getname((char __user *) regs->bx); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, argv, envp, regs); + error = do_execve(filename, + (char __user * __user *) regs->cx, + (char __user * __user *) regs->dx, + regs); if (error == 0) { /* Make sure we don't return using sysenter.. */ set_thread_flag(TIF_IRET); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d7a158367e3..ccfb27412f0 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -549,23 +549,27 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, #endif /* CONFIG_X86_32 */ #ifdef CONFIG_X86_32 -ptregscall int -sys_sigaltstack(struct pt_regs *regs, const stack_t __user *uss, - stack_t __user *uoss) +int sys_sigaltstack(struct pt_regs *regs) +{ + const stack_t __user *uss = (const stack_t __user *)regs->bx; + stack_t __user *uoss = (stack_t __user *)regs->cx; + + return do_sigaltstack(uss, uoss, regs->sp); +} #else /* !CONFIG_X86_32 */ asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) -#endif /* CONFIG_X86_32 */ { return do_sigaltstack(uss, uoss, regs->sp); } +#endif /* CONFIG_X86_32 */ /* * Do a signal return; undo the signal stack. */ #ifdef CONFIG_X86_32 -ptregscall unsigned long sys_sigreturn(struct pt_regs *regs) +unsigned long sys_sigreturn(struct pt_regs *regs) { struct sigframe __user *frame; unsigned long ax; @@ -629,13 +633,16 @@ badframe: } #ifdef CONFIG_X86_32 -ptregscall int sys_rt_sigreturn(struct pt_regs *regs) +int sys_rt_sigreturn(struct pt_regs *regs) +{ + return do_rt_sigreturn(regs); +} #else /* !CONFIG_X86_32 */ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) -#endif /* CONFIG_X86_32 */ { return do_rt_sigreturn(regs); } +#endif /* CONFIG_X86_32 */ /* * OK, we're invoking a handler: diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 8fa6ba7c923..d7ac84e7fc1 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -197,8 +197,9 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); -ptregscall int sys_vm86old(struct pt_regs *regs, struct vm86_struct __user *v86) +int sys_vm86old(struct pt_regs *regs) { + struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx; struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. * This remains on the stack until we @@ -226,7 +227,7 @@ out: } -ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long arg) +int sys_vm86(struct pt_regs *regs) { struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. @@ -238,12 +239,12 @@ ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long a struct vm86plus_struct __user *v86; tsk = current; - switch (cmd) { + switch (regs->bx) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: case VM86_GET_IRQ_BITS: case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(cmd, (int)arg); + ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); goto out; case VM86_PLUS_INSTALL_CHECK: /* @@ -260,7 +261,7 @@ ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long a ret = -EPERM; if (tsk->thread.saved_sp0) goto out; - v86 = (struct vm86plus_struct __user *)arg; + v86 = (struct vm86plus_struct __user *)regs->cx; tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, offsetof(struct kernel_vm86_struct, regs32) - sizeof(info.regs)); -- cgit v1.2.3 From 744525092727827a9cf0044074db3e22dcf354fd Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 11 Feb 2009 16:31:40 -0800 Subject: x86: merge sys_rt_sigreturn between 32 and 64 bits Impact: cleanup With the recent changes in the 32-bit code to make system calls which use struct pt_regs take a pointer, sys_rt_sigreturn() have become identical between 32 and 64 bits, and both are empty wrappers around do_rt_sigreturn(). Remove both wrappers and rename both to sys_rt_sigreturn(). Cc: Brian Gerst Cc: Tejun Heo Signed-off-by: H. Peter Anvin --- arch/x86/kernel/signal.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ccfb27412f0..7cdcd16885e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -601,7 +601,7 @@ badframe: } #endif /* CONFIG_X86_32 */ -static long do_rt_sigreturn(struct pt_regs *regs) +long sys_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe __user *frame; unsigned long ax; @@ -632,18 +632,6 @@ badframe: return 0; } -#ifdef CONFIG_X86_32 -int sys_rt_sigreturn(struct pt_regs *regs) -{ - return do_rt_sigreturn(regs); -} -#else /* !CONFIG_X86_32 */ -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) -{ - return do_rt_sigreturn(regs); -} -#endif /* CONFIG_X86_32 */ - /* * OK, we're invoking a handler: */ -- cgit v1.2.3 From d85cf93da66977dbc645352be1b2084a659d8a0b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Feb 2009 10:02:56 -0800 Subject: x86/paravirt: make arch_flush_lazy_mmu/cpu disable preemption Impact: avoid access to percpu vars in preempible context They are intended to be used whenever there's the possibility that there's some stale state which is going to be overwritten with a queued update, or to force a state change when we may be in lazy mode. Either way, we could end up calling it with preemption enabled, so wrap the functions in their own little preempt-disable section so they can be safely called in any context (though preemption should never be enabled if we're actually in a lazy state). (Move out of line to avoid #include dependencies.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/kernel/paravirt.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb60887..dcba6c567a2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,6 +268,30 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } +void arch_flush_lazy_mmu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + +void arch_flush_lazy_cpu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + arch_leave_lazy_cpu_mode(); + arch_enter_lazy_cpu_mode(); + } + + preempt_enable(); +} + struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, -- cgit v1.2.3 From 34b0900d323122113683685b200aae9f9b75e63b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 12 Feb 2009 21:30:48 +0100 Subject: x86: warn if arch_flush_lazy_mmu_cpu is called in preemptible context Impact: Catch cases where lazy MMU state is active in a preemtible context arch_flush_lazy_mmu_cpu() has been changed to disable preemption so the checks in enter/leave will never trigger. Put the preemtible() check into arch_flush_lazy_mmu_cpu() to catch such cases. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/paravirt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index dcba6c567a2..c6520a4e85d 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -273,6 +273,7 @@ void arch_flush_lazy_mmu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + WARN_ON(preempt_count() == 1); arch_leave_lazy_mmu_mode(); arch_enter_lazy_mmu_mode(); } @@ -285,6 +286,7 @@ void arch_flush_lazy_cpu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + WARN_ON(preempt_count() == 1); arch_leave_lazy_cpu_mode(); arch_enter_lazy_cpu_mode(); } -- cgit v1.2.3 From b13e24644c138d0ddbc451403c30a96b09bfd556 Mon Sep 17 00:00:00 2001 From: john stultz Date: Thu, 12 Feb 2009 18:48:53 -0800 Subject: x86, hpet: fix for LS21 + HPET = boot hang Between 2.6.23 and 2.6.24-rc1 a change was made that broke IBM LS21 systems that had the HPET enabled in the BIOS, resulting in boot hangs for x86_64. Specifically commit b8ce33590687888ebb900d09557b8807c4539022, which merges the i386 and x86_64 HPET code. Prior to this commit, when we setup the HPET timers in x86_64, we did the following: hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT, HPET_T0_CFG); However after the i386/x86_64 HPET merge, we do the following: cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); However on LS21s with HPET enabled in the BIOS, the HPET_T0_CFG register boots with Level triggered interrupts (HPET_TN_LEVEL) enabled. This causes the periodic interrupt to be not so periodic, and that results in the boot time hang I reported earlier in the delay calibration. My fix: Always disable HPET_TN_LEVEL when setting up periodic mode. Signed-off-by: John Stultz Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 64d5ad0b8ad..5c8da2c2c18 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode, now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); + /* Make sure we use edge triggered interrupts */ + cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); -- cgit v1.2.3 From c466ed2e4337c5eb03b283da507eb5328ab06c73 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Fri, 13 Feb 2009 08:40:55 -0600 Subject: x86, UV: set full apicid in uv_hub_send_ipi The uv_hub_send_ipi() function needs to set the full apicid in the UVH_IPI_INT mmr. Signed-off-by: Dimitri Sivanich Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 6adb5e6f4d9..89b84e004f0 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -114,16 +114,15 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) static void uv_send_IPI_one(int cpu, int vector) { - unsigned long val, apicid, lapicid; + unsigned long val, apicid; int pnode; apicid = per_cpu(x86_cpu_to_apicid, cpu); - lapicid = apicid & 0x3f; /* ZZZ macro needed */ pnode = uv_apicid_to_pnode(apicid); - val = ( 1UL << UVH_IPI_INT_SEND_SHFT ) | - ( lapicid << UVH_IPI_INT_APIC_ID_SHFT ) | - ( vector << UVH_IPI_INT_VECTOR_SHFT ); + val = (1UL << UVH_IPI_INT_SEND_SHFT) | + (apicid << UVH_IPI_INT_APIC_ID_SHFT) | + (vector << UVH_IPI_INT_VECTOR_SHFT); uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -- cgit v1.2.3 From e49590b6dd356f8ef10ba3531a29e5086f6f2e3a Mon Sep 17 00:00:00 2001 From: Chris Ball Date: Fri, 13 Feb 2009 20:56:18 -0500 Subject: x86, olpc: fix model detection without OFW Impact: fix "garbled display, laptop is unusable" bug Commit e51a1ac2dfca9ad869471e88f828281db7e810c0 ("x86, olpc: fix endian bug in openfirmware workaround") breaks model comparison on OLPC; the value 0xc2 needs to be scaled up by olpc_board(). The pre-patch version was wrong, but accidentally worked anyway (big-endian 0xc2 is big enough to satisfy all other board revisions, but little endian 0xc2 is not). Signed-off-by: Chris Ball Cc: Andrew Morton Acked-by: Andres Salomon Cc: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/olpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 7a13fac63a1..4006c522adc 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -203,7 +203,7 @@ static void __init platform_detect(void) static void __init platform_detect(void) { /* stopgap until OFW support is added to the kernel */ - olpc_platform_info.boardrev = 0xc2; + olpc_platform_info.boardrev = olpc_board(0xc2); } #endif -- cgit v1.2.3 From f6db44df5bd39ed33883786d35342759af796e4a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 14 Feb 2009 23:59:18 -0800 Subject: x86: fix typo in filter_cpuid_features() Impact: fix wrong disabling of cpu features an amd system got this strange output: CPU: CPU feature monitor disabled due to lack of CPUID level 0x5 but in /proc/cpuinfo I have: cpuid level : 5 on intel system: CPU: CPU feature monitor disabled due to lack of CPUID level 0x5 CPU: CPU feature dca disabled due to lack of CPUID level 0x9 but in /proc/cpuinfo i have: cpuid level : 11 Tt turns out there is a typo, and we should use level member in df. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 21f086b4c1a..32093d08d87 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -241,9 +241,9 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) * signs here... */ if (cpu_has(c, df->feature) && - ((s32)df->feature < 0 ? - (u32)df->feature > (u32)c->extended_cpuid_level : - (s32)df->feature > (s32)c->cpuid_level)) { + ((s32)df->level < 0 ? + (u32)df->level > (u32)c->extended_cpuid_level : + (s32)df->level > (s32)c->cpuid_level)) { clear_cpu_cap(c, df->feature); if (warn) printk(KERN_WARNING @@ -253,7 +253,7 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) df->level); } } -} +} /* * Naming convention should be: [()] -- cgit v1.2.3 From be716615fe596ee117292dc615e95f707fb67fd1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Jan 2009 23:36:34 +0100 Subject: x86, vm86: fix preemption bug Commit 3d2a71a596bd9c761c8487a2178e95f8a61da083 ("x86, traps: converge do_debug handlers") changed the preemption disable logic of do_debug() so vm86_handle_trap() is called with preemption disabled resulting in: BUG: sleeping function called from invalid context at include/linux/kernel.h:155 in_atomic(): 1, irqs_disabled(): 0, pid: 3005, name: dosemu.bin Pid: 3005, comm: dosemu.bin Tainted: G W 2.6.29-rc1 #51 Call Trace: [] copy_to_user+0x33/0x108 [] save_v86_state+0x65/0x149 [] handle_vm86_trap+0x20/0x8f [] do_debug+0x15b/0x1a4 [] debug_stack_correct+0x27/0x2c [] sysenter_do_call+0x12/0x2f BUG: scheduling while atomic: dosemu.bin/3005/0x10000001 Restore the original calling convention and reenable preemption before calling handle_vm86_trap(). Reported-by: Michal Suchanek Cc: stable@kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7932338d7cb..a9e7548e179 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs) local_irq_enable(); } +static inline void conditional_cli(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_disable(); +} + static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -626,8 +632,10 @@ clear_dr7: #ifdef CONFIG_X86_32 debug_vm86: + /* reenable preemption: handle_vm86_trap() might sleep */ + dec_preempt_count(); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - preempt_conditional_cli(regs); + conditional_cli(regs); return; #endif -- cgit v1.2.3 From 88d0f550d71493cd975a11a03c166211b2f3bd32 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 14 Feb 2009 23:57:28 -0800 Subject: x86: make 32bit to call enable_IO_APIC early like 64bit Impact: cleanup So we remove some #ifdefs. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 8 ++++---- arch/x86/kernel/io_apic.c | 4 ---- arch/x86/kernel/smpboot.c | 3 +-- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 8bd801db24d..b8616aaa168 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1646,19 +1646,19 @@ int __init APIC_init_uniprocessor(void) physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); setup_local_APIC(); -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_IO_APIC /* * Now enable IO-APICs, actually call clear_IO_APIC * We need clear_IO_APIC before enabling vector on BP */ if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); -#endif -#ifdef CONFIG_X86_IO_APIC if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) -#endif localise_nmi_watchdog(); +#else + localise_nmi_watchdog(); +#endif end_local_APIC_setup(); #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7248ca11bdc..0b7cde3da48 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3057,13 +3057,9 @@ out: void __init setup_IO_APIC(void) { -#ifdef CONFIG_X86_32 - enable_IO_APIC(); -#else /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ -#endif io_apic_irqs = ~PIC_IRQS; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index af57f88186e..10834954e30 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1154,13 +1154,12 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) */ setup_local_APIC(); -#ifdef CONFIG_X86_64 /* * Enable IO APIC before setting up error vector */ if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); -#endif + end_local_APIC_setup(); map_cpu_to_logical_apicid(); -- cgit v1.2.3 From 3bd25d0fa3ed588a6735b815cb0c146c23888ace Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 15 Feb 2009 02:54:03 -0800 Subject: x86: pre init pirq_entries[] Impact: cleanup set default value early - this allows the removal of a number of dynamic initialization codepaths, and an #ifdef. Signed-off-by: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 0b7cde3da48..a89878e08a4 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -813,8 +813,9 @@ static void clear_IO_APIC (void) */ #define MAX_PIRQS 8 -static int pirq_entries [MAX_PIRQS]; -static int pirqs_enabled; +static int pirq_entries[MAX_PIRQS] = { + [0 ... MAX_PIRQS - 1] = -1 +}; static int __init ioapic_pirq_setup(char *str) { @@ -823,10 +824,6 @@ static int __init ioapic_pirq_setup(char *str) get_options(str, ARRAY_SIZE(ints), ints); - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - pirqs_enabled = 1; apic_printk(APIC_VERBOSE, KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n"); max = MAX_PIRQS; @@ -1976,13 +1973,6 @@ void __init enable_IO_APIC(void) int apic; unsigned long flags; -#ifdef CONFIG_X86_32 - int i; - if (!pirqs_enabled) - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; -#endif - /* * The number of IO-APIC IRQ registers (== #pins): */ -- cgit v1.2.3 From 98c061b6cf2e7a1010286a7a4f672c4623e1b3e0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Feb 2009 00:00:50 -0800 Subject: x86: make APIC_init_uniprocessor() more like smp_prepare_cpus() Impact: cleanup 1. move localise_nmi_watchdog() later 2. change setup_boot_APIC_clock() to setup_boot_clock() for 64-bit Signed-off-by: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b8616aaa168..c03f4cc135c 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1649,32 +1649,28 @@ int __init APIC_init_uniprocessor(void) #ifdef CONFIG_X86_IO_APIC /* * Now enable IO-APICs, actually call clear_IO_APIC - * We need clear_IO_APIC before enabling vector on BP + * We need clear_IO_APIC before enabling error vector */ if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); - - if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) - localise_nmi_watchdog(); -#else - localise_nmi_watchdog(); #endif + end_local_APIC_setup(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config && !skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); -# ifdef CONFIG_X86_64 - else + else { nr_ioapics = 0; -# endif + localise_nmi_watchdog(); + } +#else + localise_nmi_watchdog(); #endif + setup_boot_clock(); #ifdef CONFIG_X86_64 - setup_boot_APIC_clock(); check_nmi_watchdog(); -#else - setup_boot_clock(); #endif return 0; -- cgit v1.2.3