aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-27 12:03:24 +0100
committerIngo Molnar <mingo@elte.hu>2009-01-27 12:03:24 +0100
commit4369f1fb7cd4cf777312f43e1cb9aa5504fc4125 (patch)
treea5525d63fe682e6744c109fb72f7e1b33855cb00
parent3ddeb51d9c83931c1ca6abf76a38934c5a1ed918 (diff)
parentcf3997f507624757f149fcc42b76fb03c151fb65 (diff)
Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu
Conflicts: arch/x86/kernel/setup_percpu.c Semantic conflict: arch/x86/kernel/cpu/common.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/cpumask.h4
-rw-r--r--arch/x86/include/asm/processor.h9
-rw-r--r--arch/x86/include/asm/topology.h6
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/apic.c18
-rw-r--r--arch/x86/kernel/cpu/common.c24
-rw-r--r--arch/x86/kernel/setup_percpu.c378
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/smpcommon.c32
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c5
-rw-r--r--arch/x86/mm/numa_64.c217
-rw-r--r--arch/x86/xen/smp.c1
13 files changed, 323 insertions, 383 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5a29b792cb8..d6218e6c982 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -133,7 +133,7 @@ config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
config HAVE_SETUP_PER_CPU_AREA
- def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER)
+ def_bool y
config HAVE_CPUMASK_OF_CPU_MAP
def_bool X86_64_SMP
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 26c6dad9047..a7f3c75f8ad 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -10,6 +10,8 @@ extern cpumask_var_t cpu_callout_mask;
extern cpumask_var_t cpu_initialized_mask;
extern cpumask_var_t cpu_sibling_setup_mask;
+extern void setup_cpu_local_masks(void);
+
#else /* CONFIG_X86_32 */
extern cpumask_t cpu_callin_map;
@@ -22,6 +24,8 @@ extern cpumask_t cpu_sibling_setup_map;
#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
+static inline void setup_cpu_local_masks(void) { }
+
#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 48676b943b9..befa20b4a68 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -394,14 +394,6 @@ union irq_stack_union {
DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr);
-
-static inline void load_gs_base(int cpu)
-{
- /* Memory clobbers used to order pda/percpu accesses */
- mb();
- wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
- mb();
-}
#endif
extern void print_cpu_info(struct cpuinfo_x86 *);
@@ -778,7 +770,6 @@ extern struct desc_ptr early_gdt_descr;
extern void cpu_set_gdt(int);
extern void switch_to_new_gdt(void);
extern void cpu_init(void);
-extern void init_gdt(int cpu);
static inline unsigned long get_debugctlmsr(void)
{
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 10022ed3a4b..77cfb2cfb38 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -74,6 +74,8 @@ static inline const struct cpumask *cpumask_of_node(int node)
return &node_to_cpumask_map[node];
}
+static inline void setup_node_to_cpumask_map(void) { }
+
#else /* CONFIG_X86_64 */
/* Mappings between node number and cpus on that node. */
@@ -120,6 +122,8 @@ static inline cpumask_t node_to_cpumask(int node)
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
+extern void setup_node_to_cpumask_map(void);
+
/*
* Replace default node_to_cpumask_ptr with optimized version
* Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
@@ -218,6 +222,8 @@ static inline int node_to_first_cpu(int node)
return first_cpu(cpu_online_map);
}
+static inline void setup_node_to_cpumask_map(void) { }
+
/*
* Replace default node_to_cpumask_ptr with optimized version
* Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a99437c965c..37fa30bada1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,7 +28,7 @@ CFLAGS_paravirt.o := $(nostackp)
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
-obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
+obj-y += setup.o i8259.o irqinit_$(BITS).o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -59,8 +59,8 @@ apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp.o
obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o
-obj-$(CONFIG_X86_32_SMP) += smpcommon.o
-obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
+obj-$(CONFIG_SMP) += setup_percpu.o
+obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 1df341a528a..c6f15647eba 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -60,6 +60,24 @@
# error SPURIOUS_APIC_VECTOR definition error
#endif
+unsigned int num_processors;
+unsigned disabled_cpus __cpuinitdata;
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+EXPORT_SYMBOL(boot_cpu_physical_apicid);
+unsigned int max_physical_apicid;
+
+/* Bitmask of physically existing CPUs */
+physid_mask_t phys_cpu_present_map;
+
+/*
+ * Map cpu index to physical APIC ID
+ */
+DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
#ifdef CONFIG_X86_32
/*
* Knob to control our willingness to enable the local APIC.
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 99904f288d6..652fdc9a757 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -52,6 +52,15 @@ cpumask_var_t cpu_initialized_mask;
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
+/* correctly size the local cpu masks */
+void __init setup_cpu_local_masks(void)
+{
+ alloc_bootmem_cpumask_var(&cpu_initialized_mask);
+ alloc_bootmem_cpumask_var(&cpu_callin_mask);
+ alloc_bootmem_cpumask_var(&cpu_callout_mask);
+ alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+}
+
#else /* CONFIG_X86_32 */
cpumask_t cpu_callin_map;
@@ -249,12 +258,17 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
void switch_to_new_gdt(void)
{
struct desc_ptr gdt_descr;
+ int cpu = smp_processor_id();
- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+ gdt_descr.address = (long)get_cpu_gdt_table(cpu);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
+ /* Reload the per-cpu base */
#ifdef CONFIG_X86_32
- asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
+ loadsegment(fs, __KERNEL_PERCPU);
+#else
+ loadsegment(gs, 0);
+ wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif
}
@@ -959,10 +973,6 @@ void __cpuinit cpu_init(void)
struct task_struct *me;
int i;
- loadsegment(fs, 0);
- loadsegment(gs, 0);
- load_gs_base(cpu);
-
#ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 &&
cpu_to_node(cpu) != NUMA_NO_NODE)
@@ -984,6 +994,8 @@ void __cpuinit cpu_init(void)
*/
switch_to_new_gdt();
+ loadsegment(fs, 0);
+
load_idt((const struct desc_ptr *)&idt_descr);
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e553803cd2d..0d1e7ac439f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -15,6 +15,7 @@
#include <asm/highmem.h>
#include <asm/proto.h>
#include <asm/cpumask.h>
+#include <asm/cpu.h>
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
# define DBG(x...) printk(KERN_DEBUG x)
@@ -22,118 +23,36 @@
# define DBG(x...)
#endif
-/*
- * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but
- * voyager wants cpu_number too.
- */
-#ifdef CONFIG_SMP
DEFINE_PER_CPU(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
-unsigned int num_processors;
-unsigned disabled_cpus __cpuinitdata;
-/* Processor that is doing the boot up */
-unsigned int boot_cpu_physical_apicid = -1U;
-EXPORT_SYMBOL(boot_cpu_physical_apicid);
-unsigned int max_physical_apicid;
-
-/* Bitmask of physically existing CPUs */
-physid_mask_t phys_cpu_present_map;
-#endif
-
-/*
- * Map cpu index to physical APIC ID
- */
-DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
-DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
-
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
-#define X86_64_NUMA 1 /* (used later) */
-DEFINE_PER_CPU(int, node_number) = 0;
-EXPORT_PER_CPU_SYMBOL(node_number);
-
-/*
- * Map cpu index to node index
- */
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-
-/*
- * Which logical CPUs are on which nodes
- */
-cpumask_t *node_to_cpumask_map;
-EXPORT_SYMBOL(node_to_cpumask_map);
-
-/*
- * Setup node_to_cpumask_map
- */
-static void __init setup_node_to_cpumask_map(void);
+#ifdef CONFIG_X86_64
+#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
#else
-static inline void setup_node_to_cpumask_map(void) { }
+#define BOOT_PERCPU_OFFSET 0
#endif
-#ifdef CONFIG_X86_64
-
-/* correctly size the local cpu masks */
-static void __init setup_cpu_local_masks(void)
-{
- alloc_bootmem_cpumask_var(&cpu_initialized_mask);
- alloc_bootmem_cpumask_var(&cpu_callin_mask);
- alloc_bootmem_cpumask_var(&cpu_callout_mask);
- alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
-}
-
-#else /* CONFIG_X86_32 */
-
-static inline void setup_cpu_local_masks(void)
-{
-}
+DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
+EXPORT_PER_CPU_SYMBOL(this_cpu_off);
-#endif /* CONFIG_X86_32 */
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
+};
+EXPORT_SYMBOL(__per_cpu_offset);
-#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
-/*
- * Copy data used in early init routines from the initial arrays to the
- * per cpu data areas. These arrays then become expendable and the
- * *_early_ptr's are zeroed indicating that the static arrays are gone.
- */
-static void __init setup_per_cpu_maps(void)
+static inline void setup_percpu_segment(int cpu)
{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- per_cpu(x86_cpu_to_apicid, cpu) =
- early_per_cpu_map(x86_cpu_to_apicid, cpu);
- per_cpu(x86_bios_cpu_apicid, cpu) =
- early_per_cpu_map(x86_bios_cpu_apicid, cpu);
-#ifdef X86_64_NUMA
- per_cpu(x86_cpu_to_node_map, cpu) =
- early_per_cpu_map(x86_cpu_to_node_map, cpu);
-#endif
- }
+#ifdef CONFIG_X86_32
+ struct desc_struct gdt;
- /* indicate the early static arrays will soon be gone */
- early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
- early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
-#ifdef X86_64_NUMA
- early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
+ pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
+ 0x2 | DESCTYPE_S, 0x8);
+ gdt.s = 1;
+ write_gdt_entry(get_cpu_gdt_table(cpu),
+ GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
#endif
}
-#ifdef CONFIG_X86_64
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
- [0] = (unsigned long)__per_cpu_load,
-};
-#else
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-#endif
-EXPORT_SYMBOL(__per_cpu_offset);
-
/*
* Great future plan:
* Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
@@ -141,15 +60,12 @@ EXPORT_SYMBOL(__per_cpu_offset);
*/
void __init setup_per_cpu_areas(void)
{
- ssize_t size, old_size;
+ ssize_t size;
char *ptr;
int cpu;
- unsigned long align = 1;
/* Copy section for each CPU (we discard the original) */
- old_size = PERCPU_ENOUGH_ROOM;
- align = max_t(unsigned long, PAGE_SIZE, align);
- size = roundup(old_size, align);
+ size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
@@ -158,20 +74,17 @@ void __init setup_per_cpu_areas(void)
for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
- ptr = __alloc_bootmem(size, align,
- __pa(MAX_DMA_ADDRESS));
+ ptr = alloc_bootmem_pages(size);
#else
int node = early_cpu_to_node(cpu);
if (!node_online(node) || !NODE_DATA(node)) {
- ptr = __alloc_bootmem(size, align,
- __pa(MAX_DMA_ADDRESS));
+ ptr = alloc_bootmem_pages(size);
pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
pr_debug("per cpu data for cpu%d at %016lx\n",
cpu, __pa(ptr));
} else {
- ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
- __pa(MAX_DMA_ADDRESS));
+ ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
cpu, node, __pa(ptr));
}
@@ -181,22 +94,47 @@ void __init setup_per_cpu_areas(void)
per_cpu_offset(cpu) = ptr - __per_cpu_start;
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu;
+ setup_percpu_segment(cpu);
+ /*
+ * Copy data used in early init routines from the
+ * initial arrays to the per cpu data areas. These
+ * arrays then become expendable and the *_early_ptr's
+ * are zeroed indicating that the static arrays are
+ * gone.
+ */
+#ifdef CONFIG_X86_LOCAL_APIC
+ per_cpu(x86_cpu_to_apicid, cpu) =
+ early_per_cpu_map(x86_cpu_to_apicid, cpu);
+ per_cpu(x86_bios_cpu_apicid, cpu) =
+ early_per_cpu_map(x86_bios_cpu_apicid, cpu);
+#endif
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
- per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
+ per_cpu(irq_stack_union.irq_stack, cpu) +
+ IRQ_STACK_SIZE - 64;
+#ifdef CONFIG_NUMA
+ per_cpu(x86_cpu_to_node_map, cpu) =
+ early_per_cpu_map(x86_cpu_to_node_map, cpu);
+#endif
+#endif
/*
- * Up to this point, CPU0 has been using .data.init
- * area. Reload %gs offset for CPU0.
+ * Up to this point, the boot CPU has been using .data.init
+ * area. Reload any changed state for the boot CPU.
*/
- if (cpu == 0)
- load_gs_base(cpu);
-#endif
+ if (cpu == boot_cpu_id)
+ switch_to_new_gdt();
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
}
- /* Setup percpu data maps */
- setup_per_cpu_maps();
+ /* indicate the early static arrays will soon be gone */
+#ifdef CONFIG_X86_LOCAL_APIC
+ early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
+ early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
+#endif
+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+ early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
+#endif
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
@@ -204,207 +142,3 @@ void __init setup_per_cpu_areas(void)
/* Setup cpu initialized, callin, callout masks */
setup_cpu_local_masks();
}
-
-#endif
-
-#ifdef X86_64_NUMA
-
-/*
- * Allocate node_to_cpumask_map based on number of available nodes
- * Requires node_possible_map to be valid.
- *
- * Note: node_to_cpumask() is not valid until after this is done.
- * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
- */
-static void __init setup_node_to_cpumask_map(void)
-{
- unsigned int node, num = 0;
- cpumask_t *map;
-
- /* setup nr_node_ids if not done yet */
- if (nr_node_ids == MAX_NUMNODES) {
- for_each_node_mask(node, node_possible_map)
- num = node;
- nr_node_ids = num + 1;
- }
-
- /* allocate the map */
- map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
- DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
-
- pr_debug("Node to cpumask map at %p for %d nodes\n",
- map, nr_node_ids);
-
- /* node_to_cpumask() will now work */
- node_to_cpumask_map = map;
-}
-
-void __cpuinit numa_set_node(int cpu, int node)
-{
- int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
-
- /* early setting, no percpu area yet */
- if (cpu_to_node_map) {
- cpu_to_node_map[cpu] = node;
- return;
- }
-
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
- if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
- printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
- dump_stack();
- return;
- }
-#endif
- per_cpu(x86_cpu_to_node_map, cpu) = node;
-
- if (node != NUMA_NO_NODE)
- per_cpu(node_number, cpu) = node;
-}
-
-void __cpuinit numa_clear_node(int cpu)
-{
- numa_set_node(cpu, NUMA_NO_NODE);
-}
-
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
-
-void __cpuinit numa_add_cpu(int cpu)
-{
- cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-void __cpuinit numa_remove_cpu(int cpu)
-{
- cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
-
-/*
- * --------- debug versions of the numa functions ---------
- */
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
-{
- int node = early_cpu_to_node(cpu);
- cpumask_t *mask;
- char buf[64];
-
- if (node_to_cpumask_map == NULL) {
- printk(KERN_ERR "node_to_cpumask_map NULL\n");
- dump_stack();
- return;
- }
-
- mask = &node_to_cpumask_map[node];
- if (enable)
- cpu_set(cpu, *mask);
- else
- cpu_clear(cpu, *mask);
-
- cpulist_scnprintf(buf, sizeof(buf), mask);
- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
- enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
-}
-
-void __cpuinit numa_add_cpu(int cpu)
-{
- numa_set_cpumask(cpu, 1);
-}
-
-void __cpuinit numa_remove_cpu(int cpu)
-{
- numa_set_cpumask(cpu, 0);
-}
-
-int cpu_to_node(int cpu)
-{
- if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
- printk(KERN_WARNING
- "cpu_to_node(%d): usage too early!\n", cpu);
- dump_stack();
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
-}
-EXPORT_SYMBOL(cpu_to_node);
-
-/*
- * Same function as cpu_to_node() but used if called before the
- * per_cpu areas are setup.
- */
-int early_cpu_to_node(int cpu)
-{
- if (early_per_cpu_ptr(x86_cpu_to_node_map))
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
- if (!per_cpu_offset(cpu)) {
- printk(KERN_WARNING
- "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
- dump_stack();
- return NUMA_NO_NODE;
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
-}
-
-
-/* empty cpumask */
-static const cpumask_t cpu_mask_none;
-
-/*
- * Returns a pointer to the bitmask of CPUs on Node 'node'.
- */
-const cpumask_t *cpumask_of_node(int node)
-{
- if (node_to_cpumask_map == NULL) {
- printk(KERN_WARNING
- "cpumask_of_node(%d): no node_to_cpumask_map!\n",
- node);
- dump_stack();
- return (const cpumask_t *)&cpu_online_map;
- }
- if (node >= nr_node_ids) {
- printk(KERN_WARNING
- "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
- node, nr_node_ids);
- dump_stack();
- return &cpu_mask_none;
- }
- return &node_to_cpumask_map[node];
-}
-EXPORT_SYMBOL(cpumask_of_node);
-
-/*
- * Returns a bitmask of CPUs on Node 'node'.
- *
- * Side note: this function creates the returned cpumask on the stack
- * so with a high NR_CPUS count, excessive stack space is used. The
- * node_to_cpumask_ptr function should be used whenever possible.
- */
-cpumask_t node_to_cpumask(int node)
-{
- if (node_to_cpumask_map == NULL) {
- printk(KERN_WARNING
- "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
- dump_stack();
- return cpu_online_map;
- }
- if (node >= nr_node_ids) {
- printk(KERN_WARNING
- "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
- node, nr_node_ids);
- dump_stack();
- return cpu_mask_none;
- }
- return node_to_cpumask_map[node];
-}
-EXPORT_SYMBOL(node_to_cpumask);
-
-/*
- * --------- end of debug versions of the numa functions ---------
- */
-
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
-
-#endif /* X86_64_NUMA */
-
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index def770b57b5..f9dbcff4354 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -793,7 +793,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
do_rest:
per_cpu(current_task, cpu) = c_idle.idle;
#ifdef CONFIG_X86_32
- init_gdt(cpu);
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
#else
@@ -1186,9 +1185,6 @@ out:
void __init native_smp_prepare_boot_cpu(void)
{
int me = smp_processor_id();
-#ifdef CONFIG_X86_32
- init_gdt(me);
-#endif
switch_to_new_gdt();
/* already set me in cpu_online_mask in boot_cpu_init() */
cpumask_set_cpu(me, cpu_callout_mask);
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
deleted file mode 100644
index add36b4e37c..00000000000
--- a/arch/x86/kernel/smpcommon.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * SMP stuff which is common to all sub-architectures.
- */
-#include <linux/module.h>
-#include <asm/smp.h>
-#include <asm/sections.h>
-
-#ifdef CONFIG_X86_64
-DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load;
-#else
-DEFINE_PER_CPU(unsigned long, this_cpu_off);
-#endif
-EXPORT_PER_CPU_SYMBOL(this_cpu_off);
-
-#ifdef CONFIG_X86_32
-/*
- * Initialize the CPU's GDT. This is either the boot CPU doing itself
- * (still using the master per-cpu area), or a CPU doing it for a
- * secondary which will soon come up.
- */
-__cpuinit void init_gdt(int cpu)
-{
- struct desc_struct gdt;
-
- pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
- 0x2 | DESCTYPE_S, 0x8);
- gdt.s = 1;
-
- write_gdt_entry(get_cpu_gdt_table(cpu),
- GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
-}
-#endif
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 96f15b09a4c..331cd6d5648 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -530,8 +530,6 @@ static void __init do_boot_cpu(__u8 cpu)
/* init_tasks (in sched.c) is indexed logically */
stack_start.sp = (void *)idle->thread.sp;
- init_gdt(cpu);
- per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
per_cpu(current_task, cpu) = idle;
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
irq_ctx_init(cpu);
@@ -1748,8 +1746,6 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
static void __cpuinit voyager_smp_prepare_boot_cpu(void)
{
- init_gdt(smp_processor_id());
- per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
switch_to_new_gdt();
cpu_set(smp_processor_id(), cpu_online_map);
@@ -1782,7 +1778,6 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus)
void __init smp_setup_processor_id(void)
{
current_thread_info()->cpu = hard_smp_processor_id();
- percpu_write(cpu_number, hard_smp_processor_id());
}
static void voyager_send_call_func(cpumask_t callmask)
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89..08d140fbc31 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -20,6 +20,12 @@
#include <asm/acpi.h>
#include <asm/k8.h>
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+# define DBG(x...) printk(KERN_DEBUG x)
+#else
+# define DBG(x...)
+#endif
+
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
@@ -33,6 +39,21 @@ int numa_off __initdata;
static unsigned long __initdata nodemap_addr;
static unsigned long __initdata nodemap_size;
+DEFINE_PER_CPU(int, node_number) = 0;
+EXPORT_PER_CPU_SYMBOL(node_number);
+
+/*
+ * Map cpu index to node index
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+/*
+ * Which logical CPUs are on which nodes
+ */
+cpumask_t *node_to_cpumask_map;
+EXPORT_SYMBOL(node_to_cpumask_map);
+
/*
* Given a shift value, try to populate memnodemap[]
* Returns :
@@ -640,3 +661,199 @@ void __init init_cpu_to_node(void)
#endif
+/*
+ * Allocate node_to_cpumask_map based on number of available nodes
+ * Requires node_possible_map to be valid.
+ *
+ * Note: node_to_cpumask() is not valid until after this is done.
+ * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
+ */
+void __init setup_node_to_cpumask_map(void)
+{
+ unsigned int node, num = 0;
+ cpumask_t *map;
+
+ /* setup nr_node_ids if not done yet */
+ if (nr_node_ids == MAX_NUMNODES) {
+ for_each_node_mask(node, node_possible_map)
+ num = node;
+ nr_node_ids = num + 1;
+ }
+
+ /* allocate the map */
+ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
+ DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
+
+ pr_debug("Node to cpumask map at %p for %d nodes\n",
+ map, nr_node_ids);
+
+ /* node_to_cpumask() will now work */
+ node_to_cpumask_map = map;
+}
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+ int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+ /* early setting, no percpu area yet */
+ if (cpu_to_node_map) {
+ cpu_to_node_map[cpu] = node;
+ return;
+ }
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+ if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
+ printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
+ dump_stack();
+ return;
+ }
+#endif
+ per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+ if (node != NUMA_NO_NODE)
+ per_cpu(node_number, cpu) = node;
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+ numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+#else /* CONFIG_DEBUG_PER_CPU_MAPS */
+
+/*
+ * --------- debug versions of the numa functions ---------
+ */
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ int node = early_cpu_to_node(cpu);
+ cpumask_t *mask;
+ char buf[64];
+
+ if (node_to_cpumask_map == NULL) {
+ printk(KERN_ERR "node_to_cpumask_map NULL\n");
+ dump_stack();
+ return;
+ }
+
+ mask = &node_to_cpumask_map[node];
+ if (enable)
+ cpu_set(cpu, *mask);
+ else
+ cpu_clear(cpu, *mask);
+
+ cpulist_scnprintf(buf, sizeof(buf), mask);
+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+ enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 0);
+}
+
+int cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+ printk(KERN_WARNING
+ "cpu_to_node(%d): usage too early!\n", cpu);
+ dump_stack();
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(cpu_to_node);
+
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
+int early_cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map))
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+ if (!per_cpu_offset(cpu)) {
+ printk(KERN_WARNING
+ "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+ dump_stack();
+ return NUMA_NO_NODE;
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+
+
+/* empty cpumask */
+static const cpumask_t cpu_mask_none;
+
+/*
+ * Returns a pointer to the bitmask of CPUs on Node 'node'.
+ */
+const cpumask_t *cpumask_of_node(int node)
+{
+ if (node_to_cpumask_map == NULL) {
+ printk(KERN_WARNING
+ "cpumask_of_node(%d): no node_to_cpumask_map!\n",
+ node);
+ dump_stack();
+ return (const cpumask_t *)&cpu_online_map;
+ }
+ if (node >= nr_node_ids) {
+ printk(KERN_WARNING
+ "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
+ node, nr_node_ids);
+ dump_stack();
+ return &cpu_mask_none;
+ }
+ return &node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(cpumask_of_node);
+
+/*
+ * Returns a bitmask of CPUs on Node 'node'.
+ *
+ * Side note: this function creates the returned cpumask on the stack
+ * so with a high NR_CPUS count, excessive stack space is used. The
+ * node_to_cpumask_ptr function should be used whenever possible.
+ */
+cpumask_t node_to_cpumask(int node)
+{
+ if (node_to_cpumask_map == NULL) {
+ printk(KERN_WARNING
+ "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
+ dump_stack();
+ return cpu_online_map;
+ }
+ if (node >= nr_node_ids) {
+ printk(KERN_WARNING
+ "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
+ node, nr_node_ids);
+ dump_stack();
+ return cpu_mask_none;
+ }
+ return node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(node_to_cpumask);
+
+/*
+ * --------- end of debug versions of the numa functions ---------
+ */
+
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 72c2eb9b64c..7735e3dd359 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -281,7 +281,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
per_cpu(current_task, cpu) = idle;
#ifdef CONFIG_X86_32
- init_gdt(cpu);
irq_ctx_init(cpu);
#else
clear_tsk_thread_flag(idle, TIF_FORK);