aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/smpboot.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r--arch/x86/kernel/smpboot.c547
1 files changed, 264 insertions, 283 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 56078d61c79..8c3aca7cb34 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -52,6 +52,7 @@
#include <asm/desc.h>
#include <asm/nmi.h>
#include <asm/irq.h>
+#include <asm/idle.h>
#include <asm/smp.h>
#include <asm/trampoline.h>
#include <asm/cpu.h>
@@ -59,7 +60,6 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
-#include <asm/nmi.h>
#include <asm/vmi.h>
#include <asm/genapic.h>
#include <linux/mc146818rtc.h>
@@ -68,22 +68,6 @@
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
-/*
- * FIXME: For x86_64, those are defined in other files. But moving them here,
- * would make the setup areas dependent on smp, which is a loss. When we
- * integrate apic between arches, we can probably do a better job, but
- * right now, they'll stay here -- glommer
- */
-
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
- { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_cpu_to_apicid_early_ptr;
-
-u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
- = { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_bios_cpu_apicid_early_ptr;
-
#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
static int low_mappings;
@@ -105,7 +89,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
#else
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
#endif
@@ -140,13 +124,12 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
static atomic_t init_deasserted;
-static int boot_cpu_logical_apicid;
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;
/* Set if we find a B stepping CPU */
-int __cpuinitdata smp_b_stepping;
+static int __cpuinitdata smp_b_stepping;
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
@@ -182,6 +165,8 @@ static void unmap_cpu_to_node(int cpu)
#endif
#ifdef CONFIG_X86_32
+static int boot_cpu_logical_apicid;
+
u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = BAD_APICID };
@@ -198,13 +183,12 @@ static void map_cpu_to_logical_apicid(void)
map_cpu_to_node(cpu, node);
}
-static void unmap_cpu_to_logical_apicid(int cpu)
+void numa_remove_cpu(int cpu)
{
cpu_2_logical_apicid[cpu] = BAD_APICID;
unmap_cpu_to_node(cpu);
}
#else
-#define unmap_cpu_to_logical_apicid(cpu) do {} while (0)
#define map_cpu_to_logical_apicid() do {} while (0)
#endif
@@ -228,13 +212,13 @@ static void __cpuinit smp_callin(void)
/*
* (This works even if the APIC is not enabled.)
*/
- phys_id = GET_APIC_ID(read_apic_id());
+ phys_id = read_apic_id();
cpuid = smp_processor_id();
if (cpu_isset(cpuid, cpu_callin_map)) {
panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
phys_id, cpuid);
}
- Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+ pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
/*
* STARTUP IPIs are fragile beasts as they might sometimes
@@ -269,12 +253,13 @@ static void __cpuinit smp_callin(void)
* boards)
*/
- Dprintk("CALLIN, before setup_local_APIC().\n");
+ pr_debug("CALLIN, before setup_local_APIC().\n");
smp_callin_clear_local_apic();
setup_local_APIC();
end_local_APIC_setup();
map_cpu_to_logical_apicid();
+ notify_cpu_starting(cpuid);
/*
* Get our bogomips.
*
@@ -284,7 +269,7 @@ static void __cpuinit smp_callin(void)
local_irq_enable();
calibrate_delay();
local_irq_disable();
- Dprintk("Stack at about %p\n", &cpuid);
+ pr_debug("Stack at about %p\n", &cpuid);
/*
* Save our processor parameters
@@ -344,53 +329,30 @@ static void __cpuinit start_secondary(void *unused)
* for which cpus receive the IPI. Holding this
* lock helps us to not include this cpu in a currently in progress
* smp_call_function().
+ *
+ * We need to hold vector_lock so there the set of online cpus
+ * does not change while we are assigning vectors to cpus. Holding
+ * this lock ensures we don't half assign or remove an irq from a cpu.
*/
- lock_ipi_call_lock();
-#ifdef CONFIG_X86_64
- spin_lock(&vector_lock);
-
- /* Setup the per cpu irq handling data structures */
+ ipi_call_lock();
+ lock_vector_lock();
__setup_vector_irq(smp_processor_id());
- /*
- * Allow the master to continue.
- */
- spin_unlock(&vector_lock);
-#endif
cpu_set(smp_processor_id(), cpu_online_map);
- unlock_ipi_call_lock();
+ unlock_vector_lock();
+ ipi_call_unlock();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+ /* enable local interrupts */
+ local_irq_enable();
+
setup_secondary_clock();
wmb();
cpu_idle();
}
-#ifdef CONFIG_X86_32
-/*
- * Everything has been set up for the secondary
- * CPUs - they just need to reload everything
- * from the task structure
- * This function must not return.
- */
-void __devinit initialize_secondary(void)
-{
- /*
- * We don't actually need to load the full TSS,
- * basically just the stack pointer and the ip.
- */
-
- asm volatile(
- "movl %0,%%esp\n\t"
- "jmp *%1"
- :
- :"m" (current->thread.sp), "m" (current->thread.ip));
-}
-#endif
-
static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_X86_32
/*
* Mask B, Pentium, but not Pentium MMX
*/
@@ -440,7 +402,6 @@ static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
valid_k7:
;
-#endif
}
static void __cpuinit smp_checks(void)
@@ -487,7 +448,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpu_set(cpu, cpu_sibling_setup_map);
if (smp_num_siblings > 1) {
- for_each_cpu_mask(i, cpu_sibling_setup_map) {
+ for_each_cpu_mask_nr(i, cpu_sibling_setup_map) {
if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
c->cpu_core_id == cpu_data(i).cpu_core_id) {
cpu_set(i, per_cpu(cpu_sibling_map, cpu));
@@ -510,7 +471,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
return;
}
- for_each_cpu_mask(i, cpu_sibling_setup_map) {
+ for_each_cpu_mask_nr(i, cpu_sibling_setup_map) {
if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
cpu_set(i, c->llc_shared_map);
@@ -555,23 +516,6 @@ cpumask_t cpu_coregroup_map(int cpu)
return c->llc_shared_map;
}
-#ifdef CONFIG_X86_32
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
- trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
- /*
- * Has to be in very low memory so we can execute
- * real-mode AP code.
- */
- if (__pa(trampoline_base) >= 0x9F000)
- BUG();
-}
-#endif
-
static void impress_friends(void)
{
int cpu;
@@ -579,7 +523,7 @@ static void impress_friends(void)
/*
* Allow the user to impress friends.
*/
- Dprintk("Before bogomips.\n");
+ pr_debug("Before bogomips.\n");
for_each_possible_cpu(cpu)
if (cpu_isset(cpu, cpu_callout_map))
bogosum += cpu_data(cpu).loops_per_jiffy;
@@ -589,7 +533,7 @@ static void impress_friends(void)
bogosum/(500000/HZ),
(bogosum/(5000/HZ))%100);
- Dprintk("Before bogocount - setting activated=1.\n");
+ pr_debug("Before bogocount - setting activated=1.\n");
}
static inline void __inquire_remote_apic(int apicid)
@@ -612,8 +556,7 @@ static inline void __inquire_remote_apic(int apicid)
printk(KERN_CONT
"a previous APIC delivery may have failed\n");
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+ apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
timeout = 0;
do {
@@ -645,29 +588,24 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
int maxlvt;
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
/* Boot on the stack */
/* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+ apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
- Dprintk("Waiting for send to finish...\n");
+ pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
/*
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- maxlvt = lapic_get_maxlvt();
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
+ if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ maxlvt = lapic_get_maxlvt();
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
}
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- Dprintk("NMI sent.\n");
+ pr_debug("NMI sent.\n");
if (send_status)
printk(KERN_ERR "APIC never delivered???\n");
@@ -691,42 +629,40 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
return send_status;
}
+ maxlvt = lapic_get_maxlvt();
+
/*
* Be paranoid about clearing APIC errors.
*/
if (APIC_INTEGRATED(apic_version[phys_apicid])) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
}
- Dprintk("Asserting INIT.\n");
+ pr_debug("Asserting INIT.\n");
/*
* Turn INIT on target chip
*/
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/*
* Send IPI
*/
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
- | APIC_DM_INIT);
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
+ phys_apicid);
- Dprintk("Waiting for send to finish...\n");
+ pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
mdelay(10);
- Dprintk("Deasserting INIT.\n");
+ pr_debug("Deasserting INIT.\n");
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
- Dprintk("Waiting for send to finish...\n");
+ pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
mb();
@@ -748,64 +684,51 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
* target processor state.
*/
startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
-#ifdef CONFIG_X86_64
- (unsigned long)init_rsp);
-#else
(unsigned long)stack_start.sp);
-#endif
/*
* Run STARTUP IPI loop.
*/
- Dprintk("#startup loops: %d.\n", num_starts);
-
- maxlvt = lapic_get_maxlvt();
+ pr_debug("#startup loops: %d.\n", num_starts);
for (j = 1; j <= num_starts; j++) {
- Dprintk("Sending STARTUP #%d.\n", j);
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
+ pr_debug("Sending STARTUP #%d.\n", j);
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
- Dprintk("After apic_write.\n");
+ pr_debug("After apic_write.\n");
/*
* STARTUP IPI
*/
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/* Boot on the stack */
/* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_eip >> 12));
+ apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
+ phys_apicid);
/*
* Give the other CPU some time to accept the IPI.
*/
udelay(300);
- Dprintk("Startup point 1.\n");
+ pr_debug("Startup point 1.\n");
- Dprintk("Waiting for send to finish...\n");
+ pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
/*
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
- }
accept_status = (apic_read(APIC_ESR) & 0xEF);
if (send_status || accept_status)
break;
}
- Dprintk("After Startup.\n");
+ pr_debug("After Startup.\n");
if (send_status)
printk(KERN_ERR "APIC never delivered???\n");
@@ -832,6 +755,52 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
complete(&c_idle->done);
}
+#ifdef CONFIG_X86_64
+
+/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
+static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
+{
+ if (!after_bootmem)
+ free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
+}
+
+/*
+ * Allocate node local memory for the AP pda.
+ *
+ * Must be called after the _cpu_pda pointer table is initialized.
+ */
+int __cpuinit get_local_pda(int cpu)
+{
+ struct x8664_pda *oldpda, *newpda;
+ unsigned long size = sizeof(struct x8664_pda);
+ int node = cpu_to_node(cpu);
+
+ if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
+ return 0;
+
+ oldpda = cpu_pda(cpu);
+ newpda = kmalloc_node(size, GFP_ATOMIC, node);
+ if (!newpda) {
+ printk(KERN_ERR "Could not allocate node local PDA "
+ "for CPU %d on node %d\n", cpu, node);
+
+ if (oldpda)
+ return 0; /* have a usable pda */
+ else
+ return -1;
+ }
+
+ if (oldpda) {
+ memcpy(newpda, oldpda, size);
+ free_bootmem_pda(oldpda);
+ }
+
+ newpda->in_bootmem = 0;
+ cpu_pda(cpu) = newpda;
+ return 0;
+}
+#endif /* CONFIG_X86_64 */
+
static int __cpuinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -848,28 +817,14 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
INIT_WORK(&c_idle.work, do_fork_idle);
-#ifdef CONFIG_X86_64
- /* allocate memory for gdts of secondary cpus. Hotplug is considered */
- if (!cpu_gdt_descr[cpu].address &&
- !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
- printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
- return -1;
- }
+#ifdef CONFIG_X86_64
/* Allocate node local memory for AP pdas */
- if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
- struct x8664_pda *newpda, *pda;
- int node = cpu_to_node(cpu);
- pda = cpu_pda(cpu);
- newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC,
- node);
- if (newpda) {
- memcpy(newpda, pda, sizeof(struct x8664_pda));
- cpu_pda(cpu) = newpda;
- } else
- printk(KERN_ERR
- "Could not allocate node local PDA for CPU %d on node %d\n",
- cpu, node);
+ if (cpu > 0) {
+ boot_error = get_local_pda(cpu);
+ if (boot_error)
+ goto restore_state;
+ /* if can't get pda memory, can't start cpu */
}
#endif
@@ -905,18 +860,15 @@ do_rest:
#ifdef CONFIG_X86_32
per_cpu(current_task, cpu) = c_idle.idle;
init_gdt(cpu);
- early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
- c_idle.idle->thread.ip = (unsigned long) start_secondary;
/* Stack for startup_32 can be just as for start_secondary onwards */
- stack_start.sp = (void *) c_idle.idle->thread.sp;
irq_ctx_init(cpu);
#else
cpu_pda(cpu)->pcurrent = c_idle.idle;
- init_rsp = c_idle.idle->thread.sp;
- load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
- initial_code = (unsigned long)start_secondary;
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
#endif
+ early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+ initial_code = (unsigned long)start_secondary;
+ stack_start.sp = (void *) c_idle.idle->thread.sp;
/* start_ip had better be page-aligned! */
start_ip = setup_trampoline();
@@ -934,7 +886,7 @@ do_rest:
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
- Dprintk("Setting warm reset code and vector.\n");
+ pr_debug("Setting warm reset code and vector.\n");
store_NMI_vector(&nmi_high, &nmi_low);
@@ -955,9 +907,9 @@ do_rest:
/*
* allow APs to start initializing.
*/
- Dprintk("Before Callout %d.\n", cpu);
+ pr_debug("Before Callout %d.\n", cpu);
cpu_set(cpu, cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
+ pr_debug("After Callout %d.\n", cpu);
/*
* Wait 5s total for a response
@@ -970,10 +922,10 @@ do_rest:
if (cpu_isset(cpu, cpu_callin_map)) {
/* number CPUs logically, starting from 1 (BSP is 0) */
- Dprintk("OK.\n");
+ pr_debug("OK.\n");
printk(KERN_INFO "CPU%d: ", cpu);
print_cpu_info(&cpu_data(cpu));
- Dprintk("CPU has booted.\n");
+ pr_debug("CPU has booted.\n");
} else {
boot_error = 1;
if (*((volatile unsigned char *)trampoline_base)
@@ -987,16 +939,14 @@ do_rest:
inquire_remote_apic(apicid);
}
}
-
- if (boot_error) {
- /* Try to put things back the way they were before ... */
- unmap_cpu_to_logical_apicid(cpu);
#ifdef CONFIG_X86_64
- clear_node_cpumask(cpu); /* was set by numa_add_cpu */
+restore_state:
#endif
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ numa_remove_cpu(cpu); /* was set by numa_add_cpu */
cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
- cpu_clear(cpu, cpu_possible_map);
cpu_clear(cpu, cpu_present_map);
per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
}
@@ -1020,7 +970,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
WARN_ON(irqs_disabled());
- Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
+ pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
!physid_isset(apicid, phys_cpu_present_map)) {
@@ -1032,7 +982,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
* Already booted CPU?
*/
if (cpu_isset(cpu, cpu_callin_map)) {
- Dprintk("do_boot_cpu %d Already started\n", cpu);
+ pr_debug("do_boot_cpu %d Already started\n", cpu);
return -ENOSYS;
}
@@ -1059,7 +1009,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
err = do_boot_cpu(apicid, cpu);
#endif
if (err) {
- Dprintk("do_boot_cpu failed %d\n", err);
+ pr_debug("do_boot_cpu failed %d\n", err);
return -EIO;
}
@@ -1088,14 +1038,12 @@ static __init void disable_smp(void)
{
cpu_present_map = cpumask_of_cpu(0);
cpu_possible_map = cpumask_of_cpu(0);
-#ifdef CONFIG_X86_32
smpboot_clear_io_apic_irqs();
-#endif
+
if (smp_found_config)
- phys_cpu_present_map =
- physid_mask_of_physid(boot_cpu_physical_apicid);
+ physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
else
- phys_cpu_present_map = physid_mask_of_physid(0);
+ physid_set_mask_of_physid(0, &phys_cpu_present_map);
map_cpu_to_logical_apicid();
cpu_set(0, per_cpu(cpu_sibling_map, 0));
cpu_set(0, per_cpu(cpu_core_map, 0));
@@ -1107,6 +1055,34 @@ static __init void disable_smp(void)
static int __init smp_sanity_check(unsigned max_cpus)
{
preempt_disable();
+
+#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
+ if (def_to_bigsmp && nr_cpu_ids > 8) {
+ unsigned int cpu;
+ unsigned nr;
+
+ printk(KERN_WARNING
+ "More than 8 CPUs detected - skipping them.\n"
+ "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
+
+ nr = 0;
+ for_each_present_cpu(cpu) {
+ if (nr >= 8)
+ cpu_clear(cpu, cpu_present_map);
+ nr++;
+ }
+
+ nr = 0;
+ for_each_possible_cpu(cpu) {
+ if (nr >= 8)
+ cpu_clear(cpu, cpu_possible_map);
+ nr++;
+ }
+
+ nr_cpu_ids = 8;
+ }
+#endif
+
if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
"by the BIOS.\n", hard_smp_processor_id());
@@ -1158,12 +1134,12 @@ static int __init smp_sanity_check(unsigned max_cpus)
* If SMP should be disabled, then really disable it!
*/
if (!max_cpus) {
- printk(KERN_INFO "SMP mode deactivated,"
- "forcing use of dummy APIC emulation.\n");
+ printk(KERN_INFO "SMP mode deactivated.\n");
smpboot_clear_io_apic();
-#ifdef CONFIG_X86_32
+
+ localise_nmi_watchdog();
+
connect_bsp_APIC();
-#endif
setup_local_APIC();
end_local_APIC_setup();
return -1;
@@ -1191,7 +1167,6 @@ static void __init smp_cpu_index_default(void)
void __init native_smp_prepare_cpus(unsigned int max_cpus)
{
preempt_disable();
- nmi_watchdog_default();
smp_cpu_index_default();
current_cpu_data = boot_cpu_data;
cpu_callin_map = cpumask_of_cpu(0);
@@ -1200,10 +1175,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
+#ifdef CONFIG_X86_32
boot_cpu_logical_apicid = logical_smp_processor_id();
+#endif
current_thread_info()->cpu = 0; /* needed? */
set_cpu_sibling_map(0);
+#ifdef CONFIG_X86_64
+ enable_IR_x2apic();
+ setup_apic_routing();
+#endif
+
if (smp_sanity_check(max_cpus) < 0) {
printk(KERN_INFO "SMP disabled\n");
disable_smp();
@@ -1211,16 +1193,15 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
}
preempt_disable();
- if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
+ if (read_apic_id() != boot_cpu_physical_apicid) {
panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
- GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
+ read_apic_id(), boot_cpu_physical_apicid);
/* Or can we switch back to PIC here? */
}
preempt_enable();
-#ifdef CONFIG_X86_32
connect_bsp_APIC();
-#endif
+
/*
* Switch from PIC to APIC mode.
*/
@@ -1247,6 +1228,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
printk(KERN_INFO "CPU%d: ", 0);
print_cpu_info(&cpu_data(0));
setup_boot_clock();
+
+ if (is_uv_system())
+ uv_system_init();
out:
preempt_enable();
}
@@ -1258,8 +1242,8 @@ void __init native_smp_prepare_boot_cpu(void)
int me = smp_processor_id();
#ifdef CONFIG_X86_32
init_gdt(me);
- switch_to_new_gdt();
#endif
+ switch_to_new_gdt();
/* already set me in cpu_online_map in boot_cpu_init() */
cpu_set(me, cpu_callout_map);
per_cpu(cpu_state, me) = CPU_ONLINE;
@@ -1267,7 +1251,7 @@ void __init native_smp_prepare_boot_cpu(void)
void __init native_smp_cpus_done(unsigned int max_cpus)
{
- Dprintk("Boot done.\n");
+ pr_debug("Boot done.\n");
impress_friends();
smp_checks();
@@ -1277,56 +1261,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
check_nmi_watchdog();
}
-#ifdef CONFIG_HOTPLUG_CPU
-
-# ifdef CONFIG_X86_32
-void cpu_exit_clear(void)
-{
- int cpu = raw_smp_processor_id();
-
- idle_task_exit();
-
- cpu_uninit();
- irq_ctx_exit(cpu);
-
- cpu_clear(cpu, cpu_callout_map);
- cpu_clear(cpu, cpu_callin_map);
-
- unmap_cpu_to_logical_apicid(cpu);
-}
-# endif /* CONFIG_X86_32 */
-
-static void remove_siblinginfo(int cpu)
-{
- int sibling;
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
- cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
- /*/
- * last thread sibling in this cpu core going down
- */
- if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
- cpu_data(sibling).booted_cores--;
- }
-
- for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
- cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
- cpus_clear(per_cpu(cpu_sibling_map, cpu));
- cpus_clear(per_cpu(cpu_core_map, cpu));
- c->phys_proc_id = 0;
- c->cpu_core_id = 0;
- cpu_clear(cpu, cpu_sibling_setup_map);
-}
-
-static int additional_cpus __initdata = -1;
-
-static __init int setup_additional_cpus(char *s)
-{
- return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
-}
-early_param("additional_cpus", setup_additional_cpus);
-
/*
* cpu_possible_map should be static, it cannot change as cpu's
* are onlined, or offlined. The reason is per-cpu data-structures
@@ -1346,16 +1280,13 @@ early_param("additional_cpus", setup_additional_cpus);
*/
__init void prefill_possible_map(void)
{
- int i;
- int possible;
+ int i, possible;
- if (additional_cpus == -1) {
- if (disabled_cpus > 0)
- additional_cpus = disabled_cpus;
- else
- additional_cpus = 0;
- }
- possible = num_processors + additional_cpus;
+ /* no processor from mptable or madt */
+ if (!num_processors)
+ num_processors = 1;
+
+ possible = num_processors + disabled_cpus;
if (possible > NR_CPUS)
possible = NR_CPUS;
@@ -1364,21 +1295,68 @@ __init void prefill_possible_map(void)
for (i = 0; i < possible; i++)
cpu_set(i, cpu_possible_map);
+
+ nr_cpu_ids = possible;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static void remove_siblinginfo(int cpu)
+{
+ int sibling;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) {
+ cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
+ /*/
+ * last thread sibling in this cpu core going down
+ */
+ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
+ cpu_data(sibling).booted_cores--;
+ }
+
+ for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu))
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
+ cpus_clear(per_cpu(cpu_sibling_map, cpu));
+ cpus_clear(per_cpu(cpu_core_map, cpu));
+ c->phys_proc_id = 0;
+ c->cpu_core_id = 0;
+ cpu_clear(cpu, cpu_sibling_setup_map);
}
static void __ref remove_cpu_from_maps(int cpu)
{
cpu_clear(cpu, cpu_online_map);
-#ifdef CONFIG_X86_64
cpu_clear(cpu, cpu_callout_map);
cpu_clear(cpu, cpu_callin_map);
/* was set by cpu_init() */
- clear_bit(cpu, (unsigned long *)&cpu_initialized);
- clear_node_cpumask(cpu);
-#endif
+ cpu_clear(cpu, cpu_initialized);
+ numa_remove_cpu(cpu);
}
-int __cpu_disable(void)
+void cpu_disable_common(void)
+{
+ int cpu = smp_processor_id();
+ /*
+ * HACK:
+ * Allow any queued timer interrupts to get serviced
+ * This is only a temporary solution until we cleanup
+ * fixup_irqs as we do for IA64.
+ */
+ local_irq_enable();
+ mdelay(1);
+
+ local_irq_disable();
+ remove_siblinginfo(cpu);
+
+ /* It's now safe to remove this processor from the online map */
+ lock_vector_lock();
+ remove_cpu_from_maps(cpu);
+ unlock_vector_lock();
+ fixup_irqs(cpu_online_map);
+}
+
+int native_cpu_disable(void)
{
int cpu = smp_processor_id();
@@ -1397,25 +1375,11 @@ int __cpu_disable(void)
stop_apic_nmi_watchdog(NULL);
clear_local_APIC();
- /*
- * HACK:
- * Allow any queued timer interrupts to get serviced
- * This is only a temporary solution until we cleanup
- * fixup_irqs as we do for IA64.
- */
- local_irq_enable();
- mdelay(1);
-
- local_irq_disable();
- remove_siblinginfo(cpu);
-
- /* It's now safe to remove this processor from the online map */
- remove_cpu_from_maps(cpu);
- fixup_irqs(cpu_online_map);
+ cpu_disable_common();
return 0;
}
-void __cpu_die(unsigned int cpu)
+void native_cpu_die(unsigned int cpu)
{
/* We don't do anything here: idle task is faking death itself. */
unsigned int i;
@@ -1432,28 +1396,45 @@ void __cpu_die(unsigned int cpu)
}
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
+
+void play_dead_common(void)
+{
+ idle_task_exit();
+ reset_lazy_tlbstate();
+ irq_ctx_exit(raw_smp_processor_id());
+ c1e_remove_cpu(raw_smp_processor_id());
+
+ mb();
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /*
+ * With physical CPU hotplug, we should halt the cpu
+ */
+ local_irq_disable();
+}
+
+void native_play_dead(void)
+{
+ play_dead_common();
+ wbinvd_halt();
+}
+
#else /* ... !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
+int native_cpu_disable(void)
{
return -ENOSYS;
}
-void __cpu_die(unsigned int cpu)
+void native_cpu_die(unsigned int cpu)
{
/* We said "no" in __cpu_disable */
BUG();
}
-#endif
-/*
- * If the BIOS enumerates physical processors before logical,
- * maxcpus=N at enumeration-time can be used to disable HT.
- */
-static int __init parse_maxcpus(char *arg)
+void native_play_dead(void)
{
- extern unsigned int maxcpus;
-
- maxcpus = simple_strtoul(arg, NULL, 0);
- return 0;
+ BUG();
}
-early_param("maxcpus", parse_maxcpus);
+
+#endif