diff options
Diffstat (limited to 'arch/x86/kernel')
38 files changed, 554 insertions, 226 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6c327b852e2..430d5b24af7 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -26,6 +26,8 @@ CFLAGS_tsc.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp) GCOV_PROFILE_vsyscall_64.o := n GCOV_PROFILE_hpet.o := n +GCOV_PROFILE_tsc.o := n +GCOV_PROFILE_paravirt.o := n obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 9372f0406ad..6c99f503780 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1192,7 +1192,7 @@ out: return 0; } -struct notifier_block device_nb = { +static struct notifier_block device_nb = { .notifier_call = device_change_notifier, }; @@ -1763,7 +1763,7 @@ static void *alloc_coherent(struct device *dev, size_t size, flag |= __GFP_ZERO; virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) - return 0; + return NULL; paddr = virt_to_phys(virt_addr); diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 10b2accd12e..c1b17e97252 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -472,6 +472,8 @@ static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) if (iommu->evt_buf == NULL) return NULL; + iommu->evt_buf_size = EVT_BUFFER_SIZE; + return iommu->evt_buf; } @@ -691,6 +693,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, devid = e->devid; devid_to = e->ext >> 8; + set_dev_entry_from_acpi(iommu, devid , e->flags, 0); set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); amd_iommu_alias_table[devid] = devid_to; break; @@ -749,11 +752,13 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, devid = e->devid; for (dev_i = devid_start; dev_i <= devid; ++dev_i) { - if (alias) + if (alias) { amd_iommu_alias_table[dev_i] = devid_to; - set_dev_entry_from_acpi(iommu, - amd_iommu_alias_table[dev_i], - flags, ext_flags); + set_dev_entry_from_acpi(iommu, + devid_to, flags, ext_flags); + } + set_dev_entry_from_acpi(iommu, dev_i, + flags, ext_flags); } break; default: diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 8c7c042ecad..0a1c2830ec6 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -140,7 +140,6 @@ int x2apic_mode; #ifdef CONFIG_X86_X2APIC /* x2apic enabled before OS handover */ static int x2apic_preenabled; -static int disable_x2apic; static __init int setup_nox2apic(char *str) { if (x2apic_enabled()) { @@ -149,7 +148,6 @@ static __init int setup_nox2apic(char *str) return 0; } - disable_x2apic = 1; setup_clear_cpu_cap(X86_FEATURE_X2APIC); return 0; } diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 69328ac8de9..8952a589028 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -652,7 +652,8 @@ static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, return ret && es7000_apic_is_cluster(); } -struct apic apic_es7000_cluster = { +/* We've been warned by a false positive warning.Use __refdata to keep calm. */ +struct apic __refdata apic_es7000_cluster = { .name = "es7000", .probe = probe_es7000, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4d0216fcb36..d2ed6c5ddc8 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1716,25 +1716,19 @@ __apicdebuginit(void) print_IO_APIC(void) return; } -__apicdebuginit(void) print_APIC_bitfield(int base) +__apicdebuginit(void) print_APIC_field(int base) { - unsigned int v; - int i, j; + int i; if (apic_verbosity == APIC_QUIET) return; - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1<<j)) - printk("1"); - else - printk("0"); - } - printk("\n"); - } + printk(KERN_DEBUG); + + for (i = 0; i < 8; i++) + printk(KERN_CONT "%08x", apic_read(base + i*0x10)); + + printk(KERN_CONT "\n"); } __apicdebuginit(void) print_local_APIC(void *dummy) @@ -1745,7 +1739,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) if (apic_verbosity == APIC_QUIET) return; - printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", + printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); @@ -1786,11 +1780,11 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); printk(KERN_DEBUG "... APIC ISR field:\n"); - print_APIC_bitfield(APIC_ISR); + print_APIC_field(APIC_ISR); printk(KERN_DEBUG "... APIC TMR field:\n"); - print_APIC_bitfield(APIC_TMR); + print_APIC_field(APIC_TMR); printk(KERN_DEBUG "... APIC IRR field:\n"); - print_APIC_bitfield(APIC_IRR); + print_APIC_field(APIC_IRR); if (APIC_INTEGRATED(ver)) { /* !82489DX */ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ @@ -3799,6 +3793,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + return irq; } @@ -4187,28 +4184,20 @@ fake_ioapic_page: } } -static int __init ioapic_insert_resources(void) +void __init ioapic_insert_resources(void) { int i; struct resource *r = ioapic_resources; if (!r) { - if (nr_ioapics > 0) { + if (nr_ioapics > 0) printk(KERN_ERR "IO APIC resources couldn't be allocated.\n"); - return -1; - } - return 0; + return; } for (i = 0; i < nr_ioapics; i++) { insert_resource(&iomem_resource, r); r++; } - - return 0; } - -/* Insert the IO APIC resources after PCI initialization has occured to handle - * IO APICS that are mapped in on a BAR in PCI space. */ -late_initcall(ioapic_insert_resources); diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index dbf5445727a..6ef00ba4c88 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -106,6 +106,9 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; + if (WARN_ONCE(!mask, "empty IPI mask")) + return; + local_irq_save(flags); WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); __default_send_IPI_dest_field(mask, vector, apic->dest_logical); diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 533e59c6fc8..ca96e68f0d2 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -493,7 +493,8 @@ static void numaq_setup_portio_remap(void) (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); } -struct apic apic_numaq = { +/* Use __refdata to keep false positive warning calm. */ +struct apic __refdata apic_numaq = { .name = "NUMAQ", .probe = probe_numaq, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8e4cbb255c3..a5371ec3677 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return x2apic_enabled(); } -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - +/* + * need to use more than cpu 0, because we need more vectors when + * MSI-X are used. + */ static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } /* @@ -170,7 +172,7 @@ static unsigned long set_apic_id(unsigned int id) static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) { - return current_cpu_data.initial_apicid >> index_msb; + return initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a284359627e..a8989aadc99 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - +/* + * need to use more than cpu 0, because we need more vectors when + * MSI-X are used. + */ static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) @@ -162,7 +164,7 @@ static unsigned long set_apic_id(unsigned int id) static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) { - return current_cpu_data.initial_apicid >> index_msb; + return initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 096d19aea2f..601159374e8 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -46,7 +46,7 @@ static int early_get_nodeid(void) return node_id.s.node_id; } -static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strcmp(oem_id, "SGI")) { if (!strcmp(oem_table_id, "UVL")) @@ -253,7 +253,7 @@ static void uv_send_IPI_self(int vector) apic_write(APIC_SELF_IPI, vector); } -struct apic apic_x2apic_uv_x = { +struct apic __refdata apic_x2apic_uv_x = { .name = "UV large system", .probe = NULL, @@ -261,7 +261,7 @@ struct apic apic_x2apic_uv_x = { .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 1, /* logical */ + .irq_dest_mode = 0, /* physical */ .target_cpus = uv_target_cpus, .disable_esr = 0, @@ -362,12 +362,6 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) BUG(); } -static __init void map_low_mmrs(void) -{ - init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); - init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); -} - enum map_type {map_wb, map_uc}; static __init void map_high(char *id, unsigned long base, int shift, @@ -395,26 +389,6 @@ static __init void map_gru_high(int max_pnode) map_high("GRU", gru.s.base, shift, max_pnode, map_wb); } -static __init void map_config_high(int max_pnode) -{ - union uvh_rh_gam_cfg_overlay_config_mmr_u cfg; - int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT; - - cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); - if (cfg.s.enable) - map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc); -} - -static __init void map_mmr_high(int max_pnode) -{ - union uvh_rh_gam_mmr_overlay_config_mmr_u mmr; - int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT; - - mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); - if (mmr.s.enable) - map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); -} - static __init void map_mmioh_high(int max_pnode) { union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; @@ -566,8 +540,6 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask; - map_low_mmrs(); - m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; @@ -591,6 +563,8 @@ void __init uv_system_init(void) bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); uv_blade_info = kmalloc(bytes, GFP_KERNEL); BUG_ON(!uv_blade_info); + for (blade = 0; blade < uv_num_possible_blades(); blade++) + uv_blade_info[blade].memory_nid = -1; get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); @@ -629,6 +603,9 @@ void __init uv_system_init(void) lcpu = uv_blade_info[blade].nr_possible_cpus; uv_blade_info[blade].nr_possible_cpus++; + /* Any node on the blade, else will contain -1. */ + uv_blade_info[blade].memory_nid = nid; + uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; uv_cpu_hub_info(cpu)->m_val = m_val; @@ -662,11 +639,10 @@ void __init uv_system_init(void) pnode = (paddr >> m_val) & pnode_mask; blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; + max_pnode = max(pnode, max_pnode); } map_gru_high(max_pnode); - map_mmr_high(max_pnode); - map_config_high(max_pnode); map_mmioh_high(max_pnode); uv_cpu_init(); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 79302e9a33a..442b5508893 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -811,7 +811,7 @@ static int apm_do_idle(void) u8 ret = 0; int idled = 0; int polling; - int err; + int err = 0; polling = !!(current_thread_info()->status & TS_POLLING); if (polling) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 28e5f595604..63fddcd082c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -356,7 +356,7 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) /* check CPU config space for extended APIC ID */ - if (c->x86 >= 0xf) { + if (cpu_has_apic && c->x86 >= 0xf) { unsigned int val; val = read_pci_config(0, 24, 0, 0x68); if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) @@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) level = cpuid_eax(1); if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* + * Some BIOSes incorrectly force this feature, but only K8 + * revision D (model = 0x14) and later actually support it. + */ + if (c->x86_model < 0x14) + clear_cpu_cap(c, X86_FEATURE_LAHF_LM); } if (c->x86 == 0x10 || c->x86 == 0x11) set_cpu_cap(c, X86_FEATURE_REP_GOOD); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f1961c07af9..5ce60a88027 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void) alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); } -static const struct cpu_dev *this_cpu __cpuinitdata; +static void __cpuinit default_init(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_64 + display_cacheinfo(c); +#else + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +#endif +} + +static const struct cpu_dev __cpuinitconst default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, +}; + +static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 @@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu) static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; -static void __cpuinit default_init(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - display_cacheinfo(c); -#else - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -#endif -} - -static const struct cpu_dev __cpuinitconst default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", - .c_x86_vendor = X86_VENDOR_UNKNOWN, -}; - static void __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 81cbe64ed6b..2a50ef89100 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -299,7 +299,7 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate) static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) { - if (core_voltage_pre_transition(data, reqvid)) + if (core_voltage_pre_transition(data, reqvid, reqfid)) return 1; if (core_frequency_transition(data, reqfid)) @@ -327,17 +327,20 @@ static int transition_fid_vid(struct powernow_k8_data *data, /* Phase 1 - core voltage transition ... setup voltage */ static int core_voltage_pre_transition(struct powernow_k8_data *data, - u32 reqvid) + u32 reqvid, u32 reqfid) { u32 rvosteps = data->rvo; u32 savefid = data->currfid; - u32 maxvid, lo; + u32 maxvid, lo, rvomult = 1; dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " "reqvid 0x%x, rvo 0x%x\n", smp_processor_id(), data->currfid, data->currvid, reqvid, data->rvo); + if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP)) + rvomult = 2; + rvosteps *= rvomult; rdmsr(MSR_FIDVID_STATUS, lo, maxvid); maxvid = 0x1f & (maxvid >> 16); dprintk("ph1 maxvid=0x%x\n", maxvid); @@ -351,7 +354,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, return 1; } - while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { + while ((rvosteps > 0) && + ((rvomult * data->rvo + data->currvid) > reqvid)) { if (data->currvid == maxvid) { rvosteps = 0; } else { @@ -384,13 +388,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) u32 vcoreqfid, vcocurrfid, vcofiddiff; u32 fid_interval, savevid = data->currvid; - if ((reqfid < HI_FID_TABLE_BOTTOM) && - (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk(KERN_ERR PFX "ph2: illegal lo-lo transition " - "0x%x 0x%x\n", reqfid, data->currfid); - return 1; - } - if (data->currfid == reqfid) { printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); @@ -407,6 +404,9 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid : vcoreqfid - vcocurrfid; + if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP)) + vcofiddiff = 0; + while (vcofiddiff > 2) { (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); @@ -1081,14 +1081,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, return 0; } - if ((fid < HI_FID_TABLE_BOTTOM) && - (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk(KERN_ERR PFX - "ignoring illegal change in lo freq table-%x to 0x%x\n", - data->currfid, fid); - return 1; - } - dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", smp_processor_id(), fid, vid); freqs.old = find_khz_freq_from_fid(data->currfid); @@ -1267,7 +1259,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { static const char ACPI_PSS_BIOS_BUG_MSG[] = KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" - KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; + FW_BUG PFX "Try again with latest BIOS.\n"; struct powernow_k8_data *data; struct init_on_cpu init_on_cpu; int rc; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index c9c1190b5e1..02ce824073c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -215,7 +215,8 @@ struct pst_s { #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) -static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); +static int core_voltage_pre_transition(struct powernow_k8_data *data, + u32 reqvid, u32 regfid); static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index af425b83202..01213048f62 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -194,14 +194,14 @@ static void print_mce(struct mce *m) m->cs, m->ip); if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); - printk("\n"); + printk(KERN_CONT "\n"); } printk(KERN_EMERG "TSC %llx ", m->tsc); if (m->addr) - printk("ADDR %llx ", m->addr); + printk(KERN_CONT "ADDR %llx ", m->addr); if (m->misc) - printk("MISC %llx ", m->misc); - printk("\n"); + printk(KERN_CONT "MISC %llx ", m->misc); + printk(KERN_CONT "\n"); printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); @@ -209,13 +209,13 @@ static void print_mce(struct mce *m) static void print_mce_head(void) { - printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); + printk(KERN_EMERG "\nHARDWARE ERROR\n"); } static void print_mce_tail(void) { printk(KERN_EMERG "This is not a software problem!\n" - KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); + "Run through mcelog --ascii to decode and contact your hardware vendor\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ @@ -1226,8 +1226,13 @@ static void mce_init(void) } /* Add per CPU specific workarounds here */ -static void mce_cpu_quirks(struct cpuinfo_x86 *c) +static int mce_cpu_quirks(struct cpuinfo_x86 *c) { + if (c->x86_vendor == X86_VENDOR_UNKNOWN) { + pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); + return -EOPNOTSUPP; + } + /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { if (c->x86 == 15 && banks > 4) { @@ -1273,11 +1278,20 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && monarch_timeout < 0) monarch_timeout = USEC_PER_SEC; + + /* + * There are also broken BIOSes on some Pentium M and + * earlier systems: + */ + if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) + mce_bootlog = 0; } if (monarch_timeout < 0) monarch_timeout = 0; if (mce_bootlog != 0) mce_panic_timeout = 30; + + return 0; } static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) @@ -1338,11 +1352,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) if (!mce_available(c)) return; - if (mce_cap_init() < 0) { + if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { mce_disabled = 1; return; } - mce_cpu_quirks(c); machine_check_vector = do_machine_check; @@ -1692,17 +1705,15 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, const char *buf, size_t siz) { char *p; - int len; strncpy(mce_helper, buf, sizeof(mce_helper)); mce_helper[sizeof(mce_helper)-1] = 0; - len = strlen(mce_helper); p = strchr(mce_helper, '\n'); - if (*p) + if (p) *p = 0; - return len; + return strlen(mce_helper) + !!p; } static ssize_t set_ignore_ce(struct sys_device *s, diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index bff8dd191dd..5957a93e517 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -36,6 +36,7 @@ static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); +static DEFINE_PER_CPU(bool, thermal_throttle_active); static atomic_t therm_throt_en = ATOMIC_INIT(0); @@ -96,27 +97,33 @@ static int therm_throt_process(int curr) { unsigned int cpu = smp_processor_id(); __u64 tmp_jiffs = get_jiffies_64(); + bool was_throttled = __get_cpu_var(thermal_throttle_active); + bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; - if (curr) + if (is_throttled) __get_cpu_var(thermal_throttle_count)++; - if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) + if (!(was_throttled ^ is_throttled) && + time_before64(tmp_jiffs, __get_cpu_var(next_check))) return 0; __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; /* if we just entered the thermal event */ - if (curr) { + if (is_throttled) { printk(KERN_CRIT "CPU%d: Temperature above threshold, " - "cpu clock throttled (total events = %lu)\n", cpu, - __get_cpu_var(thermal_throttle_count)); + "cpu clock throttled (total events = %lu)\n", + cpu, __get_cpu_var(thermal_throttle_count)); add_taint(TAINT_MACHINE_CHECK); - } else { - printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); + return 1; + } + if (was_throttled) { + printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); + return 1; } - return 1; + return 0; } #ifdef CONFIG_SYSFS diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index d4cf4ce19aa..900332b800f 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -55,6 +55,7 @@ struct x86_pmu { int num_counters_fixed; int counter_bits; u64 counter_mask; + int apic; u64 max_period; u64 intel_ctrl; }; @@ -66,6 +67,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { }; /* + * Not sure about some of these + */ +static const u64 p6_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, +}; + +static u64 p6_pmu_event_map(int event) +{ + return p6_perfmon_event_map[event]; +} + +/* + * Counter setting that is specified not to count anything. + * We use this to effectively disable a counter. + * + * L2_RQSTS with 0 MESI unit mask. + */ +#define P6_NOP_COUNTER 0x0000002EULL + +static u64 p6_pmu_raw_event(u64 event) +{ +#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL +#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL +#define P6_EVNTSEL_INV_MASK 0x00800000ULL +#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL + +#define P6_EVNTSEL_MASK \ + (P6_EVNTSEL_EVENT_MASK | \ + P6_EVNTSEL_UNIT_MASK | \ + P6_EVNTSEL_EDGE_MASK | \ + P6_EVNTSEL_INV_MASK | \ + P6_EVNTSEL_COUNTER_MASK) + + return event & P6_EVNTSEL_MASK; +} + + +/* * Intel PerfMon v3. Used on Core2 and later. */ static const u64 intel_perfmon_event_map[] = @@ -567,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex); static bool reserve_pmc_hardware(void) { +#ifdef CONFIG_X86_LOCAL_APIC int i; if (nmi_watchdog == NMI_LOCAL_APIC) @@ -581,9 +629,11 @@ static bool reserve_pmc_hardware(void) if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } +#endif return true; +#ifdef CONFIG_X86_LOCAL_APIC eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); @@ -598,10 +648,12 @@ perfctr_fail: enable_lapic_nmi_watchdog(); return false; +#endif } static void release_pmc_hardware(void) { +#ifdef CONFIG_X86_LOCAL_APIC int i; for (i = 0; i < x86_pmu.num_counters; i++) { @@ -611,6 +663,7 @@ static void release_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) enable_lapic_nmi_watchdog(); +#endif } static void hw_perf_counter_destroy(struct perf_counter *counter) @@ -666,6 +719,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) { struct perf_counter_attr *attr = &counter->attr; struct hw_perf_counter *hwc = &counter->hw; + u64 config; int err; if (!x86_pmu_initialized()) @@ -701,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; atomic64_set(&hwc->period_left, hwc->sample_period); + } else { + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * counters (user-space has to fall back and + * sample via a hrtimer based software counter): + */ + if (!x86_pmu.apic) + return -EOPNOTSUPP; } counter->destroy = hw_perf_counter_destroy; @@ -718,14 +781,40 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (attr->config >= x86_pmu.max_events) return -EINVAL; + /* * The generic map: */ - hwc->config |= x86_pmu.event_map(attr->config); + config = x86_pmu.event_map(attr->config); + + if (config == 0) + return -ENOENT; + + if (config == -1LL) + return -EINVAL; + + hwc->config |= config; return 0; } +static void p6_pmu_disable_all(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + u64 val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + static void intel_pmu_disable_all(void) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); @@ -767,6 +856,23 @@ void hw_perf_disable(void) return x86_pmu.disable_all(); } +static void p6_pmu_enable_all(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long val; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + static void intel_pmu_enable_all(void) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); @@ -784,13 +890,13 @@ static void amd_pmu_enable_all(void) barrier(); for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct perf_counter *counter = cpuc->counters[idx]; u64 val; if (!test_bit(idx, cpuc->active_mask)) continue; - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); - if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) - continue; + + val = counter->hw.config; val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_K7_EVNTSEL0 + idx, val); } @@ -819,16 +925,13 @@ static inline void intel_pmu_ack_status(u64 ack) static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { - int err; - err = checking_wrmsrl(hwc->config_base + idx, + (void)checking_wrmsrl(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); } static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { - int err; - err = checking_wrmsrl(hwc->config_base + idx, - hwc->config); + (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); } static inline void @@ -836,13 +939,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; - int err; mask = 0xfULL << (idx * 4); rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; - err = checking_wrmsrl(hwc->config_base, ctrl_val); + (void)checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static inline void +p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + u64 val = P6_NOP_COUNTER; + + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); } static inline void @@ -943,6 +1057,19 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) err = checking_wrmsrl(hwc->config_base, ctrl_val); } +static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + u64 val; + + val = hwc->config; + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); +} + + static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { @@ -959,8 +1086,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) if (cpuc->enabled) x86_pmu_enable_counter(hwc, idx); - else - x86_pmu_disable_counter(hwc, idx); } static int @@ -1176,6 +1301,49 @@ static void intel_pmu_reset(void) local_irq_restore(flags); } +static int p6_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_counters *cpuc; + struct perf_counter *counter; + struct hw_perf_counter *hwc; + int idx, handled = 0; + u64 val; + + data.regs = regs; + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_counters); + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + counter = cpuc->counters[idx]; + hwc = &counter->hw; + + val = x86_perf_counter_update(counter, hwc, idx); + if (val & (1ULL << (x86_pmu.counter_bits - 1))) + continue; + + /* + * counter overflow + */ + handled = 1; + data.period = counter->hw.last_period; + + if (!x86_perf_counter_set_period(counter, hwc, idx)) + continue; + + if (perf_counter_overflow(counter, 1, &data)) + p6_pmu_disable_counter(hwc, idx); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} /* * This handler is triggered by the local APIC, so the APIC IRQ handling @@ -1185,14 +1353,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; struct cpu_hw_counters *cpuc; - int bit, cpu, loops; + int bit, loops; u64 ack, status; data.regs = regs; data.addr = 0; - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); + cpuc = &__get_cpu_var(cpu_hw_counters); perf_disable(); status = intel_pmu_get_status(); @@ -1249,14 +1416,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_counters *cpuc; struct perf_counter *counter; struct hw_perf_counter *hwc; - int cpu, idx, handled = 0; + int idx, handled = 0; u64 val; data.regs = regs; data.addr = 0; - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); + cpuc = &__get_cpu_var(cpu_hw_counters); for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) @@ -1299,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) void set_perf_counter_pending(void) { +#ifdef CONFIG_X86_LOCAL_APIC apic->send_IPI_self(LOCAL_PENDING_VECTOR); +#endif } void perf_counters_lapic_init(void) { - if (!x86_pmu_initialized()) +#ifdef CONFIG_X86_LOCAL_APIC + if (!x86_pmu.apic || !x86_pmu_initialized()) return; /* * Always use NMI for PMU */ apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif } static int __kprobes @@ -1334,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self, regs = args->regs; +#ifdef CONFIG_X86_LOCAL_APIC apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif /* * Can't rely on the handled return value to say it was our NMI, two * counters could trigger 'simultaneously' raising two back-to-back NMIs. @@ -1353,6 +1525,33 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { .priority = 1 }; +static struct x86_pmu p6_pmu = { + .name = "p6", + .handle_irq = p6_pmu_handle_irq, + .disable_all = p6_pmu_disable_all, + .enable_all = p6_pmu_enable_all, + .enable = p6_pmu_enable_counter, + .disable = p6_pmu_disable_counter, + .eventsel = MSR_P6_EVNTSEL0, + .perfctr = MSR_P6_PERFCTR0, + .event_map = p6_pmu_event_map, + .raw_event = p6_pmu_raw_event, + .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .apic = 1, + .max_period = (1ULL << 31) - 1, + .version = 0, + .num_counters = 2, + /* + * Counters have 40 bits implemented. However they are designed such + * that bits [32-39] are sign extensions of bit 31. As such the + * effective width of a counter for P6-like PMU is 32 bits only. + * + * See IA-32 Intel Architecture Software developer manual Vol 3B + */ + .counter_bits = 32, + .counter_mask = (1ULL << 32) - 1, +}; + static struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -1365,6 +1564,7 @@ static struct x86_pmu intel_pmu = { .event_map = intel_pmu_event_map, .raw_event = intel_pmu_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of @@ -1388,10 +1588,43 @@ static struct x86_pmu amd_pmu = { .num_counters = 4, .counter_bits = 48, .counter_mask = (1ULL << 48) - 1, + .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, }; +static int p6_pmu_init(void) +{ + switch (boot_cpu_data.x86_model) { + case 1: + case 3: /* Pentium Pro */ + case 5: + case 6: /* Pentium II */ + case 7: + case 8: + case 11: /* Pentium III */ + break; + case 9: + case 13: + /* Pentium M */ + break; + default: + pr_cont("unsupported p6 CPU model %d ", + boot_cpu_data.x86_model); + return -ENODEV; + } + + x86_pmu = p6_pmu; + + if (!cpu_has_apic) { + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); + pr_info("no hardware sampling interrupt available.\n"); + x86_pmu.apic = 0; + } + + return 0; +} + static int intel_pmu_init(void) { union cpuid10_edx edx; @@ -1400,8 +1633,14 @@ static int intel_pmu_init(void) unsigned int ebx; int version; - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + /* check for P6 processor family */ + if (boot_cpu_data.x86 == 6) { + return p6_pmu_init(); + } else { return -ENODEV; + } + } /* * Check whether the Architectural PerfMon supports @@ -1561,6 +1800,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); +static DEFINE_PER_CPU(int, in_nmi_frame); static void @@ -1576,7 +1816,9 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - /* Process all stacks: */ + per_cpu(in_nmi_frame, smp_processor_id()) = + x86_is_stack_id(NMI_STACK, name); + return 0; } @@ -1584,6 +1826,9 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; + if (per_cpu(in_nmi_frame, smp_processor_id())) + return; + if (reliable) callchain_store(entry, addr); } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 5c481f6205b..e60ed740d2b 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -803,8 +803,3 @@ int __kprobes lapic_wd_event(unsigned nmi_hz) wd_ops->rearm(wd, nmi_hz); return 1; } - -int lapic_watchdog_ok(void) -{ - return wd_ops != NULL; -} diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index d593cd1f58d..bca5fba91c9 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -19,6 +19,12 @@ #include "dumpstack.h" +/* Just a stub for now */ +int x86_is_stack_id(int id, char *name) +{ + return 0; +} + void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index d35db5993fd..54b0a327676 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -19,10 +19,8 @@ #include "dumpstack.h" -static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, - unsigned *usedp, char **idp) -{ - static char ids[][8] = { + +static char x86_stack_ids[][8] = { [DEBUG_STACK - 1] = "#DB", [NMI_STACK - 1] = "NMI", [DOUBLEFAULT_STACK - 1] = "#DF", @@ -33,6 +31,15 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" #endif }; + +int x86_is_stack_id(int id, char *name) +{ + return x86_stack_ids[id - 1] == name; +} + +static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, + unsigned *usedp, char **idp) +{ unsigned k; /* @@ -61,7 +68,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, if (*usedp & (1U << k)) break; *usedp |= 1U << k; - *idp = ids[k]; + *idp = x86_stack_ids[k]; return (unsigned long *)end; } /* @@ -81,12 +88,13 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, do { ++j; end -= EXCEPTION_STKSZ; - ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); + x86_stack_ids[j][4] = '1' + + (j - N_EXCEPTION_STACKS); } while (stack < end - EXCEPTION_STKSZ); if (*usedp & (1U << j)) break; *usedp |= 1U << j; - *idp = ids[j]; + *idp = x86_stack_ids[j]; return (unsigned long *)end; } #endif diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index c4ca89d9aaf..5cb5725b2ba 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -627,10 +627,9 @@ __init void e820_setup_gap(void) #ifdef CONFIG_X86_64 if (!found) { gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " - "address range\n" - KERN_ERR "PCI: Unassigned devices with 32bit resource " - "registers may break!\n"); + printk(KERN_ERR + "PCI: Warning: Cannot find a gap in the 32bit address range\n" + "PCI: Unassigned devices with 32bit resource registers may break!\n"); } #endif diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 96f7ac0bbf0..fe26ba3e345 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -354,7 +354,7 @@ void __init efi_init(void) */ c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); if (c16) { - for (i = 0; i < sizeof(vendor) && *c16; ++i) + for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) vendor[i] = *c16++; vendor[i] = '\0'; } else @@ -512,7 +512,7 @@ void __init efi_enter_virtual_mode(void) && end_pfn <= max_pfn_mapped)) va = __va(md->phys_addr); else - va = efi_ioremap(md->phys_addr, size); + va = efi_ioremap(md->phys_addr, size, md->type); md->virt_addr = (u64) (unsigned long) va; diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 22c3b7828c5..ac0621a7ac3 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -98,10 +98,14 @@ void __init efi_call_phys_epilog(void) early_runtime_code_mapping_set_exec(0); } -void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, + u32 type) { unsigned long last_map_pfn; + if (type == EFI_MEMORY_MAPPED_IO) + return ioremap(phys_addr, size); + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) return NULL; diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 8663afb5653..0d98a01cbdb 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -602,7 +602,11 @@ ignore_int: #endif iret -.section .cpuinit.data,"wa" +#ifndef CONFIG_HOTPLUG_CPU + __CPUINITDATA +#else + __REFDATA +#endif .align 4 ENTRY(initial_code) .long i386_start_kernel diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 696f0e475c2..92b7703d3d5 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -187,7 +187,7 @@ static void __init apic_intr_init(void) #ifdef CONFIG_X86_THERMAL_VECTOR alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif -#ifdef CONFIG_X86_THRESHOLD +#ifdef CONFIG_X86_MCE_THRESHOLD alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); #endif #if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a78ecad0c90..c664d515f61 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -200,7 +200,7 @@ static void kvm_leave_lazy_mmu(void) state->mode = paravirt_get_lazy_mode(); } -static void paravirt_ops_setup(void) +static void __init paravirt_ops_setup(void) { pv_info.name = "KVM"; pv_info.paravirt_enabled = 1; diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 846510b78a0..2a62d843f01 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -347,7 +347,7 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id) static struct irqaction mfgptirq = { .handler = mfgpt_tick, - .flags = IRQF_DISABLED | IRQF_NOBALANCING, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, .name = "mfgpt-timer" }; diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index cfd9f906389..d2e56b8f48e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -675,7 +675,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) nommu: /* Should not happen anymore */ printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" - KERN_WARNING "falling back to iommu=soft.\n"); + "falling back to iommu=soft.\n"); return -1; } diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 4f9c55f3a7c..03801f2f761 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -60,7 +60,7 @@ static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) "adc %5,%%edx ; " : "=A" (product), "=r" (tmp1), "=r" (tmp2) : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); -#elif __x86_64__ +#elif defined(__x86_64__) __asm__ ( "mul %%rdx ; shrd $32,%%rdx,%%rax" : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d2d1ce8170f..a06e8d10184 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/pm.h> #include <linux/efi.h> +#include <linux/dmi.h> #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> @@ -17,7 +18,6 @@ #include <asm/cpu.h> #ifdef CONFIG_X86_32 -# include <linux/dmi.h> # include <linux/ctype.h> # include <linux/mc146818rtc.h> #else @@ -249,6 +249,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), }, }, + { /* Handle problems with rebooting on CompuLab SBC-FITPC2 */ + .callback = set_bios_reboot, + .ident = "CompuLab SBC-FITPC2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "CompuLab"), + DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), + }, + }, { } }; @@ -396,6 +404,46 @@ EXPORT_SYMBOL(machine_real_restart); #endif /* CONFIG_X86_32 */ +/* + * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot + */ +static int __init set_pci_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_CF9) { + reboot_type = BOOT_CF9; + printk(KERN_INFO "%s series board detected. " + "Selecting PCI-method for reboots.\n", d->ident); + } + return 0; +} + +static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { + { /* Handle problems with rebooting on Apple MacBook5 */ + .callback = set_pci_reboot, + .ident = "Apple MacBook5", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), + }, + }, + { /* Handle problems with rebooting on Apple MacBookPro5 */ + .callback = set_pci_reboot, + .ident = "Apple MacBookPro5", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), + }, + }, + { } +}; + +static int __init pci_reboot_init(void) +{ + dmi_check_system(pci_reboot_dmi_table); + return 0; +} +core_initcall(pci_reboot_init); + static inline void kb_wait(void) { int i; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index de2cab13284..63f32d220ef 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -672,6 +672,19 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), }, }, + { + /* + * AMI BIOS with low memory corruption was found on Intel DG45ID board. + * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will + * match only DMI_BOARD_NAME and see if there is more bad products + * with this vendor. + */ + .callback = dmi_low_memory_corruption, + .ident = "AMI BIOS", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), + }, + }, #endif {} }; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 29a3eef7cf4..07d81916f21 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -165,7 +165,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; - size_t tot_size = num_possible_cpus() * PMD_SIZE; + size_t tot_size = nr_cpu_ids * PMD_SIZE; /* on non-NUMA, embedding is better */ if (!pcpu_need_numa()) @@ -199,7 +199,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; /* allocate pointer array and alloc large pages */ - map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); + map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); pcpul_map = alloc_bootmem(map_size); for_each_possible_cpu(cpu) { @@ -228,7 +228,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) /* allocate address and map */ pcpul_vm.flags = VM_ALLOC; - pcpul_vm.size = num_possible_cpus() * PMD_SIZE; + pcpul_vm.size = nr_cpu_ids * PMD_SIZE; vm_area_register_early(&pcpul_vm, PMD_SIZE); for_each_possible_cpu(cpu) { @@ -250,8 +250,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) PMD_SIZE, pcpul_vm.addr, NULL); /* sort pcpul_map array for pcpu_lpage_remapped() */ - for (i = 0; i < num_possible_cpus() - 1; i++) - for (j = i + 1; j < num_possible_cpus(); j++) + for (i = 0; i < nr_cpu_ids - 1; i++) + for (j = i + 1; j < nr_cpu_ids; j++) if (pcpul_map[i].ptr > pcpul_map[j].ptr) { struct pcpul_ent tmp = pcpul_map[i]; pcpul_map[i] = pcpul_map[j]; @@ -288,7 +288,7 @@ void *pcpu_lpage_remapped(void *kaddr) { void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; - int left = 0, right = num_possible_cpus() - 1; + int left = 0, right = nr_cpu_ids - 1; int pos; /* pcpul in use at all? */ @@ -377,7 +377,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pcpu4k_nr_static_pages = PFN_UP(static_size); /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() + pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids * sizeof(pcpu4k_pages[0])); pcpu4k_pages = alloc_bootmem(pages_size); diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 8ccabb8a2f6..77b9689f8ed 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -744,6 +744,7 @@ uv_activation_descriptor_init(int node, int pnode) * note that base_dest_nodeid is actually a nasid. */ ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; + ad2->header.dest_subnodeid = 0x10; /* the LB */ ad2->header.command = UV_NET_ENDPOINT_INTD; ad2->header.int_both = 1; /* diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6e1a368d21d..71f4368b357 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) * use the TSC value at the transitions to calculate a pretty * good value for the TSC frequencty. */ +static inline int pit_verify_msb(unsigned char val) +{ + /* Ignore LSB */ + inb(0x42); + return inb(0x42) == val; +} + static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) { int count; u64 tsc = 0; for (count = 0; count < 50000; count++) { - /* Ignore LSB */ - inb(0x42); - if (inb(0x42) != val) + if (!pit_verify_msb(val)) break; tsc = get_cycles(); } @@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void) * to do that is to just read back the 16-bit counter * once from the PIT. */ - inb(0x42); - inb(0x42); + pit_verify_msb(0); if (pit_expect_msb(0xff, &tsc, &d1)) { for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { @@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void) * Iterate until the error is less than 500 ppm */ delta -= tsc; - if (d1+d2 < delta >> 11) - goto success; + if (d1+d2 >= delta >> 11) + continue; + + /* + * Check the PIT one more time to verify that + * all TSC reads were stable wrt the PIT. + * + * This also guarantees serialization of the + * last cycle read ('d2') in pit_expect_msb. + */ + if (!pit_verify_msb(0xfe - i)) + break; + goto success; } } printk("Fast TSC calibration failed\n"); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b263423fbe2..95a7289e4b0 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, ap.ds = __USER_DS; ap.es = __USER_DS; ap.fs = __KERNEL_PERCPU; - ap.gs = 0; + ap.gs = __KERNEL_STACK_CANARY; ap.eflags = 0; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 367e8788204..78d185d797d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -112,11 +112,6 @@ SECTIONS _sdata = .; DATA_DATA CONSTRUCTORS - -#ifdef CONFIG_X86_64 - /* End of data section */ - _edata = .; -#endif } :data #ifdef CONFIG_X86_32 @@ -156,10 +151,8 @@ SECTIONS .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) -#ifdef CONFIG_X86_32 /* End of data section */ _edata = .; -#endif } #ifdef CONFIG_X86_64 @@ -400,8 +393,8 @@ SECTIONS #ifdef CONFIG_X86_32 -ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") +. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #else /* * Per-cpu symbols which need to be offset from __per_cpu_load @@ -414,12 +407,12 @@ INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") +. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +. = ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ @@ -427,7 +420,7 @@ ASSERT((per_cpu__irq_stack_union == 0), #ifdef CONFIG_KEXEC #include <asm/kexec.h> -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big") +. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big"); #endif |