diff options
Diffstat (limited to 'arch/x86/kernel')
76 files changed, 2000 insertions, 2224 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 145cce75cda..235f5927bb9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -28,7 +28,7 @@ CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o -obj-y += setup.o i8259.o irqinit_$(BITS).o +obj-y += setup.o i8259.o irqinit.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o @@ -89,7 +89,8 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o +obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 723989d7f80..631086159c5 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -33,6 +33,7 @@ #include <linux/irq.h> #include <linux/bootmem.h> #include <linux/ioport.h> +#include <linux/pci.h> #include <asm/pgtable.h> #include <asm/io_apic.h> @@ -522,7 +523,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) * success: return IRQ number (>=0) * failure: return < 0 */ -int acpi_register_gsi(u32 gsi, int triggering, int polarity) +int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) { unsigned int irq; unsigned int plat_gsi = gsi; @@ -532,14 +533,14 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity) * Make sure all (legacy) PCI IRQs are set as level-triggered. */ if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { - if (triggering == ACPI_LEVEL_SENSITIVE) + if (trigger == ACPI_LEVEL_SENSITIVE) eisa_set_level_irq(gsi); } #endif #ifdef CONFIG_X86_IO_APIC if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { - plat_gsi = mp_register_gsi(gsi, triggering, polarity); + plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); } #endif acpi_gsi_to_irq(plat_gsi, &irq); @@ -903,10 +904,8 @@ extern int es7000_plat; #endif static struct { - int apic_id; int gsi_base; int gsi_end; - DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); } mp_ioapic_routing[MAX_IO_APICS]; int mp_find_ioapic(int gsi) @@ -986,16 +985,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); mp_ioapics[idx].apicid = uniq_ioapic_id(id); -#ifdef CONFIG_X86_32 mp_ioapics[idx].apicver = io_apic_get_version(idx); -#else - mp_ioapics[idx].apicver = 0; -#endif + /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); @@ -1158,26 +1153,52 @@ void __init mp_config_acpi_legacy_irqs(void) } } -int mp_register_gsi(u32 gsi, int triggering, int polarity) +static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, + int polarity) { +#ifdef CONFIG_X86_MPPARSE + struct mpc_intsrc mp_irq; + struct pci_dev *pdev; + unsigned char number; + unsigned int devfn; int ioapic; - int ioapic_pin; -#ifdef CONFIG_X86_32 -#define MAX_GSI_NUM 4096 -#define IRQ_COMPRESSION_START 64 + u8 pin; - static int pci_irq = IRQ_COMPRESSION_START; - /* - * Mapping between Global System Interrupts, which - * represent all possible interrupts, and IRQs - * assigned to actual devices. - */ - static int gsi_to_irq[MAX_GSI_NUM]; -#else + if (!acpi_ioapic) + return 0; + if (!dev) + return 0; + if (dev->bus != &pci_bus_type) + return 0; + + pdev = to_pci_dev(dev); + number = pdev->bus->number; + devfn = pdev->devfn; + pin = pdev->pin; + /* print the entry should happen on mptable identically */ + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); + mp_irq.srcbus = number; + mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + ioapic = mp_find_ioapic(gsi); + mp_irq.dstapic = mp_ioapics[ioapic].apicid; + mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); + + save_mp_irq(&mp_irq); +#endif + return 0; +} + +int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) +{ + int ioapic; + int ioapic_pin; + struct io_apic_irq_attr irq_attr; if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; -#endif /* Don't set up the ACPI SCI because it's already set up */ if (acpi_gbl_FADT.sci_interrupt == gsi) @@ -1196,93 +1217,22 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) gsi = ioapic_renumber_irq(ioapic, gsi); #endif - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ if (ioapic_pin > MP_MAX_IOAPIC_PIN) { printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, + "%d-%d\n", mp_ioapics[ioapic].apicid, ioapic_pin); return gsi; } - if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { - pr_debug("Pin %d-%d already programmed\n", - mp_ioapic_routing[ioapic].apic_id, ioapic_pin); -#ifdef CONFIG_X86_32 - return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); -#else - return gsi; -#endif - } - - set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); -#ifdef CONFIG_X86_32 - /* - * For GSI >= 64, use IRQ compression - */ - if ((gsi >= IRQ_COMPRESSION_START) - && (triggering == ACPI_LEVEL_SENSITIVE)) { - /* - * For PCI devices assign IRQs in order, avoiding gaps - * due to unused I/O APIC pins. - */ - int irq = gsi; - if (gsi < MAX_GSI_NUM) { - /* - * Retain the VIA chipset work-around (gsi > 15), but - * avoid a problem where the 8254 timer (IRQ0) is setup - * via an override (so it's not on pin 0 of the ioapic), - * and at the same time, the pin 0 interrupt is a PCI - * type. The gsi > 15 test could cause these two pins - * to be shared as IRQ0, and they are not shareable. - * So test for this condition, and if necessary, avoid - * the pin collision. - */ - gsi = pci_irq++; - /* - * Don't assign IRQ used by ACPI SCI - */ - if (gsi == acpi_gbl_FADT.sci_interrupt) - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; - } else { - printk(KERN_ERR "GSI %u is too high\n", gsi); - return gsi; - } - } -#endif - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - return gsi; -} -int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, - u32 gsi, int triggering, int polarity) -{ -#ifdef CONFIG_X86_MPPARSE - struct mpc_intsrc mp_irq; - int ioapic; + if (enable_update_mptable) + mp_config_acpi_gsi(dev, gsi, trigger, polarity); - if (!acpi_ioapic) - return 0; + set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, + trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + io_apic_set_pci_routing(dev, gsi, &irq_attr); - /* print the entry should happen on mptable identically */ - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | - (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); - mp_irq.srcbus = number; - mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); - ioapic = mp_find_ioapic(gsi); - mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; - mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); - - save_mp_irq(&mp_irq); -#endif - return 0; + return gsi; } /* diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 42c33cebf00..8c0be0902da 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -49,10 +49,10 @@ #define IVHD_DEV_EXT_SELECT 0x46 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 -#define IVHD_FLAG_HT_TUN_EN 0x00 -#define IVHD_FLAG_PASSPW_EN 0x01 -#define IVHD_FLAG_RESPASSPW_EN 0x02 -#define IVHD_FLAG_ISOC_EN 0x03 +#define IVHD_FLAG_HT_TUN_EN_MASK 0x01 +#define IVHD_FLAG_PASSPW_EN_MASK 0x02 +#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 +#define IVHD_FLAG_ISOC_EN_MASK 0x08 #define IVMD_FLAG_EXCL_RANGE 0x08 #define IVMD_FLAG_UNITY_MAP 0x01 @@ -569,19 +569,19 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, * First set the recommended feature enable bits from ACPI * into the IOMMU control registers */ - h->flags & IVHD_FLAG_HT_TUN_EN ? + h->flags & IVHD_FLAG_HT_TUN_EN_MASK ? iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); - h->flags & IVHD_FLAG_PASSPW_EN ? + h->flags & IVHD_FLAG_PASSPW_EN_MASK ? iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : iommu_feature_disable(iommu, CONTROL_PASSPW_EN); - h->flags & IVHD_FLAG_RESPASSPW_EN ? + h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ? iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); - h->flags & IVHD_FLAG_ISOC_EN ? + h->flags & IVHD_FLAG_ISOC_EN_MASK ? iommu_feature_enable(iommu, CONTROL_ISOC_EN) : iommu_feature_disable(iommu, CONTROL_ISOC_EN); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 098ec84b8c0..a4c9cf0bf70 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -98,6 +98,29 @@ early_param("lapic", parse_lapic); /* Local APIC was disabled by the BIOS and enabled by the kernel */ static int enabled_via_apicbase; +/* + * Handle interrupt mode configuration register (IMCR). + * This register controls whether the interrupt signals + * that reach the BSP come from the master PIC or from the + * local APIC. Before entering Symmetric I/O Mode, either + * the BIOS or the operating system must switch out of + * PIC Mode by changing the IMCR. + */ +static inline void imcr_pic_to_apic(void) +{ + /* select IMCR register */ + outb(0x70, 0x22); + /* NMI and 8259 INTR go through APIC */ + outb(0x01, 0x23); +} + +static inline void imcr_apic_to_pic(void) +{ + /* select IMCR register */ + outb(0x70, 0x22); + /* NMI and 8259 INTR go directly to BSP */ + outb(0x00, 0x23); +} #endif #ifdef CONFIG_X86_64 @@ -111,13 +134,19 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif +int x2apic_mode; #ifdef CONFIG_X86_X2APIC -int x2apic; /* x2apic enabled before OS handover */ static int x2apic_preenabled; static int disable_x2apic; static __init int setup_nox2apic(char *str) { + if (x2apic_enabled()) { + pr_warning("Bios already enabled x2apic, " + "can't enforce nox2apic"); + return 0; + } + disable_x2apic = 1; setup_clear_cpu_cap(X86_FEATURE_X2APIC); return 0; @@ -209,6 +238,31 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * bare function to substitute write operation + * and it's _that_ fast :) + */ +static void native_apic_write_dummy(u32 reg, u32 v) +{ + WARN_ON_ONCE((cpu_has_apic || !disable_apic)); +} + +static u32 native_apic_read_dummy(u32 reg) +{ + WARN_ON_ONCE((cpu_has_apic && !disable_apic)); + return 0; +} + +/* + * right after this call apic->write/read doesn't do anything + * note that there is no restore operation it works one way + */ +void apic_disable(void) +{ + apic->read = native_apic_read_dummy; + apic->write = native_apic_write_dummy; +} + void native_apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) @@ -348,7 +402,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) { - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0); unsigned int v = (mask << 16) | (msg_type << 8) | vector; apic_write(reg, v); @@ -431,6 +485,12 @@ static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); + if (cpu_has(¤t_cpu_data, X86_FEATURE_ARAT)) { + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; + /* Make LAPIC timer preferrable over percpu HPET */ + lapic_clockevent.rating = 150; + } + memcpy(levt, &lapic_clockevent, sizeof(*levt)); levt->cpumask = cpumask_of(smp_processor_id()); @@ -809,7 +869,7 @@ void clear_local_APIC(void) u32 v; /* APIC hasn't been mapped yet */ - if (!x2apic && !apic_phys) + if (!x2apic_mode && !apic_phys) return; maxlvt = lapic_get_maxlvt(); @@ -1281,7 +1341,7 @@ void check_x2apic(void) { if (x2apic_enabled()) { pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); - x2apic_preenabled = x2apic = 1; + x2apic_preenabled = x2apic_mode = 1; } } @@ -1289,7 +1349,7 @@ void enable_x2apic(void) { int msr, msr2; - if (!x2apic) + if (!x2apic_mode) return; rdmsr(MSR_IA32_APICBASE, msr, msr2); @@ -1298,6 +1358,7 @@ void enable_x2apic(void) wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); } } +#endif /* CONFIG_X86_X2APIC */ void __init enable_IR_x2apic(void) { @@ -1306,32 +1367,21 @@ void __init enable_IR_x2apic(void) unsigned long flags; struct IO_APIC_route_entry **ioapic_entries = NULL; - if (!cpu_has_x2apic) - return; - - if (!x2apic_preenabled && disable_x2apic) { - pr_info("Skipped enabling x2apic and Interrupt-remapping " - "because of nox2apic\n"); - return; + ret = dmar_table_init(); + if (ret) { + pr_debug("dmar_table_init() failed with %d:\n", ret); + goto ir_failed; } - if (x2apic_preenabled && disable_x2apic) - panic("Bios already enabled x2apic, can't enforce nox2apic"); - - if (!x2apic_preenabled && skip_ioapic_setup) { - pr_info("Skipped enabling x2apic and Interrupt-remapping " - "because of skipping io-apic setup\n"); - return; + if (!intr_remapping_supported()) { + pr_debug("intr-remapping not supported\n"); + goto ir_failed; } - ret = dmar_table_init(); - if (ret) { - pr_info("dmar_table_init() failed with %d:\n", ret); - if (x2apic_preenabled) - panic("x2apic enabled by bios. But IR enabling failed"); - else - pr_info("Not enabling x2apic,Intr-remapping\n"); + if (!x2apic_preenabled && skip_ioapic_setup) { + pr_info("Skipped enabling intr-remap because of skipping " + "io-apic setup\n"); return; } @@ -1351,19 +1401,16 @@ void __init enable_IR_x2apic(void) mask_IO_APIC_setup(ioapic_entries); mask_8259A(); - ret = enable_intr_remapping(EIM_32BIT_APIC_ID); - - if (ret && x2apic_preenabled) { - local_irq_restore(flags); - panic("x2apic enabled by bios. But IR enabling failed"); - } - + ret = enable_intr_remapping(x2apic_supported()); if (ret) goto end_restore; - if (!x2apic) { - x2apic = 1; + pr_info("Enabled Interrupt-remapping\n"); + + if (x2apic_supported() && !x2apic_mode) { + x2apic_mode = 1; enable_x2apic(); + pr_info("Enabled x2apic\n"); } end_restore: @@ -1372,37 +1419,34 @@ end_restore: * IR enabling failed */ restore_IO_APIC_setup(ioapic_entries); - else - reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries); unmask_8259A(); local_irq_restore(flags); end: - if (!ret) { - if (!x2apic_preenabled) - pr_info("Enabled x2apic and interrupt-remapping\n"); - else - pr_info("Enabled Interrupt-remapping\n"); - } else - pr_err("Failed to enable Interrupt-remapping and x2apic\n"); if (ioapic_entries) free_ioapic_entries(ioapic_entries); + + if (!ret) + return; + +ir_failed: + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else if (cpu_has_x2apic) + pr_info("Not enabling x2apic,Intr-remapping\n"); #else if (!cpu_has_x2apic) return; if (x2apic_preenabled) panic("x2apic enabled prior OS handover," - " enable CONFIG_INTR_REMAP"); - - pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping " - " and x2apic\n"); + " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP"); #endif return; } -#endif /* CONFIG_X86_X2APIC */ + #ifdef CONFIG_X86_64 /* @@ -1419,7 +1463,6 @@ static int __init detect_init_APIC(void) } mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - boot_cpu_physical_apicid = 0; return 0; } #else @@ -1533,32 +1576,49 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { - if (x2apic) { + unsigned int new_apicid; + + if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); return; } - /* - * If no local APIC can be found then set up a fake all - * zeroes page to simulate the local APIC and another - * one for the IO-APIC. - */ + /* If no local APIC can be found return early */ if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - apic_phys = __pa(apic_phys); - } else + /* lets NOP'ify apic operations */ + pr_info("APIC: disable apic facility\n"); + apic_disable(); + } else { apic_phys = mp_lapic_addr; - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", - APIC_BASE, apic_phys); + /* + * acpi lapic path already maps that address in + * acpi_register_lapic_address() + */ + if (!acpi_lapic) + set_fixmap_nocache(FIX_APIC_BASE, apic_phys); + + apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", + APIC_BASE, apic_phys); + } /* * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = read_apic_id(); + new_apicid = read_apic_id(); + if (boot_cpu_physical_apicid != new_apicid) { + boot_cpu_physical_apicid = new_apicid; + /* + * yeah -- we lie about apic_version + * in case if apic was disabled via boot option + * but it's not a problem for SMP compiled kernel + * since smp_sanity_check is prepared for such a case + * and disable smp mode + */ + apic_version[new_apicid] = + GET_APIC_VERSION(apic_read(APIC_LVR)); + } } /* @@ -1727,8 +1787,7 @@ void __init connect_bsp_APIC(void) */ apic_printk(APIC_VERBOSE, "leaving PIC mode, " "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); + imcr_pic_to_apic(); } #endif if (apic->enable_apic_mode) @@ -1756,8 +1815,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) */ apic_printk(APIC_VERBOSE, "disabling APIC mode, " "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); + imcr_apic_to_pic(); return; } #endif @@ -1963,10 +2021,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) local_irq_save(flags); disable_local_APIC(); -#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) disable_intr_remapping(); -#endif + local_irq_restore(flags); return 0; } @@ -1976,42 +2034,34 @@ static int lapic_resume(struct sys_device *dev) unsigned int l, h; unsigned long flags; int maxlvt; - -#ifdef CONFIG_INTR_REMAP - int ret; + int ret = 0; struct IO_APIC_route_entry **ioapic_entries = NULL; if (!apic_pm_state.active) return 0; local_irq_save(flags); - if (x2apic) { + if (intr_remapping_enabled) { ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { WARN(1, "Alloc ioapic_entries in lapic resume failed."); - return -ENOMEM; + ret = -ENOMEM; + goto restore; } ret = save_IO_APIC_setup(ioapic_entries); if (ret) { WARN(1, "Saving IO-APIC state failed: %d\n", ret); free_ioapic_entries(ioapic_entries); - return ret; + goto restore; } mask_IO_APIC_setup(ioapic_entries); mask_8259A(); - enable_x2apic(); } -#else - if (!apic_pm_state.active) - return 0; - local_irq_save(flags); - if (x2apic) + if (x2apic_mode) enable_x2apic(); -#endif - else { /* * Make sure the APICBASE points to the right address @@ -2049,21 +2099,16 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - reenable_intr_remapping(EIM_32BIT_APIC_ID); - - if (x2apic) { + if (intr_remapping_enabled) { + reenable_intr_remapping(x2apic_mode); unmask_8259A(); restore_IO_APIC_setup(ioapic_entries); free_ioapic_entries(ioapic_entries); } -#endif - +restore: local_irq_restore(flags); - - return 0; + return ret; } /* @@ -2111,31 +2156,14 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ #ifdef CONFIG_X86_64 -/* - * apic_is_clustered_box() -- Check if we can expect good TSC - * - * Thus far, the major user of this is IBM's Summit2 series: - * - * Clustered boxes may have unsynced TSC problems if they are - * multi-chassis. Use available data to take a good guess. - * If in doubt, go HPET. - */ -__cpuinit int apic_is_clustered_box(void) + +static int __cpuinit apic_cluster_num(void) { int i, clusters, zeros; unsigned id; u16 *bios_cpu_apicid; DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - /* - * there is not this kind of box with AMD CPU yet. - * Some AMD box with quadcore cpu and 8 sockets apicid - * will be [4, 0x23] or [8, 0x27] could be thought to - * vsmp box still need checking... - */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) - return 0; - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); @@ -2171,18 +2199,67 @@ __cpuinit int apic_is_clustered_box(void) ++zeros; } - /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (is_vsmp_box() && clusters > 1) + return clusters; +} + +static int __cpuinitdata multi_checked; +static int __cpuinitdata multi; + +static int __cpuinit set_multi(const struct dmi_system_id *d) +{ + if (multi) + return 0; + pr_info("APIC: %s detected, Multi Chassis\n", d->ident); + multi = 1; + return 0; +} + +static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = { + { + .callback = set_multi, + .ident = "IBM System Summit2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"), + }, + }, + {} +}; + +static void __cpuinit dmi_check_multi(void) +{ + if (multi_checked) + return; + + dmi_check_system(multi_dmi_table); + multi_checked = 1; +} + +/* + * apic_is_clustered_box() -- Check if we can expect good TSC + * + * Thus far, the major user of this is IBM's Summit2 series: + * Clustered boxes may have unsynced TSC problems if they are + * multi-chassis. + * Use DMI to check them + */ +__cpuinit int apic_is_clustered_box(void) +{ + dmi_check_multi(); + if (multi) return 1; + if (!is_vsmp_box()) + return 0; + /* - * If clusters > 2, then should be multi-chassis. - * May have to revisit this when multi-core + hyperthreaded CPUs come - * out, but AFAIK this will work even for them. + * ScaleMP vSMPowered boxes have one cluster per board and TSCs are + * not guaranteed to be synced between boards */ - return (clusters > 2); + if (apic_cluster_num() > 1) + return 1; + + return 0; } #endif diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 0014714ea97..d0c99abc26c 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -161,7 +161,7 @@ static int flat_apic_id_registered(void) static int flat_phys_pkg_id(int initial_apic_id, int index_msb) { - return hard_smp_processor_id() >> index_msb; + return initial_apic_id >> index_msb; } struct apic apic_flat = { @@ -212,7 +212,7 @@ struct apic apic_flat = { .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = NULL, + .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, .write = native_apic_mem_write, @@ -235,7 +235,7 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) * regardless of how many processors are present (x86_64 ES7000 * is an example). */ - if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { printk(KERN_DEBUG "system APIC only can use physical flat"); return 1; @@ -362,7 +362,7 @@ struct apic apic_physflat = { .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = NULL, + .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, .write = native_apic_mem_write, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 1c11b819f24..69328ac8de9 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -145,7 +145,7 @@ es7000_rename_gsi(int ioapic, int gsi) return gsi; } -static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) +static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) { unsigned long vect = 0, psaival = 0; @@ -254,7 +254,7 @@ static int parse_unisys_oem(char *oemptr) } #ifdef CONFIG_ACPI -static int find_unisys_acpi_oem_table(unsigned long *oem_addr) +static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; struct es7000_oem_table *table; @@ -285,7 +285,7 @@ static int find_unisys_acpi_oem_table(unsigned long *oem_addr) return 0; } -static void unmap_unisys_acpi_oem_table(unsigned long oem_addr) +static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) { if (!oem_addr) return; @@ -306,7 +306,7 @@ static int es7000_check_dsdt(void) static int es7000_acpi_ret; /* Hook from generic ACPI tables.c */ -static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { unsigned long oem_addr = 0; int check_dsdt; @@ -717,7 +717,7 @@ struct apic apic_es7000_cluster = { .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; -struct apic apic_es7000 = { +struct apic __refdata apic_es7000 = { .name = "es7000", .probe = probe_es7000, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 767fe7e46d6..1946fac42ab 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -59,6 +59,7 @@ #include <asm/setup.h> #include <asm/irq_remapping.h> #include <asm/hpet.h> +#include <asm/hw_irq.h> #include <asm/uv/uv_hub.h> #include <asm/uv/uv_irq.h> @@ -129,12 +130,9 @@ struct irq_pin_list { struct irq_pin_list *next; }; -static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) +static struct irq_pin_list *get_one_free_irq_2_pin(int node) { struct irq_pin_list *pin; - int node; - - node = cpu_to_node(cpu); pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); @@ -148,9 +146,6 @@ struct irq_cfg { unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - u8 move_desc_pending : 1; -#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -212,12 +207,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq) return cfg; } -static struct irq_cfg *get_one_free_irq_cfg(int cpu) +static struct irq_cfg *get_one_free_irq_cfg(int node) { struct irq_cfg *cfg; - int node; - - node = cpu_to_node(cpu); cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); if (cfg) { @@ -238,13 +230,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) return cfg; } -int arch_init_chip_data(struct irq_desc *desc, int cpu) +int arch_init_chip_data(struct irq_desc *desc, int node) { struct irq_cfg *cfg; cfg = desc->chip_data; if (!cfg) { - desc->chip_data = get_one_free_irq_cfg(cpu); + desc->chip_data = get_one_free_irq_cfg(node); if (!desc->chip_data) { printk(KERN_ERR "can not alloc irq_cfg\n"); BUG_ON(1); @@ -254,10 +246,9 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu) return 0; } -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - +/* for move_irq_desc */ static void -init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) +init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node) { struct irq_pin_list *old_entry, *head, *tail, *entry; @@ -266,7 +257,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) if (!old_entry) return; - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) return; @@ -276,7 +267,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) tail = entry; old_entry = old_entry->next; while (old_entry) { - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) { entry = head; while (entry) { @@ -316,12 +307,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) } void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu) + struct irq_desc *desc, int node) { struct irq_cfg *cfg; struct irq_cfg *old_cfg; - cfg = get_one_free_irq_cfg(cpu); + cfg = get_one_free_irq_cfg(node); if (!cfg) return; @@ -332,7 +323,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc, memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); - init_copy_irq_2_pin(old_cfg, cfg, cpu); + init_copy_irq_2_pin(old_cfg, cfg, node); } static void free_irq_cfg(struct irq_cfg *old_cfg) @@ -356,19 +347,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) old_desc->chip_data = NULL; } } - -static void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg = desc->chip_data; - - if (!cfg->move_in_progress) { - /* it means that domain is not changed */ - if (!cpumask_intersects(desc->affinity, mask)) - cfg->move_desc_pending = 1; - } -} -#endif +/* end for move_irq_desc */ #else static struct irq_cfg *irq_cfg(unsigned int irq) @@ -378,13 +357,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif -#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC -static inline void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ -} -#endif - struct io_apic { unsigned int index; unsigned int unused[3]; @@ -518,132 +490,18 @@ static void ioapic_mask_entry(int apic, int pin) spin_unlock_irqrestore(&ioapic_lock, flags); } -#ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) -{ - cpumask_var_t cleanup_mask; - - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); - apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } - cfg->move_in_progress = 0; -} - -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - u8 vector = cfg->vector; - - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(irq)) - io_apic_write(apic, 0x11 + pin*2, dest); - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - -/* - * Either sets desc->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that, or returns BAD_APICID and - * leaves desc->affinity untouched. - */ -static unsigned int -set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned int irq; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return BAD_APICID; - - irq = desc->irq; - cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return BAD_APICID; - - /* check that before desc->addinity get updated */ - set_extra_move_desc(desc, mask); - - cpumask_copy(desc->affinity, mask); - - return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); -} - -static void -set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - dest = set_desc_affinity(desc, mask); - if (dest != BAD_APICID) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, cfg); - } - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void -set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - set_ioapic_affinity_irq_desc(desc, mask); -} -#endif /* CONFIG_SMP */ - /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) +static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { struct irq_pin_list *entry; entry = cfg->irq_2_pin; if (!entry) { - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) { printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", apic, pin); @@ -663,7 +521,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) entry = entry->next; } - entry->next = get_one_free_irq_2_pin(cpu); + entry->next = get_one_free_irq_2_pin(node); entry = entry->next; entry->apic = apic; entry->pin = pin; @@ -672,7 +530,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, +static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, int oldapic, int oldpin, int newapic, int newpin) { @@ -692,7 +550,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, /* why? call replace before add? */ if (!replaced) - add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); + add_pin_to_irq_node(cfg, node, newapic, newpin); } static inline void io_apic_modify_irq(struct irq_cfg *cfg, @@ -850,7 +708,6 @@ static int __init ioapic_pirq_setup(char *str) __setup("pirq=", ioapic_pirq_setup); #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_INTR_REMAP struct IO_APIC_route_entry **alloc_ioapic_entries(void) { int apic; @@ -948,20 +805,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) return 0; } -void reinit_intr_remapped_IO_APIC(int intr_remapping, - struct IO_APIC_route_entry **ioapic_entries) - -{ - /* - * for now plain restore of previous settings. - * TBD: In the case of OS enabling interrupt-remapping, - * IO-APIC RTE's need to be setup to point to interrupt-remapping - * table entries. for now, do a plain restore, and wait for - * the setup_IO_APIC_irqs() to do proper initialization. - */ - restore_IO_APIC_setup(ioapic_entries); -} - void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) { int apic; @@ -971,7 +814,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) kfree(ioapic_entries); } -#endif /* * Find the IRQ entry number of a certain pin. @@ -1032,54 +874,6 @@ static int __init find_isa_irq_apic(int irq, int type) return -1; } -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", - bus, slot, pin); - if (test_bit(bus, mp_bus_not_pci)) { - apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || - mp_irqs[i].dstapic == MP_APIC_ALL) - break; - - if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} - -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - #if defined(CONFIG_EISA) || defined(CONFIG_MCA) /* * EISA Edge/Level control register, ELCR @@ -1298,6 +1092,64 @@ static int pin_2_irq(int idx, int apic, int pin) return irq; } +/* + * Find a specific PCI IRQ entry. + * Not an __init, possibly needed by modules + */ +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, + struct io_apic_irq_attr *irq_attr) +{ + int apic, i, best_guess = -1; + + apic_printk(APIC_DEBUG, + "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); + if (test_bit(bus, mp_bus_not_pci)) { + apic_printk(APIC_VERBOSE, + "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + return -1; + } + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].srcbus; + + for (apic = 0; apic < nr_ioapics; apic++) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || + mp_irqs[i].dstapic == MP_APIC_ALL) + break; + + if (!test_bit(lbus, mp_bus_not_pci) && + !mp_irqs[i].irqtype && + (bus == lbus) && + (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); + + if (!(apic || IO_APIC_IRQ(irq))) + continue; + + if (pin == (mp_irqs[i].srcbusirq & 3)) { + set_io_apic_irq_attr(irq_attr, apic, + mp_irqs[i].dstirq, + irq_trigger(i), + irq_polarity(i)); + return irq; + } + /* + * Use the first all-but-pin matching entry as a + * best-guess fuzzy result for broken mptables. + */ + if (best_guess < 0) { + set_io_apic_irq_attr(irq_attr, apic, + mp_irqs[i].dstirq, + irq_trigger(i), + irq_polarity(i)); + best_guess = irq; + } + } + } + return best_guess; +} +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); + void lock_vector_lock(void) { /* Used to the online set of cpus does not change @@ -1628,58 +1480,70 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq ioapic_write_entry(apic_id, pin, entry); } +static struct { + DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); +} mp_ioapic_routing[MAX_IO_APICS]; + static void __init setup_IO_APIC_irqs(void) { - int apic_id, pin, idx, irq; + int apic_id = 0, pin, idx, irq; int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { - for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { - - idx = find_irq_entry(apic_id, pin, mp_INT); - if (idx == -1) { - if (!notcon) { - notcon = 1; - apic_printk(APIC_VERBOSE, - KERN_DEBUG " %d-%d", - mp_ioapics[apic_id].apicid, pin); - } else - apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic_id].apicid, pin); - continue; - } - if (notcon) { - apic_printk(APIC_VERBOSE, - " (apicid-pin) not connected\n"); - notcon = 0; - } +#ifdef CONFIG_ACPI + if (!acpi_disabled && acpi_ioapic) { + apic_id = mp_find_ioapic(0); + if (apic_id < 0) + apic_id = 0; + } +#endif - irq = pin_2_irq(idx, apic_id, pin); + for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { + idx = find_irq_entry(apic_id, pin, mp_INT); + if (idx == -1) { + if (!notcon) { + notcon = 1; + apic_printk(APIC_VERBOSE, + KERN_DEBUG " %d-%d", + mp_ioapics[apic_id].apicid, pin); + } else + apic_printk(APIC_VERBOSE, " %d-%d", + mp_ioapics[apic_id].apicid, pin); + continue; + } + if (notcon) { + apic_printk(APIC_VERBOSE, + " (apicid-pin) not connected\n"); + notcon = 0; + } - /* - * Skip the timer IRQ if there's a quirk handler - * installed and if it returns 1: - */ - if (apic->multi_timer_check && - apic->multi_timer_check(apic_id, irq)) - continue; + irq = pin_2_irq(idx, apic_id, pin); - desc = irq_to_desc_alloc_cpu(irq, cpu); - if (!desc) { - printk(KERN_INFO "can not get irq_desc for %d\n", irq); - continue; - } - cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); + /* + * Skip the timer IRQ if there's a quirk handler + * installed and if it returns 1: + */ + if (apic->multi_timer_check && + apic->multi_timer_check(apic_id, irq)) + continue; - setup_IO_APIC_irq(apic_id, pin, irq, desc, - irq_trigger(idx), irq_polarity(idx)); + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + continue; } + cfg = desc->chip_data; + add_pin_to_irq_node(cfg, node, apic_id, pin); + /* + * don't mark it in pin_programmed, so later acpi could + * set it correctly when irq < 16 + */ + setup_IO_APIC_irq(apic_id, pin, irq, desc, + irq_trigger(idx), irq_polarity(idx)); } if (notcon) @@ -1869,7 +1733,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base) __apicdebuginit(void) print_local_APIC(void *dummy) { - unsigned int v, ver, maxlvt; + unsigned int i, v, ver, maxlvt; u64 icr; if (apic_verbosity == APIC_QUIET) @@ -1957,6 +1821,18 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); v = apic_read(APIC_TDCR); printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); + + if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { + v = apic_read(APIC_EFEAT); + maxlvt = (v >> 16) & 0xff; + printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v); + v = apic_read(APIC_ECTRL); + printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v); + for (i = 0; i < maxlvt; i++) { + v = apic_read(APIC_EILVTn(i)); + printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); + } + } printk("\n"); } @@ -2005,6 +1881,11 @@ __apicdebuginit(void) print_PIC(void) __apicdebuginit(int) print_all_ICs(void) { print_PIC(); + + /* don't print out if apic is not there */ + if (!cpu_has_apic || disable_apic) + return 0; + print_all_local_APICs(); print_IO_APIC(); @@ -2360,6 +2241,118 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP +static void send_cleanup_vector(struct irq_cfg *cfg) +{ + cpumask_var_t cleanup_mask; + + if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { + unsigned int i; + cfg->move_cleanup_count = 0; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + cfg->move_cleanup_count++; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); + } else { + cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + cfg->move_cleanup_count = cpumask_weight(cleanup_mask); + apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + free_cpumask_var(cleanup_mask); + } + cfg->move_in_progress = 0; +} + +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + u8 vector = cfg->vector; + + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin*2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} + +static int +assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); + +/* + * Either sets desc->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that, or returns BAD_APICID and + * leaves desc->affinity untouched. + */ +static unsigned int +set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) +{ + struct irq_cfg *cfg; + unsigned int irq; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return BAD_APICID; + + irq = desc->irq; + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) + return BAD_APICID; + + cpumask_copy(desc->affinity, mask); + + return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); +} + +static int +set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + unsigned int irq; + int ret = -1; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + dest = set_desc_affinity(desc, mask); + if (dest != BAD_APICID) { + /* Only the high 8 bits are valid. */ + dest = SET_APIC_LOGICAL_ID(dest); + __target_IO_APIC_irq(irq, dest, cfg); + ret = 0; + } + spin_unlock_irqrestore(&ioapic_lock, flags); + + return ret; +} + +static int +set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + return set_ioapic_affinity_irq_desc(desc, mask); +} #ifdef CONFIG_INTR_REMAP @@ -2374,26 +2367,25 @@ static int ioapic_retrigger_irq(unsigned int irq) * Real vector that is used for interrupting cpu will be coming from * the interrupt-remapping table entry. */ -static void +static int migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; struct irte irte; unsigned int dest; unsigned int irq; + int ret = -1; if (!cpumask_intersects(mask, cpu_online_mask)) - return; + return ret; irq = desc->irq; if (get_irte(irq, &irte)) - return; + return ret; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); + return ret; dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); @@ -2409,27 +2401,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) send_cleanup_vector(cfg); cpumask_copy(desc->affinity, mask); + + return 0; } /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, +static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { - migrate_ioapic_irq_desc(desc, mask); + return migrate_ioapic_irq_desc(desc, mask); } -static void set_ir_ioapic_affinity_irq(unsigned int irq, +static int set_ir_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); - set_ir_ioapic_affinity_irq_desc(desc, mask); + return set_ir_ioapic_affinity_irq_desc(desc, mask); } #else -static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, +static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { + return 0; } #endif @@ -2491,86 +2486,19 @@ static void irq_complete_move(struct irq_desc **descp) struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - if (likely(!cfg->move_desc_pending)) - return; - - /* domain has not changed, but affinity did */ - me = smp_processor_id(); - if (cpumask_test_cpu(me, desc->affinity)) { - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; - cfg->move_desc_pending = 0; - } -#endif + if (likely(!cfg->move_in_progress)) return; - } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; -#endif + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); - } } #else static inline void irq_complete_move(struct irq_desc **descp) {} #endif -#ifdef CONFIG_X86_X2APIC -static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - - entry = cfg->irq_2_pin; - for (;;) { - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; - io_apic_eoi(apic, pin); - entry = entry->next; - } -} - -static void -eoi_ioapic_irq(struct irq_desc *desc) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void ack_x2apic_level(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - ack_x2APIC_irq(); - eoi_ioapic_irq(desc); -} - -static void ack_x2apic_edge(unsigned int irq) -{ - ack_x2APIC_irq(); -} -#endif - static void ack_apic_edge(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2680,22 +2608,50 @@ static void ack_apic_level(unsigned int irq) } #ifdef CONFIG_INTR_REMAP +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + + entry = cfg->irq_2_pin; + for (;;) { + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; + io_apic_eoi(apic, pin); + entry = entry->next; + } +} + +static void +eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void ir_ack_apic_edge(unsigned int irq) { -#ifdef CONFIG_X86_X2APIC - if (x2apic_enabled()) - return ack_x2apic_edge(irq); -#endif - return ack_apic_edge(irq); + ack_APIC_irq(); } static void ir_ack_apic_level(unsigned int irq) { -#ifdef CONFIG_X86_X2APIC - if (x2apic_enabled()) - return ack_x2apic_level(irq); -#endif - return ack_apic_level(irq); + struct irq_desc *desc = irq_to_desc(irq); + + ack_APIC_irq(); + eoi_ioapic_irq(desc); } #endif /* CONFIG_INTR_REMAP */ @@ -2900,7 +2856,7 @@ static inline void __init check_timer(void) { struct irq_desc *desc = irq_to_desc(0); struct irq_cfg *cfg = desc->chip_data; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); int apic1, pin1, apic2, pin2; unsigned long flags; int no_pin1 = 0; @@ -2966,7 +2922,7 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); + add_pin_to_irq_node(cfg, node, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } else { /* for edge trigger, setup_IO_APIC_irq already @@ -3003,7 +2959,7 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); + replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); enable_8259A_irq(0); if (timer_irq_works()) { @@ -3215,14 +3171,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY; /* * Dynamic irq allocate and deallocation */ -unsigned int create_irq_nr(unsigned int irq_want) +unsigned int create_irq_nr(unsigned int irq_want, int node) { /* Allocate an unused irq */ unsigned int irq; unsigned int new; unsigned long flags; struct irq_cfg *cfg_new = NULL; - int cpu = boot_cpu_id; struct irq_desc *desc_new = NULL; irq = 0; @@ -3231,7 +3186,7 @@ unsigned int create_irq_nr(unsigned int irq_want) spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { - desc_new = irq_to_desc_alloc_cpu(new, cpu); + desc_new = irq_to_desc_alloc_node(new, node); if (!desc_new) { printk(KERN_INFO "can not get irq_desc for %d\n", new); continue; @@ -3240,6 +3195,9 @@ unsigned int create_irq_nr(unsigned int irq_want) if (cfg_new->vector != 0) continue; + + desc_new = move_irq_desc(desc_new, node); + if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; break; @@ -3257,11 +3215,12 @@ unsigned int create_irq_nr(unsigned int irq_want) int create_irq(void) { + int node = cpu_to_node(boot_cpu_id); unsigned int irq_want; int irq; irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want); + irq = create_irq_nr(irq_want, node); if (irq == 0) irq = -1; @@ -3363,7 +3322,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms } #ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) +static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3372,7 +3331,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3384,13 +3343,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg_desc(desc, &msg); + + return 0; } #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void +static int ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); @@ -3399,11 +3360,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) struct irte irte; if (get_irte(irq, &irte)) - return; + return -1; dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3420,6 +3381,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) */ if (cfg->move_in_progress) send_cleanup_vector(cfg); + + return 0; } #endif @@ -3515,15 +3478,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) unsigned int irq_want; struct intel_iommu *iommu = NULL; int index = 0; + int node; /* x86 doesn't support multiple MSI yet */ if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; + node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want); + irq = create_irq_nr(irq_want, node); if (irq == 0) return -1; irq_want = irq + 1; @@ -3573,7 +3538,7 @@ void arch_teardown_msi_irq(unsigned int irq) #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3582,7 +3547,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3594,6 +3559,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); + + return 0; } #endif /* CONFIG_SMP */ @@ -3627,7 +3594,7 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER #ifdef CONFIG_SMP -static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3636,7 +3603,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3648,6 +3615,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); + + return 0; } #endif /* CONFIG_SMP */ @@ -3667,12 +3636,14 @@ int arch_setup_hpet_msi(unsigned int irq) { int ret; struct msi_msg msg; + struct irq_desc *desc = irq_to_desc(irq); ret = msi_compose_msg(NULL, irq, &msg); if (ret < 0) return ret; hpet_msi_write(irq, &msg); + desc->status |= IRQ_MOVE_PCNTXT; set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, "edge"); @@ -3702,7 +3673,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) write_ht_irq_msg(irq, &msg); } -static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) +static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3710,11 +3681,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; target_ht_irq(irq, dest, cfg->vector); + + return 0; } #endif @@ -3789,6 +3762,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long flags; int err; + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, eligible_cpu); @@ -3802,15 +3777,13 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -3828,10 +3801,10 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) struct uv_IO_APIC_route_entry *entry; int mmr_pnode; + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - entry->mask = 1; mmr_pnode = uv_blade_to_pnode(mmr_blade); @@ -3895,6 +3868,71 @@ int __init arch_probe_nr_irqs(void) } #endif +static int __io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr) +{ + struct irq_desc *desc; + struct irq_cfg *cfg; + int node; + int ioapic, pin; + int trigger, polarity; + + ioapic = irq_attr->ioapic; + if (!IO_APIC_IRQ(irq)) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", + ioapic); + return -EINVAL; + } + + if (dev) + node = dev_to_node(dev); + else + node = cpu_to_node(boot_cpu_id); + + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc %d\n", irq); + return 0; + } + + pin = irq_attr->ioapic_pin; + trigger = irq_attr->trigger; + polarity = irq_attr->polarity; + + /* + * IRQs < 16 are already in the irq_2_pin[] map + */ + if (irq >= NR_IRQS_LEGACY) { + cfg = desc->chip_data; + add_pin_to_irq_node(cfg, node, ioapic, pin); + } + + setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); + + return 0; +} + +int io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr) +{ + int ioapic, pin; + /* + * Avoid pin reprogramming. PRTs typically include entries + * with redundant pin->gsi mappings (but unique PCI devices); + * we only program the IOAPIC on the first. + */ + ioapic = irq_attr->ioapic; + pin = irq_attr->ioapic_pin; + if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) { + pr_debug("Pin %d-%d already programmed\n", + mp_ioapics[ioapic].apicid, pin); + return 0; + } + set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed); + + return __io_apic_set_pci_routing(dev, irq, irq_attr); +} + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ @@ -3975,6 +4013,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) return apic_id; } +#endif int __init io_apic_get_version(int ioapic) { @@ -3987,39 +4026,6 @@ int __init io_apic_get_version(int ioapic) return reg_01.bits.version; } -#endif - -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) -{ - struct irq_desc *desc; - struct irq_cfg *cfg; - int cpu = boot_cpu_id; - - if (!IO_APIC_IRQ(irq)) { - apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); - return -EINVAL; - } - - desc = irq_to_desc_alloc_cpu(irq, cpu); - if (!desc) { - printk(KERN_INFO "can not get irq_desc %d\n", irq); - return 0; - } - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= NR_IRQS_LEGACY) { - cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); - } - - setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); - - return 0; -} - int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) { @@ -4050,51 +4056,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) #ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { - int pin, ioapic, irq, irq_entry; + int pin, ioapic = 0, irq, irq_entry; struct irq_desc *desc; - struct irq_cfg *cfg; const struct cpumask *mask; if (skip_ioapic_setup == 1) return; - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - - /* setup_IO_APIC_irqs could fail to get vector for some device - * when you have too many devices, because at that time only boot - * cpu is online. - */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - if (!cfg->vector) { - setup_IO_APIC_irq(ioapic, pin, irq, desc, - irq_trigger(irq_entry), - irq_polarity(irq_entry)); - continue; +#ifdef CONFIG_ACPI + if (!acpi_disabled && acpi_ioapic) { + ioapic = mp_find_ioapic(0); + if (ioapic < 0) + ioapic = 0; + } +#endif - } + for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + irq_entry = find_irq_entry(ioapic, pin, mp_INT); + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); - /* - * Honour affinities which have been set in early boot - */ - if (desc->status & - (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = desc->affinity; - else - mask = apic->target_cpus(); + desc = irq_to_desc(irq); - if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq_desc(desc, mask); - else - set_ioapic_affinity_irq_desc(desc, mask); - } + /* + * Honour affinities which have been set in early boot + */ + if (desc->status & + (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) + mask = desc->affinity; + else + mask = apic->target_cpus(); + if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq_desc(desc, mask); + else + set_ioapic_affinity_irq_desc(desc, mask); } + } #endif diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index d6bd6240715..a691302dc3f 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -104,7 +104,7 @@ static __init void nmi_cpu_busy(void *data) } #endif -static void report_broken_nmi(int cpu, int *prev_nmi_count) +static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) { printk(KERN_CONT "\n"); @@ -138,7 +138,7 @@ int __init check_nmi_watchdog(void) if (!prev_nmi_count) goto error; - alloc_cpumask_var(&backtrace_mask, GFP_KERNEL); + alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO); printk(KERN_INFO "Testing NMI watchdog ... "); #ifdef CONFIG_SMP @@ -414,7 +414,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) touched = 1; } - if (cpumask_test_cpu(cpu, backtrace_mask)) { + /* We can be called before check_nmi_watchdog, hence NULL check. */ + if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 01eda2ac65e..440a8bccd91 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -160,7 +160,6 @@ extern struct apic apic_summit; extern struct apic apic_bigsmp; extern struct apic apic_es7000; extern struct apic apic_es7000_cluster; -extern struct apic apic_default; struct apic *apic = &apic_default; EXPORT_SYMBOL_GPL(apic); diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 1783652bb0e..bc3e880f9b8 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = { void __init default_setup_apic_routing(void) { #ifdef CONFIG_X86_X2APIC - if (x2apic && (apic != &apic_x2apic_phys && + if (x2apic_mode && (apic != &apic_x2apic_phys && #ifdef CONFIG_X86_UV apic != &apic_x2apic_uv_x && #endif diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 9cfe1f415d8..344eee4ac0a 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -173,13 +173,6 @@ static inline int is_WPEG(struct rio_detail *rio){ rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); } - -/* In clustered mode, the high nibble of APIC ID is a cluster number. - * The low nibble is a 4-bit bitmap. */ -#define XAPIC_DEST_CPUS_SHIFT 4 -#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) -#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) - #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) static const struct cpumask *summit_target_cpus(void) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 4a903e2f0d1..8e4cbb255c3 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -10,7 +10,7 @@ #include <asm/apic.h> #include <asm/ipi.h> -DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); +static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1248318436e..780a733a5e7 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -19,6 +19,7 @@ #include <linux/timer.h> #include <linux/cpu.h> #include <linux/init.h> +#include <linux/io.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> @@ -34,6 +35,17 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; +static int early_get_nodeid(void) +{ + union uvh_node_id_u node_id; + unsigned long *mmr; + + mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); + node_id.v = *mmr; + early_iounmap(mmr, sizeof(*mmr)); + return node_id.s.node_id; +} + static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strcmp(oem_id, "SGI")) { @@ -42,6 +54,8 @@ static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) else if (!strcmp(oem_table_id, "UVX")) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { + __get_cpu_var(x2apic_extra_bits) = + early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1); uv_system_type = UV_NON_UNIQUE_APIC; return 1; } @@ -91,7 +105,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) cpumask_set_cpu(cpu, retmask); } -static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { #ifdef CONFIG_SMP unsigned long val; @@ -549,7 +563,8 @@ void __init uv_system_init(void) unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; int max_pnode = 0; - unsigned long mmr_base, present; + unsigned long mmr_base, present, paddr; + unsigned short pnode_mask; map_low_mmrs(); @@ -568,15 +583,18 @@ void __init uv_system_init(void) bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); uv_blade_info = kmalloc(bytes, GFP_KERNEL); + BUG_ON(!uv_blade_info); get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); + BUG_ON(!uv_node_to_blade); memset(uv_node_to_blade, 255, bytes); bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); + BUG_ON(!uv_cpu_to_blade); memset(uv_cpu_to_blade, 255, bytes); blade = 0; @@ -592,6 +610,7 @@ void __init uv_system_init(void) } } + pnode_mask = (1 << n_val) - 1; node_id.v = uv_read_local_mmr(UVH_NODE_ID); gnode_upper = (((unsigned long)node_id.s.node_id) & ~((1 << n_val) - 1)) << m_val; @@ -615,7 +634,7 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->numa_blade_id = blade; uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; uv_cpu_hub_info(cpu)->pnode = pnode; - uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1; + uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; @@ -631,6 +650,17 @@ void __init uv_system_init(void) lcpu, blade); } + /* Add blade/pnode info for nodes without cpus */ + for_each_online_node(nid) { + if (uv_node_to_blade[nid] >= 0) + continue; + paddr = node_start_pfn(nid) << PAGE_SHIFT; + paddr = uv_soc_phys_ram_to_gpa(paddr); + pnode = (paddr >> m_val) & pnode_mask; + blade = boot_pnode_to_blade(pnode); + uv_node_to_blade[nid] = blade; + } + map_gru_high(max_pnode); map_mmr_high(max_pnode); map_config_high(max_pnode); diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 5a6aa1c1162..1a830cbd701 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -146,4 +146,5 @@ void foo(void) OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); + OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); } diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index e72f062fb4b..898ecc47e12 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -125,6 +125,7 @@ int main(void) OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); + OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); BLANK(); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index f63882728d9..63a88e1f987 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -182,7 +182,8 @@ void uv_bios_init(void) memcpy(&uv_systab, tab, sizeof(struct uv_systab)); iounmap(tab); - printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision); + printk(KERN_INFO "EFI UV System Table Revision %d\n", + uv_systab.revision); } #else /* !CONFIG_EFI */ diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 8220ae69849..c965e521271 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, + { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, { 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7e4a459daa6..728b3750a3e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -272,7 +272,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) int cpu = smp_processor_id(); int node; - unsigned apicid = hard_smp_processor_id(); + unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; node = c->phys_proc_id; if (apicid_to_node[apicid] != NUMA_NO_NODE) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c4f667896c2..b0517aa2bd3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -114,6 +114,13 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +static int __init x86_xsave_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + return 1; +} +__setup("noxsave", x86_xsave_setup); + #ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -292,7 +299,8 @@ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } -__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata; +__u32 cpu_caps_set[NCAPINTS] __cpuinitdata; void load_percpu_segment(int cpu) { @@ -761,6 +769,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_identify) this_cpu->c_identify(c); + /* Clear/Set all flags overriden by options, after probe */ + for (i = 0; i < NCAPINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } + #ifdef CONFIG_X86_64 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); #endif @@ -806,6 +820,16 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #endif init_hypervisor(c); + + /* + * Clear/Set all flags overriden by options, need do it + * before following smp all cpus cap AND. + */ + for (i = 0; i < NCAPINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } + /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are @@ -818,10 +842,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } - /* Clear all flags overriden by options */ - for (i = 0; i < NCAPINTS; i++) - c->x86_capability[i] &= ~cleared_cpu_caps[i]; - #ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ mcheck_init(c); @@ -1203,6 +1223,8 @@ void __cpuinit cpu_init(void) load_TR_desc(); load_LDT(&init_mm.context); + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + #ifdef CONFIG_DOUBLEFAULT /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 46e29ab96c6..6b2a52dd040 100755..100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -32,9 +32,7 @@ static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); -static DEFINE_PER_CPU(unsigned, cpu_modelflag); static DEFINE_PER_CPU(int, cpu_priv_count); -static DEFINE_PER_CPU(unsigned, cpu_model); static DEFINE_MUTEX(cpu_debug_lock); @@ -80,302 +78,102 @@ static struct cpu_file_base cpu_file[] = { { "value", CPU_REG_ALL, 1 }, }; -/* Intel Registers Range */ -static struct cpu_debug_range cpu_intel_range[] = { - { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL }, - { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE }, - { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL }, - { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM }, - { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE }, - { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE }, - - { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE }, - { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON }, - { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON }, - { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE }, - - { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE }, - { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT }, - { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT }, - { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM }, - - { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE }, - { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 }, - { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE }, - { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON }, - - { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT }, - { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT }, - { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT }, - { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE }, - - { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 }, - { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 }, - { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX }, - { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 }, - { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT }, - - { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE }, - { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE }, - { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE }, - { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT }, - { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE }, - { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE }, - { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE }, - { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE }, - - { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT }, - { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON }, - { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE }, - { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON }, - { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE }, - { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 }, - { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE }, - { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 }, - - { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE }, - { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE }, - - { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON }, - { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE }, - { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON }, - { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT }, - { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON }, - { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT }, - { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE }, - { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON }, - - { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON }, - { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON }, - { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON }, - { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE }, - { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON }, - { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE }, - { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE }, - - { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE }, - { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON }, - { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON }, - - { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP }, - - { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON }, - { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON }, - { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON }, - { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON }, +/* CPU Registers Range */ +static struct cpu_debug_range cpu_reg_range[] = { + { 0x00000000, 0x00000001, CPU_MC, }, + { 0x00000006, 0x00000007, CPU_MONITOR, }, + { 0x00000010, 0x00000010, CPU_TIME, }, + { 0x00000011, 0x00000013, CPU_PMC, }, + { 0x00000017, 0x00000017, CPU_PLATFORM, }, + { 0x0000001B, 0x0000001B, CPU_APIC, }, + { 0x0000002A, 0x0000002B, CPU_POWERON, }, + { 0x0000002C, 0x0000002C, CPU_FREQ, }, + { 0x0000003A, 0x0000003A, CPU_CONTROL, }, + { 0x00000040, 0x00000047, CPU_LBRANCH, }, + { 0x00000060, 0x00000067, CPU_LBRANCH, }, + { 0x00000079, 0x00000079, CPU_BIOS, }, + { 0x00000088, 0x0000008A, CPU_CACHE, }, + { 0x0000008B, 0x0000008B, CPU_BIOS, }, + { 0x0000009B, 0x0000009B, CPU_MONITOR, }, + { 0x000000C1, 0x000000C4, CPU_PMC, }, + { 0x000000CD, 0x000000CD, CPU_FREQ, }, + { 0x000000E7, 0x000000E8, CPU_PERF, }, + { 0x000000FE, 0x000000FE, CPU_MTRR, }, + + { 0x00000116, 0x0000011E, CPU_CACHE, }, + { 0x00000174, 0x00000176, CPU_SYSENTER, }, + { 0x00000179, 0x0000017B, CPU_MC, }, + { 0x00000186, 0x00000189, CPU_PMC, }, + { 0x00000198, 0x00000199, CPU_PERF, }, + { 0x0000019A, 0x0000019A, CPU_TIME, }, + { 0x0000019B, 0x0000019D, CPU_THERM, }, + { 0x000001A0, 0x000001A0, CPU_MISC, }, + { 0x000001C9, 0x000001C9, CPU_LBRANCH, }, + { 0x000001D7, 0x000001D8, CPU_LBRANCH, }, + { 0x000001D9, 0x000001D9, CPU_DEBUG, }, + { 0x000001DA, 0x000001E0, CPU_LBRANCH, }, + + { 0x00000200, 0x0000020F, CPU_MTRR, }, + { 0x00000250, 0x00000250, CPU_MTRR, }, + { 0x00000258, 0x00000259, CPU_MTRR, }, + { 0x00000268, 0x0000026F, CPU_MTRR, }, + { 0x00000277, 0x00000277, CPU_PAT, }, + { 0x000002FF, 0x000002FF, CPU_MTRR, }, + + { 0x00000300, 0x00000311, CPU_PMC, }, + { 0x00000345, 0x00000345, CPU_PMC, }, + { 0x00000360, 0x00000371, CPU_PMC, }, + { 0x0000038D, 0x00000390, CPU_PMC, }, + { 0x000003A0, 0x000003BE, CPU_PMC, }, + { 0x000003C0, 0x000003CD, CPU_PMC, }, + { 0x000003E0, 0x000003E1, CPU_PMC, }, + { 0x000003F0, 0x000003F2, CPU_PMC, }, + + { 0x00000400, 0x00000417, CPU_MC, }, + { 0x00000480, 0x0000048B, CPU_VMX, }, + + { 0x00000600, 0x00000600, CPU_DEBUG, }, + { 0x00000680, 0x0000068F, CPU_LBRANCH, }, + { 0x000006C0, 0x000006CF, CPU_LBRANCH, }, + + { 0x000107CC, 0x000107D3, CPU_PMC, }, + + { 0xC0000080, 0xC0000080, CPU_FEATURES, }, + { 0xC0000081, 0xC0000084, CPU_CALL, }, + { 0xC0000100, 0xC0000102, CPU_BASE, }, + { 0xC0000103, 0xC0000103, CPU_TIME, }, + + { 0xC0010000, 0xC0010007, CPU_PMC, }, + { 0xC0010010, 0xC0010010, CPU_CONF, }, + { 0xC0010015, 0xC0010015, CPU_CONF, }, + { 0xC0010016, 0xC001001A, CPU_MTRR, }, + { 0xC001001D, 0xC001001D, CPU_MTRR, }, + { 0xC001001F, 0xC001001F, CPU_CONF, }, + { 0xC0010030, 0xC0010035, CPU_BIOS, }, + { 0xC0010044, 0xC0010048, CPU_MC, }, + { 0xC0010050, 0xC0010056, CPU_SMM, }, + { 0xC0010058, 0xC0010058, CPU_CONF, }, + { 0xC0010060, 0xC0010060, CPU_CACHE, }, + { 0xC0010061, 0xC0010068, CPU_SMM, }, + { 0xC0010069, 0xC001006B, CPU_SMM, }, + { 0xC0010070, 0xC0010071, CPU_SMM, }, + { 0xC0010111, 0xC0010113, CPU_SMM, }, + { 0xC0010114, 0xC0010118, CPU_SVM, }, + { 0xC0010140, 0xC0010141, CPU_OSVM, }, + { 0xC0011022, 0xC0011023, CPU_CONF, }, }; -/* AMD Registers Range */ -static struct cpu_debug_range cpu_amd_range[] = { - { 0x00000000, 0x00000001, CPU_MC, CPU_K10_PLUS, }, - { 0x00000010, 0x00000010, CPU_TIME, CPU_K8_PLUS, }, - { 0x0000001B, 0x0000001B, CPU_APIC, CPU_K8_PLUS, }, - { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_K7_PLUS }, - { 0x0000008B, 0x0000008B, CPU_VER, CPU_K8_PLUS }, - { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_K8_PLUS, }, - - { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_K8_PLUS, }, - { 0x00000179, 0x0000017B, CPU_MC, CPU_K8_PLUS, }, - { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_K8_PLUS, }, - { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_K8_PLUS, }, - - { 0x00000200, 0x0000020F, CPU_MTRR, CPU_K8_PLUS, }, - { 0x00000250, 0x00000250, CPU_MTRR, CPU_K8_PLUS, }, - { 0x00000258, 0x00000259, CPU_MTRR, CPU_K8_PLUS, }, - { 0x00000268, 0x0000026F, CPU_MTRR, CPU_K8_PLUS, }, - { 0x00000277, 0x00000277, CPU_PAT, CPU_K8_PLUS, }, - { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_K8_PLUS, }, - - { 0x00000400, 0x00000413, CPU_MC, CPU_K8_PLUS, }, - - { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_AMD_ALL, }, - { 0xC0000081, 0xC0000084, CPU_CALL, CPU_K8_PLUS, }, - { 0xC0000100, 0xC0000102, CPU_BASE, CPU_K8_PLUS, }, - { 0xC0000103, 0xC0000103, CPU_TIME, CPU_K10_PLUS, }, - - { 0xC0010000, 0xC0010007, CPU_PMC, CPU_K8_PLUS, }, - { 0xC0010010, 0xC0010010, CPU_CONF, CPU_K7_PLUS, }, - { 0xC0010015, 0xC0010015, CPU_CONF, CPU_K7_PLUS, }, - { 0xC0010016, 0xC001001A, CPU_MTRR, CPU_K8_PLUS, }, - { 0xC001001D, 0xC001001D, CPU_MTRR, CPU_K8_PLUS, }, - { 0xC001001F, 0xC001001F, CPU_CONF, CPU_K8_PLUS, }, - { 0xC0010030, 0xC0010035, CPU_BIOS, CPU_K8_PLUS, }, - { 0xC0010044, 0xC0010048, CPU_MC, CPU_K8_PLUS, }, - { 0xC0010050, 0xC0010056, CPU_SMM, CPU_K0F_PLUS, }, - { 0xC0010058, 0xC0010058, CPU_CONF, CPU_K10_PLUS, }, - { 0xC0010060, 0xC0010060, CPU_CACHE, CPU_AMD_11, }, - { 0xC0010061, 0xC0010068, CPU_SMM, CPU_K10_PLUS, }, - { 0xC0010069, 0xC001006B, CPU_SMM, CPU_AMD_11, }, - { 0xC0010070, 0xC0010071, CPU_SMM, CPU_K10_PLUS, }, - { 0xC0010111, 0xC0010113, CPU_SMM, CPU_K8_PLUS, }, - { 0xC0010114, 0xC0010118, CPU_SVM, CPU_K10_PLUS, }, - { 0xC0010140, 0xC0010141, CPU_OSVM, CPU_K10_PLUS, }, - { 0xC0011022, 0xC0011023, CPU_CONF, CPU_K10_PLUS, }, -}; - - -/* Intel */ -static int get_intel_modelflag(unsigned model) -{ - int flag; - - switch (model) { - case 0x0501: - case 0x0502: - case 0x0504: - flag = CPU_INTEL_PENTIUM; - break; - case 0x0601: - case 0x0603: - case 0x0605: - case 0x0607: - case 0x0608: - case 0x060A: - case 0x060B: - flag = CPU_INTEL_P6; - break; - case 0x0609: - case 0x060D: - flag = CPU_INTEL_PENTIUM_M; - break; - case 0x060E: - flag = CPU_INTEL_CORE; - break; - case 0x060F: - case 0x0617: - flag = CPU_INTEL_CORE2; - break; - case 0x061C: - flag = CPU_INTEL_ATOM; - break; - case 0x0F00: - case 0x0F01: - case 0x0F02: - case 0x0F03: - case 0x0F04: - flag = CPU_INTEL_XEON_P4; - break; - case 0x0F06: - flag = CPU_INTEL_XEON_MP; - break; - default: - flag = CPU_NONE; - break; - } - - return flag; -} - -/* AMD */ -static int get_amd_modelflag(unsigned model) -{ - int flag; - - switch (model >> 8) { - case 0x6: - flag = CPU_AMD_K6; - break; - case 0x7: - flag = CPU_AMD_K7; - break; - case 0x8: - flag = CPU_AMD_K8; - break; - case 0xf: - flag = CPU_AMD_0F; - break; - case 0x10: - flag = CPU_AMD_10; - break; - case 0x11: - flag = CPU_AMD_11; - break; - default: - flag = CPU_NONE; - break; - } - - return flag; -} - -static int get_cpu_modelflag(unsigned cpu) -{ - int flag; - - flag = per_cpu(cpu_model, cpu); - - switch (flag >> 16) { - case X86_VENDOR_INTEL: - flag = get_intel_modelflag(flag); - break; - case X86_VENDOR_AMD: - flag = get_amd_modelflag(flag & 0xffff); - break; - default: - flag = CPU_NONE; - break; - } - - return flag; -} - -static int get_cpu_range_count(unsigned cpu) -{ - int index; - - switch (per_cpu(cpu_model, cpu) >> 16) { - case X86_VENDOR_INTEL: - index = ARRAY_SIZE(cpu_intel_range); - break; - case X86_VENDOR_AMD: - index = ARRAY_SIZE(cpu_amd_range); - break; - default: - index = 0; - break; - } - - return index; -} - static int is_typeflag_valid(unsigned cpu, unsigned flag) { - unsigned vendor, modelflag; - int i, index; + int i; /* Standard Registers should be always valid */ if (flag >= CPU_TSS) return 1; - modelflag = per_cpu(cpu_modelflag, cpu); - vendor = per_cpu(cpu_model, cpu) >> 16; - index = get_cpu_range_count(cpu); - - for (i = 0; i < index; i++) { - switch (vendor) { - case X86_VENDOR_INTEL: - if ((cpu_intel_range[i].model & modelflag) && - (cpu_intel_range[i].flag & flag)) - return 1; - break; - case X86_VENDOR_AMD: - if ((cpu_amd_range[i].model & modelflag) && - (cpu_amd_range[i].flag & flag)) - return 1; - break; - } + for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { + if (cpu_reg_range[i].flag == flag) + return 1; } /* Invalid */ @@ -385,26 +183,11 @@ static int is_typeflag_valid(unsigned cpu, unsigned flag) static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, int index, unsigned flag) { - unsigned modelflag; - - modelflag = per_cpu(cpu_modelflag, cpu); - *max = 0; - switch (per_cpu(cpu_model, cpu) >> 16) { - case X86_VENDOR_INTEL: - if ((cpu_intel_range[index].model & modelflag) && - (cpu_intel_range[index].flag & flag)) { - *min = cpu_intel_range[index].min; - *max = cpu_intel_range[index].max; - } - break; - case X86_VENDOR_AMD: - if ((cpu_amd_range[index].model & modelflag) && - (cpu_amd_range[index].flag & flag)) { - *min = cpu_amd_range[index].min; - *max = cpu_amd_range[index].max; - } - break; - } + if (cpu_reg_range[index].flag == flag) { + *min = cpu_reg_range[index].min; + *max = cpu_reg_range[index].max; + } else + *max = 0; return *max; } @@ -434,7 +217,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) unsigned msr, msr_min, msr_max; struct cpu_private *priv; u32 low, high; - int i, range; + int i; if (seq) { priv = seq->private; @@ -446,9 +229,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) } } - range = get_cpu_range_count(cpu); - - for (i = 0; i < range; i++) { + for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) continue; @@ -588,8 +369,20 @@ static void print_apic(void *arg) seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); -#endif /* CONFIG_X86_LOCAL_APIC */ + if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { + unsigned int i, v, maxeilvt; + + v = apic_read(APIC_EFEAT); + maxeilvt = (v >> 16) & 0xff; + seq_printf(seq, " EFEAT\t\t: %08x\n", v); + seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL)); + for (i = 0; i < maxeilvt; i++) { + v = apic_read(APIC_EILVTn(i)); + seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v); + } + } +#endif /* CONFIG_X86_LOCAL_APIC */ seq_printf(seq, "\n MSR\t:\n"); } @@ -788,13 +581,11 @@ static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) { struct dentry *cpu_dentry = NULL; unsigned reg, reg_min, reg_max; - int i, range, err = 0; + int i, err = 0; char reg_dir[12]; u32 low, high; - range = get_cpu_range_count(cpu); - - for (i = 0; i < range; i++) { + for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { if (!get_cpu_range(cpu, ®_min, ®_max, i, cpu_base[type].flag)) continue; @@ -850,10 +641,6 @@ static int cpu_init_cpu(void) cpui = &cpu_data(cpu); if (!cpu_has(cpui, X86_FEATURE_MSR)) continue; - per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) | - (cpui->x86 << 8) | - (cpui->x86_model)); - per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu); sprintf(cpu_dir, "cpu%d", cpu); cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index 52c83987547..f138c6c389b 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -220,11 +220,14 @@ config X86_LONGHAUL If in doubt, say N. config X86_E_POWERSAVER - tristate "VIA C7 Enhanced PowerSaver" + tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)" select CPU_FREQ_TABLE - depends on X86_32 + depends on X86_32 && EXPERIMENTAL help - This adds the CPUFreq driver for VIA C7 processors. + This adds the CPUFreq driver for VIA C7 processors. However, this driver + does not have any safeguards to prevent operating the CPU out of spec + and is thus considered dangerous. Please use the regular ACPI cpufreq + driver, enabled by CONFIG_X86_ACPI_CPUFREQ. If in doubt, say N. diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 19f6b9d27e8..ae9b503220c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -65,13 +65,18 @@ enum { struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; struct cpufreq_frequency_table *freq_table; - unsigned int max_freq; unsigned int resume; unsigned int cpu_feature; }; static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); +struct acpi_msr_data { + u64 saved_aperf, saved_mperf; +}; + +static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); + DEFINE_TRACE(power_mark); /* acpi_perf_data is a pointer to percpu data. */ @@ -85,11 +90,7 @@ static int check_est_cpu(unsigned int cpuid) { struct cpuinfo_x86 *cpu = &cpu_data(cpuid); - if (cpu->x86_vendor != X86_VENDOR_INTEL || - !cpu_has(cpu, X86_FEATURE_EST)) - return 0; - - return 1; + return cpu_has(cpu, X86_FEATURE_EST); } static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) @@ -152,7 +153,8 @@ struct drv_cmd { u32 val; }; -static long do_drv_read(void *_cmd) +/* Called via smp_call_function_single(), on the target CPU */ +static void do_drv_read(void *_cmd) { struct drv_cmd *cmd = _cmd; u32 h; @@ -169,10 +171,10 @@ static long do_drv_read(void *_cmd) default: break; } - return 0; } -static long do_drv_write(void *_cmd) +/* Called via smp_call_function_many(), on the target CPUs */ +static void do_drv_write(void *_cmd) { struct drv_cmd *cmd = _cmd; u32 lo, hi; @@ -191,23 +193,24 @@ static long do_drv_write(void *_cmd) default: break; } - return 0; } static void drv_read(struct drv_cmd *cmd) { cmd->val = 0; - work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); + smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); } static void drv_write(struct drv_cmd *cmd) { - unsigned int i; + int this_cpu; - for_each_cpu(i, cmd->mask) { - work_on_cpu(i, do_drv_write, cmd); - } + this_cpu = get_cpu(); + if (cpumask_test_cpu(this_cpu, cmd->mask)) + do_drv_write(cmd); + smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); + put_cpu(); } static u32 get_cur_val(const struct cpumask *mask) @@ -241,28 +244,23 @@ static u32 get_cur_val(const struct cpumask *mask) return cmd.val; } -struct perf_cur { +struct perf_pair { union { struct { u32 lo; u32 hi; } split; u64 whole; - } aperf_cur, mperf_cur; + } aperf, mperf; }; - -static long read_measured_perf_ctrs(void *_cur) +/* Called via smp_call_function_single(), on the target CPU */ +static void read_measured_perf_ctrs(void *_cur) { - struct perf_cur *cur = _cur; - - rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi); - rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi); - - wrmsr(MSR_IA32_APERF, 0, 0); - wrmsr(MSR_IA32_MPERF, 0, 0); + struct perf_pair *cur = _cur; - return 0; + rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); + rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); } /* @@ -281,58 +279,63 @@ static long read_measured_perf_ctrs(void *_cur) static unsigned int get_measured_perf(struct cpufreq_policy *policy, unsigned int cpu) { - struct perf_cur cur; + struct perf_pair readin, cur; unsigned int perf_percent; unsigned int retval; - if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur)) + if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) return 0; + cur.aperf.whole = readin.aperf.whole - + per_cpu(msr_data, cpu).saved_aperf; + cur.mperf.whole = readin.mperf.whole - + per_cpu(msr_data, cpu).saved_mperf; + per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; + per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; + #ifdef __i386__ /* * We dont want to do 64 bit divide with 32 bit kernel * Get an approximate value. Return failure in case we cannot get * an approximate value. */ - if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) { + if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { int shift_count; u32 h; - h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi); + h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); shift_count = fls(h); - cur.aperf_cur.whole >>= shift_count; - cur.mperf_cur.whole >>= shift_count; + cur.aperf.whole >>= shift_count; + cur.mperf.whole >>= shift_count; } - if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) { + if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { int shift_count = 7; - cur.aperf_cur.split.lo >>= shift_count; - cur.mperf_cur.split.lo >>= shift_count; + cur.aperf.split.lo >>= shift_count; + cur.mperf.split.lo >>= shift_count; } - if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo) - perf_percent = (cur.aperf_cur.split.lo * 100) / - cur.mperf_cur.split.lo; + if (cur.aperf.split.lo && cur.mperf.split.lo) + perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; else perf_percent = 0; #else - if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) { + if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { int shift_count = 7; - cur.aperf_cur.whole >>= shift_count; - cur.mperf_cur.whole >>= shift_count; + cur.aperf.whole >>= shift_count; + cur.mperf.whole >>= shift_count; } - if (cur.aperf_cur.whole && cur.mperf_cur.whole) - perf_percent = (cur.aperf_cur.whole * 100) / - cur.mperf_cur.whole; + if (cur.aperf.whole && cur.mperf.whole) + perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; else perf_percent = 0; #endif - retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; + retval = (policy->cpuinfo.max_freq * perf_percent) / 100; return retval; } @@ -543,7 +546,7 @@ static int __init acpi_cpufreq_early_init(void) return -ENOMEM; } for_each_possible_cpu(i) { - if (!alloc_cpumask_var_node( + if (!zalloc_cpumask_var_node( &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, GFP_KERNEL, cpu_to_node(i))) { @@ -685,16 +688,11 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && policy->cpuinfo.transition_latency > 20 * 1000) { - static int print_once; policy->cpuinfo.transition_latency = 20 * 1000; - if (!print_once) { - print_once = 1; - printk(KERN_INFO "Capping off P-state tranision latency" - " at 20 uS\n"); - } + printk_once(KERN_INFO + "P-state transition latency capped at 20 uS\n"); } - data->max_freq = perf->states[0].core_frequency * 1000; /* table init */ for (i = 0; i < perf->state_count; i++) { if (i > 0 && perf->states[i].core_frequency >= @@ -713,6 +711,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (result) goto err_freqfree; + if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) + printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); + switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: /* Current speed is unknown and not detectable by IO port */ diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 0bd48e65a0c..ce2ed3e4aad 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -33,7 +33,6 @@ #include <linux/timex.h> #include <linux/io.h> #include <linux/acpi.h> -#include <linux/kernel.h> #include <asm/msr.h> #include <acpi/processor.h> diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 6ac55bd341a..86961519372 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -168,6 +168,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) case 0x0E: /* Core */ case 0x0F: /* Core Duo */ case 0x16: /* Celeron Core */ + case 0x1C: /* Atom */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE); case 0x0D: /* Pentium M (Dothan) */ diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 3c28ccd4974..d47c775eb0a 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -168,10 +168,12 @@ static int check_powernow(void) return 1; } +#ifdef CONFIG_X86_POWERNOW_K7_ACPI static void invalidate_entry(unsigned int entry) { powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; } +#endif static int get_ranges(unsigned char *pst) { @@ -320,7 +322,7 @@ static int powernow_acpi_init(void) goto err0; } - if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, + if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, GFP_KERNEL)) { retval = -ENOMEM; goto err05; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 4709ead2db5..cf52215d9eb 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -649,6 +649,20 @@ static void print_basics(struct powernow_k8_data *data) data->batps); } +static u32 freq_from_fid_did(u32 fid, u32 did) +{ + u32 mhz = 0; + + if (boot_cpu_data.x86 == 0x10) + mhz = (100 * (fid + 0x10)) >> did; + else if (boot_cpu_data.x86 == 0x11) + mhz = (100 * (fid + 8)) >> did; + else + BUG(); + + return mhz * 1000; +} + static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) { @@ -821,7 +835,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { struct cpufreq_frequency_table *powernow_table; int ret_val = -ENODEV; - acpi_integer space_id; + acpi_integer control, status; if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { dprintk("register performance failed: bad ACPI data\n"); @@ -834,12 +848,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) goto err_out; } - space_id = data->acpi_data.control_register.space_id; - if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + control = data->acpi_data.control_register.space_id; + status = data->acpi_data.status_register.space_id; + + if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) { dprintk("Invalid control/status registers (%x - %x)\n", - data->acpi_data.control_register.space_id, - space_id); + control, status); goto err_out; } @@ -872,7 +887,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); - if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { printk(KERN_ERR PFX "unable to alloc powernow_k8_data cpumask\n"); ret_val = -ENOMEM; @@ -923,8 +938,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, powernow_table[i].index = index; - powernow_table[i].frequency = - data->acpi_data.states[i].core_frequency * 1000; + /* Frequency may be rounded for these */ + if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { + powernow_table[i].frequency = + freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); + } else + powernow_table[i].frequency = + data->acpi_data.states[i].core_frequency * 1000; } return 0; } @@ -1215,13 +1235,16 @@ static int powernowk8_verify(struct cpufreq_policy *pol) return cpufreq_frequency_table_verify(pol, data->powernow_table); } +static const char ACPI_PSS_BIOS_BUG_MSG[] = + KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" + KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; + /* per CPU init entry point to the driver */ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask; int rc; - static int print_once; if (!cpu_online(pol->cpu)) return -ENODEV; @@ -1244,19 +1267,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { - /* - * Replace this one with print_once as soon as such a - * thing gets introduced - */ - if (!print_once) { - WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS " - "does not provide ACPI _PSS objects " - "in a way that Linux understands. " - "Please report this to the Linux ACPI" - " maintainers and complain to your " - "BIOS vendor.\n"); - print_once++; - } + printk_once(ACPI_PSS_BIOS_BUG_MSG); goto err_out; } if (pol->cpu != 0) { diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index c9f1fdc0283..55c831ed71c 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -471,7 +471,7 @@ static int centrino_target (struct cpufreq_policy *policy, if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) return -ENOMEM; - if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { + if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { free_cpumask_var(saved_mask); return -ENOMEM; } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 7437fa133c0..daed39ba261 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -229,12 +229,12 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) } #endif -static void __cpuinit srat_detect_node(void) +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) unsigned node; int cpu = smp_processor_id(); - int apicid = hard_smp_processor_id(); + int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; /* Don't do the funky fallback heuristics the AMD version employs for now. */ @@ -400,7 +400,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } /* Work around errata */ - srat_detect_node(); + srat_detect_node(c); if (cpu_has(c, X86_FEATURE_VMX)) detect_vmx_virtcap(c); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 483eda96e10..789efe217e1 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -17,6 +17,7 @@ #include <asm/processor.h> #include <asm/smp.h> +#include <asm/k8.h> #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -159,14 +160,6 @@ struct _cpuid4_info_regs { unsigned long can_disable; }; -#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS) -static struct pci_device_id k8_nb_id[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, - {} -}; -#endif - unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -207,10 +200,17 @@ union l3_cache { }; static const unsigned short __cpuinitconst assocs[] = { - [1] = 1, [2] = 2, [4] = 4, [6] = 8, - [8] = 16, [0xa] = 32, [0xb] = 48, + [1] = 1, + [2] = 2, + [4] = 4, + [6] = 8, + [8] = 16, + [0xa] = 32, + [0xb] = 48, [0xc] = 64, - [0xf] = 0xffff // ?? + [0xd] = 96, + [0xe] = 128, + [0xf] = 0xffff /* fully associative - no way to show this currently */ }; static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; @@ -271,7 +271,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, eax->split.type = types[leaf]; eax->split.level = levels[leaf]; if (leaf == 3) - eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; + eax->split.num_threads_sharing = + current_cpu_data.x86_max_cores - 1; else eax->split.num_threads_sharing = 0; eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; @@ -291,6 +292,14 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) { if (index < 3) return; + + if (boot_cpu_data.x86 == 0x11) + return; + + /* see erratum #382 */ + if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) + return; + this_leaf->can_disable = 1; } @@ -696,97 +705,75 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) -#ifdef CONFIG_PCI -static struct pci_dev *get_k8_northbridge(int node) -{ - struct pci_dev *dev = NULL; - int i; - - for (i = 0; i <= node; i++) { - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (!dev) - break; - } while (!pci_match_id(&k8_nb_id[0], dev)); - if (!dev) - break; - } - return dev; -} -#else -static struct pci_dev *get_k8_northbridge(int node) -{ - return NULL; -} -#endif - -static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, + unsigned int index) { - const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); - int node = cpu_to_node(cpumask_first(mask)); - struct pci_dev *dev = NULL; - ssize_t ret = 0; - int i; + int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); + int node = cpu_to_node(cpu); + struct pci_dev *dev = node_to_k8_nb_misc(node); + unsigned int reg = 0; if (!this_leaf->can_disable) - return sprintf(buf, "Feature not enabled\n"); - - dev = get_k8_northbridge(node); - if (!dev) { - printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); return -EINVAL; - } - for (i = 0; i < 2; i++) { - unsigned int reg; + if (!dev) + return -EINVAL; - pci_read_config_dword(dev, 0x1BC + i * 4, ®); + pci_read_config_dword(dev, 0x1BC + index * 4, ®); + return sprintf(buf, "%x\n", reg); +} - ret += sprintf(buf, "%sEntry: %d\n", buf, i); - ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", - buf, - reg & 0x80000000 ? "Disabled" : "Allowed", - reg & 0x40000000 ? "Disabled" : "Allowed"); - ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", - buf, (reg & 0x30000) >> 16, reg & 0xfff); - } - return ret; +#define SHOW_CACHE_DISABLE(index) \ +static ssize_t \ +show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ +{ \ + return show_cache_disable(this_leaf, buf, index); \ } +SHOW_CACHE_DISABLE(0) +SHOW_CACHE_DISABLE(1) -static ssize_t -store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, - size_t count) +static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, + const char *buf, size_t count, unsigned int index) { - const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); - int node = cpu_to_node(cpumask_first(mask)); - struct pci_dev *dev = NULL; - unsigned int ret, index, val; + int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); + int node = cpu_to_node(cpu); + struct pci_dev *dev = node_to_k8_nb_misc(node); + unsigned long val = 0; + unsigned int scrubber = 0; if (!this_leaf->can_disable) - return 0; - - if (strlen(buf) > 15) return -EINVAL; - ret = sscanf(buf, "%x %x", &index, &val); - if (ret != 2) + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!dev) return -EINVAL; - if (index > 1) + + if (strict_strtoul(buf, 10, &val) < 0) return -EINVAL; val |= 0xc0000000; - dev = get_k8_northbridge(node); - if (!dev) { - printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); - return -EINVAL; - } + + pci_read_config_dword(dev, 0x58, &scrubber); + scrubber &= ~0x1f000000; + pci_write_config_dword(dev, 0x58, scrubber); pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); + return count; +} - return 1; +#define STORE_CACHE_DISABLE(index) \ +static ssize_t \ +store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ + const char *buf, size_t count) \ +{ \ + return store_cache_disable(this_leaf, buf, count, index); \ } +STORE_CACHE_DISABLE(0) +STORE_CACHE_DISABLE(1) struct _cache_attr { struct attribute attr; @@ -808,7 +795,10 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); -static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); +static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, + show_cache_disable_0, store_cache_disable_0); +static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, + show_cache_disable_1, store_cache_disable_1); static struct attribute * default_attrs[] = { &type.attr, @@ -820,7 +810,8 @@ static struct attribute * default_attrs[] = { &size.attr, &shared_cpu_map.attr, &shared_cpu_list.attr, - &cache_disable.attr, + &cache_disable_0.attr, + &cache_disable_1.attr, NULL }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 863f89568b1..09dd1d414fc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -239,9 +239,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. */ - - mce_log(&m); - add_taint(TAINT_MACHINE_CHECK); + if (!(flags & MCP_DONTLOG)) { + mce_log(&m); + add_taint(TAINT_MACHINE_CHECK); + } /* * Clear state for this bank. @@ -452,13 +453,14 @@ void mce_log_therm_throt_event(__u64 status) */ static int check_interval = 5 * 60; /* 5 minutes */ -static int next_interval; /* in jiffies */ +static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ static void mcheck_timer(unsigned long); static DEFINE_PER_CPU(struct timer_list, mce_timer); static void mcheck_timer(unsigned long data) { struct timer_list *t = &per_cpu(mce_timer, data); + int *n; WARN_ON(smp_processor_id() != data); @@ -470,14 +472,14 @@ static void mcheck_timer(unsigned long data) * Alert userspace if needed. If we logged an MCE, reduce the * polling interval, otherwise increase the polling interval. */ + n = &__get_cpu_var(next_interval); if (mce_notify_user()) { - next_interval = max(next_interval/2, HZ/100); + *n = max(*n/2, HZ/100); } else { - next_interval = min(next_interval * 2, - (int)round_jiffies_relative(check_interval*HZ)); + *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); } - t->expires = jiffies + next_interval; + t->expires = jiffies + *n; add_timer(t); } @@ -584,7 +586,7 @@ static void mce_init(void *dummy) * Log the machine checks left over from the previous reset. */ bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC, &all_banks); + machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); set_in_cr4(X86_CR4_MCE); @@ -632,14 +634,13 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) static void mce_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); + int *n = &__get_cpu_var(next_interval); - /* data race harmless because everyone sets to the same value */ - if (!next_interval) - next_interval = check_interval * HZ; - if (!next_interval) + *n = check_interval * HZ; + if (!*n) return; setup_timer(t, mcheck_timer, smp_processor_id()); - t->expires = round_jiffies(jiffies + next_interval); + t->expires = round_jiffies(jiffies + *n); add_timer(t); } @@ -907,7 +908,6 @@ static void mce_cpu_restart(void *data) /* Reinit MCEs after user configuration changes */ static void mce_restart(void) { - next_interval = check_interval * HZ; on_each_cpu(mce_cpu_restart, NULL, 1); } @@ -1110,7 +1110,8 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - t->expires = round_jiffies(jiffies + next_interval); + t->expires = round_jiffies(jiffies + + __get_cpu_var(next_interval)); add_timer_on(t, cpu); smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); break; @@ -1162,7 +1163,7 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; - alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); + zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); err = mce_init_banks(); if (err) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index d6b72df89d6..65a0fceedcd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -15,7 +15,6 @@ #include <asm/hw_irq.h> #include <asm/idle.h> #include <asm/therm_throt.h> -#include <asm/apic.h> asmlinkage void smp_thermal_interrupt(void) { @@ -151,10 +150,11 @@ static void print_update(char *type, int *hdr, int num) static void cmci_discover(int banks, int boot) { unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); + unsigned long flags; int hdr = 0; int i; - spin_lock(&cmci_discover_lock); + spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; @@ -184,7 +184,7 @@ static void cmci_discover(int banks, int boot) WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); } } - spin_unlock(&cmci_discover_lock); + spin_unlock_irqrestore(&cmci_discover_lock, flags); if (hdr) printk(KERN_CONT "\n"); } @@ -211,13 +211,14 @@ void cmci_recheck(void) */ void cmci_clear(void) { + unsigned long flags; int i; int banks; u64 val; if (!cmci_supported(&banks)) return; - spin_lock(&cmci_discover_lock); + spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { if (!test_bit(i, __get_cpu_var(mce_banks_owned))) continue; @@ -227,7 +228,7 @@ void cmci_clear(void) wrmsrl(MSR_IA32_MC0_CTL2 + i, val); __clear_bit(i, __get_cpu_var(mce_banks_owned)); } - spin_unlock(&cmci_discover_lock); + spin_unlock_irqrestore(&cmci_discover_lock, flags); } /* diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index ce0fe4b5c04..1d584a18a50 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -808,7 +808,7 @@ int __init mtrr_cleanup(unsigned address_bits) if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) return 0; - rdmsr(MTRRdefType_MSR, def, dummy); + rdmsr(MSR_MTRRdefType, def, dummy); def &= 0xff; if (def != MTRR_TYPE_UNCACHABLE) return 0; @@ -1003,7 +1003,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) */ if (!is_cpu(INTEL) || disable_mtrr_trim) return 0; - rdmsr(MTRRdefType_MSR, def, dummy); + rdmsr(MSR_MTRRdefType, def, dummy); def &= 0xff; if (def != MTRR_TYPE_UNCACHABLE) return 0; diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0b776c09aff..0543f69f0b2 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -20,9 +20,9 @@ struct fixed_range_block { }; static struct fixed_range_block fixed_range_blocks[] = { - { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ - { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ - { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ + { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ + { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ + { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ {} }; @@ -194,12 +194,12 @@ get_fixed_ranges(mtrr_type * frs) k8_check_syscfg_dram_mod_en(); - rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]); for (i = 0; i < 2; i++) - rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); + rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]); for (i = 0; i < 8; i++) - rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); + rdmsr(MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]); } void mtrr_save_fixed_ranges(void *info) @@ -275,7 +275,11 @@ static void __init print_mtrr_state(void) } printk(KERN_DEBUG "MTRR variable ranges %sabled:\n", mtrr_state.enabled & 2 ? "en" : "dis"); - high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; + if (size_or_mask & 0xffffffffUL) + high_width = ffs(size_or_mask & 0xffffffffUL) - 1; + else + high_width = ffs(size_or_mask>>32) + 32 - 1; + high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n", @@ -306,7 +310,7 @@ void __init get_mtrr_state(void) vrs = mtrr_state.var_ranges; - rdmsr(MTRRcap_MSR, lo, dummy); + rdmsr(MSR_MTRRcap, lo, dummy); mtrr_state.have_fixed = (lo >> 8) & 1; for (i = 0; i < num_var_ranges; i++) @@ -314,7 +318,7 @@ void __init get_mtrr_state(void) if (mtrr_state.have_fixed) get_fixed_ranges(mtrr_state.fixed_ranges); - rdmsr(MTRRdefType_MSR, lo, dummy); + rdmsr(MSR_MTRRdefType, lo, dummy); mtrr_state.def_type = (lo & 0xff); mtrr_state.enabled = (lo & 0xc00) >> 10; @@ -579,10 +583,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock) __flush_tlb(); /* Save MTRR state */ - rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi); + mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); } static void post_set(void) __releases(set_atomicity_lock) @@ -591,7 +595,7 @@ static void post_set(void) __releases(set_atomicity_lock) __flush_tlb(); /* Intel (P6) standard MTRRs */ - mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); /* Enable caches */ write_cr0(read_cr0() & 0xbfffffff); @@ -703,7 +707,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i static int generic_have_wrcomb(void) { unsigned long config, dummy; - rdmsr(MTRRcap_MSR, config, dummy); + rdmsr(MSR_MTRRcap, config, dummy); return (config & (1 << 10)); } diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 03cda01f57c..8fc248b5aea 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -104,7 +104,7 @@ static void __init set_num_var_ranges(void) unsigned long config = 0, dummy; if (use_intel()) { - rdmsr(MTRRcap_MSR, config, dummy); + rdmsr(MSR_MTRRcap, config, dummy); } else if (is_cpu(AMD)) config = 2; else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 77f67f7b347..7538b767f20 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -5,21 +5,6 @@ #include <linux/types.h> #include <linux/stddef.h> -#define MTRRcap_MSR 0x0fe -#define MTRRdefType_MSR 0x2ff - -#define MTRRfix64K_00000_MSR 0x250 -#define MTRRfix16K_80000_MSR 0x258 -#define MTRRfix16K_A0000_MSR 0x259 -#define MTRRfix4K_C0000_MSR 0x268 -#define MTRRfix4K_C8000_MSR 0x269 -#define MTRRfix4K_D0000_MSR 0x26a -#define MTRRfix4K_D8000_MSR 0x26b -#define MTRRfix4K_E0000_MSR 0x26c -#define MTRRfix4K_E8000_MSR 0x26d -#define MTRRfix4K_F0000_MSR 0x26e -#define MTRRfix4K_F8000_MSR 0x26f - #define MTRR_CHANGE_MASK_FIXED 0x01 #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c index 7f7e2753685..1f5fb1588d1 100644 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ b/arch/x86/kernel/cpu/mtrr/state.c @@ -35,7 +35,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) if (use_intel()) /* Save MTRR state */ - rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); else /* Cyrix ARRs - everything else were excluded at the top */ ctxt->ccr3 = getCx86(CX86_CCR3); @@ -46,7 +46,7 @@ void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) { if (use_intel()) /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, + mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi); else if (is_cpu(CYRIX)) /* Cyrix ARRs - everything else were excluded at the top */ @@ -64,7 +64,7 @@ void set_mtrr_done(struct set_mtrr_context *ctxt) /* Restore MTRRdefType */ if (use_intel()) /* Intel (P6) standard MTRRs */ - mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); else /* Cyrix ARRs - everything else was excluded at the top */ setCx86(CX86_CCR3, ctxt->ccr3); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index f93047fed79..d5e30397246 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -14,7 +14,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); seq_printf(m, "siblings\t: %d\n", - cpumask_weight(cpu_sibling_mask(cpu))); + cpumask_weight(cpu_core_mask(cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); seq_printf(m, "apicid\t\t: %d\n", c->apicid); diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index da87590b869..81086c227ab 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -29,7 +29,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl); extern unsigned int code_bytes; -extern int kstack_depth_to_print; /* The form of the top of the frame on the stack */ struct stack_frame { diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index ef2c3563357..00628130292 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1074,12 +1074,13 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) u64 addr; u64 start; - start = startt; - while (size < sizet && (start + 1)) + for (start = startt; ; start += size) { start = find_e820_area_size(start, &size, align); - - if (size < sizet) - return 0; + if (!(start + 1)) + return 0; + if (size >= sizet) + break; + } #ifdef CONFIG_X86_32 if (start >= MAXMEM) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 76b8cd953de..ebdb85cf268 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -97,6 +97,7 @@ static void __init nvidia_bugs(int num, int slot, int func) } #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) +#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) static u32 __init ati_ixp4x0_rev(int num, int slot, int func) { u32 d; @@ -114,6 +115,7 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func) d &= 0xff; return d; } +#endif static void __init ati_bugs(int num, int slot, int func) { diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a331ec38af9..38946c6e843 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1410,7 +1410,10 @@ ENTRY(paranoid_exit) paranoid_swapgs: TRACE_IRQS_IRETQ 0 SWAPGS_UNSAFE_STACK + RESTORE_ALL 8 + jmp irq_return paranoid_restore: + TRACE_IRQS_IRETQ 0 RESTORE_ALL 8 jmp irq_return paranoid_userspace: diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 70a10ca100f..b79c5533c42 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -18,6 +18,8 @@ #include <linux/init.h> #include <linux/list.h> +#include <trace/syscall.h> + #include <asm/cacheflush.h> #include <asm/ftrace.h> #include <asm/nops.h> @@ -440,7 +442,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) _ASM_EXTABLE(1b, 4b) _ASM_EXTABLE(2b, 4b) - : [old] "=r" (old), [faulted] "=r" (faulted) + : [old] "=&r" (old), [faulted] "=r" (faulted) : [parent] "r" (parent), [return_hooker] "r" (return_hooker) : "memory" ); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 30683883e0c..dc5ed4bdd88 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -608,13 +608,6 @@ ignore_int: ENTRY(initial_code) .long i386_start_kernel -.section .text -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) - /* * BSS section */ diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 648b3a2a3a4..81408b93f88 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -236,6 +236,10 @@ static void hpet_stop_counter(void) unsigned long cfg = hpet_readl(HPET_CFG); cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); +} + +static void hpet_reset_counter(void) +{ hpet_writel(0, HPET_COUNTER); hpet_writel(0, HPET_COUNTER + 4); } @@ -250,6 +254,7 @@ static void hpet_start_counter(void) static void hpet_restart_counter(void) { hpet_stop_counter(); + hpet_reset_counter(); hpet_start_counter(); } @@ -309,7 +314,7 @@ static int hpet_setup_msi_irq(unsigned int irq); static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { - unsigned long cfg; + unsigned long cfg, cmp, now; uint64_t delta; switch (mode) { @@ -317,12 +322,23 @@ static void hpet_set_mode(enum clock_event_mode mode, hpet_stop_counter(); delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; delta >>= evt->shift; + now = hpet_readl(HPET_COUNTER); + cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); /* Make sure we use edge triggered interrupts */ cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); + hpet_writel(cmp, HPET_Tn_CMP(timer)); + udelay(1); + /* + * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL + * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL + * bit is automatically cleared after the first write. + * (See AMD-8111 HyperTransport I/O Hub Data Sheet, + * Publication # 24674) + */ hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); hpet_start_counter(); hpet_print_config(); @@ -722,7 +738,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n, /* * Clock source related code */ -static cycle_t read_hpet(void) +static cycle_t read_hpet(struct clocksource *cs) { return (cycle_t)hpet_readl(HPET_COUNTER); } @@ -756,7 +772,7 @@ static int hpet_clocksource_register(void) hpet_restart_counter(); /* Verify whether hpet counter works */ - t1 = read_hpet(); + t1 = hpet_readl(HPET_COUNTER); rdtscll(start); /* @@ -770,7 +786,7 @@ static int hpet_clocksource_register(void) rdtscll(now); } while ((now - start) < 200000UL); - if (t1 == read_hpet()) { + if (t1 == hpet_readl(HPET_COUNTER)) { printk(KERN_WARNING "HPET counter not counting. HPET disabled\n"); return -ENODEV; diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 3475440baa5..c2e0bb0890d 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -129,7 +129,7 @@ void __init setup_pit_timer(void) * to just read by itself. So use jiffies to emulate a free * running counter: */ -static cycle_t pit_read(void) +static cycle_t pit_read(struct clocksource *cs) { static int old_count; static u32 old_jifs; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3aaf7b9e3a8..9a391bbb8ba 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -12,6 +12,7 @@ #include <asm/io_apic.h> #include <asm/irq.h> #include <asm/idle.h> +#include <asm/hw_irq.h> atomic_t irq_err_count; @@ -24,9 +25,9 @@ void (*generic_interrupt_extension)(void) = NULL; */ void ack_bad_irq(unsigned int irq) { - printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); + if (printk_ratelimit()) + pr_err("unexpected IRQ trap at vector %02x\n", irq); -#ifdef CONFIG_X86_LOCAL_APIC /* * Currently unexpected vectors happen only on SMP and APIC. * We _must_ ack these because every local APIC has only N @@ -36,9 +37,7 @@ void ack_bad_irq(unsigned int irq) * completely. * But only ack when the APIC is enabled -AK */ - if (cpu_has_apic) - ack_APIC_irq(); -#endif + ack_APIC_irq(); } #define irq_stats(x) (&per_cpu(irq_stat, x)) @@ -65,7 +64,7 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, " Spurious interrupts\n"); #endif if (generic_interrupt_extension) { - seq_printf(p, "PLT: "); + seq_printf(p, "%*s: ", prec, "PLT"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->generic_irqs); seq_printf(p, " Platform interrupts\n"); @@ -178,7 +177,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->irq_thermal_count; # ifdef CONFIG_X86_64 sum += irq_stats(cpu)->irq_threshold_count; -#endif +# endif #endif return sum; } @@ -213,14 +212,11 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) irq = __get_cpu_var(vector_irq)[vector]; if (!handle_irq(irq, regs)) { -#ifdef CONFIG_X86_64 - if (!disable_apic) - ack_APIC_irq(); -#endif + ack_APIC_irq(); if (printk_ratelimit()) - printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", - __func__, smp_processor_id(), vector, irq); + pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n", + __func__, smp_processor_id(), vector, irq); } irq_exit(); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit.c index 368b0a8836f..2e08b10ad51 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit.c @@ -1,20 +1,25 @@ +#include <linux/linkage.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/ioport.h> #include <linux/interrupt.h> +#include <linux/timex.h> #include <linux/slab.h> #include <linux/random.h> +#include <linux/kprobes.h> #include <linux/init.h> #include <linux/kernel_stat.h> #include <linux/sysdev.h> #include <linux/bitops.h> +#include <linux/acpi.h> #include <linux/io.h> #include <linux/delay.h> #include <asm/atomic.h> #include <asm/system.h> #include <asm/timer.h> +#include <asm/hw_irq.h> #include <asm/pgtable.h> #include <asm/desc.h> #include <asm/apic.h> @@ -22,7 +27,23 @@ #include <asm/i8259.h> #include <asm/traps.h> +/* + * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: + * (these are usually mapped to vectors 0x30-0x3f) + */ + +/* + * The IO-APIC gives us many more interrupt sources. Most of these + * are unused but an SMP system is supposed to have enough memory ... + * sometimes (mostly wrt. hw bugs) we get corrupted vectors all + * across the spectrum, so we really want to be prepared to get all + * of these. Plus, more powerful systems might have more than 64 + * IO-APIC registers. + * + * (these are usually mapped into the 0x30-0xff vector range) + */ +#ifdef CONFIG_X86_32 /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 * as the irq is unreliable, and exception 16 works correctly @@ -52,30 +73,7 @@ static struct irqaction fpu_irq = { .handler = math_error_irq, .name = "fpu", }; - -void __init init_ISA_irqs(void) -{ - int i; - -#ifdef CONFIG_X86_LOCAL_APIC - init_bsp_APIC(); #endif - init_8259A(0); - - /* - * 16 old-style INTA-cycle interrupts: - */ - for (i = 0; i < NR_IRQS_LEGACY; i++) { - struct irq_desc *desc = irq_to_desc(i); - - desc->status = IRQ_DISABLED; - desc->action = NULL; - desc->depth = 1; - - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } -} /* * IRQ2 is cascade interrupt to second interrupt controller @@ -118,29 +116,37 @@ int vector_used_by_percpu_irq(unsigned int vector) return 0; } -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) +static void __init init_ISA_irqs(void) { int i; - /* Execute any quirks before the call gates are initialised: */ - x86_quirk_pre_intr_init(); +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) + init_bsp_APIC(); +#endif + init_8259A(0); /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) + * 16 old-style INTA-cycle interrupts: */ - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { - /* SYSCALL_VECTOR was reserved in trap_init. */ - if (i != SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); + for (i = 0; i < NR_IRQS_LEGACY; i++) { + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; + + set_irq_chip_and_handler_name(i, &i8259A_chip, + handle_level_irq, "XT"); } +} +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) +static void __init smp_intr_init(void) +{ +#ifdef CONFIG_SMP +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. @@ -160,16 +166,27 @@ void __init native_init_IRQ(void) /* IPI for generic function call */ alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - /* IPI for single call function */ + /* IPI for generic single function call */ alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); + call_function_single_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif +#endif /* CONFIG_SMP */ +} + +static void __init apic_intr_init(void) +{ + smp_intr_init(); + +#ifdef CONFIG_X86_64 + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); +#endif -#ifdef CONFIG_X86_LOCAL_APIC +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); @@ -179,16 +196,67 @@ void __init native_init_IRQ(void) /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + + /* Performance monitoring interrupts: */ +# ifdef CONFIG_PERF_COUNTERS + alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); + alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); +# endif + #endif +#ifdef CONFIG_X86_32 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) /* thermal monitor LVT interrupt */ alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif +#endif +} + +/** + * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +static void __init x86_quirk_pre_intr_init(void) +{ +#ifdef CONFIG_X86_32 + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) + return; + } +#endif + init_ISA_irqs(); +} + +void __init native_init_IRQ(void) +{ + int i; + + /* Execute any quirks before the call gates are initialised: */ + x86_quirk_pre_intr_init(); + + apic_intr_init(); + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { + /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ + if (!test_bit(i, used_vectors)) + set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); + } if (!acpi_ioapic) setup_irq(2, &irq2); +#ifdef CONFIG_X86_32 /* * Call quirks after call gates are initialised (usually add in * the architecture specific gates): @@ -203,4 +271,5 @@ void __init native_init_IRQ(void) setup_irq(FPU_IRQ, &fpu_irq); irq_ctx_init(smp_processor_id()); +#endif } diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c deleted file mode 100644 index 8cd10537fd4..00000000000 --- a/arch/x86/kernel/irqinit_64.c +++ /dev/null @@ -1,177 +0,0 @@ -#include <linux/linkage.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/kernel_stat.h> -#include <linux/sysdev.h> -#include <linux/bitops.h> -#include <linux/acpi.h> -#include <linux/io.h> -#include <linux/delay.h> - -#include <asm/atomic.h> -#include <asm/system.h> -#include <asm/hw_irq.h> -#include <asm/pgtable.h> -#include <asm/desc.h> -#include <asm/apic.h> -#include <asm/i8259.h> - -/* - * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: - * (these are usually mapped to vectors 0x30-0x3f) - */ - -/* - * The IO-APIC gives us many more interrupt sources. Most of these - * are unused but an SMP system is supposed to have enough memory ... - * sometimes (mostly wrt. hw bugs) we get corrupted vectors all - * across the spectrum, so we really want to be prepared to get all - * of these. Plus, more powerful systems might have more than 64 - * IO-APIC registers. - * - * (these are usually mapped into the 0x30-0xff vector range) - */ - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ - -static struct irqaction irq2 = { - .handler = no_action, - .name = "cascade", -}; -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... IRQ0_VECTOR - 1] = -1, - [IRQ0_VECTOR] = 0, - [IRQ1_VECTOR] = 1, - [IRQ2_VECTOR] = 2, - [IRQ3_VECTOR] = 3, - [IRQ4_VECTOR] = 4, - [IRQ5_VECTOR] = 5, - [IRQ6_VECTOR] = 6, - [IRQ7_VECTOR] = 7, - [IRQ8_VECTOR] = 8, - [IRQ9_VECTOR] = 9, - [IRQ10_VECTOR] = 10, - [IRQ11_VECTOR] = 11, - [IRQ12_VECTOR] = 12, - [IRQ13_VECTOR] = 13, - [IRQ14_VECTOR] = 14, - [IRQ15_VECTOR] = 15, - [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 -}; - -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - -static void __init init_ISA_irqs(void) -{ - int i; - - init_bsp_APIC(); - init_8259A(0); - - for (i = 0; i < NR_IRQS_LEGACY; i++) { - struct irq_desc *desc = irq_to_desc(i); - - desc->status = IRQ_DISABLED; - desc->action = NULL; - desc->depth = 1; - - /* - * 16 old-style INTA-cycle interrupts: - */ - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } -} - -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -static void __init smp_intr_init(void) -{ -#ifdef CONFIG_SMP - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPIs for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for generic single function call */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); -#endif -} - -static void __init apic_intr_init(void) -{ - smp_intr_init(); - - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); - alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); - - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* generic IPI for platform specific use */ - alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); -} - -void __init native_init_IRQ(void) -{ - int i; - - init_ISA_irqs(); - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (vector != IA32_SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); - } - - apic_intr_init(); - - if (!acpi_ioapic) - setup_irq(2, &irq2); -} diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index eedfaebe106..8d82a77a3f3 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -88,6 +88,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; + gdb_regs[GDB_SP] = (int)®s->sp; #else gdb_regs[GDB_R8] = regs->r8; gdb_regs[GDB_R9] = regs->r9; @@ -100,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs32[GDB_PS] = regs->flags; gdb_regs32[GDB_CS] = regs->cs; gdb_regs32[GDB_SS] = regs->ss; -#endif gdb_regs[GDB_SP] = regs->sp; +#endif } /** @@ -141,7 +142,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); gdb_regs32[GDB_CS] = __KERNEL_CS; gdb_regs32[GDB_SS] = __KERNEL_DS; - gdb_regs[GDB_PC] = p->thread.ip; + gdb_regs[GDB_PC] = 0; gdb_regs[GDB_R8] = 0; gdb_regs[GDB_R9] = 0; gdb_regs[GDB_R10] = 0; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 137f2e8132d..223af43f152 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -77,6 +77,11 @@ static cycle_t kvm_clock_read(void) return ret; } +static cycle_t kvm_clock_get_cycles(struct clocksource *cs) +{ + return kvm_clock_read(); +} + /* * If we don't do that, there is the possibility that the guest * will calibrate under heavy load - thus, getting a lower lpj - @@ -107,7 +112,7 @@ static void kvm_get_preset_lpj(void) static struct clocksource kvm_clock = { .name = "kvm-clock", - .read = kvm_clock_read, + .read = kvm_clock_get_cycles, .rating = 400, .mask = CLOCKSOURCE_MASK(64), .mult = 1 << KVM_SCALE, diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index e7368c1da01..c1c429d0013 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -194,7 +194,7 @@ void machine_kexec(struct kimage *image) unsigned int preserve_context); #ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) + if (image->preserve_context) save_processor_state(); #endif @@ -253,7 +253,7 @@ void machine_kexec(struct kimage *image) image->preserve_context); #ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) + if (image->preserve_context) restore_processor_state(); #endif diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 89cea4d4467..84c3bf209e9 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -274,7 +274,7 @@ void machine_kexec(struct kimage *image) int save_ftrace_enabled; #ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) + if (image->preserve_context) save_processor_state(); #endif @@ -333,7 +333,7 @@ void machine_kexec(struct kimage *image) image->preserve_context); #ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) + if (image->preserve_context) restore_processor_state(); #endif diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index a0f3851ef31..98c470c069d 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -108,40 +108,29 @@ struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE -struct update_for_cpu { - const void __user *buf; - size_t size; -}; - -static long update_for_cpu(void *_ufc) -{ - struct update_for_cpu *ufc = _ufc; - int error; - - error = microcode_ops->request_microcode_user(smp_processor_id(), - ufc->buf, ufc->size); - if (error < 0) - return error; - if (!error) - microcode_ops->apply_microcode(smp_processor_id()); - return error; -} - static int do_microcode_update(const void __user *buf, size_t size) { + cpumask_t old; int error = 0; int cpu; - struct update_for_cpu ufc = { .buf = buf, .size = size }; + + old = current->cpus_allowed; for_each_online_cpu(cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; if (!uci->valid) continue; - error = work_on_cpu(cpu, update_for_cpu, &ufc); + + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + error = microcode_ops->request_microcode_user(cpu, buf, size); if (error < 0) - break; + goto out; + if (!error) + microcode_ops->apply_microcode(cpu); } +out: + set_cpus_allowed_ptr(current, &old); return error; } @@ -391,8 +380,6 @@ static int mc_sysdev_add(struct sys_device *sys_dev) return err; err = microcode_init_cpu(cpu); - if (err) - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return err; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index dce99dca6cf..651c93b2886 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -17,6 +17,7 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/smp.h> +#include <linux/pci.h> #include <asm/mtrr.h> #include <asm/mpspec.h> @@ -679,7 +680,7 @@ void __init get_smp_config(void) __get_smp_config(0); } -static void smp_reserve_bootmem(struct mpf_intel *mpf) +static void __init smp_reserve_bootmem(struct mpf_intel *mpf) { unsigned long size = get_mpc_size(mpf->physptr); #ifdef CONFIG_X86_32 @@ -838,7 +839,7 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; -static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) +static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) { int i; @@ -866,27 +867,21 @@ static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) } } #else /* CONFIG_X86_IO_APIC */ -static inline void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} +static +inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} #endif /* CONFIG_X86_IO_APIC */ -static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, - int count) +static int +check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) { - if (!mpc_new_phys) { - pr_info("No spare slots, try to append...take your risk, " - "new mpc_length %x\n", count); - } else { - if (count <= mpc_new_length) - pr_info("No spare slots, try to append..., " - "new mpc_length %x\n", count); - else { - pr_err("mpc_new_length %lx is too small\n", - mpc_new_length); - return -1; - } + int ret = 0; + + if (!mpc_new_phys || count <= mpc_new_length) { + WARN(1, "update_mptable: No spare slots (length: %x)\n", count); + return -1; } - return 0; + return ret; } static int __init replace_intsrc_all(struct mpc_table *mpc, @@ -945,7 +940,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, } else { struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; count += sizeof(struct mpc_intsrc); - if (!check_slot(mpc_new_phys, mpc_new_length, count)) + if (check_slot(mpc_new_phys, mpc_new_length, count) < 0) goto out; assign_to_mpc_intsrc(&mp_irqs[i], m); mpc->length = count; @@ -962,11 +957,14 @@ out: return 0; } -static int __initdata enable_update_mptable; +int enable_update_mptable; static int __init update_mptable_setup(char *str) { enable_update_mptable = 1; +#ifdef CONFIG_PCI + pci_routeirq = 1; +#endif return 0; } early_param("update_mptable", update_mptable_setup); @@ -979,6 +977,9 @@ static int __initdata alloc_mptable; static int __init parse_alloc_mptable_opt(char *p) { enable_update_mptable = 1; +#ifdef CONFIG_PCI + pci_routeirq = 1; +#endif alloc_mptable = 1; if (!p) return 0; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8e45f446488..9faf43bea33 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -134,7 +134,9 @@ static void *get_call_destination(u8 type) .pv_irq_ops = pv_irq_ops, .pv_apic_ops = pv_apic_ops, .pv_mmu_ops = pv_mmu_ops, +#ifdef CONFIG_PARAVIRT_SPINLOCKS .pv_lock_ops = pv_lock_ops, +#endif }; return *((void **)&tmpl + type); } diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 34f12e9996e..221a3853e26 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -50,7 +50,7 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); } -struct dma_map_ops swiotlb_dma_ops = { +static struct dma_map_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = x86_swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ca989158e84..3e21e38d7e3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -11,6 +11,7 @@ #include <trace/power.h> #include <asm/system.h> #include <asm/apic.h> +#include <asm/syscalls.h> #include <asm/idle.h> #include <asm/uaccess.h> #include <asm/i387.h> diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fe9345c967d..23b7c8f017e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,7 +21,6 @@ #include <linux/audit.h> #include <linux/seccomp.h> #include <linux/signal.h> -#include <linux/ftrace.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -35,6 +34,8 @@ #include <asm/proto.h> #include <asm/ds.h> +#include <trace/syscall.h> + #include "tls.h" enum x86_regset { diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index e95022e4f5d..af71d06624b 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -261,8 +261,6 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev) { if (hpet_force_user) old_ich_force_enable_hpet(dev); - else - hpet_print_force_info(); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, @@ -493,5 +491,42 @@ void force_hpet_resume(void) break; } } +#endif + +#if defined(CONFIG_PCI) && defined(CONFIG_NUMA) +/* Set correct numa_node information for AMD NB functions */ +static void __init quirk_amd_nb_node(struct pci_dev *dev) +{ + struct pci_dev *nb_ht; + unsigned int devfn; + u32 val; + + devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); + nb_ht = pci_get_slot(dev->bus, devfn); + if (!nb_ht) + return; + + pci_read_config_dword(nb_ht, 0x60, &val); + set_dev_node(&dev->dev, val & 7); + pci_dev_put(dev); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_HT, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MAP, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK, + quirk_amd_nb_node); #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2aef36d8aca..667188e0b5a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -224,6 +224,22 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), }, }, + { /* Handle problems with rebooting on Dell DXP061 */ + .callback = set_bios_reboot, + .ident = "Dell DXP061", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"), + }, + }, + { /* Handle problems with rebooting on Sony VGN-Z540N */ + .callback = set_bios_reboot, + .ident = "Sony VGN-Z540N", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), + }, + }, { } }; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4158439bf6..7791eef95b9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -214,8 +214,8 @@ unsigned long mmu_cr4_features; unsigned long mmu_cr4_features = X86_CR4_PAE; #endif -/* Boot loader ID as an integer, for the benefit of proc_dointvec */ -int bootloader_type; +/* Boot loader ID and version as integers, for the benefit of proc_dointvec */ +int bootloader_type, bootloader_version; /* * Setup options @@ -706,6 +706,12 @@ void __init setup_arch(char **cmdline_p) #endif saved_video_mode = boot_params.hdr.vid_mode; bootloader_type = boot_params.hdr.type_of_loader; + if ((bootloader_type >> 4) == 0xe) { + bootloader_type &= 0xf; + bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4; + } + bootloader_version = bootloader_type & 0xf; + bootloader_version |= boot_params.hdr.ext_loader_ver << 4; #ifdef CONFIG_BLK_DEV_RAM rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; @@ -997,24 +1003,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 /** - * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -void __init x86_quirk_pre_intr_init(void) -{ - if (x86_quirks->arch_pre_intr_init) { - if (x86_quirks->arch_pre_intr_init()) - return; - } - init_ISA_irqs(); -} - -/** * x86_quirk_intr_init - post gate setup interrupt initialisation * * Description: diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 3a97a4cf187..8f0e13be36b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -160,8 +160,10 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) /* * If large page isn't supported, there's no benefit in doing * this. Also, on non-NUMA, embedding is better. + * + * NOTE: disabled for now. */ - if (!cpu_has_pse || !pcpu_need_numa()) + if (true || !cpu_has_pse || !pcpu_need_numa()) return -EINVAL; /* diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 13f33ea8cca..f6db48c405b 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -193,19 +193,19 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) } struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, - .smp_prepare_cpus = native_smp_prepare_cpus, - .smp_cpus_done = native_smp_cpus_done, + .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, + .smp_prepare_cpus = native_smp_prepare_cpus, + .smp_cpus_done = native_smp_cpus_done, - .smp_send_stop = native_smp_send_stop, - .smp_send_reschedule = native_smp_send_reschedule, + .smp_send_stop = native_smp_send_stop, + .smp_send_reschedule = native_smp_send_reschedule, - .cpu_up = native_cpu_up, - .cpu_die = native_cpu_die, - .cpu_disable = native_cpu_disable, - .play_dead = native_play_dead, + .cpu_up = native_cpu_up, + .cpu_die = native_cpu_die, + .cpu_disable = native_cpu_disable, + .play_dead = native_play_dead, - .send_call_func_ipi = native_send_call_func_ipi, + .send_call_func_ipi = native_send_call_func_ipi, .send_call_func_single_ipi = native_send_call_func_single_ipi, }; EXPORT_SYMBOL_GPL(smp_ops); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 58d24ef917d..7c80007ea5f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -504,7 +504,7 @@ void __inquire_remote_apic(int apicid) * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. */ -int __devinit +int __cpuinit wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; @@ -538,7 +538,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) return (send_status | accept_status); } -int __devinit +static int __cpuinit wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; @@ -822,10 +822,12 @@ do_rest: /* mark "stuck" area as not stuck */ *((volatile unsigned long *)trampoline_base) = 0; - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { + /* + * Cleanup possible dangling ends... + */ + smpboot_restore_warm_reset_vector(); + } return boot_error; } @@ -990,10 +992,12 @@ static int __init smp_sanity_check(unsigned max_cpus) */ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - printk(KERN_ERR "... forcing use of dummy APIC emulation." + if (!disable_apic) { + pr_err("BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + pr_err("... forcing use of dummy APIC emulation." "(tell your hw vendor)\n"); + } smpboot_clear_io_apic(); arch_disable_smp_support(); return -1; diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index deb5ebb32c3..8c7b03b0cfc 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -25,6 +25,8 @@ static int uv_bau_retry_limit __read_mostly; /* position of pnode (which is nasid>>1): */ static int uv_nshift __read_mostly; +/* base pnode in this partition */ +static int uv_partition_base_pnode __read_mostly; static unsigned long uv_mmask __read_mostly; @@ -32,6 +34,34 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats); static DEFINE_PER_CPU(struct bau_control, bau_control); /* + * Determine the first node on a blade. + */ +static int __init blade_to_first_node(int blade) +{ + int node, b; + + for_each_online_node(node) { + b = uv_node_to_blade_id(node); + if (blade == b) + return node; + } + return -1; /* shouldn't happen */ +} + +/* + * Determine the apicid of the first cpu on a blade. + */ +static int __init blade_to_first_apicid(int blade) +{ + int cpu; + + for_each_present_cpu(cpu) + if (blade == uv_cpu_to_blade_id(cpu)) + return per_cpu(x86_cpu_to_apicid, cpu); + return -1; +} + +/* * Free a software acknowledge hardware resource by clearing its Pending * bit. This will return a reply to the sender. * If the message has timed out, a reply has already been sent by the @@ -67,7 +97,7 @@ static void uv_bau_process_message(struct bau_payload_queue_entry *msg, msp = __get_cpu_var(bau_control).msg_statuses + msg_slot; cpu = uv_blade_processor_id(); msg->number_of_cpus = - uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); + uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); this_cpu_mask = 1UL << cpu; if (msp->seen_by.bits & this_cpu_mask) return; @@ -215,14 +245,14 @@ static int uv_wait_completion(struct bau_desc *bau_desc, * Returns @flush_mask if some remote flushing remains to be done. The * mask will have some bits still set. */ -const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, +const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, struct bau_desc *bau_desc, struct cpumask *flush_mask) { int completion_status = 0; int right_shift; int tries = 0; - int blade; + int pnode; int bit; unsigned long mmr_offset; unsigned long index; @@ -265,8 +295,8 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, * use the IPI method of shootdown on them. */ for_each_cpu(bit, flush_mask) { - blade = uv_cpu_to_blade_id(bit); - if (blade == this_blade) + pnode = uv_cpu_to_pnode(bit); + if (pnode == this_pnode) continue; cpumask_clear_cpu(bit, flush_mask); } @@ -309,16 +339,16 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask); int i; int bit; - int blade; + int pnode; int uv_cpu; - int this_blade; + int this_pnode; int locals = 0; struct bau_desc *bau_desc; cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); uv_cpu = uv_blade_processor_id(); - this_blade = uv_numa_blade_id(); + this_pnode = uv_hub_info->pnode; bau_desc = __get_cpu_var(bau_control).descriptor_base; bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; @@ -326,13 +356,14 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, i = 0; for_each_cpu(bit, flush_mask) { - blade = uv_cpu_to_blade_id(bit); - BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); - if (blade == this_blade) { + pnode = uv_cpu_to_pnode(bit); + BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1)); + if (pnode == this_pnode) { locals++; continue; } - bau_node_set(blade, &bau_desc->distribution); + bau_node_set(pnode - uv_partition_base_pnode, + &bau_desc->distribution); i++; } if (i == 0) { @@ -350,7 +381,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, bau_desc->payload.address = va; bau_desc->payload.sending_cpu = cpu; - return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask); + return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask); } /* @@ -418,24 +449,58 @@ void uv_bau_message_interrupt(struct pt_regs *regs) set_irq_regs(old_regs); } +/* + * uv_enable_timeouts + * + * Each target blade (i.e. blades that have cpu's) needs to have + * shootdown message timeouts enabled. The timeout does not cause + * an interrupt, but causes an error message to be returned to + * the sender. + */ static void uv_enable_timeouts(void) { - int i; int blade; - int last_blade; + int nblades; int pnode; - int cur_cpu = 0; - unsigned long apicid; + unsigned long mmr_image; - last_blade = -1; - for_each_online_node(i) { - blade = uv_node_to_blade_id(i); - if (blade == last_blade) + nblades = uv_num_possible_blades(); + + for (blade = 0; blade < nblades; blade++) { + if (!uv_blade_nr_possible_cpus(blade)) continue; - last_blade = blade; - apicid = per_cpu(x86_cpu_to_apicid, cur_cpu); + pnode = uv_blade_to_pnode(blade); - cur_cpu += uv_blade_nr_possible_cpus(i); + mmr_image = + uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); + /* + * Set the timeout period and then lock it in, in three + * steps; captures and locks in the period. + * + * To program the period, the SOFT_ACK_MODE must be off. + */ + mmr_image &= ~((unsigned long)1 << + UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); + uv_write_global_mmr64 + (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + /* + * Set the 4-bit period. + */ + mmr_image &= ~((unsigned long)0xf << + UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); + mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << + UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); + uv_write_global_mmr64 + (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + /* + * Subsequent reversals of the timebase bit (3) cause an + * immediate timeout of one or all INTD resources as + * indicated in bits 2:0 (7 causes all of them to timeout). + */ + mmr_image |= ((unsigned long)1 << + UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); + uv_write_global_mmr64 + (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); } } @@ -482,8 +547,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) stat->requestee, stat->onetlb, stat->alltlb, stat->s_retry, stat->d_retry, stat->ptc_i); seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n", - uv_read_global_mmr64(uv_blade_to_pnode - (uv_cpu_to_blade_id(cpu)), + uv_read_global_mmr64(uv_cpu_to_pnode(cpu), UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), stat->sflush, stat->dflush, stat->retriesok, stat->nomsg, @@ -617,16 +681,18 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node) * finish the initialization of the per-blade control structures */ static void __init -uv_table_bases_finish(int blade, int node, int cur_cpu, +uv_table_bases_finish(int blade, struct bau_control *bau_tablesp, struct bau_desc *adp) { struct bau_control *bcp; - int i; + int cpu; - for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) { - bcp = (struct bau_control *)&per_cpu(bau_control, i); + for_each_present_cpu(cpu) { + if (blade != uv_cpu_to_blade_id(cpu)) + continue; + bcp = (struct bau_control *)&per_cpu(bau_control, cpu); bcp->bau_msg_head = bau_tablesp->va_queue_first; bcp->va_queue_first = bau_tablesp->va_queue_first; bcp->va_queue_last = bau_tablesp->va_queue_last; @@ -649,11 +715,10 @@ uv_activation_descriptor_init(int node, int pnode) struct bau_desc *adp; struct bau_desc *ad2; - adp = (struct bau_desc *) - kmalloc_node(16384, GFP_KERNEL, node); + adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node); BUG_ON(!adp); - pa = __pa((unsigned long)adp); + pa = uv_gpa(adp); /* need the real nasid*/ n = pa >> uv_nshift; m = pa & uv_mmask; @@ -667,8 +732,12 @@ uv_activation_descriptor_init(int node, int pnode) for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) { memset(ad2, 0, sizeof(struct bau_desc)); ad2->header.sw_ack_flag = 1; - ad2->header.base_dest_nodeid = - uv_blade_to_pnode(uv_cpu_to_blade_id(0)); + /* + * base_dest_nodeid is the first node in the partition, so + * the bit map will indicate partition-relative node numbers. + * note that base_dest_nodeid is actually a nasid. + */ + ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; ad2->header.command = UV_NET_ENDPOINT_INTD; ad2->header.int_both = 1; /* @@ -686,6 +755,8 @@ static struct bau_payload_queue_entry * __init uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) { struct bau_payload_queue_entry *pqp; + unsigned long pa; + int pn; char *cp; pqp = (struct bau_payload_queue_entry *) kmalloc_node( @@ -696,10 +767,14 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) cp = (char *)pqp + 31; pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); bau_tablesp->va_queue_first = pqp; + /* + * need the pnode of where the memory was really allocated + */ + pa = uv_gpa(pqp); + pn = pa >> uv_nshift; uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, - ((unsigned long)pnode << - UV_PAYLOADQ_PNODE_SHIFT) | + ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | uv_physnodeaddr(pqp)); uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, uv_physnodeaddr(pqp)); @@ -715,8 +790,9 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) /* * Initialization of each UV blade's structures */ -static int __init uv_init_blade(int blade, int node, int cur_cpu) +static int __init uv_init_blade(int blade) { + int node; int pnode; unsigned long pa; unsigned long apicid; @@ -724,16 +800,17 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu) struct bau_payload_queue_entry *pqp; struct bau_control *bau_tablesp; + node = blade_to_first_node(blade); bau_tablesp = uv_table_bases_init(blade, node); pnode = uv_blade_to_pnode(blade); adp = uv_activation_descriptor_init(node, pnode); pqp = uv_payload_queue_init(node, pnode, bau_tablesp); - uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp); + uv_table_bases_finish(blade, bau_tablesp, adp); /* * the below initialization can't be in firmware because the * messaging IRQ will be determined by the OS */ - apicid = per_cpu(x86_cpu_to_apicid, cur_cpu); + apicid = blade_to_first_apicid(blade); pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); if ((pa & 0xff) != UV_BAU_MESSAGE) { uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, @@ -748,44 +825,34 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu) static int __init uv_bau_init(void) { int blade; - int node; int nblades; - int last_blade; int cur_cpu; if (!is_uv_system()) return 0; for_each_possible_cpu(cur_cpu) - alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), + zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), GFP_KERNEL, cpu_to_node(cur_cpu)); uv_bau_retry_limit = 1; uv_nshift = uv_hub_info->n_val; uv_mmask = (1UL << uv_hub_info->n_val) - 1; - nblades = 0; - last_blade = -1; - cur_cpu = 0; - for_each_online_node(node) { - blade = uv_node_to_blade_id(node); - if (blade == last_blade) - continue; - last_blade = blade; - nblades++; - } + nblades = uv_num_possible_blades(); + uv_bau_table_bases = (struct bau_control **) kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL); BUG_ON(!uv_bau_table_bases); - last_blade = -1; - for_each_online_node(node) { - blade = uv_node_to_blade_id(node); - if (blade == last_blade) - continue; - last_blade = blade; - uv_init_blade(blade, node, cur_cpu); - cur_cpu += uv_blade_nr_possible_cpus(blade); - } + uv_partition_base_pnode = 0x7fffffff; + for (blade = 0; blade < nblades; blade++) + if (uv_blade_nr_possible_cpus(blade) && + (uv_blade_to_pnode(blade) < uv_partition_base_pnode)) + uv_partition_base_pnode = uv_blade_to_pnode(blade); + for (blade = 0; blade < nblades; blade++) + if (uv_blade_nr_possible_cpus(blade)) + uv_init_blade(blade); + alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); uv_enable_timeouts(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d696145855b..ede024531f8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -966,11 +966,8 @@ void __init trap_init(void) for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); -#ifdef CONFIG_X86_64 set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#else - set_bit(SYSCALL_VECTOR, used_vectors); -#endif + /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7a567ebe636..84d27356c3d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -384,13 +384,13 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, latch, ms, fast_calibrate, tsc_khz; + unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz; int hpet = is_hpet_enabled(), i, loopmin; - tsc_khz = get_hypervisor_tsc_freq(); - if (tsc_khz) { + hv_tsc_khz = get_hypervisor_tsc_freq(); + if (hv_tsc_khz) { printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); - return tsc_khz; + return hv_tsc_khz; } local_irq_save(flags); @@ -699,7 +699,7 @@ static struct clocksource clocksource_tsc; * code, which is necessary to support wrapping clocksources like pm * timer. */ -static cycle_t read_tsc(void) +static cycle_t read_tsc(struct clocksource *cs) { cycle_t ret = (cycle_t)get_cycles(); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index bf36328f6ef..027b5b49899 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -34,6 +34,7 @@ static __cpuinitdata atomic_t stop_count; * of a critical section, to be able to prove TSC time-warps: */ static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; + static __cpuinitdata cycles_t last_tsc; static __cpuinitdata cycles_t max_warp; static __cpuinitdata int nr_warps; @@ -113,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) return; if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - printk(KERN_INFO - "Skipping synchronization checks as TSC is reliable.\n"); + pr_info("Skipping synchronization checks as TSC is reliable.\n"); return; } - printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", - smp_processor_id(), cpu); + pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", + smp_processor_id(), cpu); /* * Reset it - in case this is a second bootup: @@ -143,8 +143,8 @@ void __cpuinit check_tsc_sync_source(int cpu) if (nr_warps) { printk("\n"); - printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," - " turning off TSC clock.\n", max_warp); + pr_warning("Measured %Ld cycles TSC warp between CPUs, " + "turning off TSC clock.\n", max_warp); mark_tsc_unstable("check_tsc_sync_source failed"); } else { printk(" passed.\n"); @@ -195,5 +195,3 @@ void __cpuinit check_tsc_sync_target(void) while (atomic_read(&stop_count) != cpus) cpu_relax(); } -#undef NR_LOOPS - diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c index 67f9b9dbf80..36afb98675a 100644 --- a/arch/x86/kernel/uv_sysfs.c +++ b/arch/x86/kernel/uv_sysfs.c @@ -21,6 +21,7 @@ #include <linux/sysdev.h> #include <asm/uv/bios.h> +#include <asm/uv/uv.h> struct kobject *sgi_uv_kobj; @@ -47,6 +48,9 @@ static int __init sgi_uv_sysfs_init(void) { unsigned long ret; + if (!is_uv_system()) + return -ENODEV; + if (!sgi_uv_kobj) sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); if (!sgi_uv_kobj) { diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 2ffb6c53326..583f11d5c48 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -29,7 +29,7 @@ #define RTC_NAME "sgi_rtc" -static cycle_t uv_read_rtc(void); +static cycle_t uv_read_rtc(struct clocksource *cs); static int uv_rtc_next_event(unsigned long, struct clock_event_device *); static void uv_rtc_timer_setup(enum clock_event_mode, struct clock_event_device *); @@ -123,7 +123,7 @@ static int uv_setup_intr(int cpu, u64 expires) /* Initialize comparator value */ uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); - return (expires < uv_read_rtc() && !uv_intr_pending(pnode)); + return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode)); } /* @@ -256,7 +256,7 @@ static int uv_rtc_unset_timer(int cpu) spin_lock_irqsave(&head->lock, flags); - if (head->next_cpu == bcpu && uv_read_rtc() >= *t) + if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) rc = 1; *t = ULLONG_MAX; @@ -278,7 +278,7 @@ static int uv_rtc_unset_timer(int cpu) /* * Read the RTC. */ -static cycle_t uv_read_rtc(void) +static cycle_t uv_read_rtc(struct clocksource *cs) { return (cycle_t)uv_read_local_mmr(UVH_RTC); } @@ -291,7 +291,7 @@ static int uv_rtc_next_event(unsigned long delta, { int ced_cpu = cpumask_first(ced->cpumask); - return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc()); + return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL)); } /* diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index d7ac84e7fc1..b8035a0f404 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -318,9 +318,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk } /* - * Save old state, set default return value (%ax) to 0 + * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL) */ - info->regs32->ax = 0; + info->regs32->ax = VM86_SIGNAL; tsk->thread.saved_sp0 = tsk->thread.sp0; tsk->thread.saved_fs = info->regs32->fs; tsk->thread.saved_gs = get_user_gs(info->regs32); diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index d303369a7ba..2b3eb82efee 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -283,7 +283,7 @@ void __devinit vmi_time_ap_init(void) /** vmi clocksource */ static struct clocksource clocksource_vmi; -static cycle_t read_real_cycles(void) +static cycle_t read_real_cycles(struct clocksource *cs) { cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); return max(ret, clocksource_vmi.cycle_last); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 849ee611f01..4c85b2e2bb6 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -1,5 +1,431 @@ +/* + * ld script for the x86 kernel + * + * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * + * Modernisation, unification and other changes and fixes: + * Copyright (C) 2007-2009 Sam Ravnborg <sam@ravnborg.org> + * + * + * Don't define absolute symbols until and unless you know that symbol + * value is should remain constant even if kernel image is relocated + * at run time. Absolute symbols are not relocated. If symbol value should + * change if kernel is relocated, make the symbol section relative and + * put it inside the section definition. + */ + #ifdef CONFIG_X86_32 -# include "vmlinux_32.lds.S" +#define LOAD_OFFSET __PAGE_OFFSET #else -# include "vmlinux_64.lds.S" +#define LOAD_OFFSET __START_KERNEL_map #endif + +#include <asm-generic/vmlinux.lds.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page_types.h> +#include <asm/cache.h> +#include <asm/boot.h> + +#undef i386 /* in case the preprocessor is a 32bit one */ + +OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) + +#ifdef CONFIG_X86_32 +OUTPUT_ARCH(i386) +ENTRY(phys_startup_32) +jiffies = jiffies_64; +#else +OUTPUT_ARCH(i386:x86-64) +ENTRY(phys_startup_64) +jiffies_64 = jiffies; +#endif + +PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_X86_64 + user PT_LOAD FLAGS(7); /* RWE */ + data.init PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_SMP + percpu PT_LOAD FLAGS(7); /* RWE */ +#endif + data.init2 PT_LOAD FLAGS(7); /* RWE */ +#endif + note PT_NOTE FLAGS(0); /* ___ */ +} + +SECTIONS +{ +#ifdef CONFIG_X86_32 + . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; + phys_startup_32 = startup_32 - LOAD_OFFSET; +#else + . = __START_KERNEL; + phys_startup_64 = startup_64 - LOAD_OFFSET; +#endif + + /* Text and read-only data */ + + /* bootstrapping code */ + .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { + _text = .; + *(.text.head) + } :text = 0x9090 + + /* The rest of the text */ + .text : AT(ADDR(.text) - LOAD_OFFSET) { +#ifdef CONFIG_X86_32 + /* not really needed, already page aligned */ + . = ALIGN(PAGE_SIZE); + *(.text.page_aligned) +#endif + . = ALIGN(8); + _stext = .; + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) + /* End of text section */ + _etext = .; + } :text = 0x9090 + + NOTES :text :note + + /* Exception table */ + . = ALIGN(16); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } :text = 0x9090 + + RODATA + + /* Data */ + . = ALIGN(PAGE_SIZE); + .data : AT(ADDR(.data) - LOAD_OFFSET) { + DATA_DATA + CONSTRUCTORS + +#ifdef CONFIG_X86_64 + /* End of data section */ + _edata = .; +#endif + } :data + +#ifdef CONFIG_X86_32 + /* 32 bit has nosave before _edata */ + . = ALIGN(PAGE_SIZE); + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + } +#endif + + . = ALIGN(PAGE_SIZE); + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + *(.data.page_aligned) + *(.data.idt) + } + +#ifdef CONFIG_X86_32 + . = ALIGN(32); +#else + . = ALIGN(PAGE_SIZE); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); +#endif + .data.cacheline_aligned : + AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + *(.data.cacheline_aligned) + } + + /* rarely changed data like cpu maps */ +#ifdef CONFIG_X86_32 + . = ALIGN(32); +#else + . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); +#endif + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + +#ifdef CONFIG_X86_32 + /* End of data section */ + _edata = .; +#endif + } + +#ifdef CONFIG_X86_64 + +#define VSYSCALL_ADDR (-10*1024*1024) +#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) +#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) + +#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) +#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) + +#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) +#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) + + . = VSYSCALL_ADDR; + .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { + *(.vsyscall_0) + } :user + + __vsyscall_0 = VSYSCALL_VIRT_ADDR; + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { + *(.vsyscall_fn) + } + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { + *(.vsyscall_gtod_data) + } + + vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); + .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) { + *(.vsyscall_clock) + } + vsyscall_clock = VVIRT(.vsyscall_clock); + + + .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { + *(.vsyscall_1) + } + .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { + *(.vsyscall_2) + } + + .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { + *(.vgetcpu_mode) + } + vgetcpu_mode = VVIRT(.vgetcpu_mode); + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .jiffies : AT(VLOAD(.jiffies)) { + *(.jiffies) + } + jiffies = VVIRT(.jiffies); + + .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { + *(.vsyscall_3) + } + + . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; + +#undef VSYSCALL_ADDR +#undef VSYSCALL_PHYS_ADDR +#undef VSYSCALL_VIRT_ADDR +#undef VLOAD_OFFSET +#undef VLOAD +#undef VVIRT_OFFSET +#undef VVIRT + +#endif /* CONFIG_X86_64 */ + + /* init_task */ + . = ALIGN(THREAD_SIZE); + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) + } +#ifdef CONFIG_X86_64 + :data.init +#endif + + /* + * smp_locks might be freed after init + * start/end must be page aligned + */ + . = ALIGN(PAGE_SIZE); + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + __smp_locks = .; + *(.smp_locks) + __smp_locks_end = .; + . = ALIGN(PAGE_SIZE); + } + + /* Init code and data - will be freed after init */ + . = ALIGN(PAGE_SIZE); + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + __init_begin = .; /* paired with __init_end */ + _sinittext = .; + INIT_TEXT + _einittext = .; + } + + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { + INIT_DATA + } + + . = ALIGN(16); + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + __initcall_start = .; + INITCALLS + __initcall_end = .; + } + + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; + *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; + } + + SECURITY_INIT + + . = ALIGN(8); + .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { + __parainstructions = .; + *(.parainstructions) + __parainstructions_end = .; + } + + . = ALIGN(8); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + + .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { + *(.altinstr_replacement) + } + + /* + * .exit.text is discard at runtime, not link time, to deal with + * references from .altinstructions and .eh_frame + */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + EXIT_TEXT + } + + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { + EXIT_DATA + } + +#ifdef CONFIG_BLK_DEV_INITRD + . = ALIGN(PAGE_SIZE); + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } +#endif + +#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) + /* + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - __data_nosave - should + * start another section data.init2. Also, pda should be at the head of + * percpu area. Preallocate it and define the percpu offset symbol + * so that it can be accessed as a percpu variable. + */ + . = ALIGN(PAGE_SIZE); + PERCPU_VADDR(0, :percpu) +#else + PERCPU(PAGE_SIZE) +#endif + + . = ALIGN(PAGE_SIZE); + + /* freed after init ends here */ + .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) { + __init_end = .; + } + +#ifdef CONFIG_X86_64 + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + } :data.init2 + /* use another section data.init2, see PERCPU_VADDR() above */ +#endif + + /* BSS */ + . = ALIGN(PAGE_SIZE); + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + __bss_start = .; + *(.bss.page_aligned) + *(.bss) + . = ALIGN(4); + __bss_stop = .; + } + + . = ALIGN(PAGE_SIZE); + .brk : AT(ADDR(.brk) - LOAD_OFFSET) { + __brk_base = .; + . += 64 * 1024; /* 64k alignment slop space */ + *(.brk_reservation) /* areas brk users have reserved */ + __brk_limit = .; + } + + .end : AT(ADDR(.end) - LOAD_OFFSET) { + _end = .; + } + + /* Sections to be discarded */ + /DISCARD/ : { + *(.exitcall.exit) + *(.eh_frame) + *(.discard) + } + + STABS_DEBUG + DWARF_DEBUG +} + + +#ifdef CONFIG_X86_32 +ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE") +#else +/* + * Per-cpu symbols which need to be offset from __per_cpu_load + * for the boot processor. + */ +#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load +INIT_PER_CPU(gdt_page); +INIT_PER_CPU(irq_stack_union); + +/* + * Build-time check on the image size: + */ +ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE") + +#ifdef CONFIG_SMP +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); +#endif + +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_KEXEC +#include <asm/kexec.h> + +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big") +#endif + diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S deleted file mode 100644 index 62ad500d55f..00000000000 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ /dev/null @@ -1,229 +0,0 @@ -/* ld script to make i386 Linux kernel - * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; - * - * Don't define absolute symbols until and unless you know that symbol - * value is should remain constant even if kernel image is relocated - * at run time. Absolute symbols are not relocated. If symbol value should - * change if kernel is relocated, make the symbol section relative and - * put it inside the section definition. - */ - -#define LOAD_OFFSET __PAGE_OFFSET - -#include <asm-generic/vmlinux.lds.h> -#include <asm/thread_info.h> -#include <asm/page_types.h> -#include <asm/cache.h> -#include <asm/boot.h> - -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(phys_startup_32) -jiffies = jiffies_64; - -PHDRS { - text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(0); /* ___ */ -} -SECTIONS -{ - . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; - phys_startup_32 = startup_32 - LOAD_OFFSET; - - .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { - _text = .; /* Text and read-only data */ - *(.text.head) - } :text = 0x9090 - - /* read-only */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */ - *(.text.page_aligned) - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - IRQENTRY_TEXT - *(.fixup) - *(.gnu.warning) - _etext = .; /* End of text section */ - } :text = 0x9090 - - NOTES :text :note - - . = ALIGN(16); /* Exception table */ - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } :text = 0x9090 - - RODATA - - /* writeable */ - . = ALIGN(PAGE_SIZE); - .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ - DATA_DATA - CONSTRUCTORS - } :data - - . = ALIGN(PAGE_SIZE); - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - } - - . = ALIGN(PAGE_SIZE); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.page_aligned) - *(.data.idt) - } - - . = ALIGN(32); - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - *(.data.cacheline_aligned) - } - - /* rarely changed data like cpu maps */ - . = ALIGN(32); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) - _edata = .; /* End of data section */ - } - - . = ALIGN(THREAD_SIZE); /* init_task */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) - } - - /* might get freed after init */ - . = ALIGN(PAGE_SIZE); - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - __smp_locks = .; - *(.smp_locks) - __smp_locks_end = .; - } - /* will be freed after init - * Following ALIGN() is required to make sure no other data falls on the - * same page where __smp_alt_end is pointing as that page might be freed - * after boot. Always make sure that ALIGN() directive is present after - * the section which contains __smp_alt_end. - */ - . = ALIGN(PAGE_SIZE); - - /* will be freed after init */ - . = ALIGN(PAGE_SIZE); /* Init code and data */ - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - __init_begin = .; - _sinittext = .; - INIT_TEXT - _einittext = .; - } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - . = ALIGN(16); - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - __x86_cpu_dev_start = .; - *(.x86_cpu_dev.init) - __x86_cpu_dev_end = .; - } - SECURITY_INIT - . = ALIGN(4); - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - . = ALIGN(4); - .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; - *(.parainstructions) - __parainstructions_end = .; - } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { - EXIT_DATA - } -#if defined(CONFIG_BLK_DEV_INITRD) - . = ALIGN(PAGE_SIZE); - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif - PERCPU(PAGE_SIZE) - . = ALIGN(PAGE_SIZE); - /* freed after init ends here */ - - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - __init_end = .; - __bss_start = .; /* BSS */ - *(.bss.page_aligned) - *(.bss) - . = ALIGN(4); - __bss_stop = .; - } - - .brk : AT(ADDR(.brk) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __brk_base = . ; - . += 64 * 1024 ; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ - __brk_limit = . ; - } - - .end : AT(ADDR(.end) - LOAD_OFFSET) { - _end = . ; - } - - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - } - - STABS_DEBUG - - DWARF_DEBUG -} - -/* - * Build-time check on the image size: - */ -ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") - -#ifdef CONFIG_KEXEC -/* Link time checks */ -#include <asm/kexec.h> - -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big") -#endif diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S deleted file mode 100644 index c8742507b03..00000000000 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ /dev/null @@ -1,298 +0,0 @@ -/* ld script to make x86-64 Linux kernel - * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; - */ - -#define LOAD_OFFSET __START_KERNEL_map - -#include <asm-generic/vmlinux.lds.h> -#include <asm/asm-offsets.h> -#include <asm/page_types.h> - -#undef i386 /* in case the preprocessor is a 32bit one */ - -OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") -OUTPUT_ARCH(i386:x86-64) -ENTRY(phys_startup_64) -jiffies_64 = jiffies; -PHDRS { - text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ - user PT_LOAD FLAGS(7); /* RWE */ - data.init PT_LOAD FLAGS(7); /* RWE */ -#ifdef CONFIG_SMP - percpu PT_LOAD FLAGS(7); /* RWE */ -#endif - data.init2 PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(0); /* ___ */ -} -SECTIONS -{ - . = __START_KERNEL; - phys_startup_64 = startup_64 - LOAD_OFFSET; - .text : AT(ADDR(.text) - LOAD_OFFSET) { - _text = .; /* Text and read-only data */ - /* First the code that has to be first for bootstrapping */ - *(.text.head) - _stext = .; - /* Then the rest */ - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - IRQENTRY_TEXT - *(.fixup) - *(.gnu.warning) - _etext = .; /* End of text section */ - } :text = 0x9090 - - NOTES :text :note - - . = ALIGN(16); /* Exception table */ - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } :text = 0x9090 - - RODATA - - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ - /* Data */ - .data : AT(ADDR(.data) - LOAD_OFFSET) { - DATA_DATA - CONSTRUCTORS - _edata = .; /* End of data section */ - } :data - - - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - *(.data.cacheline_aligned) - } - . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) - } - -#define VSYSCALL_ADDR (-10*1024*1024) -#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) -#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) - -#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) -#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) - -#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) -#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) - - . = VSYSCALL_ADDR; - .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user - __vsyscall_0 = VSYSCALL_VIRT_ADDR; - - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) - { *(.vsyscall_gtod_data) } - vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); - .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) - { *(.vsyscall_clock) } - vsyscall_clock = VVIRT(.vsyscall_clock); - - - .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) - { *(.vsyscall_1) } - .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) - { *(.vsyscall_2) } - - .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } - vgetcpu_mode = VVIRT(.vgetcpu_mode); - - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } - jiffies = VVIRT(.jiffies); - - .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) - { *(.vsyscall_3) } - - . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; - -#undef VSYSCALL_ADDR -#undef VSYSCALL_PHYS_ADDR -#undef VSYSCALL_VIRT_ADDR -#undef VLOAD_OFFSET -#undef VLOAD -#undef VVIRT_OFFSET -#undef VVIRT - - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - . = ALIGN(THREAD_SIZE); /* init_task */ - *(.data.init_task) - }:data.init - - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - *(.data.page_aligned) - } - - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - /* might get freed after init */ - . = ALIGN(PAGE_SIZE); - __smp_alt_begin = .; - __smp_locks = .; - *(.smp_locks) - __smp_locks_end = .; - . = ALIGN(PAGE_SIZE); - __smp_alt_end = .; - } - - . = ALIGN(PAGE_SIZE); /* Init code and data */ - __init_begin = .; /* paired with __init_end */ - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - INIT_TEXT - _einittext = .; - } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - __initdata_begin = .; - INIT_DATA - __initdata_end = .; - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - . = ALIGN(16); - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - __x86_cpu_dev_start = .; - *(.x86_cpu_dev.init) - __x86_cpu_dev_end = .; - } - SECURITY_INIT - - . = ALIGN(8); - .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; - *(.parainstructions) - __parainstructions_end = .; - } - - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - . = ALIGN(8); - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { - EXIT_DATA - } - -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(PAGE_SIZE); - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif - -#ifdef CONFIG_SMP - /* - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the - * output PHDR, so the next output section - __data_nosave - should - * start another section data.init2. Also, pda should be at the head of - * percpu area. Preallocate it and define the percpu offset symbol - * so that it can be accessed as a percpu variable. - */ - . = ALIGN(PAGE_SIZE); - PERCPU_VADDR(0, :percpu) -#else - PERCPU(PAGE_SIZE) -#endif - - . = ALIGN(PAGE_SIZE); - __init_end = .; - - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ - - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __bss_start = .; /* BSS */ - *(.bss.page_aligned) - *(.bss) - __bss_stop = .; - } - - .brk : AT(ADDR(.brk) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __brk_base = . ; - . += 64 * 1024 ; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ - __brk_limit = . ; - } - - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.eh_frame) - *(.discard) - } - - STABS_DEBUG - - DWARF_DEBUG -} - - /* - * Per-cpu symbols which need to be offset from __per_cpu_load - * for the boot processor. - */ -#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load -INIT_PER_CPU(gdt_page); -INIT_PER_CPU(irq_stack_union); - -/* - * Build-time check on the image size: - */ -ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") - -#ifdef CONFIG_SMP -ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); -#endif - -#ifdef CONFIG_KEXEC -#include <asm/kexec.h> - -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big") -#endif diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 2b54fe002e9..c5ee17e8c6d 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -89,7 +89,7 @@ int save_i387_xstate(void __user *buf) if (!used_math()) return 0; - clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_USEDFPU) { /* * Start with clearing the user buffer. This will present a @@ -114,6 +114,8 @@ int save_i387_xstate(void __user *buf) return -1; } + clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_XSAVE) { struct _fpstate __user *fx = buf; struct _xstate __user *x = buf; @@ -324,7 +326,7 @@ void __ref xsave_cntxt_init(void) } /* - * for now OS knows only about FP/SSE + * Support only the state known to OS. */ pcntxt_mask = pcntxt_mask & XCNTXT_MASK; xsave_init(); |