diff options
Diffstat (limited to 'arch/x86')
47 files changed, 806 insertions, 439 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac22bb7719f..e4c038abb71 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -238,6 +238,28 @@ config X86_HAS_BOOT_CPU_ID def_bool y depends on X86_VOYAGER +config SPARSE_IRQ + bool "Support sparse irq numbering" + depends on PCI_MSI || HT_IRQ + help + This enables support for sparse irqs. This is useful for distro + kernels that want to define a high CONFIG_NR_CPUS value but still + want to have low kernel memory footprint on smaller machines. + + ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread + out the irq_desc[] array in a more NUMA-friendly way. ) + + If you don't know what to do here, say N. + +config NUMA_MIGRATE_IRQ_DESC + bool "Move irq desc when changing irq smp_affinity" + depends on SPARSE_IRQ && NUMA + default n + help + This enables moving irq_desc to cpu/node that irq will use handled. + + If you don't know what to do here, say N. + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index b815664fe37..8e99073b9e0 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -520,6 +520,7 @@ config X86_PTRACE_BTS bool "Branch Trace Store" default y depends on X86_DEBUGCTLMSR + depends on BROKEN help This adds a ptrace interface to the hardware's branch trace store. diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index 0be77b39328..7e8e8b25f5f 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -74,7 +74,7 @@ static int kbd_pending(void) { u8 pending; asm volatile("int $0x16; setnz %0" - : "=rm" (pending) + : "=qm" (pending) : "a" (0x0100)); return pending; } diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 1a30c0440c6..ac302a2fa33 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -251,13 +251,6 @@ struct amd_iommu { /* Pointer to PCI device of this IOMMU */ struct pci_dev *dev; - /* - * Capability pointer. There could be more than one IOMMU per PCI - * device function if there are more than one AMD IOMMU capability - * pointers. - */ - u16 cap_ptr; - /* physical address of MMIO space */ u64 mmio_phys; /* virtual address of MMIO space */ @@ -266,6 +259,13 @@ struct amd_iommu { /* capabilities of that IOMMU read from ACPI */ u32 cap; + /* + * Capability pointer. There could be more than one IOMMU per PCI + * device function if there are more than one AMD IOMMU capability + * pointers. + */ + u16 cap_ptr; + /* pci domain of this IOMMU */ u16 pci_seg; @@ -284,19 +284,19 @@ struct amd_iommu { /* size of command buffer */ u32 cmd_buf_size; - /* event buffer virtual address */ - u8 *evt_buf; /* size of event buffer */ u32 evt_buf_size; + /* event buffer virtual address */ + u8 *evt_buf; /* MSI number for event interrupt */ u16 evt_msi_num; - /* if one, we need to send a completion wait command */ - int need_sync; - /* true if interrupts for this IOMMU are already enabled */ bool int_enabled; + /* if one, we need to send a completion wait command */ + int need_sync; + /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; }; diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 7f225a4b2a2..097794ff6b7 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -71,15 +71,13 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) /* Make sure we keep the same behaviour */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { -#ifdef CONFIG_X86_32 - return 0; -#else +#ifdef CONFIG_X86_64 struct dma_mapping_ops *ops = get_dma_ops(dev); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); - return (dma_addr == bad_dma_address); #endif + return (dma_addr == bad_dma_address); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 72c5a190bf4..a95008457ea 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -23,12 +23,13 @@ #ifndef _ASM_X86_DS_H #define _ASM_X86_DS_H -#ifdef CONFIG_X86_DS #include <linux/types.h> #include <linux/init.h> +#ifdef CONFIG_X86_DS + struct task_struct; /* @@ -232,7 +233,8 @@ extern void ds_free(struct ds_context *context); #else /* CONFIG_X86_DS */ -#define ds_init_intel(config) do {} while (0) +struct cpuinfo_x86; +static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 6afd9933a7d..25d527ca136 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -188,17 +188,14 @@ extern void restore_IO_APIC_setup(void); extern void reinit_intr_remapped_IO_APIC(int); #endif -extern int probe_nr_irqs(void); +extern void probe_nr_irqs_gsi(void); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; -static inline void ioapic_init_mappings(void) { } +static inline void ioapic_init_mappings(void) { } -static inline int probe_nr_irqs(void) -{ - return NR_IRQS; -} +static inline void probe_nr_irqs_gsi(void) { } #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0005adb0f94..f7ff65032b9 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,12 +101,23 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) +#define NR_IRQS_LEGACY 16 + #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) + +#ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif +#else +# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) +# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) +# else +# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +# endif +#endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index 5b28995d664..d02d936840a 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h @@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void); */ #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) -#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU) - #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ dma_addr_t ADDR_NAME; #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ @@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void); #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ (((PTR)->LEN_NAME) = (VAL)) -#else -/* No IOMMU */ - -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) (0) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define pci_unmap_len(PTR, LEN_NAME) (0) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) - -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_X86_PCI_64_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index d1531c8480b..eefb0594b05 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -#define __ARCH_WANT_COMPAT_SYS_PTRACE - #endif /* __KERNEL__ */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4850e4b02b6..ff386ff50ed 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -239,7 +239,7 @@ struct pci_bus; void set_pci_bus_resources_arch_default(struct pci_bus *b); #ifdef CONFIG_SMP -#define mc_capable() (boot_cpu_data.x86_max_cores > 1) +#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids) #define smt_capable() (smp_num_siblings > 1) #endif diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h index b7c0dea119f..61e08c0a290 100644 --- a/arch/x86/include/asm/vmi.h +++ b/arch/x86/include/asm/vmi.h @@ -223,9 +223,15 @@ struct pci_header { } __attribute__((packed)); /* Function prototypes for bootstrapping */ +#ifdef CONFIG_VMI extern void vmi_init(void); +extern void vmi_activate(void); extern void vmi_bringup(void); -extern void vmi_apply_boot_page_allocations(void); +#else +static inline void vmi_init(void) {} +static inline void vmi_activate(void) {} +static inline void vmi_bringup(void) {} +#endif /* State needed to start an application processor in an SMP system. */ struct vmi_ap_state { diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e489ff9cb3e..b62a7667828 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o -obj-y += ds.o +obj-$(CONFIG_X86_DS) += ds.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o obj-y += step.o diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e4899e0e878..0a60d60ed03 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) spin_lock_irqsave(&iommu->lock, flags); ret = __iommu_queue_command(iommu, cmd); + if (!ret) + iommu->need_sync = 1; spin_unlock_irqrestore(&iommu->lock, flags); return ret; @@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu) cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); - iommu->need_sync = 0; - spin_lock_irqsave(&iommu->lock, flags); + if (!iommu->need_sync) + goto out; + + iommu->need_sync = 0; + ret = __iommu_queue_command(iommu, &cmd); if (ret) @@ -230,8 +235,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu) status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); - if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) - printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); + if (unlikely(i == EXIT_LOOP_COUNT)) + panic("AMD IOMMU: Completion wait loop failed\n"); + out: spin_unlock_irqrestore(&iommu->lock, flags); @@ -254,8 +260,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) ret = iommu_queue_command(iommu, &cmd); - iommu->need_sync = 1; - return ret; } @@ -281,8 +285,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, ret = iommu_queue_command(iommu, &cmd); - iommu->need_sync = 1; - return ret; } @@ -343,7 +345,7 @@ static int iommu_map(struct protection_domain *dom, u64 __pte, *pte, *page; bus_addr = PAGE_ALIGN(bus_addr); - phys_addr = PAGE_ALIGN(bus_addr); + phys_addr = PAGE_ALIGN(phys_addr); /* only support 512GB address spaces for now */ if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) @@ -599,7 +601,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) continue; p2 = IOMMU_PTE_PAGE(p1[i]); - for (j = 0; j < 512; ++i) { + for (j = 0; j < 512; ++j) { if (!IOMMU_PTE_PRESENT(p2[j])) continue; p3 = IOMMU_PTE_PAGE(p2[j]); @@ -762,8 +764,6 @@ static void set_device_domain(struct amd_iommu *iommu, write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); iommu_queue_inv_dev_entry(iommu, devid); - - iommu->need_sync = 1; } /***************************************************************************** @@ -858,6 +858,9 @@ static int get_device_resources(struct device *dev, print_devid(_bdf, 1); } + if (domain_for_device(_bdf) == NULL) + set_device_domain(*iommu, *domain, _bdf); + return 1; } @@ -908,7 +911,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, if (address >= dom->aperture_size) return; - WARN_ON(address & 0xfffULL || address > dom->aperture_size); + WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; pte += IOMMU_PTE_L0_INDEX(address); @@ -920,8 +923,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, /* * This function contains common code for mapping of a physically - * contiguous memory region into DMA address space. It is uses by all - * mapping functions provided by this IOMMU driver. + * contiguous memory region into DMA address space. It is used by all + * mapping functions provided with this IOMMU driver. * Must be called with the domain lock held. */ static dma_addr_t __map_single(struct device *dev, @@ -981,7 +984,8 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) + if ((dma_addr == bad_dma_address) || + (dma_addr + size > dma_dom->aperture_size)) return; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); @@ -1031,8 +1035,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1060,8 +1063,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1127,8 +1129,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1173,8 +1174,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1225,8 +1225,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1257,8 +1256,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 30ae2701b3d..c6cc22815d3 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -427,6 +427,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, &entry, sizeof(entry)); + /* set head and tail to zero manually */ + writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); return cmd_buf; @@ -1074,7 +1078,8 @@ int __init amd_iommu_init(void) goto free; /* IOMMU rlookup table - find the IOMMU for a specific device */ - amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL, + amd_iommu_rlookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, get_order(rlookup_table_size)); if (amd_iommu_rlookup_table == NULL) goto free; diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 04a7f960bbc..16f94879b52 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1315,7 +1315,7 @@ void enable_x2apic(void) } } -void enable_IR_x2apic(void) +void __init enable_IR_x2apic(void) { #ifdef CONFIG_INTR_REMAP int ret; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index d3dcd58b87c..7f05f44b97e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 i = 0; if (cpu_family == CPU_HW_PSTATE) { - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; - data->currpstate = i; + if (data->currpstate == HW_PSTATE_INVALID) { + /* read (initial) hw pstate if not yet set */ + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + + /* + * a workaround for family 11h erratum 311 might cause + * an "out-of-range Pstate if the core is in Pstate-0 + */ + if (i >= data->numps) + data->currpstate = HW_PSTATE_0; + else + data->currpstate = i; + } return 0; } do { @@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) } data->cpu = pol->cpu; + data->currpstate = HW_PSTATE_INVALID; if (powernow_k8_cpu_init_acpi(data)) { /* diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index ab48cfed4d9..65cfb5d7f77 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -5,6 +5,19 @@ * http://www.gnu.org/licenses/gpl.html */ + +enum pstate { + HW_PSTATE_INVALID = 0xff, + HW_PSTATE_0 = 0, + HW_PSTATE_1 = 1, + HW_PSTATE_2 = 2, + HW_PSTATE_3 = 3, + HW_PSTATE_4 = 4, + HW_PSTATE_5 = 5, + HW_PSTATE_6 = 6, + HW_PSTATE_7 = 7, +}; + struct powernow_k8_data { unsigned int cpu; @@ -23,7 +36,9 @@ struct powernow_k8_data { u32 exttype; /* extended interface = 1 */ /* keep track of the current fid / vid or pstate */ - u32 currvid, currfid, currpstate; + u32 currvid; + u32 currfid; + enum pstate currpstate; /* the powernow_table includes all frequency and vid/fid pairings: * fid are the lower 8 bits of the index, vid are the upper 8 bits. diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 4b031a4ac85..1c838032fd3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -510,12 +510,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) */ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) { - static cpumask_t mce_cpus = CPU_MASK_NONE; - mce_cpu_quirks(c); if (mce_dont_init || - cpu_test_and_set(smp_processor_id(), mce_cpus) || !mce_available(c)) return; diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index d1a121443bd..a2d1176c38e 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -21,8 +21,6 @@ */ -#ifdef CONFIG_X86_DS - #include <asm/ds.h> #include <linux/errno.h> @@ -211,14 +209,15 @@ static DEFINE_PER_CPU(struct ds_context *, system_context); static inline struct ds_context *ds_get_context(struct task_struct *task) { struct ds_context *context; + unsigned long irq; - spin_lock(&ds_lock); + spin_lock_irqsave(&ds_lock, irq); context = (task ? task->thread.ds_ctx : this_system_context); if (context) context->count++; - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); return context; } @@ -226,55 +225,46 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) /* * Same as ds_get_context, but allocates the context and it's DS * structure, if necessary; returns NULL; if out of memory. - * - * pre: requires ds_lock to be held */ static inline struct ds_context *ds_alloc_context(struct task_struct *task) { struct ds_context **p_context = (task ? &task->thread.ds_ctx : &this_system_context); struct ds_context *context = *p_context; + unsigned long irq; if (!context) { - spin_unlock(&ds_lock); - context = kzalloc(sizeof(*context), GFP_KERNEL); - - if (!context) { - spin_lock(&ds_lock); + if (!context) return NULL; - } context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { kfree(context); - spin_lock(&ds_lock); return NULL; } - spin_lock(&ds_lock); - /* - * Check for race - another CPU could have allocated - * it meanwhile: - */ + spin_lock_irqsave(&ds_lock, irq); + if (*p_context) { kfree(context->ds); kfree(context); - return *p_context; - } - *p_context = context; + context = *p_context; + } else { + *p_context = context; - context->this = p_context; - context->task = task; + context->this = p_context; + context->task = task; - if (task) - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); - if (!task || (task == current)) - wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); - - get_tracer(task); + if (!task || (task == current)) + wrmsrl(MSR_IA32_DS_AREA, + (unsigned long)context->ds); + } + spin_unlock_irqrestore(&ds_lock, irq); } context->count++; @@ -288,10 +278,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) */ static inline void ds_put_context(struct ds_context *context) { + unsigned long irq; + if (!context) return; - spin_lock(&ds_lock); + spin_lock_irqsave(&ds_lock, irq); if (--context->count) goto out; @@ -313,7 +305,7 @@ static inline void ds_put_context(struct ds_context *context) kfree(context->ds); kfree(context); out: - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); } @@ -384,6 +376,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, struct ds_context *context; unsigned long buffer, adj; const unsigned long alignment = (1 << 3); + unsigned long irq; int error = 0; if (!ds_cfg.sizeof_ds) @@ -398,26 +391,27 @@ static int ds_request(struct task_struct *task, void *base, size_t size, return -EOPNOTSUPP; - spin_lock(&ds_lock); - - error = -ENOMEM; context = ds_alloc_context(task); if (!context) - goto out_unlock; + return -ENOMEM; + + spin_lock_irqsave(&ds_lock, irq); error = -EPERM; if (!check_tracer(task)) goto out_unlock; + get_tracer(task); + error = -EALREADY; if (context->owner[qual] == current) - goto out_unlock; + goto out_put_tracer; error = -EPERM; if (context->owner[qual] != NULL) - goto out_unlock; + goto out_put_tracer; context->owner[qual] = current; - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); error = -ENOMEM; @@ -465,10 +459,17 @@ static int ds_request(struct task_struct *task, void *base, size_t size, out_release: context->owner[qual] = NULL; ds_put_context(context); + put_tracer(task); + return error; + + out_put_tracer: + spin_unlock_irqrestore(&ds_lock, irq); + ds_put_context(context); + put_tracer(task); return error; out_unlock: - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); ds_put_context(context); return error; } @@ -818,13 +819,21 @@ static const struct ds_configuration ds_cfg_var = { .sizeof_ds = sizeof(long) * 12, .sizeof_field = sizeof(long), .sizeof_rec[ds_bts] = sizeof(long) * 3, +#ifdef __i386__ .sizeof_rec[ds_pebs] = sizeof(long) * 10 +#else + .sizeof_rec[ds_pebs] = sizeof(long) * 18 +#endif }; static const struct ds_configuration ds_cfg_64 = { .sizeof_ds = 8 * 12, .sizeof_field = 8, .sizeof_rec[ds_bts] = 8 * 3, +#ifdef __i386__ .sizeof_rec[ds_pebs] = 8 * 10 +#else + .sizeof_rec[ds_pebs] = 8 * 18 +#endif }; static inline void @@ -878,4 +887,3 @@ void ds_free(struct ds_context *context) while (leftovers--) ds_put_context(context); } -#endif /* CONFIG_X86_DS */ diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 1f20608d4ca..b0f61f0dcd0 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void) stts(); } -void __init init_thread_xstate(void) +void __cpuinit init_thread_xstate(void) { if (!HAVE_HWFP) { xstate_size = sizeof(struct i387_soft_struct); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index c9513e1ff28..a74887b416c 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -108,94 +108,253 @@ static int __init parse_noapic(char *str) early_param("noapic", parse_noapic); struct irq_pin_list; + +/* + * This is performance-critical, we want to do it O(1) + * + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ + +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; + +static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) +{ + struct irq_pin_list *pin; + int node; + + node = cpu_to_node(cpu); + + pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node); + + return pin; +} + struct irq_cfg { - unsigned int irq; struct irq_pin_list *irq_2_pin; cpumask_t domain; cpumask_t old_domain; unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + u8 move_desc_pending : 1; +#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ +#ifdef CONFIG_SPARSE_IRQ +static struct irq_cfg irq_cfgx[] = { +#else static struct irq_cfg irq_cfgx[NR_IRQS] = { - [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, +#endif + [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; -#define for_each_irq_cfg(irq, cfg) \ - for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) +void __init arch_early_irq_init(void) +{ + struct irq_cfg *cfg; + struct irq_desc *desc; + int count; + int i; + + cfg = irq_cfgx; + count = ARRAY_SIZE(irq_cfgx); + for (i = 0; i < count; i++) { + desc = irq_to_desc(i); + desc->chip_data = &cfg[i]; + } +} + +#ifdef CONFIG_SPARSE_IRQ static struct irq_cfg *irq_cfg(unsigned int irq) { - return irq < nr_irqs ? irq_cfgx + irq : NULL; + struct irq_cfg *cfg = NULL; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + if (desc) + cfg = desc->chip_data; + + return cfg; } -static struct irq_cfg *irq_cfg_alloc(unsigned int irq) +static struct irq_cfg *get_one_free_irq_cfg(int cpu) { - return irq_cfg(irq); + struct irq_cfg *cfg; + int node; + + node = cpu_to_node(cpu); + + cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); + + return cfg; } -/* - * Rough estimation of how many shared IRQs there are, can be changed - * anytime. - */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) +void arch_init_chip_data(struct irq_desc *desc, int cpu) +{ + struct irq_cfg *cfg; -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ + cfg = desc->chip_data; + if (!cfg) { + desc->chip_data = get_one_free_irq_cfg(cpu); + if (!desc->chip_data) { + printk(KERN_ERR "can not alloc irq_cfg\n"); + BUG_ON(1); + } + } +} -struct irq_pin_list { - int apic, pin; - struct irq_pin_list *next; -}; +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + +static void +init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) +{ + struct irq_pin_list *old_entry, *head, *tail, *entry; + + cfg->irq_2_pin = NULL; + old_entry = old_cfg->irq_2_pin; + if (!old_entry) + return; + + entry = get_one_free_irq_2_pin(cpu); + if (!entry) + return; + + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + head = entry; + tail = entry; + old_entry = old_entry->next; + while (old_entry) { + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + entry = head; + while (entry) { + head = entry->next; + kfree(entry); + entry = head; + } + /* still use the old one */ + return; + } + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + tail->next = entry; + tail = entry; + old_entry = old_entry->next; + } -static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; -static struct irq_pin_list *irq_2_pin_ptr; + tail->next = NULL; + cfg->irq_2_pin = head; +} -static void __init irq_2_pin_init(void) +static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) { - struct irq_pin_list *pin = irq_2_pin_head; - int i; + struct irq_pin_list *entry, *next; + + if (old_cfg->irq_2_pin == cfg->irq_2_pin) + return; - for (i = 1; i < PIN_MAP_SIZE; i++) - pin[i-1].next = &pin[i]; + entry = old_cfg->irq_2_pin; - irq_2_pin_ptr = &pin[0]; + while (entry) { + next = entry->next; + kfree(entry); + entry = next; + } + old_cfg->irq_2_pin = NULL; } -static struct irq_pin_list *get_one_free_irq_2_pin(void) +void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) { - struct irq_pin_list *pin = irq_2_pin_ptr; + struct irq_cfg *cfg; + struct irq_cfg *old_cfg; - if (!pin) - panic("can not get more irq_2_pin\n"); + cfg = get_one_free_irq_cfg(cpu); - irq_2_pin_ptr = pin->next; - pin->next = NULL; - return pin; + if (!cfg) + return; + + desc->chip_data = cfg; + + old_cfg = old_desc->chip_data; + + memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + + init_copy_irq_2_pin(old_cfg, cfg, cpu); +} + +static void free_irq_cfg(struct irq_cfg *old_cfg) +{ + kfree(old_cfg); +} + +void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) +{ + struct irq_cfg *old_cfg, *cfg; + + old_cfg = old_desc->chip_data; + cfg = desc->chip_data; + + if (old_cfg == cfg) + return; + + if (old_cfg) { + free_irq_2_pin(old_cfg, cfg); + free_irq_cfg(old_cfg); + old_desc->chip_data = NULL; + } } +static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +{ + struct irq_cfg *cfg = desc->chip_data; + + if (!cfg->move_in_progress) { + /* it means that domain is not changed */ + if (!cpus_intersects(desc->affinity, mask)) + cfg->move_desc_pending = 1; + } +} +#endif + +#else +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + return irq < nr_irqs ? irq_cfgx + irq : NULL; +} + +#endif + +#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC +static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +{ +} +#endif + struct io_apic { unsigned int index; unsigned int unused[3]; @@ -237,11 +396,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } -static bool io_apic_level_ack_pending(unsigned int irq) +static bool io_apic_level_ack_pending(struct irq_cfg *cfg) { struct irq_pin_list *entry; unsigned long flags; - struct irq_cfg *cfg = irq_cfg(irq); spin_lock_irqsave(&ioapic_lock, flags); entry = cfg->irq_2_pin; @@ -323,13 +481,12 @@ static void ioapic_mask_entry(int apic, int pin) } #ifdef CONFIG_SMP -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) { int apic, pin; - struct irq_cfg *cfg; struct irq_pin_list *entry; + u8 vector = cfg->vector; - cfg = irq_cfg(irq); entry = cfg->irq_2_pin; for (;;) { unsigned int reg; @@ -359,24 +516,27 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) } } -static int assign_irq_vector(int irq, cpumask_t mask); +static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; unsigned long flags; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; + unsigned int irq; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - cfg = irq_cfg(irq); - if (assign_irq_vector(irq, mask)) + irq = desc->irq; + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); /* @@ -384,12 +544,20 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) */ dest = SET_APIC_LOGICAL_ID(dest); - desc = irq_to_desc(irq); spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); + __target_IO_APIC_irq(irq, dest, cfg); desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } + +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + set_ioapic_affinity_irq_desc(desc, mask); +} #endif /* CONFIG_SMP */ /* @@ -397,16 +565,18 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) +static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) { - struct irq_cfg *cfg; struct irq_pin_list *entry; - /* first time to refer irq_cfg, so with new */ - cfg = irq_cfg_alloc(irq); entry = cfg->irq_2_pin; if (!entry) { - entry = get_one_free_irq_2_pin(); + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", + apic, pin); + return; + } cfg->irq_2_pin = entry; entry->apic = apic; entry->pin = pin; @@ -421,7 +591,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry = entry->next; } - entry->next = get_one_free_irq_2_pin(); + entry->next = get_one_free_irq_2_pin(cpu); entry = entry->next; entry->apic = apic; entry->pin = pin; @@ -430,11 +600,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq(unsigned int irq, +static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, int oldapic, int oldpin, int newapic, int newpin) { - struct irq_cfg *cfg = irq_cfg(irq); struct irq_pin_list *entry = cfg->irq_2_pin; int replaced = 0; @@ -451,18 +620,16 @@ static void __init replace_pin_at_irq(unsigned int irq, /* why? call replace before add? */ if (!replaced) - add_pin_to_irq(irq, newapic, newpin); + add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); } -static inline void io_apic_modify_irq(unsigned int irq, +static inline void io_apic_modify_irq(struct irq_cfg *cfg, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { int pin; - struct irq_cfg *cfg; struct irq_pin_list *entry; - cfg = irq_cfg(irq); for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { unsigned int reg; pin = entry->pin; @@ -475,9 +642,9 @@ static inline void io_apic_modify_irq(unsigned int irq, } } -static void __unmask_IO_APIC_irq(unsigned int irq) +static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); } #ifdef CONFIG_X86_64 @@ -492,47 +659,64 @@ void io_apic_sync(struct irq_pin_list *entry) readl(&io_apic->data); } -static void __mask_IO_APIC_irq(unsigned int irq) +static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } #else /* CONFIG_X86_32 */ -static void __mask_IO_APIC_irq(unsigned int irq) +static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); + io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); } -static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, IO_APIC_REDIR_MASKED, NULL); } -static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } #endif /* CONFIG_X86_32 */ -static void mask_IO_APIC_irq (unsigned int irq) +static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { + struct irq_cfg *cfg = desc->chip_data; unsigned long flags; + BUG_ON(!cfg); + spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); + __mask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_IO_APIC_irq (unsigned int irq) +static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) { + struct irq_cfg *cfg = desc->chip_data; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); + __unmask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } +static void mask_IO_APIC_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + mask_IO_APIC_irq_desc(desc); +} +static void unmask_IO_APIC_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + unmask_IO_APIC_irq_desc(desc); +} + static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@ -809,7 +993,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); */ static int EISA_ELCR(unsigned int irq) { - if (irq < 16) { + if (irq < NR_IRQS_LEGACY) { unsigned int port = 0x4d0 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } @@ -1034,7 +1218,7 @@ void unlock_vector_lock(void) spin_unlock(&vector_lock); } -static int __assign_irq_vector(int irq, cpumask_t mask) +static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { /* * NOTE! The local APIC isn't very good at handling @@ -1050,16 +1234,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask) static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; int cpu; - struct irq_cfg *cfg; - cfg = irq_cfg(irq); + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; /* Only try and allocate irqs on cpus that are present */ cpus_and(mask, mask, cpu_online_map); - if ((cfg->move_in_progress) || cfg->move_cleanup_count) - return -EBUSY; - old_vector = cfg->vector; if (old_vector) { cpumask_t tmp; @@ -1113,24 +1294,22 @@ next: return -ENOSPC; } -static int assign_irq_vector(int irq, cpumask_t mask) +static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { int err; unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, mask); + err = __assign_irq_vector(irq, cfg, mask); spin_unlock_irqrestore(&vector_lock, flags); return err; } -static void __clear_irq_vector(int irq) +static void __clear_irq_vector(int irq, struct irq_cfg *cfg) { - struct irq_cfg *cfg; cpumask_t mask; int cpu, vector; - cfg = irq_cfg(irq); BUG_ON(!cfg->vector); vector = cfg->vector; @@ -1162,9 +1341,13 @@ void __setup_vector_irq(int cpu) /* This function must be called with vector_lock held */ int irq, vector; struct irq_cfg *cfg; + struct irq_desc *desc; /* Mark the inuse vectors */ - for_each_irq_cfg(irq, cfg) { + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + cfg = desc->chip_data; if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; @@ -1215,11 +1398,8 @@ static inline int IO_APIC_irq_trigger(int irq) } #endif -static void ioapic_register_intr(int irq, unsigned long trigger) +static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) @@ -1311,7 +1491,7 @@ static int setup_ioapic_entry(int apic, int irq, return 0; } -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, int trigger, int polarity) { struct irq_cfg *cfg; @@ -1321,10 +1501,10 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, if (!IO_APIC_IRQ(irq)) return; - cfg = irq_cfg(irq); + cfg = desc->chip_data; mask = TARGET_CPUS; - if (assign_irq_vector(irq, mask)) + if (assign_irq_vector(irq, cfg, mask)) return; cpus_and(mask, cfg->domain, mask); @@ -1341,12 +1521,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic].mp_apicid, pin); - __clear_irq_vector(irq); + __clear_irq_vector(irq, cfg); return; } - ioapic_register_intr(irq, trigger); - if (irq < 16) + ioapic_register_intr(irq, desc, trigger); + if (irq < NR_IRQS_LEGACY) disable_8259A_irq(irq); ioapic_write_entry(apic, pin, entry); @@ -1356,6 +1536,9 @@ static void __init setup_IO_APIC_irqs(void) { int apic, pin, idx, irq; int notcon = 0; + struct irq_desc *desc; + struct irq_cfg *cfg; + int cpu = boot_cpu_id; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); @@ -1387,9 +1570,15 @@ static void __init setup_IO_APIC_irqs(void) if (multi_timer_check(apic, irq)) continue; #endif - add_pin_to_irq(irq, apic, pin); + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + continue; + } + cfg = desc->chip_data; + add_pin_to_irq_cpu(cfg, cpu, apic, pin); - setup_IO_APIC_irq(apic, pin, irq, + setup_IO_APIC_irq(apic, pin, irq, desc, irq_trigger(idx), irq_polarity(idx)); } } @@ -1448,6 +1637,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_03 reg_03; unsigned long flags; struct irq_cfg *cfg; + struct irq_desc *desc; unsigned int irq; if (apic_verbosity == APIC_QUIET) @@ -1537,8 +1727,13 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_cfg(irq, cfg) { - struct irq_pin_list *entry = cfg->irq_2_pin; + for_each_irq_desc(irq, desc) { + struct irq_pin_list *entry; + + if (!desc) + continue; + cfg = desc->chip_data; + entry = cfg->irq_2_pin; if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", irq); @@ -2022,14 +2217,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq) { int was_pending = 0; unsigned long flags; + struct irq_cfg *cfg; spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { + if (irq < NR_IRQS_LEGACY) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) was_pending = 1; } - __unmask_IO_APIC_irq(irq); + cfg = irq_cfg(irq); + __unmask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; @@ -2092,35 +2289,37 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); * as simple as edge triggered migration and we can do the irq migration * with a simple atomic update to IO-APIC RTE. */ -static void migrate_ioapic_irq(int irq, cpumask_t mask) +static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; - struct irq_desc *desc; cpumask_t tmp, cleanup_mask; struct irte irte; int modify_ioapic_rte; unsigned int dest; unsigned long flags; + unsigned int irq; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; + irq = desc->irq; if (get_irte(irq, &irte)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - desc = irq_to_desc(irq); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); + __target_IO_APIC_irq(irq, dest, cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -2142,14 +2341,14 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) desc->affinity = mask; } -static int migrate_irq_remapped_level(int irq) +static int migrate_irq_remapped_level_desc(struct irq_desc *desc) { int ret = -1; - struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; - mask_IO_APIC_irq(irq); + mask_IO_APIC_irq_desc(desc); - if (io_apic_level_ack_pending(irq)) { + if (io_apic_level_ack_pending(cfg)) { /* * Interrupt in progress. Migrating irq now will change the * vector information in the IO-APIC RTE and that will confuse @@ -2161,14 +2360,15 @@ static int migrate_irq_remapped_level(int irq) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; cpus_clear(desc->pending_mask); unmask: - unmask_IO_APIC_irq(irq); + unmask_IO_APIC_irq_desc(desc); + return ret; } @@ -2178,6 +2378,9 @@ static void ir_irq_migration(struct work_struct *work) struct irq_desc *desc; for_each_irq_desc(irq, desc) { + if (!desc) + continue; + if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -2198,18 +2401,22 @@ static void ir_irq_migration(struct work_struct *work) /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { - struct irq_desc *desc = irq_to_desc(irq); - if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; desc->pending_mask = mask; - migrate_irq_remapped_level(irq); + migrate_irq_remapped_level_desc(desc); return; } - migrate_ioapic_irq(irq, mask); + migrate_ioapic_irq_desc(desc, mask); +} +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + + set_ir_ioapic_affinity_irq_desc(desc, mask); } #endif @@ -2229,6 +2436,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; + if (irq == -1) + continue; + desc = irq_to_desc(irq); if (!desc) continue; @@ -2250,19 +2460,40 @@ unlock: irq_exit(); } -static void irq_complete_move(unsigned int irq) +static void irq_complete_move(struct irq_desc **descp) { - struct irq_cfg *cfg = irq_cfg(irq); + struct irq_desc *desc = *descp; + struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) + if (likely(!cfg->move_in_progress)) { +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + if (likely(!cfg->move_desc_pending)) + return; + + /* domain has not changed, but affinity did */ + me = smp_processor_id(); + if (cpu_isset(me, desc->affinity)) { + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; + cfg->move_desc_pending = 0; + } +#endif return; + } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { cpumask_t cleanup_mask; +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; +#endif + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); cfg->move_cleanup_count = cpus_weight(cleanup_mask); send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); @@ -2270,8 +2501,9 @@ static void irq_complete_move(unsigned int irq) } } #else -static inline void irq_complete_move(unsigned int irq) {} +static inline void irq_complete_move(struct irq_desc **descp) {} #endif + #ifdef CONFIG_INTR_REMAP static void ack_x2apic_level(unsigned int irq) { @@ -2282,11 +2514,14 @@ static void ack_x2apic_edge(unsigned int irq) { ack_x2APIC_irq(); } + #endif static void ack_apic_edge(unsigned int irq) { - irq_complete_move(irq); + struct irq_desc *desc = irq_to_desc(irq); + + irq_complete_move(&desc); move_native_irq(irq); ack_APIC_irq(); } @@ -2295,18 +2530,21 @@ atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { + struct irq_desc *desc = irq_to_desc(irq); + #ifdef CONFIG_X86_32 unsigned long v; int i; #endif + struct irq_cfg *cfg; int do_unmask_irq = 0; - irq_complete_move(irq); + irq_complete_move(&desc); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { + if (unlikely(desc->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; - mask_IO_APIC_irq(irq); + mask_IO_APIC_irq_desc(desc); } #endif @@ -2330,7 +2568,8 @@ static void ack_apic_level(unsigned int irq) * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro */ - i = irq_cfg(irq)->vector; + cfg = desc->chip_data; + i = cfg->vector; v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); #endif @@ -2369,17 +2608,18 @@ static void ack_apic_level(unsigned int irq) * accurate and is causing problems then it is a hardware bug * and you can go talk to the chipset vendor about it. */ - if (!io_apic_level_ack_pending(irq)) + cfg = desc->chip_data; + if (!io_apic_level_ack_pending(cfg)) move_masked_irq(irq); - unmask_IO_APIC_irq(irq); + unmask_IO_APIC_irq_desc(desc); } #ifdef CONFIG_X86_32 if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + __mask_and_edge_IO_APIC_irq(cfg); + __unmask_and_level_IO_APIC_irq(cfg); spin_unlock(&ioapic_lock); } #endif @@ -2430,20 +2670,22 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for_each_irq_cfg(irq, cfg) { - if (IO_APIC_IRQ(irq) && !cfg->vector) { + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + + cfg = desc->chip_data; + if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (irq < 16) + if (irq < NR_IRQS_LEGACY) make_8259A_irq(irq); - else { - desc = irq_to_desc(irq); + else /* Strange. Oh, well.. */ desc->chip = &no_irq_chip; - } } } } @@ -2468,7 +2710,7 @@ static void unmask_lapic_irq(unsigned int irq) apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } -static void ack_lapic_irq (unsigned int irq) +static void ack_lapic_irq(unsigned int irq) { ack_APIC_irq(); } @@ -2480,11 +2722,8 @@ static struct irq_chip lapic_chip __read_mostly = { .ack = ack_lapic_irq, }; -static void lapic_register_intr(int irq) +static void lapic_register_intr(int irq, struct irq_desc *desc) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); @@ -2588,7 +2827,9 @@ int timer_through_8259 __initdata; */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_cfg(0); + struct irq_desc *desc = irq_to_desc(0); + struct irq_cfg *cfg = desc->chip_data; + int cpu = boot_cpu_id; int apic1, pin1, apic2, pin2; unsigned long flags; unsigned int ver; @@ -2603,7 +2844,7 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ disable_8259A_irq(0); - assign_irq_vector(0, TARGET_CPUS); + assign_irq_vector(0, cfg, TARGET_CPUS); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -2654,10 +2895,10 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq(0, apic1, pin1); + add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } - unmask_IO_APIC_irq(0); + unmask_IO_APIC_irq_desc(desc); if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@ -2683,9 +2924,9 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); + replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq(0); + unmask_IO_APIC_irq_desc(desc); enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -2717,7 +2958,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); - lapic_register_intr(0); + lapic_register_intr(0, desc); apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); @@ -2902,22 +3143,26 @@ unsigned int create_irq_nr(unsigned int irq_want) unsigned int irq; unsigned int new; unsigned long flags; - struct irq_cfg *cfg_new; - - irq_want = nr_irqs - 1; + struct irq_cfg *cfg_new = NULL; + int cpu = boot_cpu_id; + struct irq_desc *desc_new = NULL; irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new > 0; new--) { + for (new = irq_want; new < NR_IRQS; new++) { if (platform_legacy_irq(new)) continue; - cfg_new = irq_cfg(new); - if (cfg_new && cfg_new->vector != 0) + + desc_new = irq_to_desc_alloc_cpu(new, cpu); + if (!desc_new) { + printk(KERN_INFO "can not get irq_desc for %d\n", new); continue; - /* check if need to create one */ - if (!cfg_new) - cfg_new = irq_cfg_alloc(new); - if (__assign_irq_vector(new, TARGET_CPUS) == 0) + } + cfg_new = desc_new->chip_data; + + if (cfg_new->vector != 0) + continue; + if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) irq = new; break; } @@ -2925,15 +3170,21 @@ unsigned int create_irq_nr(unsigned int irq_want) if (irq > 0) { dynamic_irq_init(irq); + /* restore it, in case dynamic_irq_init clear it */ + if (desc_new) + desc_new->chip_data = cfg_new; } return irq; } +static int nr_irqs_gsi = NR_IRQS_LEGACY; int create_irq(void) { + unsigned int irq_want; int irq; - irq = create_irq_nr(nr_irqs - 1); + irq_want = nr_irqs_gsi; + irq = create_irq_nr(irq_want); if (irq == 0) irq = -1; @@ -2944,14 +3195,22 @@ int create_irq(void) void destroy_irq(unsigned int irq) { unsigned long flags; + struct irq_cfg *cfg; + struct irq_desc *desc; + /* store it, in case dynamic_irq_cleanup clear it */ + desc = irq_to_desc(irq); + cfg = desc->chip_data; dynamic_irq_cleanup(irq); + /* connect back irq_cfg */ + if (desc) + desc->chip_data = cfg; #ifdef CONFIG_INTR_REMAP free_irte(irq); #endif spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); + __clear_irq_vector(irq, cfg); spin_unlock_irqrestore(&vector_lock, flags); } @@ -2966,12 +3225,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms unsigned dest; cpumask_t tmp; + cfg = irq_cfg(irq); tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); + err = assign_irq_vector(irq, cfg, tmp); if (err) return err; - cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -3029,35 +3288,35 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms #ifdef CONFIG_SMP static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - read_msi_msg(irq, &msg); + read_msi_msg_desc(desc, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); - write_msi_msg(irq, &msg); - desc = irq_to_desc(irq); + write_msi_msg_desc(desc, &msg); desc->affinity = mask; } - #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is @@ -3065,11 +3324,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) */ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp, cleanup_mask; struct irte irte; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -3078,10 +3337,12 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) if (get_irte(irq, &irte)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -3105,9 +3366,9 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) cfg->move_in_progress = 0; } - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif #endif /* CONFIG_SMP */ @@ -3166,7 +3427,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) } #endif -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { int ret; struct msi_msg msg; @@ -3175,7 +3436,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) if (ret < 0) return ret; - set_irq_msi(irq, desc); + set_irq_msi(irq, msidesc); write_msi_msg(irq, &msg); #ifdef CONFIG_INTR_REMAP @@ -3195,26 +3456,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) return 0; } -static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) -{ - unsigned int irq; - - irq = dev->bus->number; - irq <<= 8; - irq |= dev->devfn; - irq <<= 12; - - return irq; -} - -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) { unsigned int irq; int ret; unsigned int irq_want; - irq_want = build_irq_for_pci_dev(dev) + 0x100; - + irq_want = nr_irqs_gsi; irq = create_irq_nr(irq_want); if (irq == 0) return -1; @@ -3228,7 +3476,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) goto error; no_ir: #endif - ret = setup_msi_irq(dev, desc, irq); + ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) { destroy_irq(irq); return ret; @@ -3246,7 +3494,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { unsigned int irq; int ret, sub_handle; - struct msi_desc *desc; + struct msi_desc *msidesc; unsigned int irq_want; #ifdef CONFIG_INTR_REMAP @@ -3254,10 +3502,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int index = 0; #endif - irq_want = build_irq_for_pci_dev(dev) + 0x100; + irq_want = nr_irqs_gsi; sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want--); + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want); + irq_want++; if (irq == 0) return -1; #ifdef CONFIG_INTR_REMAP @@ -3289,7 +3538,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) } no_ir: #endif - ret = setup_msi_irq(dev, desc, irq); + ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) goto error; sub_handle++; @@ -3310,20 +3559,22 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_SMP static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -3335,9 +3586,9 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif /* CONFIG_SMP */ struct irq_chip dmar_msi_type = { @@ -3371,8 +3622,8 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_SMP static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; - struct irq_desc *desc; struct msi_msg msg; unsigned int dest; cpumask_t tmp; @@ -3381,10 +3632,12 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -3396,9 +3649,9 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif /* CONFIG_SMP */ struct irq_chip hpet_msi_type = { @@ -3453,26 +3706,28 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif static struct irq_chip ht_irq_chip = { @@ -3492,13 +3747,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) int err; cpumask_t tmp; + cfg = irq_cfg(irq); tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); + err = assign_irq_vector(irq, cfg, tmp); if (!err) { struct ht_irq_msg msg; unsigned dest; - cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -3544,7 +3799,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long flags; int err; - err = assign_irq_vector(irq, *eligible_cpu); + cfg = irq_cfg(irq); + + err = assign_irq_vector(irq, cfg, *eligible_cpu); if (err != 0) return err; @@ -3553,8 +3810,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, irq_name); spin_unlock_irqrestore(&vector_lock, flags); - cfg = irq_cfg(irq); - mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); @@ -3606,29 +3861,16 @@ int __init io_apic_get_redir_entries (int ioapic) return reg_01.bits.entries; } -int __init probe_nr_irqs(void) +void __init probe_nr_irqs_gsi(void) { int idx; int nr = 0; -#ifndef CONFIG_XEN - int nr_min = 32; -#else - int nr_min = NR_IRQS; -#endif for (idx = 0; idx < nr_ioapics; idx++) nr += io_apic_get_redir_entries(idx) + 1; - /* double it for hotplug and msi and nmi */ - nr <<= 1; - - /* something wrong ? */ - if (nr < nr_min) - nr = nr_min; - if (WARN_ON(nr > NR_IRQS)) - nr = NR_IRQS; - - return nr; + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; } /* -------------------------------------------------------------------------- @@ -3727,19 +3969,31 @@ int __init io_apic_get_version(int ioapic) int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { + struct irq_desc *desc; + struct irq_cfg *cfg; + int cpu = boot_cpu_id; + if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ioapic); return -EINVAL; } + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc %d\n", irq); + return 0; + } + /* * IRQs < 16 are already in the irq_2_pin[] map */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); + if (irq >= NR_IRQS_LEGACY) { + cfg = desc->chip_data; + add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); + } - setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); + setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); return 0; } @@ -3775,7 +4029,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; + struct irq_desc *desc; struct irq_cfg *cfg; + cpumask_t mask; if (skip_ioapic_setup == 1) return; @@ -3791,17 +4047,31 @@ void __init setup_ioapic_dest(void) * when you have too many devices, because at that time only boot * cpu is online. */ - cfg = irq_cfg(irq); - if (!cfg->vector) - setup_IO_APIC_irq(ioapic, pin, irq, + desc = irq_to_desc(irq); + cfg = desc->chip_data; + if (!cfg->vector) { + setup_IO_APIC_irq(ioapic, pin, irq, desc, irq_trigger(irq_entry), irq_polarity(irq_entry)); + continue; + + } + + /* + * Honour affinities which have been set in early boot + */ + if (desc->status & + (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) + mask = desc->affinity; + else + mask = TARGET_CPUS; + #ifdef CONFIG_INTR_REMAP - else if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); -#endif + if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq_desc(desc, mask); else - set_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif + set_ioapic_affinity_irq_desc(desc, mask); } } @@ -3850,7 +4120,6 @@ void __init ioapic_init_mappings(void) struct resource *ioapic_res; int i; - irq_2_pin_init(); ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d1d4dc52f64..3f1d9d18df6 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v) } desc = irq_to_desc(i); + if (!desc) + return 0; + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP any_count = kstat_irqs(i); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index a51382672de..119fc9c8ff7 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map) for_each_irq_desc(irq, desc) { cpumask_t mask; + if (!desc) + continue; if (irq == 2) continue; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 60eb84eb77a..900009c7059 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -94,6 +94,8 @@ void fixup_irqs(cpumask_t map) int break_affinity = 0; int set_affinity = 1; + if (!desc) + continue; if (irq == 2) continue; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 845aa9803e8..6a92f47c52e 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -68,8 +68,7 @@ void __init init_ISA_irqs (void) /* * 16 old-style INTA-cycle interrupts: */ - for (i = 0; i < 16; i++) { - /* first time call this irq_desc */ + for (i = 0; i < NR_IRQS_LEGACY; i++) { struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index ff023539128..40c1e62ec78 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -142,8 +142,7 @@ void __init init_ISA_irqs(void) init_bsp_APIC(); init_8259A(0); - for (i = 0; i < 16; i++) { - /* first time call this irq_desc */ + for (i = 0; i < NR_IRQS_LEGACY; i++) { struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1c9cc431ea4..e169ae9b6a6 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt) } #ifdef CONFIG_X86_LOCAL_APIC -static void __devinit kvm_setup_secondary_clock(void) +static void __cpuinit kvm_setup_secondary_clock(void) { /* * Now that the first cpu already had this clocksource initialized, diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 82fb2809ce3..c4b5b24e021 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -272,13 +272,18 @@ static struct attribute_group mc_attr_group = { .name = "microcode", }; -static void microcode_fini_cpu(int cpu) +static void __microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - mutex_lock(µcode_mutex); microcode_ops->microcode_fini_cpu(cpu); uci->valid = 0; +} + +static void microcode_fini_cpu(int cpu) +{ + mutex_lock(µcode_mutex); + __microcode_fini_cpu(cpu); mutex_unlock(µcode_mutex); } @@ -306,12 +311,16 @@ static int microcode_resume_cpu(int cpu) * to this cpu (a bit of paranoia): */ if (microcode_ops->collect_cpu_info(cpu, &nsig)) { - microcode_fini_cpu(cpu); + __microcode_fini_cpu(cpu); + printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n", + cpu); return -1; } - if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { - microcode_fini_cpu(cpu); + if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) { + __microcode_fini_cpu(cpu); + printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n", + cpu); /* Should we look for a new ucode here? */ return 1; } diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 622dc4a2178..a8e62792d17 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -155,6 +155,7 @@ static DEFINE_SPINLOCK(microcode_update_lock); static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); + unsigned long flags; unsigned int val[2]; memset(csig, 0, sizeof(*csig)); @@ -174,11 +175,16 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) csig->pf = 1 << ((val[1] >> 18) & 7); } + /* serialize access to the physical write to MSR 0x79 */ + spin_lock_irqsave(µcode_update_lock, flags); + wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ sync_core(); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); + spin_unlock_irqrestore(µcode_update_lock, flags); + pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", csig->sig, csig->pf, csig->rev); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f98f4e1dba0..0f4c1fd5a1f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -604,6 +604,9 @@ static void __init __get_smp_config(unsigned int early) printk(KERN_INFO "Using ACPI for processor (LAPIC) " "configuration information\n"); + if (!mpf) + return; + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 0e9f1982b1d..95777b0faa7 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -7,7 +7,8 @@ #include <asm/paravirt.h> -static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) +static inline void +default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) { __raw_spin_lock(lock); } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index e1e731d78f3..d28bbdc35e4 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p) ++p; if (*p == '\0') break; - bridge = simple_strtol(p, &endp, 0); + bridge = simple_strtoul(p, &endp, 0); if (p == endp) break; diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a42b02b4df6..a35eaa379ff 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size) spin_lock_irqsave(&iommu_bitmap_lock, flags); iommu_area_free(iommu_gart_bitmap, offset, size); + if (offset >= next_bit) + next_bit = offset + size; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } @@ -743,10 +745,8 @@ void __init gart_iommu_init(void) unsigned long scratch; long i; - if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { - printk(KERN_INFO "PCI-GART: No AMD GART found.\n"); + if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) return; - } #ifndef CONFIG_AGP_AMD64 no_agp = 1; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9d5674f7b6c..5e028e1926e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -794,6 +794,9 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif + /* VMI may relocate the fixmap; do this before touching ioremap area */ + vmi_init(); + early_cpu_init(); early_ioremap_init(); @@ -880,13 +883,8 @@ void __init setup_arch(char **cmdline_p) check_efer(); #endif -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) - /* - * Must be before kernel pagetables are setup - * or fixmap area is touched. - */ - vmi_init(); -#endif + /* Must be before kernel pagetables are setup */ + vmi_activate(); /* after early param, so could get panic from serial */ reserve_early_setup_data(); @@ -1082,7 +1080,7 @@ void __init setup_arch(char **cmdline_p) ioapic_init_mappings(); /* need to wait for io_apic is mapped */ - nr_irqs = probe_nr_irqs(); + probe_nr_irqs_gsi(); kvm_guest_init(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7b109339731..f71f96fc9e6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -294,9 +294,7 @@ static void __cpuinit start_secondary(void *unused) * fragile that we want to limit the things done here to the * most necessary things. */ -#ifdef CONFIG_VMI vmi_bringup(); -#endif cpu_init(); preempt_disable(); smp_callin(); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 8b6c393ab9f..22fd6577156 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -960,8 +960,6 @@ static inline int __init activate_vmi(void) void __init vmi_init(void) { - unsigned long flags; - if (!vmi_rom) probe_vmi_rom(); else @@ -973,13 +971,21 @@ void __init vmi_init(void) reserve_top_address(-vmi_rom->virtual_top); - local_irq_save(flags); - activate_vmi(); - #ifdef CONFIG_X86_IO_APIC /* This is virtual hardware; timer routing is wired correctly */ no_timer_check = 1; #endif +} + +void vmi_activate(void) +{ + unsigned long flags; + + if (!vmi_rom) + return; + + local_irq_save(flags); + activate_vmi(); local_irq_restore(flags & X86_EFLAGS_IF); } diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index b13acb75e82..15c3e699918 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -310,7 +310,7 @@ static void __init setup_xstate_init(void) /* * Enable and initialize the xsave feature. */ -void __init xsave_cntxt_init(void) +void __ref xsave_cntxt_init(void) { unsigned int eax, ebx, ecx, edx; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f1983d9477c..410ddbc1aa2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) } rmap_write_protect(vcpu->kvm, sp->gfn); + kvm_unlink_unsync_page(vcpu->kvm, sp); if (vcpu->arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_zap_page(vcpu->kvm, sp); return 1; } kvm_mmu_flush_tlb(vcpu); - kvm_unlink_unsync_page(vcpu->kvm, sp); return 0; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 613ec9aa674..84eee43bbe7 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], &curr_pte, sizeof(curr_pte)); if (r || curr_pte != gw->ptes[level - 2]) { + kvm_mmu_put_page(shadow_page, sptep); kvm_release_pfn_clean(sw->pfn); sw->sptep = NULL; return 1; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d06b4dc0e2e..a4018b01e1f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (cpu_has_virtual_nmis()) { if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vmx_nmi_enabled(vcpu)) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vmx_nmi_enabled(vcpu)) { vcpu->arch.nmi_pending = false; vcpu->arch.nmi_injected = true; } else { diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 022cd41ea9b..202864ad49a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -401,14 +401,13 @@ static int __init ppro_init(char **cpu_type) *cpu_type = "i386/pii"; break; case 6 ... 8: + case 10 ... 11: *cpu_type = "i386/piii"; break; case 9: + case 13: *cpu_type = "i386/p6_mobile"; break; - case 10 ... 13: - *cpu_type = "i386/p6"; - break; case 14: *cpu_type = "i386/core"; break; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 3f1b81a83e2..e9f80c744cf 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) int i; if (!reset_value) { - reset_value = kmalloc(sizeof(unsigned) * num_counters, + reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, GFP_ATOMIC); if (!reset_value) return; @@ -156,6 +156,8 @@ static void ppro_start(struct op_msrs const * const msrs) unsigned int low, high; int i; + if (!reset_value) + return; for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); @@ -171,6 +173,8 @@ static void ppro_stop(struct op_msrs const * const msrs) unsigned int low, high; int i; + if (!reset_value) + return; for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 3c27a809393..2051dc96b8e 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); /* - * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config - * have 4096 bytes. Even if the device is capable, that doesn't mean we can - * access it. Maybe we don't have a way to generate extended config space - * accesses. So check it + * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have + * 4096 bytes configuration space for each function of their processor + * configuration space. */ -static void fam10h_pci_cfg_space_size(struct pci_dev *dev) +static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev) { dev->cfg_size = pci_cfg_space_size_ext(dev); } - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size); /* * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 688936044dc..636ef4caa52 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) * For 64-bit, we must skip the Xen hole in the middle of the address * space, just after the big x86-64 virtual hole. */ -static int xen_pgd_walk(struct mm_struct *mm, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) +static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, + int (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) { - pgd_t *pgd = mm->pgd; int flush = 0; unsigned hole_low, hole_high; unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; @@ -753,6 +752,14 @@ out: return flush; } +static int xen_pgd_walk(struct mm_struct *mm, + int (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) +{ + return __xen_pgd_walk(mm, mm->pgd, func, limit); +} + /* If we're using split pte locks, then take the page's lock and return a pointer to it. Otherwise return NULL. */ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) @@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) xen_mc_batch(); - if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { + if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { /* re-enable interrupts for flushing */ xen_mc_issue(0); @@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) PT_PMD); #endif - xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT); + __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT); xen_mc_issue(0); } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d77da613b1d..acd9b6705e0 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -362,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu) alternatives_smp_switch(0); } -static void xen_play_dead(void) +static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */ { play_dead_common(); HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index d7422dc2a55..9e1afae8461 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu); void xen_mark_init_mm_pinned(void); -void __init xen_setup_vcpu_info_placement(void); +void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); |