diff options
Diffstat (limited to 'arch/x86/kernel/apic/io_apic.c')
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 292 |
1 files changed, 142 insertions, 150 deletions
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 00e6071cefc..da99ffcdfde 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -389,6 +389,8 @@ struct io_apic { unsigned int index; unsigned int unused[3]; unsigned int data; + unsigned int unused2[11]; + unsigned int eoi; }; static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) @@ -397,6 +399,12 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); } +static inline void io_apic_eoi(unsigned int apic, unsigned int vector) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(vector, &io_apic->eoi); +} + static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { struct io_apic __iomem *io_apic = io_apic_base(apic); @@ -546,16 +554,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq apic = entry->apic; pin = entry->pin; -#ifdef CONFIG_INTR_REMAP /* * With interrupt-remapping, destination information comes * from interrupt-remapping table entry. */ if (!irq_remapped(irq)) io_apic_write(apic, 0x11 + pin*2, dest); -#else - io_apic_write(apic, 0x11 + pin*2, dest); -#endif reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -588,10 +592,12 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return BAD_APICID; - cpumask_and(desc->affinity, cfg->domain, mask); + /* check that before desc->addinity get updated */ set_extra_move_desc(desc, mask); - return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); + cpumask_copy(desc->affinity, mask); + + return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); } static void @@ -849,9 +855,9 @@ __setup("pirq=", ioapic_pirq_setup); static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; /* - * Saves and masks all the unmasked IO-APIC RTE's + * Saves all the IO-APIC RTE's */ -int save_mask_IO_APIC_setup(void) +int save_IO_APIC_setup(void) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -876,16 +882,9 @@ int save_mask_IO_APIC_setup(void) } for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - - entry = early_ioapic_entries[apic][pin] = + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + early_ioapic_entries[apic][pin] = ioapic_read_entry(apic, pin); - if (!entry.mask) { - entry.mask = 1; - ioapic_write_entry(apic, pin, entry); - } - } return 0; @@ -898,6 +897,25 @@ nomem: return -ENOMEM; } +void mask_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + if (!early_ioapic_entries[apic]) + break; + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin]; + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + } +} + void restore_IO_APIC_setup(void) { int apic, pin; @@ -1411,9 +1429,7 @@ void __setup_vector_irq(int cpu) } static struct irq_chip ioapic_chip; -#ifdef CONFIG_INTR_REMAP static struct irq_chip ir_ioapic_chip; -#endif #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 @@ -1452,7 +1468,6 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t else desc->status &= ~IRQ_LEVEL; -#ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { desc->status |= IRQ_MOVE_PCNTXT; if (trigger) @@ -1464,7 +1479,7 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t handle_edge_irq, "edge"); return; } -#endif + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, @@ -1478,14 +1493,13 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t int setup_ioapic_entry(int apic_id, int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int trigger, - int polarity, int vector) + int polarity, int vector, int pin) { /* * add it to the IO-APIC irq-routing table: */ memset(entry,0,sizeof(*entry)); -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) { struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); struct irte irte; @@ -1504,7 +1518,14 @@ int setup_ioapic_entry(int apic_id, int irq, irte.present = 1; irte.dst_mode = apic->irq_dest_mode; - irte.trigger_mode = trigger; + /* + * Trigger mode in the IRTE will always be edge, and the + * actual level or edge trigger will be setup in the IO-APIC + * RTE. This will help simplify level triggered irq migration. + * For more details, see the comments above explainig IO-APIC + * irq migration in the presence of interrupt-remapping. + */ + irte.trigger_mode = 0; irte.dlvry_mode = apic->irq_delivery_mode; irte.vector = vector; irte.dest_id = IRTE_DEST(destination); @@ -1515,18 +1536,21 @@ int setup_ioapic_entry(int apic_id, int irq, ir_entry->zero = 0; ir_entry->format = 1; ir_entry->index = (index & 0x7fff); - } else -#endif - { + /* + * IO-APIC RTE will be configured with virtual vector. + * irq handler will do the explicit EOI to the io-apic. + */ + ir_entry->vector = pin; + } else { entry->delivery_mode = apic->irq_delivery_mode; entry->dest_mode = apic->irq_dest_mode; entry->dest = destination; + entry->vector = vector; } entry->mask = 0; /* enable IRQ */ entry->trigger = trigger; entry->polarity = polarity; - entry->vector = vector; /* Mask level triggered irqs. * Use IRQ_DELAYED_DISABLE for edge triggered irqs. @@ -1561,7 +1585,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, - dest, trigger, polarity, cfg->vector)) { + dest, trigger, polarity, cfg->vector, pin)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic_id].apicid, pin); __clear_irq_vector(irq, cfg); @@ -1642,10 +1666,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, { struct IO_APIC_route_entry entry; -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) return; -#endif memset(&entry, 0, sizeof(entry)); @@ -2040,8 +2062,13 @@ void disable_IO_APIC(void) * If the i8259 is routed through an IOAPIC * Put that IOAPIC in virtual wire mode * so legacy interrupts can be delivered. + * + * With interrupt-remapping, for now we will use virtual wire A mode, + * as virtual wire B is little complex (need to configure both + * IOAPIC RTE aswell as interrupt-remapping table entry). + * As this gets called during crash dump, keep this simple for now. */ - if (ioapic_i8259.pin != -1) { + if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); @@ -2061,7 +2088,10 @@ void disable_IO_APIC(void) ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } - disconnect_bsp_APIC(ioapic_i8259.pin != -1); + /* + * Use virtual wire A mode when interrupt remapping is enabled. + */ + disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); } #ifdef CONFIG_X86_32 @@ -2303,37 +2333,24 @@ static int ioapic_retrigger_irq(unsigned int irq) #ifdef CONFIG_SMP #ifdef CONFIG_INTR_REMAP -static void ir_irq_migration(struct work_struct *work); - -static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); /* * Migrate the IO-APIC irq in the presence of intr-remapping. * - * For edge triggered, irq migration is a simple atomic update(of vector - * and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we need to modify the io-apic RTE aswell with the update - * vector information, along with modifying IRTE with vector and destination. - * So irq migration for level triggered is little bit more complex compared to - * edge triggered migration. But the good news is, we use the same algorithm - * for level triggered migration as we have today, only difference being, - * we now initiate the irq migration from process context instead of the - * interrupt context. + * For both level and edge triggered, irq migration is a simple atomic + * update(of vector and cpu destination) of IRTE and flush the hardware cache. * - * In future, when we do a directed EOI (combined with cpu EOI broadcast - * suppression) to the IO-APIC, level triggered irq migration will also be - * as simple as edge triggered migration and we can do the irq migration - * with a simple atomic update to IO-APIC RTE. + * For level triggered, we eliminate the io-apic RTE modification (with the + * updated vector information), by using a virtual vector (io-apic pin number). + * Real vector that is used for interrupting cpu will be coming from + * the interrupt-remapping table entry. */ static void migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; struct irte irte; - int modify_ioapic_rte; unsigned int dest; - unsigned long flags; unsigned int irq; if (!cpumask_intersects(mask, cpu_online_mask)) @@ -2351,13 +2368,6 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); - modify_ioapic_rte = desc->status & IRQ_LEVEL; - if (modify_ioapic_rte) { - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); - } - irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -2372,73 +2382,12 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) cpumask_copy(desc->affinity, mask); } -static int migrate_irq_remapped_level_desc(struct irq_desc *desc) -{ - int ret = -1; - struct irq_cfg *cfg = desc->chip_data; - - mask_IO_APIC_irq_desc(desc); - - if (io_apic_level_ack_pending(cfg)) { - /* - * Interrupt in progress. Migrating irq now will change the - * vector information in the IO-APIC RTE and that will confuse - * the EOI broadcast performed by cpu. - * So, delay the irq migration to the next instance. - */ - schedule_delayed_work(&ir_migration_work, 1); - goto unmask; - } - - /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, desc->pending_mask); - - ret = 0; - desc->status &= ~IRQ_MOVE_PENDING; - cpumask_clear(desc->pending_mask); - -unmask: - unmask_IO_APIC_irq_desc(desc); - - return ret; -} - -static void ir_irq_migration(struct work_struct *work) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - if (desc->status & IRQ_MOVE_PENDING) { - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->chip->set_affinity || - !(desc->status & IRQ_MOVE_PENDING)) { - desc->status &= ~IRQ_MOVE_PENDING; - spin_unlock_irqrestore(&desc->lock, flags); - continue; - } - - desc->chip->set_affinity(irq, desc->pending_mask); - spin_unlock_irqrestore(&desc->lock, flags); - } - } -} - /* * Migrates the IRQ destination in the process context. */ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { - if (desc->status & IRQ_LEVEL) { - desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(desc->pending_mask, mask); - migrate_irq_remapped_level_desc(desc); - return; - } - migrate_ioapic_irq_desc(desc, mask); } static void set_ir_ioapic_affinity_irq(unsigned int irq, @@ -2448,6 +2397,11 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq, set_ir_ioapic_affinity_irq_desc(desc, mask); } +#else +static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, + const struct cpumask *mask) +{ +} #endif asmlinkage void smp_irq_move_cleanup_interrupt(void) @@ -2461,6 +2415,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) me = smp_processor_id(); for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { unsigned int irq; + unsigned int irr; struct irq_desc *desc; struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; @@ -2480,6 +2435,18 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + /* + * Check if the vector that needs to be cleanedup is + * registered at the cpu's IRR. If so, then this is not + * the best time to clean it up. Lets clean it up in the + * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR + * to myself. + */ + if (irr & (1 << (vector % 32))) { + apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); + goto unlock; + } __get_cpu_var(vector_irq)[vector] = -1; cfg->move_cleanup_count--; unlock: @@ -2529,9 +2496,44 @@ static inline void irq_complete_move(struct irq_desc **descp) {} #endif #ifdef CONFIG_INTR_REMAP +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + + entry = cfg->irq_2_pin; + for (;;) { + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; + io_apic_eoi(apic, pin); + entry = entry->next; + } +} + +static void +eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void ack_x2apic_level(unsigned int irq) { + struct irq_desc *desc = irq_to_desc(irq); ack_x2APIC_irq(); + eoi_ioapic_irq(desc); } static void ack_x2apic_edge(unsigned int irq) @@ -2662,20 +2664,20 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; -#ifdef CONFIG_INTR_REMAP static struct irq_chip ir_ioapic_chip __read_mostly = { .name = "IR-IO-APIC", .startup = startup_ioapic_irq, .mask = mask_IO_APIC_irq, .unmask = unmask_IO_APIC_irq, +#ifdef CONFIG_INTR_REMAP .ack = ack_x2apic_edge, .eoi = ack_x2apic_level, #ifdef CONFIG_SMP .set_affinity = set_ir_ioapic_affinity_irq, #endif +#endif .retrigger = ioapic_retrigger_irq, }; -#endif static inline void init_IO_APIC_traps(void) { @@ -2901,10 +2903,8 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("BIOS bug: timer not connected to IO-APIC"); -#endif pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2940,10 +2940,8 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); -#endif local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) @@ -3237,9 +3235,7 @@ void destroy_irq(unsigned int irq) if (desc) desc->chip_data = cfg; -#ifdef CONFIG_INTR_REMAP free_irte(irq); -#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); spin_unlock_irqrestore(&vector_lock, flags); @@ -3265,7 +3261,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); -#ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { struct irte irte; int ir_index; @@ -3291,10 +3286,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms MSI_ADDR_IR_SHV | MSI_ADDR_IR_INDEX1(ir_index) | MSI_ADDR_IR_INDEX2(ir_index); - } else -#endif - { - msg->address_hi = MSI_ADDR_BASE_HI; + } else { + if (x2apic_enabled()) + msg->address_hi = MSI_ADDR_BASE_HI | + MSI_ADDR_EXT_DEST_ID(dest); + else + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO | ((apic->irq_dest_mode == 0) ? @@ -3394,15 +3392,16 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; -#ifdef CONFIG_INTR_REMAP static struct irq_chip msi_ir_chip = { .name = "IR-PCI-MSI", .unmask = unmask_msi_irq, .mask = mask_msi_irq, +#ifdef CONFIG_INTR_REMAP .ack = ack_x2apic_edge, #ifdef CONFIG_SMP .set_affinity = ir_set_msi_irq_affinity, #endif +#endif .retrigger = ioapic_retrigger_irq, }; @@ -3432,7 +3431,6 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) } return index; } -#endif static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { @@ -3446,7 +3444,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) set_irq_msi(irq, msidesc); write_msi_msg(irq, &msg); -#ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { struct irq_desc *desc = irq_to_desc(irq); /* @@ -3455,7 +3452,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) desc->status |= IRQ_MOVE_PCNTXT; set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); } else -#endif set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); @@ -3469,11 +3465,8 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int ret, sub_handle; struct msi_desc *msidesc; unsigned int irq_want; - -#ifdef CONFIG_INTR_REMAP - struct intel_iommu *iommu = 0; + struct intel_iommu *iommu = NULL; int index = 0; -#endif irq_want = nr_irqs_gsi; sub_handle = 0; @@ -3482,7 +3475,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (irq == 0) return -1; irq_want = irq + 1; -#ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) goto no_ir; @@ -3510,7 +3502,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) set_irte_irq(irq, iommu, index, sub_handle); } no_ir: -#endif ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) goto error; @@ -3528,7 +3519,7 @@ void arch_teardown_msi_irq(unsigned int irq) destroy_irq(irq); } -#ifdef CONFIG_DMAR +#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) #ifdef CONFIG_SMP static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { @@ -3609,7 +3600,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) #endif /* CONFIG_SMP */ -struct irq_chip hpet_msi_type = { +static struct irq_chip hpet_msi_type = { .name = "HPET_MSI", .unmask = hpet_msi_unmask, .mask = hpet_msi_mask, @@ -4045,11 +4036,9 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) set_ir_ioapic_affinity_irq_desc(desc, mask); else -#endif set_ioapic_affinity_irq_desc(desc, mask); } @@ -4142,9 +4131,12 @@ static int __init ioapic_insert_resources(void) struct resource *r = ioapic_resources; if (!r) { - printk(KERN_ERR - "IO APIC resources could be not be allocated.\n"); - return -1; + if (nr_ioapics > 0) { + printk(KERN_ERR + "IO APIC resources couldn't be allocated.\n"); + return -1; + } + return 0; } for (i = 0; i < nr_ioapics; i++) { |