diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/Kconfig | 11 | ||||
-rw-r--r-- | arch/powerpc/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/pci_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/ppc_ksyms.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/rtas_pci.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/setup_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/setup_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/sys_ppc32.c | 24 | ||||
-rw-r--r-- | arch/powerpc/kernel/systbl.S | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/maple/setup.c | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh.c | 489 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_cache.c | 316 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_driver.c | 376 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_event.c | 39 |
16 files changed, 854 insertions, 425 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 28004f002ec..935d9657151 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -275,6 +275,7 @@ config PPC_PSERIES select PPC_I8259 select PPC_RTAS select RTAS_ERROR_LOGGING + select PPC_UDBG_16550 default y config PPC_CHRP @@ -284,6 +285,7 @@ config PPC_CHRP select PPC_INDIRECT_PCI select PPC_RTAS select PPC_MPC106 + select PPC_UDBG_16550 default y config PPC_PMAC @@ -306,6 +308,7 @@ config PPC_PREP depends on PPC_MULTIPLATFORM && PPC32 && BROKEN select PPC_I8259 select PPC_INDIRECT_PCI + select PPC_UDBG_16550 default y config PPC_MAPLE @@ -314,6 +317,7 @@ config PPC_MAPLE select U3_DART select MPIC_BROKEN_U3 select GENERIC_TBSYNC + select PPC_UDBG_16550 default n help This option enables support for the Maple 970FX Evaluation Board. @@ -324,6 +328,7 @@ config PPC_CELL depends on PPC_MULTIPLATFORM && PPC64 select PPC_RTAS select MMIO_NVRAM + select PPC_UDBG_16550 config PPC_OF def_bool y @@ -370,6 +375,10 @@ config MPIC_BROKEN_U3 depends on PPC_MAPLE default y +config PPC_UDBG_16550 + bool + default n + config CELL_IIC depends on PPC_CELL bool @@ -403,7 +412,7 @@ config PPC_MPC106 config GENERIC_TBSYNC bool - default y if CONFIG_PPC32 && CONFIG_SMP + default y if PPC32 && SMP default n source "drivers/cpufreq/Kconfig" diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 17ed5018288..144e284d21d 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -54,7 +54,7 @@ obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_6xx) += idle_6xx.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_SERIAL_8250) += legacy_serial.o udbg_16550.o +obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o module-$(CONFIG_PPC64) += module_64.o obj-$(CONFIG_MODULES) += $(module-y) diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index fc60a773af7..ba21a6c4f46 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -381,7 +381,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0); dev->subsystem_device = get_int_prop(node, "subsystem-id", 0); - dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/ + dev->cfg_size = pci_cfg_space_size(dev); sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus), dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index b2758148a0d..16d9a904f3c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -244,7 +244,6 @@ EXPORT_SYMBOL(set_context); extern long mol_trampoline; EXPORT_SYMBOL(mol_trampoline); /* For MOL */ EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ -EXPORT_SYMBOL_GPL(__handle_mm_fault); /* For MOL */ #ifdef CONFIG_SMP extern int mmu_hash_lock; EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 45b8109951f..5579f655991 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -72,7 +72,7 @@ static int of_device_available(struct device_node * dn) return 0; } -static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) { int returnval = -1; unsigned long buid, addr; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e5d285adb49..db72a92943b 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -299,9 +299,7 @@ void __init setup_arch(char **cmdline_p) if (ppc_md.init_early) ppc_md.init_early(); -#ifdef CONFIG_SERIAL_8250 find_legacy_serial_ports(); -#endif finish_device_tree(); smp_setup_cpu_maps(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 81567e93126..c4b76961d6d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -472,9 +472,7 @@ void __init setup_system(void) * hash table management for us, thus ioremap works. We do that early * so that further code can be debugged */ -#ifdef CONFIG_SERIAL_8250 find_legacy_serial_ports(); -#endif /* * "Finish" the device-tree, that is do the actual parsing of diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 0ee44be4ab7..475249dc235 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -552,30 +552,6 @@ asmlinkage long compat_sys_sched_rr_get_interval(u32 pid, struct compat_timespec return ret; } -asmlinkage int compat_sys_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) -{ - return sys_pciconfig_read((unsigned long) bus, - (unsigned long) dfn, - (unsigned long) off, - (unsigned long) len, - compat_ptr(ubuf)); -} - -asmlinkage int compat_sys_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) -{ - return sys_pciconfig_write((unsigned long) bus, - (unsigned long) dfn, - (unsigned long) off, - (unsigned long) len, - compat_ptr(ubuf)); -} - -asmlinkage int compat_sys_pciconfig_iobase(u32 which, u32 in_bus, u32 in_devfn) -{ - return sys_pciconfig_iobase(which, in_bus, in_devfn); -} - - /* Note: it is necessary to treat mode as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index dd2ab85e351..68013179a50 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -239,9 +239,9 @@ SYS32ONLY(ftruncate64) SYSX(sys_ni_syscall,sys_stat64,sys_stat64) SYSX(sys_ni_syscall,sys_lstat64,sys_lstat64) SYSX(sys_ni_syscall,sys_fstat64,sys_fstat64) -COMPAT_SYS(pciconfig_read) -COMPAT_SYS(pciconfig_write) -COMPAT_SYS(pciconfig_iobase) +SYSCALL(pciconfig_read) +SYSCALL(pciconfig_write) +SYSCALL(pciconfig_iobase) SYSCALL(ni_syscall) SYSCALL(getdents64) SYSCALL(pivot_root) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 2ffca63602c..7b278d83739 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -174,7 +174,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size, pa = addr & PAGE_MASK; size = PAGE_ALIGN(addr + size) - pa; - if (size == 0) + if ((size == 0) || (pa == 0)) return NULL; if (mem_init_done) { diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index dd73e38bfb7..a1cb4d23672 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -71,9 +71,6 @@ #define DBG(fmt...) #endif -extern void generic_find_legacy_serial_ports(u64 *physport, - unsigned int *default_speed); - static void maple_restart(char *cmd) { unsigned int maple_nvram_base; diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 6accdd15550..61616d14407 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_EEH) += eeh.o eeh_event.o +obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 7fbfd16d72b..17cea7f2afd 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -76,15 +76,14 @@ */ #define EEH_MAX_FAILS 100000 -/* Misc forward declaraions */ -static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn); - /* RTAS tokens */ static int ibm_set_eeh_option; static int ibm_set_slot_reset; static int ibm_read_slot_reset_state; static int ibm_read_slot_reset_state2; static int ibm_slot_error_detail; +static int ibm_get_config_addr_info; +static int ibm_configure_bridge; int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); @@ -98,308 +97,23 @@ static DEFINE_SPINLOCK(slot_errbuf_lock); static int eeh_error_buf_size; /* System monitoring statistics */ -static DEFINE_PER_CPU(unsigned long, no_device); -static DEFINE_PER_CPU(unsigned long, no_dn); -static DEFINE_PER_CPU(unsigned long, no_cfg_addr); -static DEFINE_PER_CPU(unsigned long, ignored_check); -static DEFINE_PER_CPU(unsigned long, total_mmio_ffs); -static DEFINE_PER_CPU(unsigned long, false_positives); -static DEFINE_PER_CPU(unsigned long, ignored_failures); -static DEFINE_PER_CPU(unsigned long, slot_resets); - -/** - * The pci address cache subsystem. This subsystem places - * PCI device address resources into a red-black tree, sorted - * according to the address range, so that given only an i/o - * address, the corresponding PCI device can be **quickly** - * found. It is safe to perform an address lookup in an interrupt - * context; this ability is an important feature. - * - * Currently, the only customer of this code is the EEH subsystem; - * thus, this code has been somewhat tailored to suit EEH better. - * In particular, the cache does *not* hold the addresses of devices - * for which EEH is not enabled. - * - * (Implementation Note: The RB tree seems to be better/faster - * than any hash algo I could think of for this problem, even - * with the penalty of slow pointer chases for d-cache misses). - */ -struct pci_io_addr_range -{ - struct rb_node rb_node; - unsigned long addr_lo; - unsigned long addr_hi; - struct pci_dev *pcidev; - unsigned int flags; -}; - -static struct pci_io_addr_cache -{ - struct rb_root rb_root; - spinlock_t piar_lock; -} pci_io_addr_cache_root; - -static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) -{ - struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; - - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - - if (addr < piar->addr_lo) { - n = n->rb_left; - } else { - if (addr > piar->addr_hi) { - n = n->rb_right; - } else { - pci_dev_get(piar->pcidev); - return piar->pcidev; - } - } - } - - return NULL; -} - -/** - * pci_get_device_by_addr - Get device, given only address - * @addr: mmio (PIO) phys address or i/o port number - * - * Given an mmio phys address, or a port number, find a pci device - * that implements this address. Be sure to pci_dev_put the device - * when finished. I/O port numbers are assumed to be offset - * from zero (that is, they do *not* have pci_io_addr added in). - * It is safe to call this function within an interrupt. - */ -static struct pci_dev *pci_get_device_by_addr(unsigned long addr) -{ - struct pci_dev *dev; - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - dev = __pci_get_device_by_addr(addr); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); - return dev; -} - -#ifdef DEBUG -/* - * Handy-dandy debug print routine, does nothing more - * than print out the contents of our addr cache. - */ -static void pci_addr_cache_print(struct pci_io_addr_cache *cache) -{ - struct rb_node *n; - int cnt = 0; - - n = rb_first(&cache->rb_root); - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n", - (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, - piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); - cnt++; - n = rb_next(n); - } -} -#endif - -/* Insert address range into the rb tree. */ -static struct pci_io_addr_range * -pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, - unsigned long ahi, unsigned int flags) -{ - struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; - struct rb_node *parent = NULL; - struct pci_io_addr_range *piar; - - /* Walk tree, find a place to insert into tree */ - while (*p) { - parent = *p; - piar = rb_entry(parent, struct pci_io_addr_range, rb_node); - if (ahi < piar->addr_lo) { - p = &parent->rb_left; - } else if (alo > piar->addr_hi) { - p = &parent->rb_right; - } else { - if (dev != piar->pcidev || - alo != piar->addr_lo || ahi != piar->addr_hi) { - printk(KERN_WARNING "PIAR: overlapping address range\n"); - } - return piar; - } - } - piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); - if (!piar) - return NULL; +static unsigned long no_device; +static unsigned long no_dn; +static unsigned long no_cfg_addr; +static unsigned long ignored_check; +static unsigned long total_mmio_ffs; +static unsigned long false_positives; +static unsigned long ignored_failures; +static unsigned long slot_resets; - piar->addr_lo = alo; - piar->addr_hi = ahi; - piar->pcidev = dev; - piar->flags = flags; - -#ifdef DEBUG - printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", - alo, ahi, pci_name (dev)); -#endif - - rb_link_node(&piar->rb_node, parent, p); - rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root); - - return piar; -} - -static void __pci_addr_cache_insert_device(struct pci_dev *dev) -{ - struct device_node *dn; - struct pci_dn *pdn; - int i; - int inserted = 0; - - dn = pci_device_to_OF_node(dev); - if (!dn) { - printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev)); - return; - } - - /* Skip any devices for which EEH is not enabled. */ - pdn = PCI_DN(dn); - if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || - pdn->eeh_mode & EEH_MODE_NOCHECK) { -#ifdef DEBUG - printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n", - pci_name(dev), pdn->node->full_name); -#endif - return; - } - - /* The cache holds a reference to the device... */ - pci_dev_get(dev); - - /* Walk resources on this device, poke them into the tree */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - unsigned long start = pci_resource_start(dev,i); - unsigned long end = pci_resource_end(dev,i); - unsigned int flags = pci_resource_flags(dev,i); - - /* We are interested only bus addresses, not dma or other stuff */ - if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) - continue; - if (start == 0 || ~start == 0 || end == 0 || ~end == 0) - continue; - pci_addr_cache_insert(dev, start, end, flags); - inserted = 1; - } - - /* If there was nothing to add, the cache has no reference... */ - if (!inserted) - pci_dev_put(dev); -} - -/** - * pci_addr_cache_insert_device - Add a device to the address cache - * @dev: PCI device whose I/O addresses we are interested in. - * - * In order to support the fast lookup of devices based on addresses, - * we maintain a cache of devices that can be quickly searched. - * This routine adds a device to that cache. - */ -static void pci_addr_cache_insert_device(struct pci_dev *dev) -{ - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __pci_addr_cache_insert_device(dev); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); -} - -static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) -{ - struct rb_node *n; - int removed = 0; - -restart: - n = rb_first(&pci_io_addr_cache_root.rb_root); - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - - if (piar->pcidev == dev) { - rb_erase(n, &pci_io_addr_cache_root.rb_root); - removed = 1; - kfree(piar); - goto restart; - } - n = rb_next(n); - } - - /* The cache no longer holds its reference to this device... */ - if (removed) - pci_dev_put(dev); -} - -/** - * pci_addr_cache_remove_device - remove pci device from addr cache - * @dev: device to remove - * - * Remove a device from the addr-cache tree. - * This is potentially expensive, since it will walk - * the tree multiple times (once per resource). - * But so what; device removal doesn't need to be that fast. - */ -static void pci_addr_cache_remove_device(struct pci_dev *dev) -{ - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __pci_addr_cache_remove_device(dev); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); -} - -/** - * pci_addr_cache_build - Build a cache of I/O addresses - * - * Build a cache of pci i/o addresses. This cache will be used to - * find the pci device that corresponds to a given address. - * This routine scans all pci busses to build the cache. - * Must be run late in boot process, after the pci controllers - * have been scaned for devices (after all device resources are known). - */ -void __init pci_addr_cache_build(void) -{ - struct device_node *dn; - struct pci_dev *dev = NULL; - - if (!eeh_subsystem_enabled) - return; - - spin_lock_init(&pci_io_addr_cache_root.piar_lock); - - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - /* Ignore PCI bridges ( XXX why ??) */ - if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) { - continue; - } - pci_addr_cache_insert_device(dev); - - /* Save the BAR's; firmware doesn't restore these after EEH reset */ - dn = pci_device_to_OF_node(dev); - eeh_save_bars(dev, PCI_DN(dn)); - } - -#ifdef DEBUG - /* Verify tree built up above, echo back the list of addrs. */ - pci_addr_cache_print(&pci_io_addr_cache_root); -#endif -} +#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) /* --------------------------------------------------------------- */ -/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */ +/* Below lies the EEH event infrastructure */ void eeh_slot_error_detail (struct pci_dn *pdn, int severity) { + int config_addr; unsigned long flags; int rc; @@ -407,8 +121,13 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity) spin_lock_irqsave(&slot_errbuf_lock, flags); memset(slot_errbuf, 0, eeh_error_buf_size); + /* Use PE configuration address, if present */ + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + rc = rtas_call(ibm_slot_error_detail, - 8, 1, NULL, pdn->eeh_config_addr, + 8, 1, NULL, config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid), NULL, 0, virt_to_phys(slot_errbuf), @@ -428,6 +147,7 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity) static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) { int token, outputs; + int config_addr; if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { token = ibm_read_slot_reset_state2; @@ -438,7 +158,12 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) outputs = 3; } - return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr, + /* Use PE configuration address, if present */ + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + + return rtas_call(token, 3, outputs, rets, config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); } @@ -462,7 +187,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) /** * Return the "partitionable endpoint" (pe) under which this device lies */ -static struct device_node * find_device_pe(struct device_node *dn) +struct device_node * find_device_pe(struct device_node *dn) { while ((dn->parent) && PCI_DN(dn->parent) && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { @@ -485,6 +210,11 @@ static void __eeh_mark_slot (struct device_node *dn, int mode_flag) if (PCI_DN(dn)) { PCI_DN(dn)->eeh_mode |= mode_flag; + /* Mark the pci device driver too */ + struct pci_dev *dev = PCI_DN(dn)->pcidev; + if (dev && dev->driver) + dev->error_state = pci_channel_io_frozen; + if (dn->child) __eeh_mark_slot (dn->child, mode_flag); } @@ -495,6 +225,11 @@ static void __eeh_mark_slot (struct device_node *dn, int mode_flag) void eeh_mark_slot (struct device_node *dn, int mode_flag) { dn = find_device_pe (dn); + + /* Back up one, since config addrs might be shared */ + if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr) + dn = dn->parent; + PCI_DN(dn)->eeh_mode |= mode_flag; __eeh_mark_slot (dn->child, mode_flag); } @@ -516,7 +251,13 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) { unsigned long flags; spin_lock_irqsave(&confirm_error_lock, flags); + dn = find_device_pe (dn); + + /* Back up one, since config addrs might be shared */ + if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr) + dn = dn->parent; + PCI_DN(dn)->eeh_mode &= ~mode_flag; PCI_DN(dn)->eeh_check_count = 0; __eeh_clear_slot (dn->child, mode_flag); @@ -544,15 +285,16 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) int rets[3]; unsigned long flags; struct pci_dn *pdn; + enum pci_channel_state state; int rc = 0; - __get_cpu_var(total_mmio_ffs)++; + total_mmio_ffs++; if (!eeh_subsystem_enabled) return 0; if (!dn) { - __get_cpu_var(no_dn)++; + no_dn++; return 0; } pdn = PCI_DN(dn); @@ -560,7 +302,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) /* Access to IO BARs might get this far and still not want checking. */ if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || pdn->eeh_mode & EEH_MODE_NOCHECK) { - __get_cpu_var(ignored_check)++; + ignored_check++; #ifdef DEBUG printk ("EEH:ignored check (%x) for %s %s\n", pdn->eeh_mode, pci_name (dev), dn->full_name); @@ -568,8 +310,8 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) return 0; } - if (!pdn->eeh_config_addr) { - __get_cpu_var(no_cfg_addr)++; + if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) { + no_cfg_addr++; return 0; } @@ -611,7 +353,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) if (ret != 0) { printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n", ret, dn->full_name); - __get_cpu_var(false_positives)++; + false_positives++; rc = 0; goto dn_unlock; } @@ -620,14 +362,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) if (rets[1] != 1) { printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", ret, dn->full_name); - __get_cpu_var(false_positives)++; + false_positives++; rc = 0; goto dn_unlock; } /* If not the kind of error we know about, punt. */ if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { - __get_cpu_var(false_positives)++; + false_positives++; rc = 0; goto dn_unlock; } @@ -635,12 +377,12 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) /* Note that config-io to empty slots may fail; * we recognize empty because they don't have children. */ if ((rets[0] == 5) && (dn->child == NULL)) { - __get_cpu_var(false_positives)++; + false_positives++; rc = 0; goto dn_unlock; } - __get_cpu_var(slot_resets)++; + slot_resets++; /* Avoid repeated reports of this failure, including problems * with other functions on this device, and functions under @@ -648,8 +390,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) eeh_mark_slot (dn, EEH_MODE_ISOLATED); spin_unlock_irqrestore(&confirm_error_lock, flags); - eeh_send_failure_event (dn, dev, rets[0], rets[2]); - + state = pci_channel_io_normal; + if ((rets[0] == 2) || (rets[0] == 4)) + state = pci_channel_io_frozen; + if (rets[0] == 5) + state = pci_channel_io_perm_failure; + eeh_send_failure_event (dn, dev, state, rets[2]); + /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ @@ -685,7 +432,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon addr = eeh_token_to_phys((unsigned long __force) token); dev = pci_get_device_by_addr(addr); if (!dev) { - __get_cpu_var(no_device)++; + no_device++; return val; } @@ -716,11 +463,16 @@ eeh_slot_availability(struct pci_dn *pdn) if (rc) return rc; if (rets[1] == 0) return -1; /* EEH is not supported */ - if (rets[0] == 0) return 0; /* Oll Korrect */ + if (rets[0] == 0) return 0; /* Oll Korrect */ if (rets[0] == 5) { if (rets[2] == 0) return -1; /* permanently unavailable */ return rets[2]; /* number of millisecs to wait */ } + if (rets[0] == 1) + return 250; + + printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n", + rc, rets[0], rets[1], rets[2]); return -1; } @@ -737,6 +489,7 @@ eeh_slot_availability(struct pci_dn *pdn) static void rtas_pci_slot_reset(struct pci_dn *pdn, int state) { + int config_addr; int rc; BUG_ON (pdn==NULL); @@ -747,8 +500,13 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state) return; } + /* Use PE configuration address, if present */ + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + rc = rtas_call(ibm_set_slot_reset,4,1, NULL, - pdn->eeh_config_addr, + config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid), state); @@ -761,9 +519,11 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state) /** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second * dn -- device node to be reset. + * + * Return 0 if success, else a non-zero value. */ -void +int rtas_set_slot_reset(struct pci_dn *pdn) { int i, rc; @@ -793,10 +553,21 @@ rtas_set_slot_reset(struct pci_dn *pdn) * ready to be used; if not, wait for recovery. */ for (i=0; i<10; i++) { rc = eeh_slot_availability (pdn); - if (rc <= 0) break; + if (rc < 0) + printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", rc, pdn->node->full_name); + if (rc == 0) + return 0; + if (rc < 0) + return -1; msleep (rc+100); } + + rc = eeh_slot_availability (pdn); + if (rc) + printk (KERN_ERR "EEH: timeout resetting slot %s\n", pdn->node->full_name); + + return rc; } /* ------------------------------------------------------- */ @@ -851,7 +622,7 @@ void eeh_restore_bars(struct pci_dn *pdn) if (!pdn) return; - if (! pdn->eeh_is_bridge) + if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code)) __restore_bars (pdn); dn = pdn->node->child; @@ -869,30 +640,30 @@ void eeh_restore_bars(struct pci_dn *pdn) * PCI devices are added individuallly; but, for the restore, * an entire slot is reset at a time. */ -static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn) +static void eeh_save_bars(struct pci_dn *pdn) { int i; - if (!pdev || !pdn ) + if (!pdn ) return; for (i = 0; i < 16; i++) - pci_read_config_dword(pdev, i * 4, &pdn->config_space[i]); - - if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - pdn->eeh_is_bridge = 1; + rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]); } void rtas_configure_bridge(struct pci_dn *pdn) { - int token = rtas_token ("ibm,configure-bridge"); + int config_addr; int rc; - if (token == RTAS_UNKNOWN_SERVICE) - return; - rc = rtas_call(token,3,1, NULL, - pdn->eeh_config_addr, + /* Use PE configuration address, if present */ + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + + rc = rtas_call(ibm_configure_bridge,3,1, NULL, + config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); if (rc) { @@ -927,6 +698,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data) int enable; struct pci_dn *pdn = PCI_DN(dn); + pdn->class_code = 0; pdn->eeh_mode = 0; pdn->eeh_check_count = 0; pdn->eeh_freeze_count = 0; @@ -943,6 +715,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data) pdn->eeh_mode |= EEH_MODE_NOCHECK; return NULL; } + pdn->class_code = *class_code; /* * Now decide if we are going to "Disable" EEH checking @@ -953,8 +726,10 @@ static void *early_enable_eeh(struct device_node *dn, void *data) * But there are a few cases like display devices that make sense. */ enable = 1; /* i.e. we will do checking */ +#if 0 if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY) enable = 0; +#endif if (!enable) pdn->eeh_mode |= EEH_MODE_NOCHECK; @@ -973,8 +748,22 @@ static void *early_enable_eeh(struct device_node *dn, void *data) eeh_subsystem_enabled = 1; pdn->eeh_mode |= EEH_MODE_SUPPORTED; pdn->eeh_config_addr = regs[0]; + + /* If the newer, better, ibm,get-config-addr-info is supported, + * then use that instead. */ + pdn->eeh_pe_config_addr = 0; + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { + unsigned int rets[2]; + ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, + pdn->eeh_config_addr, + info->buid_hi, info->buid_lo, + 0); + if (ret == 0) + pdn->eeh_pe_config_addr = rets[0]; + } #ifdef DEBUG - printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name); + printk(KERN_DEBUG "EEH: %s: eeh enabled, config=%x pe_config=%x\n", + dn->full_name, pdn->eeh_config_addr, pdn->eeh_pe_config_addr); #endif } else { @@ -993,6 +782,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data) dn->full_name); } + eeh_save_bars(pdn); return NULL; } @@ -1026,6 +816,8 @@ void __init eeh_init(void) ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); + ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); + ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) return; @@ -1080,12 +872,10 @@ void eeh_add_device_early(struct device_node *dn) if (!dn || !PCI_DN(dn)) return; phb = PCI_DN(dn)->phb; - if (NULL == phb || 0 == phb->buid) { - printk(KERN_WARNING "EEH: Expected buid but found none for %s\n", - dn->full_name); - dump_stack(); + + /* USB Bus children of PCI devices will not have BUID's */ + if (NULL == phb || 0 == phb->buid) return; - } info.buid_hi = BUID_HI(phb->buid); info.buid_lo = BUID_LO(phb->buid); @@ -1127,7 +917,6 @@ void eeh_add_device_late(struct pci_dev *dev) pdn->pcidev = dev; pci_addr_cache_insert_device (dev); - eeh_save_bars(dev, pdn); } EXPORT_SYMBOL_GPL(eeh_add_device_late); @@ -1175,25 +964,9 @@ EXPORT_SYMBOL_GPL(eeh_remove_bus_device); static int proc_eeh_show(struct seq_file *m, void *v) { - unsigned int cpu; - unsigned long ffs = 0, positives = 0, failures = 0; - unsigned long resets = 0; - unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0; - - for_each_cpu(cpu) { - ffs += per_cpu(total_mmio_ffs, cpu); - positives += per_cpu(false_positives, cpu); - failures += per_cpu(ignored_failures, cpu); - resets += per_cpu(slot_resets, cpu); - no_dev += per_cpu(no_device, cpu); - no_dn += per_cpu(no_dn, cpu); - no_cfg += per_cpu(no_cfg_addr, cpu); - no_check += per_cpu(ignored_check, cpu); - } - if (0 == eeh_subsystem_enabled) { seq_printf(m, "EEH Subsystem is globally disabled\n"); - seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs); + seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs); } else { seq_printf(m, "EEH Subsystem is enabled\n"); seq_printf(m, @@ -1205,8 +978,10 @@ static int proc_eeh_show(struct seq_file *m, void *v) "eeh_false_positives=%ld\n" "eeh_ignored_failures=%ld\n" "eeh_slot_resets=%ld\n", - no_dev, no_dn, no_cfg, no_check, - ffs, positives, failures, resets); + no_device, no_dn, no_cfg_addr, + ignored_check, total_mmio_ffs, + false_positives, ignored_failures, + slot_resets); } return 0; diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c new file mode 100644 index 00000000000..d4a402c5866 --- /dev/null +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -0,0 +1,316 @@ +/* + * eeh_cache.c + * PCI address cache; allows the lookup of PCI devices based on I/O address + * + * Copyright (C) 2004 Linas Vepstas <linas@austin.ibm.com> IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/rbtree.h> +#include <linux/spinlock.h> +#include <asm/atomic.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> + +#undef DEBUG + +/** + * The pci address cache subsystem. This subsystem places + * PCI device address resources into a red-black tree, sorted + * according to the address range, so that given only an i/o + * address, the corresponding PCI device can be **quickly** + * found. It is safe to perform an address lookup in an interrupt + * context; this ability is an important feature. + * + * Currently, the only customer of this code is the EEH subsystem; + * thus, this code has been somewhat tailored to suit EEH better. + * In particular, the cache does *not* hold the addresses of devices + * for which EEH is not enabled. + * + * (Implementation Note: The RB tree seems to be better/faster + * than any hash algo I could think of for this problem, even + * with the penalty of slow pointer chases for d-cache misses). + */ +struct pci_io_addr_range +{ + struct rb_node rb_node; + unsigned long addr_lo; + unsigned long addr_hi; + struct pci_dev *pcidev; + unsigned int flags; +}; + +static struct pci_io_addr_cache +{ + struct rb_root rb_root; + spinlock_t piar_lock; +} pci_io_addr_cache_root; + +static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) +{ + struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; + + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + if (addr < piar->addr_lo) { + n = n->rb_left; + } else { + if (addr > piar->addr_hi) { + n = n->rb_right; + } else { + pci_dev_get(piar->pcidev); + return piar->pcidev; + } + } + } + + return NULL; +} + +/** + * pci_get_device_by_addr - Get device, given only address + * @addr: mmio (PIO) phys address or i/o port number + * + * Given an mmio phys address, or a port number, find a pci device + * that implements this address. Be sure to pci_dev_put the device + * when finished. I/O port numbers are assumed to be offset + * from zero (that is, they do *not* have pci_io_addr added in). + * It is safe to call this function within an interrupt. + */ +struct pci_dev *pci_get_device_by_addr(unsigned long addr) +{ + struct pci_dev *dev; + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + dev = __pci_get_device_by_addr(addr); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); + return dev; +} + +#ifdef DEBUG +/* + * Handy-dandy debug print routine, does nothing more + * than print out the contents of our addr cache. + */ +static void pci_addr_cache_print(struct pci_io_addr_cache *cache) +{ + struct rb_node *n; + int cnt = 0; + + n = rb_first(&cache->rb_root); + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n", + (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, + piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); + cnt++; + n = rb_next(n); + } +} +#endif + +/* Insert address range into the rb tree. */ +static struct pci_io_addr_range * +pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, + unsigned long ahi, unsigned int flags) +{ + struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; + struct rb_node *parent = NULL; + struct pci_io_addr_range *piar; + + /* Walk tree, find a place to insert into tree */ + while (*p) { + parent = *p; + piar = rb_entry(parent, struct pci_io_addr_range, rb_node); + if (ahi < piar->addr_lo) { + p = &parent->rb_left; + } else if (alo > piar->addr_hi) { + p = &parent->rb_right; + } else { + if (dev != piar->pcidev || + alo != piar->addr_lo || ahi != piar->addr_hi) { + printk(KERN_WARNING "PIAR: overlapping address range\n"); + } + return piar; + } + } + piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); + if (!piar) + return NULL; + + piar->addr_lo = alo; + piar->addr_hi = ahi; + piar->pcidev = dev; + piar->flags = flags; + +#ifdef DEBUG + printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", + alo, ahi, pci_name (dev)); +#endif + + rb_link_node(&piar->rb_node, parent, p); + rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root); + + return piar; +} + +static void __pci_addr_cache_insert_device(struct pci_dev *dev) +{ + struct device_node *dn; + struct pci_dn *pdn; + int i; + int inserted = 0; + + dn = pci_device_to_OF_node(dev); + if (!dn) { + printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev)); + return; + } + + /* Skip any devices for which EEH is not enabled. */ + pdn = PCI_DN(dn); + if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || + pdn->eeh_mode & EEH_MODE_NOCHECK) { +#ifdef DEBUG + printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n", + pci_name(dev), pdn->node->full_name); +#endif + return; + } + + /* The cache holds a reference to the device... */ + pci_dev_get(dev); + + /* Walk resources on this device, poke them into the tree */ + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + unsigned long start = pci_resource_start(dev,i); + unsigned long end = pci_resource_end(dev,i); + unsigned int flags = pci_resource_flags(dev,i); + + /* We are interested only bus addresses, not dma or other stuff */ + if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if (start == 0 || ~start == 0 || end == 0 || ~end == 0) + continue; + pci_addr_cache_insert(dev, start, end, flags); + inserted = 1; + } + + /* If there was nothing to add, the cache has no reference... */ + if (!inserted) + pci_dev_put(dev); +} + +/** + * pci_addr_cache_insert_device - Add a device to the address cache + * @dev: PCI device whose I/O addresses we are interested in. + * + * In order to support the fast lookup of devices based on addresses, + * we maintain a cache of devices that can be quickly searched. + * This routine adds a device to that cache. + */ +void pci_addr_cache_insert_device(struct pci_dev *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + __pci_addr_cache_insert_device(dev); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); +} + +static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) +{ + struct rb_node *n; + int removed = 0; + +restart: + n = rb_first(&pci_io_addr_cache_root.rb_root); + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + if (piar->pcidev == dev) { + rb_erase(n, &pci_io_addr_cache_root.rb_root); + removed = 1; + kfree(piar); + goto restart; + } + n = rb_next(n); + } + + /* The cache no longer holds its reference to this device... */ + if (removed) + pci_dev_put(dev); +} + +/** + * pci_addr_cache_remove_device - remove pci device from addr cache + * @dev: device to remove + * + * Remove a device from the addr-cache tree. + * This is potentially expensive, since it will walk + * the tree multiple times (once per resource). + * But so what; device removal doesn't need to be that fast. + */ +void pci_addr_cache_remove_device(struct pci_dev *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + __pci_addr_cache_remove_device(dev); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); +} + +/** + * pci_addr_cache_build - Build a cache of I/O addresses + * + * Build a cache of pci i/o addresses. This cache will be used to + * find the pci device that corresponds to a given address. + * This routine scans all pci busses to build the cache. + * Must be run late in boot process, after the pci controllers + * have been scaned for devices (after all device resources are known). + */ +void __init pci_addr_cache_build(void) +{ + struct device_node *dn; + struct pci_dev *dev = NULL; + + spin_lock_init(&pci_io_addr_cache_root.piar_lock); + + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + /* Ignore PCI bridges */ + if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) + continue; + + pci_addr_cache_insert_device(dev); + + dn = pci_device_to_OF_node(dev); + pci_dev_get (dev); /* matching put is in eeh_remove_device() */ + PCI_DN(dn)->pcidev = dev; + } + +#ifdef DEBUG + /* Verify tree built up above, echo back the list of addrs. */ + pci_addr_cache_print(&pci_io_addr_cache_root); +#endif +} + diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c new file mode 100644 index 00000000000..6373372932b --- /dev/null +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -0,0 +1,376 @@ +/* + * PCI Error Recovery Driver for RPA-compliant PPC64 platform. + * Copyright (C) 2004, 2005 Linas Vepstas <linas@linas.org> + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to <linas@us.ibm.com> + * + */ +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <asm/eeh.h> +#include <asm/eeh_event.h> +#include <asm/ppc-pci.h> +#include <asm/pci-bridge.h> +#include <asm/prom.h> +#include <asm/rtas.h> + + +static inline const char * pcid_name (struct pci_dev *pdev) +{ + if (pdev->dev.driver) + return pdev->dev.driver->name; + return ""; +} + +#ifdef DEBUG +static void print_device_node_tree (struct pci_dn *pdn, int dent) +{ + int i; + if (!pdn) return; + for (i=0;i<dent; i++) + printk(" "); + printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n", + pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr, + pdn->eeh_pe_config_addr, pdn->node->full_name); + dent += 3; + struct device_node *pc = pdn->node->child; + while (pc) { + print_device_node_tree(PCI_DN(pc), dent); + pc = pc->sibling; + } +} +#endif + +/** + * irq_in_use - return true if this irq is being used + */ +static int irq_in_use(unsigned int irq) +{ + int rc = 0; + unsigned long flags; + struct irq_desc *desc = irq_desc + irq; + + spin_lock_irqsave(&desc->lock, flags); + if (desc->action) + rc = 1; + spin_unlock_irqrestore(&desc->lock, flags); + return rc; +} + +/* ------------------------------------------------------- */ +/** eeh_report_error - report an EEH error to each device, + * collect up and merge the device responses. + */ + +static void eeh_report_error(struct pci_dev *dev, void *userdata) +{ + enum pci_ers_result rc, *res = userdata; + struct pci_driver *driver = dev->driver; + + dev->error_state = pci_channel_io_frozen; + + if (!driver) + return; + + if (irq_in_use (dev->irq)) { + struct device_node *dn = pci_device_to_OF_node(dev); + PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; + disable_irq_nosync(dev->irq); + } + if (!driver->err_handler) + return; + if (!driver->err_handler->error_detected) + return; + + rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen); + if (*res == PCI_ERS_RESULT_NONE) *res = rc; + if (*res == PCI_ERS_RESULT_NEED_RESET) return; + if (*res == PCI_ERS_RESULT_DISCONNECT && + rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; +} + +/** eeh_report_reset -- tell this device that the pci slot + * has been reset. + */ + +static void eeh_report_reset(struct pci_dev *dev, void *userdata) +{ + struct pci_driver *driver = dev->driver; + struct device_node *dn = pci_device_to_OF_node(dev); + + if (!driver) + return; + + if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) { + PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; + enable_irq(dev->irq); + } + if (!driver->err_handler) + return; + if (!driver->err_handler->slot_reset) + return; + + driver->err_handler->slot_reset(dev); +} + +static void eeh_report_resume(struct pci_dev *dev, void *userdata) +{ + struct pci_driver *driver = dev->driver; + + dev->error_state = pci_channel_io_normal; + + if (!driver) + return; + if (!driver->err_handler) + return; + if (!driver->err_handler->resume) + return; + + driver->err_handler->resume(dev); +} + +static void eeh_report_failure(struct pci_dev *dev, void *userdata) +{ + struct pci_driver *driver = dev->driver; + + dev->error_state = pci_channel_io_perm_failure; + + if (!driver) + return; + + if (irq_in_use (dev->irq)) { + struct device_node *dn = pci_device_to_OF_node(dev); + PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; + disable_irq_nosync(dev->irq); + } + if (!driver->err_handler) + return; + if (!driver->err_handler->error_detected) + return; + driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); +} + +/* ------------------------------------------------------- */ +/** + * handle_eeh_events -- reset a PCI device after hard lockup. + * + * pSeries systems will isolate a PCI slot if the PCI-Host + * bridge detects address or data parity errors, DMA's + * occuring to wild addresses (which usually happen due to + * bugs in device drivers or in PCI adapter firmware). + * Slot isolations also occur if #SERR, #PERR or other misc + * PCI-related errors are detected. + * + * Recovery process consists of unplugging the device driver + * (which generated hotplug events to userspace), then issuing + * a PCI #RST to the device, then reconfiguring the PCI config + * space for all bridges & devices under this slot, and then + * finally restarting the device drivers (which cause a second + * set of hotplug events to go out to userspace). + */ + +/** + * eeh_reset_device() -- perform actual reset of a pci slot + * Args: bus: pointer to the pci bus structure corresponding + * to the isolated slot. A non-null value will + * cause all devices under the bus to be removed + * and then re-added. + * pe_dn: pointer to a "Partionable Endpoint" device node. + * This is the top-level structure on which pci + * bus resets can be performed. + */ + +static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) +{ + int rc; + if (bus) + pcibios_remove_pci_devices(bus); + + /* Reset the pci controller. (Asserts RST#; resets config space). + * Reconfigure bridges and devices. Don't try to bring the system + * up if the reset failed for some reason. */ + rc = rtas_set_slot_reset(pe_dn); + if (rc) + return rc; + + /* New-style config addrs might be shared across multiple devices, + * Walk over all functions on this device */ + if (pe_dn->eeh_pe_config_addr) { + struct device_node *pe = pe_dn->node; + pe = pe->parent->child; + while (pe) { + struct pci_dn *ppe = PCI_DN(pe); + if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) { + rtas_configure_bridge(ppe); + eeh_restore_bars(ppe); + } + pe = pe->sibling; + } + } else { + rtas_configure_bridge(pe_dn); + eeh_restore_bars(pe_dn); + } + + /* Give the system 5 seconds to finish running the user-space + * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, + * this is a hack, but if we don't do this, and try to bring + * the device up before the scripts have taken it down, + * potentially weird things happen. + */ + if (bus) { + ssleep (5); + pcibios_add_pci_devices(bus); + } + + return 0; +} + +/* The longest amount of time to wait for a pci device + * to come back on line, in seconds. + */ +#define MAX_WAIT_FOR_RECOVERY 15 + +void handle_eeh_events (struct eeh_event *event) +{ + struct device_node *frozen_dn; + struct pci_dn *frozen_pdn; + struct pci_bus *frozen_bus; + int rc = 0; + enum pci_ers_result result = PCI_ERS_RESULT_NONE; + + frozen_dn = find_device_pe(event->dn); + frozen_bus = pcibios_find_pci_bus(frozen_dn); + + if (!frozen_dn) { + printk(KERN_ERR "EEH: Error: Cannot find partition endpoint for %s\n", + pci_name(event->dev)); + return; + } + + /* There are two different styles for coming up with the PE. + * In the old style, it was the highest EEH-capable device + * which was always an EADS pci bridge. In the new style, + * there might not be any EADS bridges, and even when there are, + * the firmware marks them as "EEH incapable". So another + * two-step is needed to find the pci bus.. */ + if (!frozen_bus) + frozen_bus = pcibios_find_pci_bus (frozen_dn->parent); + + if (!frozen_bus) { + printk(KERN_ERR "EEH: Cannot find PCI bus for %s\n", + frozen_dn->full_name); + return; + } + +#if 0 + /* We may get "permanent failure" messages on empty slots. + * These are false alarms. Empty slots have no child dn. */ + if ((event->state == pci_channel_io_perm_failure) && (frozen_device == NULL)) + return; +#endif + + frozen_pdn = PCI_DN(frozen_dn); + frozen_pdn->eeh_freeze_count++; + + if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) + goto hard_fail; + + /* If the reset state is a '5' and the time to reset is 0 (infinity) + * or is more then 15 seconds, then mark this as a permanent failure. + */ + if ((event->state == pci_channel_io_perm_failure) && + ((event->time_unavail <= 0) || + (event->time_unavail > MAX_WAIT_FOR_RECOVERY*1000))) + goto hard_fail; + + eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); + printk(KERN_WARNING + "EEH: This PCI device has failed %d times since last reboot: %s - %s\n", + frozen_pdn->eeh_freeze_count, + pci_name (frozen_pdn->pcidev), + pcid_name(frozen_pdn->pcidev)); + + /* Walk the various device drivers attached to this slot through + * a reset sequence, giving each an opportunity to do what it needs + * to accomplish the reset. Each child gets a report of the + * status ... if any child can't handle the reset, then the entire + * slot is dlpar removed and added. + */ + pci_walk_bus(frozen_bus, eeh_report_error, &result); + + /* If all device drivers were EEH-unaware, then shut + * down all of the device drivers, and hope they + * go down willingly, without panicing the system. + */ + if (result == PCI_ERS_RESULT_NONE) { + rc = eeh_reset_device(frozen_pdn, frozen_bus); + if (rc) + goto hard_fail; + } + + /* If any device called out for a reset, then reset the slot */ + if (result == PCI_ERS_RESULT_NEED_RESET) { + rc = eeh_reset_device(frozen_pdn, NULL); + if (rc) + goto hard_fail; + pci_walk_bus(frozen_bus, eeh_report_reset, 0); + } + + /* If all devices reported they can proceed, the re-enable PIO */ + if (result == PCI_ERS_RESULT_CAN_RECOVER) { + /* XXX Not supported; we brute-force reset the device */ + rc = eeh_reset_device(frozen_pdn, NULL); + if (rc) + goto hard_fail; + pci_walk_bus(frozen_bus, eeh_report_reset, 0); + } + + /* Tell all device drivers that they can resume operations */ + pci_walk_bus(frozen_bus, eeh_report_resume, 0); + + return; + +hard_fail: + /* + * About 90% of all real-life EEH failures in the field + * are due to poorly seated PCI cards. Only 10% or so are + * due to actual, failed cards. + */ + printk(KERN_ERR + "EEH: PCI device %s - %s has failed %d times \n" + "and has been permanently disabled. Please try reseating\n" + "this device or replacing it.\n", + pci_name (frozen_pdn->pcidev), + pcid_name(frozen_pdn->pcidev), + frozen_pdn->eeh_freeze_count); + + eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */); + + /* Notify all devices that they're about to go down. */ + pci_walk_bus(frozen_bus, eeh_report_failure, 0); + + /* Shut down the device drivers for good. */ + pcibios_remove_pci_devices(frozen_bus); +} + +/* ---------- end of file ---------- */ diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index 92497333c2b..9a9961f2748 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -21,6 +21,7 @@ #include <linux/list.h> #include <linux/pci.h> #include <asm/eeh_event.h> +#include <asm/ppc-pci.h> /** Overview: * EEH error states may be detected within exception handlers; @@ -37,31 +38,6 @@ static void eeh_thread_launcher(void *); DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL); /** - * eeh_panic - call panic() for an eeh event that cannot be handled. - * The philosophy of this routine is that it is better to panic and - * halt the OS than it is to risk possible data corruption by - * oblivious device drivers that don't know better. - * - * @dev pci device that had an eeh event - * @reset_state current reset state of the device slot - */ -static void eeh_panic(struct pci_dev *dev, int reset_state) -{ - /* - * Since the panic_on_oops sysctl is used to halt the system - * in light of potential corruption, we can use it here. - */ - if (panic_on_oops) { - panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, - pci_name(dev)); - } - else { - printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", - reset_state, pci_name(dev)); - } -} - -/** * eeh_event_handler - dispatch EEH events. The detection of a frozen * slot can occur inside an interrupt, where it can be hard to do * anything about it. The goal of this routine is to pull these @@ -82,10 +58,16 @@ static int eeh_event_handler(void * dummy) spin_lock_irqsave(&eeh_eventlist_lock, flags); event = NULL; + + /* Unqueue the event, get ready to process. */ if (!list_empty(&eeh_eventlist)) { event = list_entry(eeh_eventlist.next, struct eeh_event, list); list_del(&event->list); } + + if (event) + eeh_mark_slot(event->dn, EEH_MODE_RECOVERING); + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); if (event == NULL) break; @@ -93,8 +75,11 @@ static int eeh_event_handler(void * dummy) printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", pci_name(event->dev)); - eeh_panic (event->dev, event->state); + handle_eeh_events(event); + + eeh_clear_slot(event->dn, EEH_MODE_RECOVERING); + pci_dev_put(event->dev); kfree(event); } @@ -122,7 +107,7 @@ static void eeh_thread_launcher(void *dummy) */ int eeh_send_failure_event (struct device_node *dn, struct pci_dev *dev, - int state, + enum pci_channel_state state, int time_unavail) { unsigned long flags; |