diff options
Diffstat (limited to 'arch/powerpc/kernel')
32 files changed, 1681 insertions, 2103 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 3e779f07f21..42c42ecad00 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,7 +12,8 @@ endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ - init_task.o process.o systbl.o idle.o + init_task.o process.o systbl.o idle.o \ + signal.o obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o \ @@ -65,9 +66,9 @@ obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o module-$(CONFIG_PPC64) += module_64.o obj-$(CONFIG_MODULES) += $(module-y) -pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o +pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o isa-bridge.o pci32-$(CONFIG_PPC32) := pci_32.o -obj-$(CONFIG_PCI) += $(pci64-y) $(pci32-y) +obj-$(CONFIG_PCI) += $(pci64-y) $(pci32-y) pci-common.o obj-$(CONFIG_PCI_MSI) += msi.o kexec-$(CONFIG_PPC64) := machine_kexec_64.o kexec-$(CONFIG_PPC32) := machine_kexec_32.o diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b2b5d664d32..b1f8000952f 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -294,6 +294,21 @@ static struct cpu_spec cpu_specs[] = { .oprofile_mmcra_sipr = MMCRA_SIPR, .platform = "power5", }, + { /* Power5++ */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x003b0300, + .cpu_name = "POWER5+ (gs)", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5_PLUS, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .oprofile_cpu_type = "ppc64/power5++", + .oprofile_type = PPC_OPROFILE_POWER4, + .oprofile_mmcra_sihv = MMCRA_SIHV, + .oprofile_mmcra_sipr = MMCRA_SIPR, + .platform = "power5+", + }, { /* Power5 GS */ .pvr_mask = 0xffff0000, .pvr_value = 0x003b0000, @@ -1178,8 +1193,8 @@ static struct cpu_spec cpu_specs[] = { .platform = "ppc440", }, { /* 440SP Rev. A */ - .pvr_mask = 0xff000fff, - .pvr_value = 0x53000891, + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53200891, .cpu_name = "440SP Rev. A", .cpu_features = CPU_FTRS_44X, .cpu_user_features = COMMON_USER_BOOKE, @@ -1188,9 +1203,19 @@ static struct cpu_spec cpu_specs[] = { .platform = "ppc440", }, { /* 440SPe Rev. A */ - .pvr_mask = 0xff000fff, - .pvr_value = 0x53000890, - .cpu_name = "440SPe Rev. A", + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53400890, + .cpu_name = "440SPe Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .icache_bsize = 32, + .dcache_bsize = 32, + .platform = "ppc440", + }, + { /* 440SPe Rev. B */ + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53400891, + .cpu_name = "440SPe Rev. B", .cpu_features = CPU_FTRS_44X, .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index c897203198b..7d73a13450b 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -9,7 +9,6 @@ * rewritten by Paul Mackerras. * Copyright (C) 1996 Paul Mackerras. * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * This file contains the low-level support and setup for the * PowerPC platform, including trap and interrupt dispatch. @@ -32,10 +31,6 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> -#ifdef CONFIG_APUS -#include <asm/amigappc.h> -#endif - /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ #define LOAD_BAT(n, reg, RA, RB) \ /* see the comment for clear_bats() -- Cort */ \ @@ -92,11 +87,6 @@ _start: * r4: virtual address of boot_infos_t * r5: 0 * - * APUS - * r3: 'APUS' - * r4: physical address of memory base - * Linux/m68k style BootInfo structure at &_end. - * * PREP * This is jumped to on prep systems right after the kernel is relocated * to its proper place in memory by the boot loader. The expected layout @@ -150,14 +140,6 @@ __start: */ bl early_init -#ifdef CONFIG_APUS -/* On APUS the __va/__pa constants need to be set to the correct - * values before continuing. - */ - mr r4,r30 - bl fix_mem_constants -#endif /* CONFIG_APUS */ - /* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains * the physical address we are running at, returned by early_init() */ @@ -167,7 +149,7 @@ __after_mmu_off: bl flush_tlbs bl initial_bats -#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) +#if defined(CONFIG_BOOTX_TEXT) bl setup_disp_bat #endif @@ -183,7 +165,6 @@ __after_mmu_off: #endif /* CONFIG_6xx */ -#ifndef CONFIG_APUS /* * We need to run with _start at physical address 0. * On CHRP, we are loaded at 0x10000 since OF on CHRP uses @@ -196,7 +177,6 @@ __after_mmu_off: addis r4,r3,KERNELBASE@h /* current address of _start */ cmpwi 0,r4,0 /* are we already running at 0? */ bne relocate_kernel -#endif /* CONFIG_APUS */ /* * we now have the 1st 16M of ram mapped with the bats. * prep needs the mmu to be turned on here, but pmac already has it on. @@ -881,85 +861,6 @@ _GLOBAL(copy_and_flush) addi r6,r6,4 blr -#ifdef CONFIG_APUS -/* - * On APUS the physical base address of the kernel is not known at compile - * time, which means the __pa/__va constants used are incorrect. In the - * __init section is recorded the virtual addresses of instructions using - * these constants, so all that has to be done is fix these before - * continuing the kernel boot. - * - * r4 = The physical address of the kernel base. - */ -fix_mem_constants: - mr r10,r4 - addis r10,r10,-KERNELBASE@h /* virt_to_phys constant */ - neg r11,r10 /* phys_to_virt constant */ - - lis r12,__vtop_table_begin@h - ori r12,r12,__vtop_table_begin@l - add r12,r12,r10 /* table begin phys address */ - lis r13,__vtop_table_end@h - ori r13,r13,__vtop_table_end@l - add r13,r13,r10 /* table end phys address */ - subi r12,r12,4 - subi r13,r13,4 -1: lwzu r14,4(r12) /* virt address of instruction */ - add r14,r14,r10 /* phys address of instruction */ - lwz r15,0(r14) /* instruction, now insert top */ - rlwimi r15,r10,16,16,31 /* half of vp const in low half */ - stw r15,0(r14) /* of instruction and restore. */ - dcbst r0,r14 /* write it to memory */ - sync - icbi r0,r14 /* flush the icache line */ - cmpw r12,r13 - bne 1b - sync /* additional sync needed on g4 */ - isync - -/* - * Map the memory where the exception handlers will - * be copied to when hash constants have been patched. - */ -#ifdef CONFIG_APUS_FAST_EXCEPT - lis r8,0xfff0 -#else - lis r8,0 -#endif - ori r8,r8,0x2 /* 128KB, supervisor */ - mtspr SPRN_DBAT3U,r8 - mtspr SPRN_DBAT3L,r8 - - lis r12,__ptov_table_begin@h - ori r12,r12,__ptov_table_begin@l - add r12,r12,r10 /* table begin phys address */ - lis r13,__ptov_table_end@h - ori r13,r13,__ptov_table_end@l - add r13,r13,r10 /* table end phys address */ - subi r12,r12,4 - subi r13,r13,4 -1: lwzu r14,4(r12) /* virt address of instruction */ - add r14,r14,r10 /* phys address of instruction */ - lwz r15,0(r14) /* instruction, now insert top */ - rlwimi r15,r11,16,16,31 /* half of pv const in low half*/ - stw r15,0(r14) /* of instruction and restore. */ - dcbst r0,r14 /* write it to memory */ - sync - icbi r0,r14 /* flush the icache line */ - cmpw r12,r13 - bne 1b - - sync /* additional sync needed on g4 */ - isync /* No speculative loading until now */ - blr - -/*********************************************************************** - * Please note that on APUS the exception handlers are located at the - * physical address 0xfff0000. For this reason, the exception handlers - * cannot use relative branches to access the code below. - ***********************************************************************/ -#endif /* CONFIG_APUS */ - #ifdef CONFIG_SMP #ifdef CONFIG_GEMINI .globl __secondary_start_gemini @@ -1135,19 +1036,6 @@ start_here: bl __save_cpu_setup bl MMU_init -#ifdef CONFIG_APUS - /* Copy exception code to exception vector base on APUS. */ - lis r4,KERNELBASE@h -#ifdef CONFIG_APUS_FAST_EXCEPT - lis r3,0xfff0 /* Copy to 0xfff00000 */ -#else - lis r3,0 /* Copy to 0x00000000 */ -#endif - li r5,0x4000 /* # bytes of memory to copy */ - li r6,0 - bl copy_and_flush /* copy the first 0x4000 bytes */ -#endif /* CONFIG_APUS */ - /* * Go back to running unmapped so we can load up new values * for SDR1 (hash table pointer) and the segment registers @@ -1324,11 +1212,7 @@ initial_bats: #else ori r8,r8,2 /* R/W access */ #endif /* CONFIG_SMP */ -#ifdef CONFIG_APUS - ori r11,r11,BL_8M<<2|0x2 /* set up 8MB BAT registers for 604 */ -#else ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */ -#endif /* CONFIG_APUS */ mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */ mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */ @@ -1338,7 +1222,7 @@ initial_bats: blr -#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) +#ifdef CONFIG_BOOTX_TEXT setup_disp_bat: /* * setup the display bat prepared for us in prom.c @@ -1362,7 +1246,7 @@ setup_disp_bat: 1: mtspr SPRN_IBAT3L,r8 mtspr SPRN_IBAT3U,r11 blr -#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */ +#endif /* CONFIG_BOOTX_TEXT */ #ifdef CONFIG_8260 /* Jump into the system reset for the rom. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1111fcec767..8cdd48ea439 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -103,8 +103,8 @@ __secondary_hold_acknowledge: . = 0x60 /* - * The following code is used on pSeries to hold secondary processors - * in a spin loop after they have been freed from OpenFirmware, but + * The following code is used to hold secondary processors + * in a spin loop after they have entered the kernel, but * before the bulk of the kernel has been relocated. This code * is relocated to physical address 0x60 before prom_init is run. * All of it must fit below the first exception vector at 0x100. diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 34ae11494dd..e31aca9208e 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -35,7 +35,7 @@ void _insb(const volatile u8 __iomem *port, void *buf, long count) asm volatile("sync"); do { tmp = *port; - asm volatile("eieio"); + eieio(); *tbuf++ = tmp; } while (--count != 0); asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); @@ -66,7 +66,7 @@ void _insw_ns(const volatile u16 __iomem *port, void *buf, long count) asm volatile("sync"); do { tmp = *port; - asm volatile("eieio"); + eieio(); *tbuf++ = tmp; } while (--count != 0); asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); @@ -97,7 +97,7 @@ void _insl_ns(const volatile u32 __iomem *port, void *buf, long count) asm volatile("sync"); do { tmp = *port; - asm volatile("eieio"); + eieio(); *tbuf++ = tmp; } while (--count != 0); asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); @@ -155,21 +155,21 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src, __asm__ __volatile__ ("sync" : : : "memory"); while(n && (!IO_CHECK_ALIGN(vsrc, 4) || !IO_CHECK_ALIGN(dest, 4))) { *((u8 *)dest) = *((volatile u8 *)vsrc); - __asm__ __volatile__ ("eieio" : : : "memory"); + eieio(); vsrc++; dest++; n--; } while(n > 4) { *((u32 *)dest) = *((volatile u32 *)vsrc); - __asm__ __volatile__ ("eieio" : : : "memory"); + eieio(); vsrc += 4; dest += 4; n -= 4; } while(n) { *((u8 *)dest) = *((volatile u8 *)vsrc); - __asm__ __volatile__ ("eieio" : : : "memory"); + eieio(); vsrc++; dest++; n--; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c2b84c64db2..2fc87862146 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -7,7 +7,6 @@ * Copyright (C) 1996-2001 Cort Dougan * Adapted for Power Macintosh by Paul Mackerras * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -337,7 +336,8 @@ void do_IRQ(struct pt_regs *regs) void __init init_IRQ(void) { - ppc_md.init_IRQ(); + if (ppc_md.init_IRQ) + ppc_md.init_IRQ(); #ifdef CONFIG_PPC64 irq_ctx_init(); #endif @@ -597,6 +597,49 @@ static void irq_radix_rdunlock(unsigned long flags) local_irq_restore(flags); } +static int irq_setup_virq(struct irq_host *host, unsigned int virq, + irq_hw_number_t hwirq) +{ + /* Clear IRQ_NOREQUEST flag */ + get_irq_desc(virq)->status &= ~IRQ_NOREQUEST; + + /* map it */ + smp_wmb(); + irq_map[virq].hwirq = hwirq; + smp_mb(); + + if (host->ops->map(host, virq, hwirq)) { + pr_debug("irq: -> mapping failed, freeing\n"); + irq_free_virt(virq, 1); + return -1; + } + + return 0; +} + +unsigned int irq_create_direct_mapping(struct irq_host *host) +{ + unsigned int virq; + + if (host == NULL) + host = irq_default_host; + + BUG_ON(host == NULL); + WARN_ON(host->revmap_type != IRQ_HOST_MAP_NOMAP); + + virq = irq_alloc_virt(host, 1, 0); + if (virq == NO_IRQ) { + pr_debug("irq: create_direct virq allocation failed\n"); + return NO_IRQ; + } + + pr_debug("irq: create_direct obtained virq %d\n", virq); + + if (irq_setup_virq(host, virq, virq)) + return NO_IRQ; + + return virq; +} unsigned int irq_create_mapping(struct irq_host *host, irq_hw_number_t hwirq) @@ -645,18 +688,9 @@ unsigned int irq_create_mapping(struct irq_host *host, } pr_debug("irq: -> obtained virq %d\n", virq); - /* Clear IRQ_NOREQUEST flag */ - get_irq_desc(virq)->status &= ~IRQ_NOREQUEST; - - /* map it */ - smp_wmb(); - irq_map[virq].hwirq = hwirq; - smp_mb(); - if (host->ops->map(host, virq, hwirq)) { - pr_debug("irq: -> mapping failed, freeing\n"); - irq_free_virt(virq, 1); + if (irq_setup_virq(host, virq, hwirq)) return NO_IRQ; - } + return virq; } EXPORT_SYMBOL_GPL(irq_create_mapping); diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c new file mode 100644 index 00000000000..f0f49d1be3d --- /dev/null +++ b/arch/powerpc/kernel/isa-bridge.c @@ -0,0 +1,271 @@ +/* + * Routines for tracking a legacy ISA bridge + * + * Copyrigh 2007 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp. + * + * Some bits and pieces moved over from pci_64.c + * + * Copyrigh 2003 Anton Blanchard <anton@au.ibm.com>, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define DEBUG + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/notifier.h> + +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/ppc-pci.h> +#include <asm/firmware.h> + +unsigned long isa_io_base; /* NULL if no ISA bus */ +EXPORT_SYMBOL(isa_io_base); + +/* Cached ISA bridge dev. */ +static struct device_node *isa_bridge_devnode; +struct pci_dev *isa_bridge_pcidev; +EXPORT_SYMBOL_GPL(isa_bridge_pcidev); + +#define ISA_SPACE_MASK 0x1 +#define ISA_SPACE_IO 0x1 + +static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, + unsigned long phb_io_base_phys) +{ + /* We should get some saner parsing here and remove these structs */ + struct pci_address { + u32 a_hi; + u32 a_mid; + u32 a_lo; + }; + + struct isa_address { + u32 a_hi; + u32 a_lo; + }; + + struct isa_range { + struct isa_address isa_addr; + struct pci_address pci_addr; + unsigned int size; + }; + + const struct isa_range *range; + unsigned long pci_addr; + unsigned int isa_addr; + unsigned int size; + int rlen = 0; + + range = of_get_property(isa_node, "ranges", &rlen); + if (range == NULL || (rlen < sizeof(struct isa_range))) + goto inval_range; + + /* From "ISA Binding to 1275" + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 1: an ISA address + * cells 2 - 4: a PCI address + * (size depending on dev->n_addr_cells) + * cell 5: the size of the range + */ + if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO) { + range++; + rlen -= sizeof(struct isa_range); + if (rlen < sizeof(struct isa_range)) + goto inval_range; + } + if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO) + goto inval_range; + + isa_addr = range->isa_addr.a_lo; + pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | + range->pci_addr.a_lo; + + /* Assume these are both zero. Note: We could fix that and + * do a proper parsing instead ... oh well, that will do for + * now as nobody uses fancy mappings for ISA bridges + */ + if ((pci_addr != 0) || (isa_addr != 0)) { + printk(KERN_ERR "unexpected isa to pci mapping: %s\n", + __FUNCTION__); + return; + } + + /* Align size and make sure it's cropped to 64K */ + size = PAGE_ALIGN(range->size); + if (size > 0x10000) + size = 0x10000; + + printk(KERN_ERR "no ISA IO ranges or unexpected isa range," + "mapping 64k\n"); + + __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, + size, _PAGE_NO_CACHE|_PAGE_GUARDED); + return; + +inval_range: + printk(KERN_ERR "no ISA IO ranges or unexpected isa range," + "mapping 64k\n"); + __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, + 0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED); +} + + +/** + * isa_bridge_find_early - Find and map the ISA IO space early before + * main PCI discovery. This is optionally called by + * the arch code when adding PCI PHBs to get early + * access to ISA IO ports + */ +void __init isa_bridge_find_early(struct pci_controller *hose) +{ + struct device_node *np, *parent = NULL, *tmp; + + /* If we already have an ISA bridge, bail off */ + if (isa_bridge_devnode != NULL) + return; + + /* For each "isa" node in the system. Note : we do a search by + * type and not by name. It might be better to do by name but that's + * what the code used to do and I don't want to break too much at + * once. We can look into changing that separately + */ + for_each_node_by_type(np, "isa") { + /* Look for our hose being a parent */ + for (parent = of_get_parent(np); parent;) { + if (parent == hose->arch_data) { + of_node_put(parent); + break; + } + tmp = parent; + parent = of_get_parent(parent); + of_node_put(tmp); + } + if (parent != NULL) + break; + } + if (np == NULL) + return; + isa_bridge_devnode = np; + + /* Now parse the "ranges" property and setup the ISA mapping */ + pci_process_ISA_OF_ranges(np, hose->io_base_phys); + + /* Set the global ISA io base to indicate we have an ISA bridge */ + isa_io_base = ISA_IO_BASE; + + pr_debug("ISA bridge (early) is %s\n", np->full_name); +} + +/** + * isa_bridge_find_late - Find and map the ISA IO space upon discovery of + * a new ISA bridge + */ +static void __devinit isa_bridge_find_late(struct pci_dev *pdev, + struct device_node *devnode) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + + /* Store ISA device node and PCI device */ + isa_bridge_devnode = of_node_get(devnode); + isa_bridge_pcidev = pdev; + + /* Now parse the "ranges" property and setup the ISA mapping */ + pci_process_ISA_OF_ranges(devnode, hose->io_base_phys); + + /* Set the global ISA io base to indicate we have an ISA bridge */ + isa_io_base = ISA_IO_BASE; + + pr_debug("ISA bridge (late) is %s on %s\n", + devnode->full_name, pci_name(pdev)); +} + +/** + * isa_bridge_remove - Remove/unmap an ISA bridge + */ +static void isa_bridge_remove(void) +{ + pr_debug("ISA bridge removed !\n"); + + /* Clear the global ISA io base to indicate that we have no more + * ISA bridge. Note that drivers don't quite handle that, though + * we should probably do something about it. But do we ever really + * have ISA bridges being removed on machines using legacy devices ? + */ + isa_io_base = ISA_IO_BASE; + + /* Clear references to the bridge */ + of_node_put(isa_bridge_devnode); + isa_bridge_devnode = NULL; + isa_bridge_pcidev = NULL; + + /* Unmap the ISA area */ + __iounmap_at((void *)ISA_IO_BASE, 0x10000); +} + +/** + * isa_bridge_notify - Get notified of PCI devices addition/removal + */ +static int __devinit isa_bridge_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + struct device_node *devnode = pci_device_to_OF_node(pdev); + + switch(action) { + case BUS_NOTIFY_ADD_DEVICE: + /* Check if we have an early ISA device, without PCI dev */ + if (isa_bridge_devnode && isa_bridge_devnode == devnode && + !isa_bridge_pcidev) { + pr_debug("ISA bridge PCI attached: %s\n", + pci_name(pdev)); + isa_bridge_pcidev = pdev; + } + + /* Check if we have no ISA device, and this happens to be one, + * register it as such if it has an OF device + */ + if (!isa_bridge_devnode && devnode && devnode->type && + !strcmp(devnode->type, "isa")) + isa_bridge_find_late(pdev, devnode); + + return 0; + case BUS_NOTIFY_DEL_DEVICE: + /* Check if this our existing ISA device */ + if (pdev == isa_bridge_pcidev || + (devnode && devnode == isa_bridge_devnode)) + isa_bridge_remove(); + return 0; + } + return 0; +} + +static struct notifier_block isa_bridge_notifier = { + .notifier_call = isa_bridge_notify +}; + +/** + * isa_bridge_init - register to be notified of ISA bridge addition/removal + * + */ +static int __init isa_bridge_init(void) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + return 0; + bus_register_notifier(&pci_bus_type, &isa_bridge_notifier); + return 0; +} +arch_initcall(isa_bridge_init); diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 98decf8ebff..e708ab7ca9e 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -392,7 +392,7 @@ BEGIN_FTR_SECTION mtspr SPRN_L1CSR0,r3 isync blr -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) +END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) mfspr r3,SPRN_L1CSR1 ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR mtspr SPRN_L1CSR1,r3 @@ -419,7 +419,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) _GLOBAL(__flush_icache_range) BEGIN_FTR_SECTION blr /* for 601, do nothing */ -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) li r5,L1_CACHE_BYTES-1 andc r3,r3,r5 subf r4,r3,r4 @@ -514,8 +514,8 @@ _GLOBAL(invalidate_dcache_range) */ _GLOBAL(__flush_dcache_icache) BEGIN_FTR_SECTION - blr /* for 601, do nothing */ -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) + blr +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) rlwinm r3,r3,0,0,19 /* Get page base address */ li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ mtctr r4 @@ -543,7 +543,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) _GLOBAL(__flush_dcache_icache_phys) BEGIN_FTR_SECTION blr /* for 601, do nothing */ -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mfmsr r10 rlwinm r0,r10,0,28,26 /* clear DR */ mtmsr r0 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 519861da042..bbb3ba54c51 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -646,6 +646,19 @@ _GLOBAL(kexec_sequence) /* turn off mmu */ bl real_mode + /* copy 0x100 bytes starting at start to 0 */ + li r3,0 + mr r4,r30 /* start, aka phys mem offset */ + li r5,0x100 + li r6,0 + bl .copy_and_flush /* (dest, src, copy limit, start offset) */ +1: /* assume normal blr return */ + + /* release other cpus to the new kernel secondary start at 0x60 */ + mflr r5 + li r6,1 + stw r6,kexec_flag-1b(5) + /* clear out hardware hash page table and tlb */ ld r5,0(r27) /* deref function descriptor */ mtctr r5 @@ -676,19 +689,6 @@ _GLOBAL(kexec_sequence) * are the boot cpu ????? * other device tree differences (prop sizes, va vs pa, etc)... */ - - /* copy 0x100 bytes starting at start to 0 */ - li r3,0 - mr r4,r30 - li r5,0x100 - li r6,0 - bl .copy_and_flush /* (dest, src, copy limit, start offset) */ -1: /* assume normal blr return */ - - /* release other cpus to the new kernel secondary start at 0x60 */ - mflr r5 - li r6,1 - stw r6,kexec_flag-1b(5) mr r3,r25 # my phys cpu mr r4,r30 # start, aka phys mem offset mtlr 4 diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index d454f61c9c7..9536ed7f247 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -427,14 +427,6 @@ static int __devinit of_pci_phb_probe(struct of_device *dev, /* Process "ranges" property */ pci_process_bridge_OF_ranges(phb, dev->node, 0); - /* Setup IO space. We use the non-dynamic version of that code here, - * which doesn't quite support unplugging. Next kernel release will - * have a better fix for this. - * Note also that we don't do ISA, this will also be fixed with a - * more massive rework. - */ - pci_setup_phb_io(phb, pci_io_base == 0); - /* Init pci_dn data structures */ pci_devs_phb_init_dynamic(phb); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c new file mode 100644 index 00000000000..faf5ef3e90d --- /dev/null +++ b/arch/powerpc/kernel/pci-common.c @@ -0,0 +1,454 @@ +/* + * Contains common pci routines for ALL ppc platform + * (based on pci_32.c and pci_64.c) + * + * Port for PPC64 David Engebretsen, IBM Corp. + * Contains common pci routines for ppc64 platform, pSeries and iSeries brands. + * + * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM + * Rework, based on alpha PCI code. + * + * Common pmac/prep/chrp pci routines. -- Cort + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/mm.h> +#include <linux/list.h> +#include <linux/syscalls.h> +#include <linux/irq.h> +#include <linux/vmalloc.h> + +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/byteorder.h> +#include <asm/machdep.h> +#include <asm/ppc-pci.h> +#include <asm/firmware.h> + +#ifdef DEBUG +#include <asm/udbg.h> +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +static DEFINE_SPINLOCK(hose_spinlock); + +/* XXX kill that some day ... */ +int global_phb_number; /* Global phb counter */ + +extern struct list_head hose_list; + +/* + * pci_controller(phb) initialized common variables. + */ +static void __devinit pci_setup_pci_controller(struct pci_controller *hose) +{ + memset(hose, 0, sizeof(struct pci_controller)); + + spin_lock(&hose_spinlock); + hose->global_number = global_phb_number++; + list_add_tail(&hose->list_node, &hose_list); + spin_unlock(&hose_spinlock); +} + +struct pci_controller * pcibios_alloc_controller(struct device_node *dev) +{ + struct pci_controller *phb; + + if (mem_init_done) + phb = kmalloc(sizeof(struct pci_controller), GFP_KERNEL); + else + phb = alloc_bootmem(sizeof (struct pci_controller)); + if (phb == NULL) + return NULL; + pci_setup_pci_controller(phb); + phb->arch_data = dev; + phb->is_dynamic = mem_init_done; +#ifdef CONFIG_PPC64 + if (dev) { + int nid = of_node_to_nid(dev); + + if (nid < 0 || !node_online(nid)) + nid = -1; + + PHB_SET_NODE(phb, nid); + } +#endif + return phb; +} + +void pcibios_free_controller(struct pci_controller *phb) +{ + spin_lock(&hose_spinlock); + list_del(&phb->list_node); + spin_unlock(&hose_spinlock); + + if (phb->is_dynamic) + kfree(phb); +} + +/* + * Return the domain number for this bus. + */ +int pci_domain_nr(struct pci_bus *bus) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + return 0; + else { + struct pci_controller *hose = pci_bus_to_host(bus); + + return hose->global_number; + } +} + +EXPORT_SYMBOL(pci_domain_nr); + +#ifdef CONFIG_PPC_OF + +/* This routine is meant to be used early during boot, when the + * PCI bus numbers have not yet been assigned, and you need to + * issue PCI config cycles to an OF device. + * It could also be used to "fix" RTAS config cycles if you want + * to set pci_assign_all_buses to 1 and still use RTAS for PCI + * config cycles. + */ +struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) +{ + if (!have_of) + return NULL; + while(node) { + struct pci_controller *hose, *tmp; + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) + if (hose->arch_data == node) + return hose; + node = node->parent; + } + return NULL; +} + +static ssize_t pci_show_devspec(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev; + struct device_node *np; + + pdev = to_pci_dev (dev); + np = pci_device_to_OF_node(pdev); + if (np == NULL || np->full_name == NULL) + return 0; + return sprintf(buf, "%s", np->full_name); +} +static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); +#endif /* CONFIG_PPC_OF */ + +/* Add sysfs properties */ +void pcibios_add_platform_entries(struct pci_dev *pdev) +{ +#ifdef CONFIG_PPC_OF + device_create_file(&pdev->dev, &dev_attr_devspec); +#endif /* CONFIG_PPC_OF */ +} + +char __init *pcibios_setup(char *str) +{ + return str; +} + +/* + * Reads the interrupt pin to determine if interrupt is use by card. + * If the interrupt is used, then gets the interrupt line from the + * openfirmware and sets it in the pci_dev and pci_config line. + */ +int pci_read_irq_line(struct pci_dev *pci_dev) +{ + struct of_irq oirq; + unsigned int virq; + + DBG("Try to map irq for %s...\n", pci_name(pci_dev)); + +#ifdef DEBUG + memset(&oirq, 0xff, sizeof(oirq)); +#endif + /* Try to get a mapping from the device-tree */ + if (of_irq_map_pci(pci_dev, &oirq)) { + u8 line, pin; + + /* If that fails, lets fallback to what is in the config + * space and map that through the default controller. We + * also set the type to level low since that's what PCI + * interrupts are. If your platform does differently, then + * either provide a proper interrupt tree or don't use this + * function. + */ + if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin)) + return -1; + if (pin == 0) + return -1; + if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) || + line == 0xff) { + return -1; + } + DBG(" -> no map ! Using irq line %d from PCI config\n", line); + + virq = irq_create_mapping(NULL, line); + if (virq != NO_IRQ) + set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); + } else { + DBG(" -> got one, spec %d cells (0x%08x 0x%08x...) on %s\n", + oirq.size, oirq.specifier[0], oirq.specifier[1], + oirq.controller->full_name); + + virq = irq_create_of_mapping(oirq.controller, oirq.specifier, + oirq.size); + } + if(virq == NO_IRQ) { + DBG(" -> failed to map !\n"); + return -1; + } + + DBG(" -> mapped to linux irq %d\n", virq); + + pci_dev->irq = virq; + + return 0; +} +EXPORT_SYMBOL(pci_read_irq_line); + +/* + * Platform support for /proc/bus/pci/X/Y mmap()s, + * modelled on the sparc64 implementation by Dave Miller. + * -- paulus. + */ + +/* + * Adjust vm_pgoff of VMA such that it is the physical page offset + * corresponding to the 32-bit pci bus offset for DEV requested by the user. + * + * Basically, the user finds the base address for his device which he wishes + * to mmap. They read the 32-bit value from the config space base register, + * add whatever PAGE_SIZE multiple offset they wish, and feed this into the + * offset parameter of mmap on /proc/bus/pci/XXX for that device. + * + * Returns negative error code on failure, zero on success. + */ +static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, + resource_size_t *offset, + enum pci_mmap_state mmap_state) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long io_offset = 0; + int i, res_bit; + + if (hose == 0) + return NULL; /* should never happen */ + + /* If memory, add on the PCI bridge address offset */ + if (mmap_state == pci_mmap_mem) { +#if 0 /* See comment in pci_resource_to_user() for why this is disabled */ + *offset += hose->pci_mem_offset; +#endif + res_bit = IORESOURCE_MEM; + } else { + io_offset = (unsigned long)hose->io_base_virt - _IO_BASE; + *offset += io_offset; + res_bit = IORESOURCE_IO; + } + + /* + * Check that the offset requested corresponds to one of the + * resources of the device. + */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &dev->resource[i]; + int flags = rp->flags; + + /* treat ROM as memory (should be already) */ + if (i == PCI_ROM_RESOURCE) + flags |= IORESOURCE_MEM; + + /* Active and same type? */ + if ((flags & res_bit) == 0) + continue; + + /* In the range of this resource? */ + if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end) + continue; + + /* found it! construct the final physical address */ + if (mmap_state == pci_mmap_io) + *offset += hose->io_base_phys - io_offset; + return rp; + } + + return NULL; +} + +/* + * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci + * device mapping. + */ +static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, + pgprot_t protection, + enum pci_mmap_state mmap_state, + int write_combine) +{ + unsigned long prot = pgprot_val(protection); + + /* Write combine is always 0 on non-memory space mappings. On + * memory space, if the user didn't pass 1, we check for a + * "prefetchable" resource. This is a bit hackish, but we use + * this to workaround the inability of /sysfs to provide a write + * combine bit + */ + if (mmap_state != pci_mmap_mem) + write_combine = 0; + else if (write_combine == 0) { + if (rp->flags & IORESOURCE_PREFETCH) + write_combine = 1; + } + + /* XXX would be nice to have a way to ask for write-through */ + prot |= _PAGE_NO_CACHE; + if (write_combine) + prot &= ~_PAGE_GUARDED; + else + prot |= _PAGE_GUARDED; + + return __pgprot(prot); +} + +/* + * This one is used by /dev/mem and fbdev who have no clue about the + * PCI device, it tries to find the PCI device first and calls the + * above routine + */ +pgprot_t pci_phys_mem_access_prot(struct file *file, + unsigned long pfn, + unsigned long size, + pgprot_t protection) +{ + struct pci_dev *pdev = NULL; + struct resource *found = NULL; + unsigned long prot = pgprot_val(protection); + unsigned long offset = pfn << PAGE_SHIFT; + int i; + + if (page_is_ram(pfn)) + return __pgprot(prot); + + prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; + + for_each_pci_dev(pdev) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &pdev->resource[i]; + int flags = rp->flags; + + /* Active and same type? */ + if ((flags & IORESOURCE_MEM) == 0) + continue; + /* In the range of this resource? */ + if (offset < (rp->start & PAGE_MASK) || + offset > rp->end) + continue; + found = rp; + break; + } + if (found) + break; + } + if (found) { + if (found->flags & IORESOURCE_PREFETCH) + prot &= ~_PAGE_GUARDED; + pci_dev_put(pdev); + } + + DBG("non-PCI map for %lx, prot: %lx\n", offset, prot); + + return __pgprot(prot); +} + + +/* + * Perform the actual remap of the pages for a PCI device mapping, as + * appropriate for this architecture. The region in the process to map + * is described by vm_start and vm_end members of VMA, the base physical + * address is found in vm_pgoff. + * The pci device structure is provided so that architectures may make mapping + * decisions on a per-device or per-bus basis. + * + * Returns a negative error code on failure, zero on success. + */ +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + resource_size_t offset = vma->vm_pgoff << PAGE_SHIFT; + struct resource *rp; + int ret; + + rp = __pci_mmap_make_offset(dev, &offset, mmap_state); + if (rp == NULL) + return -EINVAL; + + vma->vm_pgoff = offset >> PAGE_SHIFT; + vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, + vma->vm_page_prot, + mmap_state, write_combine); + + ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot); + + return ret; +} + +void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, + resource_size_t *start, resource_size_t *end) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + resource_size_t offset = 0; + + if (hose == NULL) + return; + + if (rsrc->flags & IORESOURCE_IO) + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + + /* We pass a fully fixed up address to userland for MMIO instead of + * a BAR value because X is lame and expects to be able to use that + * to pass to /dev/mem ! + * + * That means that we'll have potentially 64 bits values where some + * userland apps only expect 32 (like X itself since it thinks only + * Sparc has 64 bits MMIO) but if we don't do that, we break it on + * 32 bits CHRPs :-( + * + * Hopefully, the sysfs insterface is immune to that gunk. Once X + * has been fixed (and the fix spread enough), we can re-enable the + * 2 lines below and pass down a BAR value to userland. In that case + * we'll also have to re-enable the matching code in + * __pci_mmap_make_offset(). + * + * BenH. + */ +#if 0 + else if (rsrc->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; +#endif + + *start = rsrc->start - offset; + *end = rsrc->end - offset; +} diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 86982112b0d..0adf077f3f3 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -55,8 +55,7 @@ static u8* pci_to_OF_bus_map; */ int pci_assign_all_buses; -struct pci_controller* hose_head; -struct pci_controller** hose_tail = &hose_head; +LIST_HEAD(hose_list); static int pci_bus_count; @@ -573,58 +572,6 @@ pcibios_assign_resources(void) } } - -int -pcibios_enable_resources(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for (idx=0; idx<6; idx++) { - /* Only set up the requested stuff */ - if (!(mask & (1<<idx))) - continue; - - r = &dev->resource[idx]; - if (r->flags & IORESOURCE_UNSET) { - printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (dev->resource[PCI_ROM_RESOURCE].start) - cmd |= PCI_COMMAND_MEMORY; - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -static int next_controller_index; - -struct pci_controller * __init -pcibios_alloc_controller(void) -{ - struct pci_controller *hose; - - hose = (struct pci_controller *)alloc_bootmem(sizeof(*hose)); - memset(hose, 0, sizeof(struct pci_controller)); - - *hose_tail = hose; - hose_tail = &hose->next; - - hose->index = next_controller_index++; - - return hose; -} - #ifdef CONFIG_PPC_OF /* * Functions below are used on OpenFirmware machines. @@ -670,7 +617,7 @@ void pcibios_make_OF_bus_map(void) { int i; - struct pci_controller* hose; + struct pci_controller *hose, *tmp; struct property *map_prop; struct device_node *dn; @@ -687,7 +634,7 @@ pcibios_make_OF_bus_map(void) pci_to_OF_bus_map[i] = 0xff; /* For each hose, we begin searching bridges */ - for(hose=hose_head; hose; hose=hose->next) { + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { struct device_node* node; node = (struct device_node *)hose->arch_data; if (!node) @@ -765,7 +712,7 @@ static struct device_node *scan_OF_for_pci_bus(struct pci_bus *bus) /* Are we a root bus ? */ if (bus->self == NULL || bus->parent == NULL) { - struct pci_controller *hose = pci_bus_to_hose(bus->number); + struct pci_controller *hose = pci_bus_to_host(bus); if (hose == NULL) return NULL; return of_node_get(hose->arch_data); @@ -818,27 +765,6 @@ pci_device_to_OF_node(struct pci_dev *dev) } EXPORT_SYMBOL(pci_device_to_OF_node); -/* This routine is meant to be used early during boot, when the - * PCI bus numbers have not yet been assigned, and you need to - * issue PCI config cycles to an OF device. - * It could also be used to "fix" RTAS config cycles if you want - * to set pci_assign_all_buses to 1 and still use RTAS for PCI - * config cycles. - */ -struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) -{ - if (!have_of) - return NULL; - while(node) { - struct pci_controller* hose; - for (hose=hose_head;hose;hose=hose->next) - if (hose->arch_data == node) - return hose; - node=node->parent; - } - return NULL; -} - static int find_OF_pci_device_filter(struct device_node* node, void* data) { @@ -1027,34 +953,12 @@ pci_create_OF_bus_map(void) } } -static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct pci_dev *pdev; - struct device_node *np; - - pdev = to_pci_dev (dev); - np = pci_device_to_OF_node(pdev); - if (np == NULL || np->full_name == NULL) - return 0; - return sprintf(buf, "%s", np->full_name); -} -static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); - #else /* CONFIG_PPC_OF */ void pcibios_make_OF_bus_map(void) { } #endif /* CONFIG_PPC_OF */ -/* Add sysfs properties */ -int pcibios_add_platform_entries(struct pci_dev *pdev) -{ -#ifdef CONFIG_PPC_OF - return device_create_file(&pdev->dev, &dev_attr_devspec); -#endif /* CONFIG_PPC_OF */ -} - - #ifdef CONFIG_PPC_PMAC /* * This set of routines checks for PCI<->PCI bridges that have closed @@ -1269,14 +1173,14 @@ pcibios_fixup_p2p_bridges(void) static int __init pcibios_init(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; struct pci_bus *bus; - int next_busno; + int next_busno = 0; printk(KERN_INFO "PCI: Probing PCI hardware\n"); /* Scan all of the recorded PCI controllers. */ - for (next_busno = 0, hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { if (pci_assign_all_buses) hose->first_busno = next_busno; hose->last_busno = 0xff; @@ -1319,12 +1223,6 @@ pcibios_init(void) subsys_initcall(pcibios_init); -unsigned long resource_fixup(struct pci_dev * dev, struct resource * res, - unsigned long start, unsigned long size) -{ - return start; -} - void __init pcibios_fixup_bus(struct pci_bus *bus) { struct pci_controller *hose = (struct pci_controller *) bus->sysdata; @@ -1342,7 +1240,7 @@ void __init pcibios_fixup_bus(struct pci_bus *bus) if (!res->flags) { if (io_offset) printk(KERN_ERR "I/O resource not set for host" - " bridge %d\n", hose->index); + " bridge %d\n", hose->global_number); res->start = 0; res->end = IO_SPACE_LIMIT; res->flags = IORESOURCE_IO; @@ -1356,7 +1254,7 @@ void __init pcibios_fixup_bus(struct pci_bus *bus) if (i > 0) continue; printk(KERN_ERR "Memory resource not set for " - "host bridge %d\n", hose->index); + "host bridge %d\n", hose->global_number); res->start = hose->pci_mem_offset; res->end = ~0U; res->flags = IORESOURCE_MEM; @@ -1370,7 +1268,7 @@ void __init pcibios_fixup_bus(struct pci_bus *bus) for (i = 0; i < 4; ++i) { if ((res = bus->resource[i]) == NULL) continue; - if (!res->flags) + if (!res->flags || bus->self->transparent) continue; if (io_offset && (res->flags & IORESOURCE_IO)) { res->start += io_offset; @@ -1395,11 +1293,6 @@ void __init pcibios_fixup_bus(struct pci_bus *bus) } } -char __init *pcibios_setup(char *str) -{ - return str; -} - /* the next one is stolen from the alpha port... */ void __init pcibios_update_irq(struct pci_dev *dev, int irq) @@ -1408,64 +1301,6 @@ pcibios_update_irq(struct pci_dev *dev, int irq) /* XXX FIXME - update OF device tree node interrupt property */ } -#ifdef CONFIG_PPC_MERGE -/* XXX This is a copy of the ppc64 version. This is temporary until we start - * merging the 2 PCI layers - */ -/* - * Reads the interrupt pin to determine if interrupt is use by card. - * If the interrupt is used, then gets the interrupt line from the - * openfirmware and sets it in the pci_dev and pci_config line. - */ -int pci_read_irq_line(struct pci_dev *pci_dev) -{ - struct of_irq oirq; - unsigned int virq; - - DBG("Try to map irq for %s...\n", pci_name(pci_dev)); - - /* Try to get a mapping from the device-tree */ - if (of_irq_map_pci(pci_dev, &oirq)) { - u8 line, pin; - - /* If that fails, lets fallback to what is in the config - * space and map that through the default controller. We - * also set the type to level low since that's what PCI - * interrupts are. If your platform does differently, then - * either provide a proper interrupt tree or don't use this - * function. - */ - if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin)) - return -1; - if (pin == 0) - return -1; - if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) || - line == 0xff) { - return -1; - } - DBG(" -> no map ! Using irq line %d from PCI config\n", line); - - virq = irq_create_mapping(NULL, line); - if (virq != NO_IRQ) - set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); - } else { - DBG(" -> got one, spec %d cells (0x%08x...) on %s\n", - oirq.size, oirq.specifier[0], oirq.controller->full_name); - - virq = irq_create_of_mapping(oirq.controller, oirq.specifier, - oirq.size); - } - if(virq == NO_IRQ) { - DBG(" -> failed to map !\n"); - return -1; - } - pci_dev->irq = virq; - - return 0; -} -EXPORT_SYMBOL(pci_read_irq_line); -#endif /* CONFIG_PPC_MERGE */ - int pcibios_enable_device(struct pci_dev *dev, int mask) { u16 cmd, old_cmd; @@ -1497,281 +1332,17 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return 0; } -struct pci_controller* +static struct pci_controller* pci_bus_to_hose(int bus) { - struct pci_controller* hose = hose_head; + struct pci_controller *hose, *tmp; - for (; hose; hose = hose->next) + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) if (bus >= hose->first_busno && bus <= hose->last_busno) return hose; return NULL; } -void __iomem * -pci_bus_io_base(unsigned int bus) -{ - struct pci_controller *hose; - - hose = pci_bus_to_hose(bus); - if (!hose) - return NULL; - return hose->io_base_virt; -} - -unsigned long -pci_bus_io_base_phys(unsigned int bus) -{ - struct pci_controller *hose; - - hose = pci_bus_to_hose(bus); - if (!hose) - return 0; - return hose->io_base_phys; -} - -unsigned long -pci_bus_mem_base_phys(unsigned int bus) -{ - struct pci_controller *hose; - - hose = pci_bus_to_hose(bus); - if (!hose) - return 0; - return hose->pci_mem_offset; -} - -unsigned long -pci_resource_to_bus(struct pci_dev *pdev, struct resource *res) -{ - /* Hack alert again ! See comments in chrp_pci.c - */ - struct pci_controller* hose = - (struct pci_controller *)pdev->sysdata; - if (hose && res->flags & IORESOURCE_MEM) - return res->start - hose->pci_mem_offset; - /* We may want to do something with IOs here... */ - return res->start; -} - - -static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, - resource_size_t *offset, - enum pci_mmap_state mmap_state) -{ - struct pci_controller *hose = pci_bus_to_hose(dev->bus->number); - unsigned long io_offset = 0; - int i, res_bit; - - if (hose == 0) - return NULL; /* should never happen */ - - /* If memory, add on the PCI bridge address offset */ - if (mmap_state == pci_mmap_mem) { -#if 0 /* See comment in pci_resource_to_user() for why this is disabled */ - *offset += hose->pci_mem_offset; -#endif - res_bit = IORESOURCE_MEM; - } else { - io_offset = hose->io_base_virt - (void __iomem *)_IO_BASE; - *offset += io_offset; - res_bit = IORESOURCE_IO; - } - - /* - * Check that the offset requested corresponds to one of the - * resources of the device. - */ - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - struct resource *rp = &dev->resource[i]; - int flags = rp->flags; - - /* treat ROM as memory (should be already) */ - if (i == PCI_ROM_RESOURCE) - flags |= IORESOURCE_MEM; - - /* Active and same type? */ - if ((flags & res_bit) == 0) - continue; - - /* In the range of this resource? */ - if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end) - continue; - - /* found it! construct the final physical address */ - if (mmap_state == pci_mmap_io) - *offset += hose->io_base_phys - io_offset; - return rp; - } - - return NULL; -} - -/* - * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci - * device mapping. - */ -static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, - pgprot_t protection, - enum pci_mmap_state mmap_state, - int write_combine) -{ - unsigned long prot = pgprot_val(protection); - - /* Write combine is always 0 on non-memory space mappings. On - * memory space, if the user didn't pass 1, we check for a - * "prefetchable" resource. This is a bit hackish, but we use - * this to workaround the inability of /sysfs to provide a write - * combine bit - */ - if (mmap_state != pci_mmap_mem) - write_combine = 0; - else if (write_combine == 0) { - if (rp->flags & IORESOURCE_PREFETCH) - write_combine = 1; - } - - /* XXX would be nice to have a way to ask for write-through */ - prot |= _PAGE_NO_CACHE; - if (write_combine) - prot &= ~_PAGE_GUARDED; - else - prot |= _PAGE_GUARDED; - - return __pgprot(prot); -} - -/* - * This one is used by /dev/mem and fbdev who have no clue about the - * PCI device, it tries to find the PCI device first and calls the - * above routine - */ -pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long pfn, - unsigned long size, - pgprot_t protection) -{ - struct pci_dev *pdev = NULL; - struct resource *found = NULL; - unsigned long prot = pgprot_val(protection); - unsigned long offset = pfn << PAGE_SHIFT; - int i; - - if (page_is_ram(pfn)) - return __pgprot(prot); - - prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; - - for_each_pci_dev(pdev) { - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - struct resource *rp = &pdev->resource[i]; - int flags = rp->flags; - - /* Active and same type? */ - if ((flags & IORESOURCE_MEM) == 0) - continue; - /* In the range of this resource? */ - if (offset < (rp->start & PAGE_MASK) || - offset > rp->end) - continue; - found = rp; - break; - } - if (found) - break; - } - if (found) { - if (found->flags & IORESOURCE_PREFETCH) - prot &= ~_PAGE_GUARDED; - pci_dev_put(pdev); - } - - DBG("non-PCI map for %lx, prot: %lx\n", offset, prot); - - return __pgprot(prot); -} - - -/* - * Perform the actual remap of the pages for a PCI device mapping, as - * appropriate for this architecture. The region in the process to map - * is described by vm_start and vm_end members of VMA, the base physical - * address is found in vm_pgoff. - * The pci device structure is provided so that architectures may make mapping - * decisions on a per-device or per-bus basis. - * - * Returns a negative error code on failure, zero on success. - */ -int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, - int write_combine) -{ - resource_size_t offset = vma->vm_pgoff << PAGE_SHIFT; - struct resource *rp; - int ret; - - rp = __pci_mmap_make_offset(dev, &offset, mmap_state); - if (rp == NULL) - return -EINVAL; - - vma->vm_pgoff = offset >> PAGE_SHIFT; - vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, - vma->vm_page_prot, - mmap_state, write_combine); - - ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, vma->vm_page_prot); - - return ret; -} - -/* Obsolete functions. Should be removed once the symbios driver - * is fixed - */ -unsigned long -phys_to_bus(unsigned long pa) -{ - struct pci_controller *hose; - int i; - - for (hose = hose_head; hose; hose = hose->next) { - for (i = 0; i < 3; ++i) { - if (pa >= hose->mem_resources[i].start - && pa <= hose->mem_resources[i].end) { - /* - * XXX the hose->pci_mem_offset really - * only applies to mem_resources[0]. - * We need a way to store an offset for - * the others. -- paulus - */ - if (i == 0) - pa -= hose->pci_mem_offset; - return pa; - } - } - } - /* hmmm, didn't find it */ - return 0; -} - -unsigned long -pci_phys_to_bus(unsigned long pa, int busnr) -{ - struct pci_controller* hose = pci_bus_to_hose(busnr); - if (!hose) - return pa; - return pa - hose->pci_mem_offset; -} - -unsigned long -pci_bus_to_phys(unsigned int ba, int busnr) -{ - struct pci_controller* hose = pci_bus_to_hose(busnr); - if (!hose) - return ba; - return ba + hose->pci_mem_offset; -} - /* Provide information on locations of various I/O regions in physical * memory. Do this on a per-card basis so that we choose the right * root bridge. @@ -1814,62 +1385,11 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) return result; } -void pci_resource_to_user(const struct pci_dev *dev, int bar, - const struct resource *rsrc, - resource_size_t *start, resource_size_t *end) -{ - struct pci_controller *hose = pci_bus_to_hose(dev->bus->number); - resource_size_t offset = 0; - - if (hose == NULL) - return; - - if (rsrc->flags & IORESOURCE_IO) - offset = (unsigned long)hose->io_base_virt - _IO_BASE; - - /* We pass a fully fixed up address to userland for MMIO instead of - * a BAR value because X is lame and expects to be able to use that - * to pass to /dev/mem ! - * - * That means that we'll have potentially 64 bits values where some - * userland apps only expect 32 (like X itself since it thinks only - * Sparc has 64 bits MMIO) but if we don't do that, we break it on - * 32 bits CHRPs :-( - * - * Hopefully, the sysfs insterface is immune to that gunk. Once X - * has been fixed (and the fix spread enough), we can re-enable the - * 2 lines below and pass down a BAR value to userland. In that case - * we'll also have to re-enable the matching code in - * __pci_mmap_make_offset(). - * - * BenH. - */ -#if 0 - else if (rsrc->flags & IORESOURCE_MEM) - offset = hose->pci_mem_offset; -#endif - - *start = rsrc->start - offset; - *end = rsrc->end - offset; -} - -void __init pci_init_resource(struct resource *res, resource_size_t start, - resource_size_t end, int flags, char *name) -{ - res->start = start; - res->end = end; - res->flags = flags; - res->name = name; - res->parent = NULL; - res->sibling = NULL; - res->child = NULL; -} - unsigned long pci_address_to_pio(phys_addr_t address) { - struct pci_controller* hose = hose_head; + struct pci_controller *hose, *tmp; - for (; hose; hose = hose->next) { + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { unsigned int size = hose->io_resource.end - hose->io_resource.start + 1; if (address >= hose->io_base_phys && diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index e3009a43ac5..a97e23ac197 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -22,6 +22,7 @@ #include <linux/list.h> #include <linux/syscalls.h> #include <linux/irq.h> +#include <linux/vmalloc.h> #include <asm/processor.h> #include <asm/io.h> @@ -41,35 +42,23 @@ unsigned long pci_probe_only = 1; int pci_assign_all_buses = 0; -static int pci_initial_scan_done; static void fixup_resource(struct resource *res, struct pci_dev *dev); static void do_bus_setup(struct pci_bus *bus); -static void phbs_remap_io(void); /* pci_io_base -- the base address from which io bars are offsets. * This is the lowest I/O base address (so bar values are always positive), * and it *must* be the start of ISA space if an ISA bus exists because - * ISA drivers use hard coded offsets. If no ISA bus exists a dummy - * page is mapped and isa_io_limit prevents access to it. + * ISA drivers use hard coded offsets. If no ISA bus exists nothing + * is mapped on the first 64K of IO space */ -unsigned long isa_io_base; /* NULL if no ISA bus */ -EXPORT_SYMBOL(isa_io_base); -unsigned long pci_io_base; +unsigned long pci_io_base = ISA_IO_BASE; EXPORT_SYMBOL(pci_io_base); -void iSeries_pcibios_init(void); - LIST_HEAD(hose_list); static struct dma_mapping_ops *pci_dma_ops; -int global_phb_number; /* Global phb counter */ - -/* Cached ISA bridge dev. */ -struct pci_dev *ppc64_isabridge_dev = NULL; -EXPORT_SYMBOL_GPL(ppc64_isabridge_dev); - void set_pci_dma_ops(struct dma_mapping_ops *dma_ops) { pci_dma_ops = dma_ops; @@ -100,7 +89,7 @@ void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region return; if (res->flags & IORESOURCE_IO) - offset = (unsigned long)hose->io_base_virt - pci_io_base; + offset = (unsigned long)hose->io_base_virt - _IO_BASE; if (res->flags & IORESOURCE_MEM) offset = hose->pci_mem_offset; @@ -119,7 +108,7 @@ void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, return; if (res->flags & IORESOURCE_IO) - offset = (unsigned long)hose->io_base_virt - pci_io_base; + offset = (unsigned long)hose->io_base_virt - _IO_BASE; if (res->flags & IORESOURCE_MEM) offset = hose->pci_mem_offset; @@ -156,7 +145,7 @@ void pcibios_align_resource(void *data, struct resource *res, if (res->flags & IORESOURCE_IO) { unsigned long offset = (unsigned long)hose->io_base_virt - - pci_io_base; + _IO_BASE; /* Make sure we start at our min on all hoses */ if (start - offset < PCIBIOS_MIN_IO) start = PCIBIOS_MIN_IO + offset; @@ -180,55 +169,6 @@ void pcibios_align_resource(void *data, struct resource *res, res->start = start; } -static DEFINE_SPINLOCK(hose_spinlock); - -/* - * pci_controller(phb) initialized common variables. - */ -static void __devinit pci_setup_pci_controller(struct pci_controller *hose) -{ - memset(hose, 0, sizeof(struct pci_controller)); - - spin_lock(&hose_spinlock); - hose->global_number = global_phb_number++; - list_add_tail(&hose->list_node, &hose_list); - spin_unlock(&hose_spinlock); -} - -struct pci_controller * pcibios_alloc_controller(struct device_node *dev) -{ - struct pci_controller *phb; - - if (mem_init_done) - phb = kmalloc(sizeof(struct pci_controller), GFP_KERNEL); - else - phb = alloc_bootmem(sizeof (struct pci_controller)); - if (phb == NULL) - return NULL; - pci_setup_pci_controller(phb); - phb->arch_data = dev; - phb->is_dynamic = mem_init_done; - if (dev) { - int nid = of_node_to_nid(dev); - - if (nid < 0 || !node_online(nid)) - nid = -1; - - PHB_SET_NODE(phb, nid); - } - return phb; -} - -void pcibios_free_controller(struct pci_controller *phb) -{ - spin_lock(&hose_spinlock); - list_del(&phb->list_node); - spin_unlock(&hose_spinlock); - - if (phb->is_dynamic) - kfree(phb); -} - void __devinit pcibios_claim_one_bus(struct pci_bus *b) { struct pci_dev *dev; @@ -291,7 +231,6 @@ static unsigned int pci_parse_of_flags(u32 addr0) return flags; } -#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1]) static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) { @@ -310,8 +249,8 @@ static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) flags = pci_parse_of_flags(addrs[0]); if (!flags) continue; - base = GET_64BIT(addrs, 1); - size = GET_64BIT(addrs, 3); + base = of_read_number(&addrs[1], 2); + size = of_read_number(&addrs[3], 2); if (!size) continue; i = addrs[0] & 0xff; @@ -479,7 +418,7 @@ void __devinit of_scan_pci_bridge(struct device_node *node, i = 1; for (; len >= 32; len -= 32, ranges += 8) { flags = pci_parse_of_flags(ranges[0]); - size = GET_64BIT(ranges, 6); + size = of_read_number(&ranges[6], 2); if (flags == 0 || size == 0) continue; if (flags & IORESOURCE_IO) { @@ -498,7 +437,7 @@ void __devinit of_scan_pci_bridge(struct device_node *node, res = bus->resource[i]; ++i; } - res->start = GET_64BIT(ranges, 1); + res->start = of_read_number(&ranges[1], 2); res->end = res->start + size - 1; res->flags = flags; fixup_resource(res, dev); @@ -537,10 +476,16 @@ void __devinit scan_phb(struct pci_controller *hose) bus->secondary = hose->first_busno; hose->bus = bus; + if (!firmware_has_feature(FW_FEATURE_ISERIES)) + pcibios_map_io_space(bus); + bus->resource[0] = res = &hose->io_resource; - if (res->flags && request_resource(&ioport_resource, res)) + if (res->flags && request_resource(&ioport_resource, res)) { printk(KERN_ERR "Failed to request PCI IO region " "on PCI domain %04x\n", hose->global_number); + DBG("res->start = 0x%016lx, res->end = 0x%016lx\n", + res->start, res->end); + } for (i = 0; i < 3; ++i) { res = &hose->mem_resources[i]; @@ -598,17 +543,6 @@ static int __init pcibios_init(void) if (ppc_md.pcibios_fixup) ppc_md.pcibios_fixup(); - /* Cache the location of the ISA bridge (if we have one) */ - ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); - if (ppc64_isabridge_dev != NULL) - printk(KERN_DEBUG "ISA bridge at %s\n", pci_name(ppc64_isabridge_dev)); - - if (!firmware_has_feature(FW_FEATURE_ISERIES)) - /* map in PCI I/O space */ - phbs_remap_io(); - - pci_initial_scan_done = 1; - printk(KERN_DEBUG "PCI: Probing PCI hardware done\n"); return 0; @@ -616,11 +550,6 @@ static int __init pcibios_init(void) subsys_initcall(pcibios_init); -char __init *pcibios_setup(char *str) -{ - return str; -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { u16 cmd, oldcmd; @@ -651,22 +580,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return 0; } -/* - * Return the domain number for this bus. - */ -int pci_domain_nr(struct pci_bus *bus) -{ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return 0; - else { - struct pci_controller *hose = pci_bus_to_host(bus); - - return hose->global_number; - } -} - -EXPORT_SYMBOL(pci_domain_nr); - /* Decide whether to display the domain number in /proc */ int pci_proc_domain(struct pci_bus *bus) { @@ -678,281 +591,6 @@ int pci_proc_domain(struct pci_bus *bus) } } -/* - * Platform support for /proc/bus/pci/X/Y mmap()s, - * modelled on the sparc64 implementation by Dave Miller. - * -- paulus. - */ - -/* - * Adjust vm_pgoff of VMA such that it is the physical page offset - * corresponding to the 32-bit pci bus offset for DEV requested by the user. - * - * Basically, the user finds the base address for his device which he wishes - * to mmap. They read the 32-bit value from the config space base register, - * add whatever PAGE_SIZE multiple offset they wish, and feed this into the - * offset parameter of mmap on /proc/bus/pci/XXX for that device. - * - * Returns negative error code on failure, zero on success. - */ -static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, - resource_size_t *offset, - enum pci_mmap_state mmap_state) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - unsigned long io_offset = 0; - int i, res_bit; - - if (hose == 0) - return NULL; /* should never happen */ - - /* If memory, add on the PCI bridge address offset */ - if (mmap_state == pci_mmap_mem) { -#if 0 /* See comment in pci_resource_to_user() for why this is disabled */ - *offset += hose->pci_mem_offset; -#endif - res_bit = IORESOURCE_MEM; - } else { - io_offset = (unsigned long)hose->io_base_virt - pci_io_base; - *offset += io_offset; - res_bit = IORESOURCE_IO; - } - - /* - * Check that the offset requested corresponds to one of the - * resources of the device. - */ - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - struct resource *rp = &dev->resource[i]; - int flags = rp->flags; - - /* treat ROM as memory (should be already) */ - if (i == PCI_ROM_RESOURCE) - flags |= IORESOURCE_MEM; - - /* Active and same type? */ - if ((flags & res_bit) == 0) - continue; - - /* In the range of this resource? */ - if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end) - continue; - - /* found it! construct the final physical address */ - if (mmap_state == pci_mmap_io) - *offset += hose->io_base_phys - io_offset; - return rp; - } - - return NULL; -} - -/* - * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci - * device mapping. - */ -static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, - pgprot_t protection, - enum pci_mmap_state mmap_state, - int write_combine) -{ - unsigned long prot = pgprot_val(protection); - - /* Write combine is always 0 on non-memory space mappings. On - * memory space, if the user didn't pass 1, we check for a - * "prefetchable" resource. This is a bit hackish, but we use - * this to workaround the inability of /sysfs to provide a write - * combine bit - */ - if (mmap_state != pci_mmap_mem) - write_combine = 0; - else if (write_combine == 0) { - if (rp->flags & IORESOURCE_PREFETCH) - write_combine = 1; - } - - /* XXX would be nice to have a way to ask for write-through */ - prot |= _PAGE_NO_CACHE; - if (write_combine) - prot &= ~_PAGE_GUARDED; - else - prot |= _PAGE_GUARDED; - - return __pgprot(prot); -} - -/* - * This one is used by /dev/mem and fbdev who have no clue about the - * PCI device, it tries to find the PCI device first and calls the - * above routine - */ -pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long pfn, - unsigned long size, - pgprot_t protection) -{ - struct pci_dev *pdev = NULL; - struct resource *found = NULL; - unsigned long prot = pgprot_val(protection); - unsigned long offset = pfn << PAGE_SHIFT; - int i; - - if (page_is_ram(pfn)) - return __pgprot(prot); - - prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; - - for_each_pci_dev(pdev) { - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - struct resource *rp = &pdev->resource[i]; - int flags = rp->flags; - - /* Active and same type? */ - if ((flags & IORESOURCE_MEM) == 0) - continue; - /* In the range of this resource? */ - if (offset < (rp->start & PAGE_MASK) || - offset > rp->end) - continue; - found = rp; - break; - } - if (found) - break; - } - if (found) { - if (found->flags & IORESOURCE_PREFETCH) - prot &= ~_PAGE_GUARDED; - pci_dev_put(pdev); - } - - DBG("non-PCI map for %lx, prot: %lx\n", offset, prot); - - return __pgprot(prot); -} - - -/* - * Perform the actual remap of the pages for a PCI device mapping, as - * appropriate for this architecture. The region in the process to map - * is described by vm_start and vm_end members of VMA, the base physical - * address is found in vm_pgoff. - * The pci device structure is provided so that architectures may make mapping - * decisions on a per-device or per-bus basis. - * - * Returns a negative error code on failure, zero on success. - */ -int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine) -{ - resource_size_t offset = vma->vm_pgoff << PAGE_SHIFT; - struct resource *rp; - int ret; - - rp = __pci_mmap_make_offset(dev, &offset, mmap_state); - if (rp == NULL) - return -EINVAL; - - vma->vm_pgoff = offset >> PAGE_SHIFT; - vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, - vma->vm_page_prot, - mmap_state, write_combine); - - ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, vma->vm_page_prot); - - return ret; -} - -static ssize_t pci_show_devspec(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct pci_dev *pdev; - struct device_node *np; - - pdev = to_pci_dev (dev); - np = pci_device_to_OF_node(pdev); - if (np == NULL || np->full_name == NULL) - return 0; - return sprintf(buf, "%s", np->full_name); -} -static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); - -int pcibios_add_platform_entries(struct pci_dev *pdev) -{ - return device_create_file(&pdev->dev, &dev_attr_devspec); -} - -#define ISA_SPACE_MASK 0x1 -#define ISA_SPACE_IO 0x1 - -static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, - unsigned long phb_io_base_phys, - void __iomem * phb_io_base_virt) -{ - /* Remove these asap */ - - struct pci_address { - u32 a_hi; - u32 a_mid; - u32 a_lo; - }; - - struct isa_address { - u32 a_hi; - u32 a_lo; - }; - - struct isa_range { - struct isa_address isa_addr; - struct pci_address pci_addr; - unsigned int size; - }; - - const struct isa_range *range; - unsigned long pci_addr; - unsigned int isa_addr; - unsigned int size; - int rlen = 0; - - range = of_get_property(isa_node, "ranges", &rlen); - if (range == NULL || (rlen < sizeof(struct isa_range))) { - printk(KERN_ERR "no ISA ranges or unexpected isa range size," - "mapping 64k\n"); - __ioremap_explicit(phb_io_base_phys, - (unsigned long)phb_io_base_virt, - 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED); - return; - } - - /* From "ISA Binding to 1275" - * The ranges property is laid out as an array of elements, - * each of which comprises: - * cells 0 - 1: an ISA address - * cells 2 - 4: a PCI address - * (size depending on dev->n_addr_cells) - * cell 5: the size of the range - */ - if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) { - isa_addr = range->isa_addr.a_lo; - pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | - range->pci_addr.a_lo; - - /* Assume these are both zero */ - if ((pci_addr != 0) || (isa_addr != 0)) { - printk(KERN_ERR "unexpected isa to pci mapping: %s\n", - __FUNCTION__); - return; - } - - size = PAGE_ALIGN(range->size); - - __ioremap_explicit(phb_io_base_phys, - (unsigned long) phb_io_base_virt, - size, _PAGE_NO_CACHE | _PAGE_GUARDED); - } -} - void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, struct device_node *dev, int prim) { @@ -1047,155 +685,122 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, } } -void __devinit pci_setup_phb_io(struct pci_controller *hose, int primary) +#ifdef CONFIG_HOTPLUG + +int pcibios_unmap_io_space(struct pci_bus *bus) { - unsigned long size = hose->pci_io_size; - unsigned long io_virt_offset; - struct resource *res; - struct device_node *isa_dn; + struct pci_controller *hose; - if (size == 0) - return; + WARN_ON(bus == NULL); - hose->io_base_virt = reserve_phb_iospace(size); - DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", - hose->global_number, hose->io_base_phys, - (unsigned long) hose->io_base_virt); - - if (primary) { - pci_io_base = (unsigned long)hose->io_base_virt; - isa_dn = of_find_node_by_type(NULL, "isa"); - if (isa_dn) { - isa_io_base = pci_io_base; - pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys, - hose->io_base_virt); - of_node_put(isa_dn); - } - } + /* If this is not a PHB, we only flush the hash table over + * the area mapped by this bridge. We don't play with the PTE + * mappings since we might have to deal with sub-page alignemnts + * so flushing the hash table is the only sane way to make sure + * that no hash entries are covering that removed bridge area + * while still allowing other busses overlapping those pages + */ + if (bus->self) { + struct resource *res = bus->resource[0]; - io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; - res = &hose->io_resource; - res->start += io_virt_offset; - res->end += io_virt_offset; + DBG("IO unmapping for PCI-PCI bridge %s\n", + pci_name(bus->self)); - /* If this is called after the initial PCI scan, then we need to - * proceed to IO mappings now - */ - if (pci_initial_scan_done) - __ioremap_explicit(hose->io_base_phys, - (unsigned long)hose->io_base_virt, - hose->pci_io_size, - _PAGE_NO_CACHE | _PAGE_GUARDED); -} + __flush_hash_table_range(&init_mm, res->start + _IO_BASE, + res->end - res->start + 1); + return 0; + } -void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose, - int primary) -{ - unsigned long size = hose->pci_io_size; - unsigned long io_virt_offset; - struct resource *res; + /* Get the host bridge */ + hose = pci_bus_to_host(bus); - if (size == 0) - return; + /* Check if we have IOs allocated */ + if (hose->io_base_alloc == 0) + return 0; - hose->io_base_virt = __ioremap(hose->io_base_phys, size, - _PAGE_NO_CACHE | _PAGE_GUARDED); - DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", - hose->global_number, hose->io_base_phys, - (unsigned long) hose->io_base_virt); + DBG("IO unmapping for PHB %s\n", + ((struct device_node *)hose->arch_data)->full_name); + DBG(" alloc=0x%p\n", hose->io_base_alloc); - if (primary) - pci_io_base = (unsigned long)hose->io_base_virt; + /* This is a PHB, we fully unmap the IO area */ + vunmap(hose->io_base_alloc); - io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; - res = &hose->io_resource; - res->start += io_virt_offset; - res->end += io_virt_offset; + return 0; } +EXPORT_SYMBOL_GPL(pcibios_unmap_io_space); +#endif /* CONFIG_HOTPLUG */ -static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys, - unsigned long *start_virt, unsigned long *size) +int __devinit pcibios_map_io_space(struct pci_bus *bus) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct resource *res; - - if (bus->self) - res = bus->resource[0]; - else - /* Root Bus */ - res = &hose->io_resource; - - if (res->end == 0 && res->start == 0) - return 1; + struct vm_struct *area; + unsigned long phys_page; + unsigned long size_page; + unsigned long io_virt_offset; + struct pci_controller *hose; - *start_virt = pci_io_base + res->start; - *start_phys = *start_virt + hose->io_base_phys - - (unsigned long) hose->io_base_virt; + WARN_ON(bus == NULL); - if (res->end > res->start) - *size = res->end - res->start + 1; - else { - printk("%s(): unexpected region 0x%lx->0x%lx\n", - __FUNCTION__, res->start, res->end); - return 1; + /* If this not a PHB, nothing to do, page tables still exist and + * thus HPTEs will be faulted in when needed + */ + if (bus->self) { + DBG("IO mapping for PCI-PCI bridge %s\n", + pci_name(bus->self)); + DBG(" virt=0x%016lx...0x%016lx\n", + bus->resource[0]->start + _IO_BASE, + bus->resource[0]->end + _IO_BASE); + return 0; } - return 0; -} - -int unmap_bus_range(struct pci_bus *bus) -{ - unsigned long start_phys; - unsigned long start_virt; - unsigned long size; + /* Get the host bridge */ + hose = pci_bus_to_host(bus); + phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); + size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE); - if (!bus) { - printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); - return 1; - } - - if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) - return 1; - if (__iounmap_explicit((void __iomem *) start_virt, size)) - return 1; - - return 0; -} -EXPORT_SYMBOL(unmap_bus_range); + /* Make sure IO area address is clear */ + hose->io_base_alloc = NULL; -int remap_bus_range(struct pci_bus *bus) -{ - unsigned long start_phys; - unsigned long start_virt; - unsigned long size; + /* If there's no IO to map on that bus, get away too */ + if (hose->pci_io_size == 0 || hose->io_base_phys == 0) + return 0; - if (!bus) { - printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); - return 1; - } - - - if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) - return 1; - if (start_phys == 0) - return 1; - printk(KERN_DEBUG "mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size); - if (__ioremap_explicit(start_phys, start_virt, size, - _PAGE_NO_CACHE | _PAGE_GUARDED)) - return 1; + /* Let's allocate some IO space for that guy. We don't pass + * VM_IOREMAP because we don't care about alignment tricks that + * the core does in that case. Maybe we should due to stupid card + * with incomplete address decoding but I'd rather not deal with + * those outside of the reserved 64K legacy region. + */ + area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END); + if (area == NULL) + return -ENOMEM; + hose->io_base_alloc = area->addr; + hose->io_base_virt = (void __iomem *)(area->addr + + hose->io_base_phys - phys_page); + + DBG("IO mapping for PHB %s\n", + ((struct device_node *)hose->arch_data)->full_name); + DBG(" phys=0x%016lx, virt=0x%p (alloc=0x%p)\n", + hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc); + DBG(" size=0x%016lx (alloc=0x%016lx)\n", + hose->pci_io_size, size_page); + + /* Establish the mapping */ + if (__ioremap_at(phys_page, area->addr, size_page, + _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL) + return -ENOMEM; + + /* Fixup hose IO resource */ + io_virt_offset = (unsigned long)hose->io_base_virt - _IO_BASE; + hose->io_resource.start += io_virt_offset; + hose->io_resource.end += io_virt_offset; + + DBG(" hose->io_resource=0x%016lx...0x%016lx\n", + hose->io_resource.start, hose->io_resource.end); return 0; } -EXPORT_SYMBOL(remap_bus_range); - -static void phbs_remap_io(void) -{ - struct pci_controller *hose, *tmp; - - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) - remap_bus_range(hose->bus); -} +EXPORT_SYMBOL_GPL(pcibios_map_io_space); static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) { @@ -1203,8 +808,7 @@ static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) unsigned long offset; if (res->flags & IORESOURCE_IO) { - offset = (unsigned long)hose->io_base_virt - pci_io_base; - + offset = (unsigned long)hose->io_base_virt - _IO_BASE; res->start += offset; res->end += offset; } else if (res->flags & IORESOURCE_MEM) { @@ -1219,9 +823,20 @@ void __devinit pcibios_fixup_device_resources(struct pci_dev *dev, /* Update device resources. */ int i; - for (i = 0; i < PCI_NUM_RESOURCES; i++) - if (dev->resource[i].flags) - fixup_resource(&dev->resource[i], dev); + DBG("%s: Fixup resources:\n", pci_name(dev)); + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + if (!res->flags) + continue; + + DBG(" 0x%02x < %08lx:0x%016lx...0x%016lx\n", + i, res->flags, res->start, res->end); + + fixup_resource(res, dev); + + DBG(" > %08lx:0x%016lx...0x%016lx\n", + res->flags, res->start, res->end); + } } EXPORT_SYMBOL(pcibios_fixup_device_resources); @@ -1291,119 +906,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) } EXPORT_SYMBOL(pcibios_fixup_bus); -/* - * Reads the interrupt pin to determine if interrupt is use by card. - * If the interrupt is used, then gets the interrupt line from the - * openfirmware and sets it in the pci_dev and pci_config line. - */ -int pci_read_irq_line(struct pci_dev *pci_dev) -{ - struct of_irq oirq; - unsigned int virq; - - DBG("Try to map irq for %s...\n", pci_name(pci_dev)); - -#ifdef DEBUG - memset(&oirq, 0xff, sizeof(oirq)); -#endif - /* Try to get a mapping from the device-tree */ - if (of_irq_map_pci(pci_dev, &oirq)) { - u8 line, pin; - - /* If that fails, lets fallback to what is in the config - * space and map that through the default controller. We - * also set the type to level low since that's what PCI - * interrupts are. If your platform does differently, then - * either provide a proper interrupt tree or don't use this - * function. - */ - if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin)) - return -1; - if (pin == 0) - return -1; - if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) || - line == 0xff) { - return -1; - } - DBG(" -> no map ! Using irq line %d from PCI config\n", line); - - virq = irq_create_mapping(NULL, line); - if (virq != NO_IRQ) - set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); - } else { - DBG(" -> got one, spec %d cells (0x%08x 0x%08x...) on %s\n", - oirq.size, oirq.specifier[0], oirq.specifier[1], - oirq.controller->full_name); - - virq = irq_create_of_mapping(oirq.controller, oirq.specifier, - oirq.size); - } - if(virq == NO_IRQ) { - DBG(" -> failed to map !\n"); - return -1; - } - - DBG(" -> mapped to linux irq %d\n", virq); - - pci_dev->irq = virq; - - return 0; -} -EXPORT_SYMBOL(pci_read_irq_line); - -void pci_resource_to_user(const struct pci_dev *dev, int bar, - const struct resource *rsrc, - resource_size_t *start, resource_size_t *end) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - resource_size_t offset = 0; - - if (hose == NULL) - return; - - if (rsrc->flags & IORESOURCE_IO) - offset = (unsigned long)hose->io_base_virt - pci_io_base; - - /* We pass a fully fixed up address to userland for MMIO instead of - * a BAR value because X is lame and expects to be able to use that - * to pass to /dev/mem ! - * - * That means that we'll have potentially 64 bits values where some - * userland apps only expect 32 (like X itself since it thinks only - * Sparc has 64 bits MMIO) but if we don't do that, we break it on - * 32 bits CHRPs :-( - * - * Hopefully, the sysfs insterface is immune to that gunk. Once X - * has been fixed (and the fix spread enough), we can re-enable the - * 2 lines below and pass down a BAR value to userland. In that case - * we'll also have to re-enable the matching code in - * __pci_mmap_make_offset(). - * - * BenH. - */ -#if 0 - else if (rsrc->flags & IORESOURCE_MEM) - offset = hose->pci_mem_offset; -#endif - - *start = rsrc->start - offset; - *end = rsrc->end - offset; -} - -struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) -{ - if (!have_of) - return NULL; - while(node) { - struct pci_controller *hose, *tmp; - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) - if (hose->arch_data == node) - return hose; - node = node->parent; - } - return NULL; -} - unsigned long pci_address_to_pio(phys_addr_t address) { struct pci_controller *hose, *tmp; @@ -1412,7 +914,7 @@ unsigned long pci_address_to_pio(phys_addr_t address) if (address >= hose->io_base_phys && address < (hose->io_base_phys + hose->pci_io_size)) { unsigned long base = - (unsigned long)hose->io_base_virt - pci_io_base; + (unsigned long)hose->io_base_virt - _IO_BASE; return base + (address - hose->io_base_phys); } } diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index c96fa9bd35a..a20f1951a5c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -67,7 +67,6 @@ EXPORT_SYMBOL(ISA_DMA_THRESHOLD); EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); -EXPORT_SYMBOL(do_signal); EXPORT_SYMBOL(transfer_to_handler); EXPORT_SYMBOL(do_IRQ); EXPORT_SYMBOL(machine_check_exception); @@ -106,10 +105,6 @@ EXPORT_SYMBOL(isa_mem_base); EXPORT_SYMBOL(pci_dram_offset); EXPORT_SYMBOL(pci_alloc_consistent); EXPORT_SYMBOL(pci_free_consistent); -EXPORT_SYMBOL(pci_bus_io_base); -EXPORT_SYMBOL(pci_bus_io_base_phys); -EXPORT_SYMBOL(pci_bus_mem_base_phys); -EXPORT_SYMBOL(pci_bus_to_hose); #endif /* CONFIG_PCI */ EXPORT_SYMBOL(start_thread); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6e2f03566b0..84f000a45e3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -219,22 +219,26 @@ void discard_lazy_cpu_state(void) } #endif /* CONFIG_SMP */ -#ifdef CONFIG_PPC_MERGE /* XXX for now */ int set_dabr(unsigned long dabr) { +#ifdef CONFIG_PPC_MERGE /* XXX for now */ if (ppc_md.set_dabr) return ppc_md.set_dabr(dabr); +#endif + /* XXX should we have a CPU_FTR_HAS_DABR ? */ +#if defined(CONFIG_PPC64) || defined(CONFIG_6xx) mtspr(SPRN_DABR, dabr); +#endif return 0; } -#endif #ifdef CONFIG_PPC64 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); -static DEFINE_PER_CPU(unsigned long, current_dabr); #endif +static DEFINE_PER_CPU(unsigned long, current_dabr); + struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { @@ -299,12 +303,10 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_SMP */ -#ifdef CONFIG_PPC64 /* for now */ if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) { set_dabr(new->thread.dabr); __get_cpu_var(current_dabr) = new->thread.dabr; } -#endif /* CONFIG_PPC64 */ new_thread = &new->thread; old_thread = ¤t->thread; @@ -473,12 +475,10 @@ void flush_thread(void) discard_lazy_cpu_state(); -#ifdef CONFIG_PPC64 /* for now */ if (current->thread.dabr) { current->thread.dabr = 0; set_dabr(0); } -#endif } void diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index af42ddab3ab..37ff99bd98b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -52,6 +52,7 @@ #include <asm/pSeries_reconfig.h> #include <asm/pci-bridge.h> #include <asm/kexec.h> +#include <asm/system.h> #ifdef DEBUG #define DBG(fmt...) printk(KERN_ERR fmt) @@ -1005,7 +1006,7 @@ static void __init early_reserve_mem(void) void __init early_init_devtree(void *params) { - DBG(" -> early_init_devtree()\n"); + DBG(" -> early_init_devtree(%p)\n", params); /* Setup flat device-tree pointer */ initial_boot_params = params; @@ -1055,8 +1056,6 @@ void __init early_init_devtree(void *params) DBG(" <- early_init_devtree()\n"); } -#undef printk - int of_n_addr_cells(struct device_node* np) { const int *ip; @@ -1375,8 +1374,17 @@ static void of_node_release(struct kref *kref) struct device_node *node = kref_to_device_node(kref); struct property *prop = node->properties; - if (!OF_IS_DYNAMIC(node)) + /* We should never be releasing nodes that haven't been detached. */ + if (!of_node_check_flag(node, OF_DETACHED)) { + printk("WARNING: Bad of_node_put() on %s\n", node->full_name); + dump_stack(); + kref_init(&node->kref); + return; + } + + if (!of_node_check_flag(node, OF_DYNAMIC)) return; + while (prop) { struct property *next = prop->next; kfree(prop->name); @@ -1432,6 +1440,8 @@ void of_detach_node(const struct device_node *np) write_lock(&devtree_lock); parent = np->parent; + if (!parent) + goto out_unlock; if (allnodes == np) allnodes = np->allnext; @@ -1455,6 +1465,9 @@ void of_detach_node(const struct device_node *np) prevsib->sibling = np->sibling; } + of_node_set_flag(np, OF_DETACHED); + +out_unlock: write_unlock(&devtree_lock); } @@ -1716,22 +1729,18 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) } EXPORT_SYMBOL(of_get_cpu_node); -#ifdef DEBUG +#if defined(CONFIG_DEBUG_FS) && defined(DEBUG) static struct debugfs_blob_wrapper flat_dt_blob; static int __init export_flat_device_tree(void) { struct dentry *d; - d = debugfs_create_dir("powerpc", NULL); - if (!d) - return 1; - flat_dt_blob.data = initial_boot_params; flat_dt_blob.size = initial_boot_params->totalsize; d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR, - d, &flat_dt_blob); + powerpc_debugfs_root, &flat_dt_blob); if (!d) return 1; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index d6047c44103..a1d582e3862 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -635,6 +635,7 @@ static void __init early_cmdline_parse(void) /* ibm,dynamic-reconfiguration-memory property supported */ #define OV5_DRCONF_MEMORY 0x20 #define OV5_LARGE_PAGES 0x10 /* large pages supported */ +#define OV5_DONATE_DEDICATE_CPU 0x02 /* donate dedicated CPU support */ /* PCIe/MSI support. Without MSI full PCIe is not supported */ #ifdef CONFIG_PCI_MSI #define OV5_MSI 0x01 /* PCIe/MSI support */ @@ -685,7 +686,8 @@ static unsigned char ibm_architecture_vec[] = { /* option vector 5: PAPR/OF options */ 3 - 2, /* length */ 0, /* don't ignore, don't halt */ - OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_MSI, + OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | + OV5_DONATE_DEDICATE_CPU | OV5_MSI, }; /* Old method - ELF header with PT_NOTE sections */ diff --git a/arch/powerpc/kernel/ptrace-common.h b/arch/powerpc/kernel/ptrace-common.h deleted file mode 100644 index 8797ae737a7..00000000000 --- a/arch/powerpc/kernel/ptrace-common.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2002 Stephen Rothwell, IBM Coproration - * Extracted from ptrace.c and ptrace32.c - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. - */ - -#ifndef _PPC64_PTRACE_COMMON_H -#define _PPC64_PTRACE_COMMON_H - -#include <asm/system.h> - -/* - * Set of msr bits that gdb can change on behalf of a process. - */ -#define MSR_DEBUGCHANGE (MSR_FE0 | MSR_SE | MSR_BE | MSR_FE1) - -/* - * Get contents of register REGNO in task TASK. - */ -static inline unsigned long get_reg(struct task_struct *task, int regno) -{ - unsigned long tmp = 0; - - /* - * Put the correct FP bits in, they might be wrong as a result - * of our lazy FP restore. - */ - if (regno == PT_MSR) { - tmp = ((unsigned long *)task->thread.regs)[PT_MSR]; - tmp |= task->thread.fpexc_mode; - } else if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) { - tmp = ((unsigned long *)task->thread.regs)[regno]; - } - - return tmp; -} - -/* - * Write contents of register REGNO in task TASK. - */ -static inline int put_reg(struct task_struct *task, int regno, - unsigned long data) -{ - if (regno < PT_SOFTE) { - if (regno == PT_MSR) - data = (data & MSR_DEBUGCHANGE) - | (task->thread.regs->msr & ~MSR_DEBUGCHANGE); - ((unsigned long *)task->thread.regs)[regno] = data; - return 0; - } - return -EIO; -} - -static inline void set_single_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - if (regs != NULL) - regs->msr |= MSR_SE; - set_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -static inline void clear_single_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - if (regs != NULL) - regs->msr &= ~MSR_SE; - clear_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -#ifdef CONFIG_ALTIVEC -/* - * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. - * The transfer totals 34 quadword. Quadwords 0-31 contain the - * corresponding vector registers. Quadword 32 contains the vscr as the - * last word (offset 12) within that quadword. Quadword 33 contains the - * vrsave as the first word (offset 0) within the quadword. - * - * This definition of the VMX state is compatible with the current PPC32 - * ptrace interface. This allows signal handling and ptrace to use the - * same structures. This also simplifies the implementation of a bi-arch - * (combined (32- and 64-bit) gdb. - */ - -/* - * Get contents of AltiVec register state in task TASK - */ -static inline int get_vrregs(unsigned long __user *data, - struct task_struct *task) -{ - unsigned long regsize; - - /* copy AltiVec registers VR[0] .. VR[31] */ - regsize = 32 * sizeof(vector128); - if (copy_to_user(data, task->thread.vr, regsize)) - return -EFAULT; - data += (regsize / sizeof(unsigned long)); - - /* copy VSCR */ - regsize = 1 * sizeof(vector128); - if (copy_to_user(data, &task->thread.vscr, regsize)) - return -EFAULT; - data += (regsize / sizeof(unsigned long)); - - /* copy VRSAVE */ - if (put_user(task->thread.vrsave, (u32 __user *)data)) - return -EFAULT; - - return 0; -} - -/* - * Write contents of AltiVec register state into task TASK. - */ -static inline int set_vrregs(struct task_struct *task, - unsigned long __user *data) -{ - unsigned long regsize; - - /* copy AltiVec registers VR[0] .. VR[31] */ - regsize = 32 * sizeof(vector128); - if (copy_from_user(task->thread.vr, data, regsize)) - return -EFAULT; - data += (regsize / sizeof(unsigned long)); - - /* copy VSCR */ - regsize = 1 * sizeof(vector128); - if (copy_from_user(&task->thread.vscr, data, regsize)) - return -EFAULT; - data += (regsize / sizeof(unsigned long)); - - /* copy VRSAVE */ - if (get_user(task->thread.vrsave, (u32 __user *)data)) - return -EFAULT; - - return 0; -} -#endif - -static inline int ptrace_set_debugreg(struct task_struct *task, - unsigned long addr, unsigned long data) -{ - /* We only support one DABR and no IABRS at the moment */ - if (addr > 0) - return -EINVAL; - - /* The bottom 3 bits are flags */ - if ((data & ~0x7UL) >= TASK_SIZE) - return -EIO; - - /* Ensure translation is on */ - if (data && !(data & DABR_TRANSLATION)) - return -EIO; - - task->thread.dabr = data; - return 0; -} - -#endif /* _PPC64_PTRACE_COMMON_H */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index bf76562167c..0fb53950da4 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -35,11 +35,11 @@ #include <asm/pgtable.h> #include <asm/system.h> -#ifdef CONFIG_PPC64 -#include "ptrace-common.h" -#endif +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ -#ifdef CONFIG_PPC32 /* * Set of msr bits that gdb can change on behalf of a process. */ @@ -48,65 +48,117 @@ #else #define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) #endif -#endif /* CONFIG_PPC32 */ /* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. + * Max register writeable via put_reg */ - #ifdef CONFIG_PPC32 +#define PT_MAX_PUT_REG PT_MQ +#else +#define PT_MAX_PUT_REG PT_CCR +#endif + /* * Get contents of register REGNO in task TASK. */ -static inline unsigned long get_reg(struct task_struct *task, int regno) +unsigned long ptrace_get_reg(struct task_struct *task, int regno) { - if (regno < sizeof(struct pt_regs) / sizeof(unsigned long) - && task->thread.regs != NULL) + unsigned long tmp = 0; + + if (task->thread.regs == NULL) + return -EIO; + + if (regno == PT_MSR) { + tmp = ((unsigned long *)task->thread.regs)[PT_MSR]; + return tmp | task->thread.fpexc_mode; + } + + if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) return ((unsigned long *)task->thread.regs)[regno]; - return (0); + + return -EIO; } /* * Write contents of register REGNO in task TASK. */ -static inline int put_reg(struct task_struct *task, int regno, - unsigned long data) +int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) { - if (regno <= PT_MQ && task->thread.regs != NULL) { + if (task->thread.regs == NULL) + return -EIO; + + if (regno <= PT_MAX_PUT_REG || regno == PT_TRAP) { if (regno == PT_MSR) data = (data & MSR_DEBUGCHANGE) | (task->thread.regs->msr & ~MSR_DEBUGCHANGE); + /* We prevent mucking around with the reserved area of trap + * which are used internally by the kernel + */ + if (regno == PT_TRAP) + data &= 0xfff0; ((unsigned long *)task->thread.regs)[regno] = data; return 0; } return -EIO; } + +static int get_fpregs(void __user *data, struct task_struct *task, + int has_fpscr) +{ + unsigned int count = has_fpscr ? 33 : 32; + + if (copy_to_user(data, task->thread.fpr, count * sizeof(double))) + return -EFAULT; + return 0; +} + +static int set_fpregs(void __user *data, struct task_struct *task, + int has_fpscr) +{ + unsigned int count = has_fpscr ? 33 : 32; + + if (copy_from_user(task->thread.fpr, data, count * sizeof(double))) + return -EFAULT; + return 0; +} + + #ifdef CONFIG_ALTIVEC /* + * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. + * The transfer totals 34 quadword. Quadwords 0-31 contain the + * corresponding vector registers. Quadword 32 contains the vscr as the + * last word (offset 12) within that quadword. Quadword 33 contains the + * vrsave as the first word (offset 0) within the quadword. + * + * This definition of the VMX state is compatible with the current PPC32 + * ptrace interface. This allows signal handling and ptrace to use the + * same structures. This also simplifies the implementation of a bi-arch + * (combined (32- and 64-bit) gdb. + */ + +/* * Get contents of AltiVec register state in task TASK */ -static inline int get_vrregs(unsigned long __user *data, struct task_struct *task) +static int get_vrregs(unsigned long __user *data, struct task_struct *task) { - int i, j; - - if (!access_ok(VERIFY_WRITE, data, 133 * sizeof(unsigned long))) - return -EFAULT; + unsigned long regsize; /* copy AltiVec registers VR[0] .. VR[31] */ - for (i = 0; i < 32; i++) - for (j = 0; j < 4; j++, data++) - if (__put_user(task->thread.vr[i].u[j], data)) - return -EFAULT; + regsize = 32 * sizeof(vector128); + if (copy_to_user(data, task->thread.vr, regsize)) + return -EFAULT; + data += (regsize / sizeof(unsigned long)); /* copy VSCR */ - for (i = 0; i < 4; i++, data++) - if (__put_user(task->thread.vscr.u[i], data)) - return -EFAULT; + regsize = 1 * sizeof(vector128); + if (copy_to_user(data, &task->thread.vscr, regsize)) + return -EFAULT; + data += (regsize / sizeof(unsigned long)); - /* copy VRSAVE */ - if (__put_user(task->thread.vrsave, data)) + /* copy VRSAVE */ + if (put_user(task->thread.vrsave, (u32 __user *)data)) return -EFAULT; return 0; @@ -115,31 +167,29 @@ static inline int get_vrregs(unsigned long __user *data, struct task_struct *tas /* * Write contents of AltiVec register state into task TASK. */ -static inline int set_vrregs(struct task_struct *task, unsigned long __user *data) +static int set_vrregs(struct task_struct *task, unsigned long __user *data) { - int i, j; - - if (!access_ok(VERIFY_READ, data, 133 * sizeof(unsigned long))) - return -EFAULT; + unsigned long regsize; /* copy AltiVec registers VR[0] .. VR[31] */ - for (i = 0; i < 32; i++) - for (j = 0; j < 4; j++, data++) - if (__get_user(task->thread.vr[i].u[j], data)) - return -EFAULT; + regsize = 32 * sizeof(vector128); + if (copy_from_user(task->thread.vr, data, regsize)) + return -EFAULT; + data += (regsize / sizeof(unsigned long)); /* copy VSCR */ - for (i = 0; i < 4; i++, data++) - if (__get_user(task->thread.vscr.u[i], data)) - return -EFAULT; + regsize = 1 * sizeof(vector128); + if (copy_from_user(&task->thread.vscr, data, regsize)) + return -EFAULT; + data += (regsize / sizeof(unsigned long)); /* copy VRSAVE */ - if (__get_user(task->thread.vrsave, data)) + if (get_user(task->thread.vrsave, (u32 __user *)data)) return -EFAULT; return 0; } -#endif +#endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_SPE @@ -156,7 +206,7 @@ static inline int set_vrregs(struct task_struct *task, unsigned long __user *dat /* * Get contents of SPE register state in task TASK. */ -static inline int get_evrregs(unsigned long *data, struct task_struct *task) +static int get_evrregs(unsigned long *data, struct task_struct *task) { int i; @@ -182,7 +232,7 @@ static inline int get_evrregs(unsigned long *data, struct task_struct *task) /* * Write contents of SPE register state into task TASK. */ -static inline int set_evrregs(struct task_struct *task, unsigned long *data) +static int set_evrregs(struct task_struct *task, unsigned long *data) { int i; @@ -205,8 +255,8 @@ static inline int set_evrregs(struct task_struct *task, unsigned long *data) } #endif /* CONFIG_SPE */ -static inline void -set_single_step(struct task_struct *task) + +static void set_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; @@ -221,8 +271,7 @@ set_single_step(struct task_struct *task) set_tsk_thread_flag(task, TIF_SINGLESTEP); } -static inline void -clear_single_step(struct task_struct *task) +static void clear_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; @@ -236,7 +285,25 @@ clear_single_step(struct task_struct *task) } clear_tsk_thread_flag(task, TIF_SINGLESTEP); } -#endif /* CONFIG_PPC32 */ + +static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, + unsigned long data) +{ + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + return -EINVAL; + + /* The bottom 3 bits are flags */ + if ((data & ~0x7UL) >= TASK_SIZE) + return -EIO; + + /* Ensure translation is on */ + if (data && !(data & DABR_TRANSLATION)) + return -EIO; + + task->thread.dabr = data; + return 0; +} /* * Called by kernel/ptrace.c when detaching.. @@ -249,6 +316,62 @@ void ptrace_disable(struct task_struct *child) clear_single_step(child); } +/* + * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, + * we mark them as obsolete now, they will be removed in a future version + */ +static long arch_ptrace_old(struct task_struct *child, long request, long addr, + long data) +{ + int ret = -EPERM; + + switch(request) { + case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned long __user *tmp = (unsigned long __user *)addr; + + for (i = 0; i < 32; i++) { + ret = put_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned long __user *tmp = (unsigned long __user *)addr; + + for (i = 0; i < 32; i++) { + ret = get_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ + flush_fp_to_thread(child); + ret = get_fpregs((void __user *)addr, child, 0); + break; + } + + case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ + flush_fp_to_thread(child); + ret = set_fpregs((void __user *)addr, child, 0); + break; + } + + } + return ret; +} + long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret = -EPERM; @@ -284,11 +407,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) #endif break; -#ifdef CONFIG_PPC32 CHECK_FULL_REGS(child->thread.regs); -#endif if (index < PT_FPR0) { - tmp = get_reg(child, (int) index); + tmp = ptrace_get_reg(child, (int) index); } else { flush_fp_to_thread(child); tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0]; @@ -323,13 +444,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) #endif break; -#ifdef CONFIG_PPC32 CHECK_FULL_REGS(child->thread.regs); -#endif - if (index == PT_ORIG_R3) - break; if (index < PT_FPR0) { - ret = put_reg(child, index, data); + ret = ptrace_put_reg(child, index, data); } else { flush_fp_to_thread(child); ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data; @@ -384,7 +501,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } -#ifdef CONFIG_PPC64 case PTRACE_GET_DEBUGREG: { ret = -EINVAL; /* We only support one DABR and no IABRS at the moment */ @@ -398,73 +514,61 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case PTRACE_SET_DEBUGREG: ret = ptrace_set_debugreg(child, addr, data); break; -#endif case PTRACE_DETACH: ret = ptrace_detach(child, data); break; - case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; - unsigned long __user *tmp = (unsigned long __user *)addr; - - for (i = 0; i < 32; i++) { - ret = put_user(*reg, tmp); - if (ret) - break; - reg++; - tmp++; +#ifdef CONFIG_PPC64 + case PTRACE_GETREGS64: +#endif + case PTRACE_GETREGS: { /* Get all pt_regs from the child. */ + int ui; + if (!access_ok(VERIFY_WRITE, (void __user *)data, + sizeof(struct pt_regs))) { + ret = -EIO; + break; + } + ret = 0; + for (ui = 0; ui < PT_REGS_COUNT; ui ++) { + ret |= __put_user(ptrace_get_reg(child, ui), + (unsigned long __user *) data); + data += sizeof(long); } break; } - case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; - unsigned long __user *tmp = (unsigned long __user *)addr; - - for (i = 0; i < 32; i++) { - ret = get_user(*reg, tmp); +#ifdef CONFIG_PPC64 + case PTRACE_SETREGS64: +#endif + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp; + int ui; + if (!access_ok(VERIFY_READ, (void __user *)data, + sizeof(struct pt_regs))) { + ret = -EIO; + break; + } + ret = 0; + for (ui = 0; ui < PT_REGS_COUNT; ui ++) { + ret = __get_user(tmp, (unsigned long __user *) data); if (ret) break; - reg++; - tmp++; + ptrace_put_reg(child, ui, tmp); + data += sizeof(long); } break; } - case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; - unsigned long __user *tmp = (unsigned long __user *)addr; - + case PTRACE_GETFPREGS: { /* Get the child FPU state (FPR0...31 + FPSCR) */ flush_fp_to_thread(child); - - for (i = 0; i < 32; i++) { - ret = put_user(*reg, tmp); - if (ret) - break; - reg++; - tmp++; - } + ret = get_fpregs((void __user *)data, child, 1); break; } - case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; - unsigned long __user *tmp = (unsigned long __user *)addr; - + case PTRACE_SETFPREGS: { /* Set the child FPU state (FPR0...31 + FPSCR) */ flush_fp_to_thread(child); - - for (i = 0; i < 32; i++) { - ret = get_user(*reg, tmp); - if (ret) - break; - reg++; - tmp++; - } + ret = set_fpregs((void __user *)data, child, 1); break; } @@ -499,11 +603,18 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; #endif + /* Old reverse args ptrace callss */ + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ + case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */ + ret = arch_ptrace_old(child, request, addr, data); + break; + default: ret = ptrace_request(child, request, addr, data); break; } - return ret; } diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 9b9a230349b..9e6baeac0fb 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -33,13 +33,55 @@ #include <asm/pgtable.h> #include <asm/system.h> -#include "ptrace-common.h" - /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. */ +/* + * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, + * we mark them as obsolete now, they will be removed in a future version + */ +static long compat_ptrace_old(struct task_struct *child, long request, + long addr, long data) +{ + int ret = -EPERM; + + switch(request) { + case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned int __user *tmp = (unsigned int __user *)addr; + + for (i = 0; i < 32; i++) { + ret = put_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned int __user *tmp = (unsigned int __user *)addr; + + for (i = 0; i < 32; i++) { + ret = get_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + } + return ret; +} + long compat_sys_ptrace(int request, int pid, unsigned long addr, unsigned long data) { @@ -123,7 +165,7 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr, break; if (index < PT_FPR0) { - tmp = get_reg(child, index); + tmp = ptrace_get_reg(child, index); } else { flush_fp_to_thread(child); /* @@ -162,7 +204,9 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr, else part = 0; /* want the 1st half of the register (left-most). */ - /* Validate the input - check to see if address is on the wrong boundary or beyond the end of the user area */ + /* Validate the input - check to see if address is on the wrong boundary + * or beyond the end of the user area + */ if ((addr & 3) || numReg > PT_FPSCR) break; @@ -170,7 +214,7 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr, flush_fp_to_thread(child); tmp = ((unsigned long int *)child->thread.fpr)[numReg - PT_FPR0]; } else { /* register within PT_REGS struct */ - tmp = get_reg(child, numReg); + tmp = ptrace_get_reg(child, numReg); } reg32bits = ((u32*)&tmp)[part]; ret = put_user(reg32bits, (u32 __user *)data); @@ -226,10 +270,8 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr, if ((addr & 3) || (index > PT_FPSCR32)) break; - if (index == PT_ORIG_R3) - break; if (index < PT_FPR0) { - ret = put_reg(child, index, data); + ret = ptrace_put_reg(child, index, data); } else { flush_fp_to_thread(child); /* @@ -258,70 +300,25 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr, /* Determine which register the user wants */ index = (u64)addr >> 2; numReg = index / 2; + /* * Validate the input - check to see if address is on the * wrong boundary or beyond the end of the user area */ if ((addr & 3) || (numReg > PT_FPSCR)) break; - /* Insure it is a register we let them change */ - if ((numReg == PT_ORIG_R3) - || ((numReg > PT_CCR) && (numReg < PT_FPR0))) - break; - if (numReg >= PT_FPR0) { + if (numReg < PT_FPR0) { + unsigned long freg = ptrace_get_reg(child, numReg); + if (index % 2) + freg = (freg & ~0xfffffffful) | (data & 0xfffffffful); + else + freg = (freg & 0xfffffffful) | (data << 32); + ret = ptrace_put_reg(child, numReg, freg); + } else { flush_fp_to_thread(child); + ((unsigned int *)child->thread.regs)[index] = data; + ret = 0; } - if (numReg == PT_MSR) - data = (data & MSR_DEBUGCHANGE) - | (child->thread.regs->msr & ~MSR_DEBUGCHANGE); - ((u32*)child->thread.regs)[index] = data; - ret = 0; - break; - } - - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: { /* restart after signal. */ - ret = -EIO; - if (!valid_signal(data)) - break; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - /* make sure the single step bit is not set. */ - clear_single_step(child); - wake_up_process(child); - ret = 0; - break; - } - - /* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - case PTRACE_KILL: { - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - clear_single_step(child); - wake_up_process(child); - break; - } - - case PTRACE_SINGLESTEP: { /* set the trap flag. */ - ret = -EIO; - if (!valid_signal(data)) - break; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - set_single_step(child); - child->exit_code = data; - /* give it a chance to run. */ - wake_up_process(child); - ret = 0; break; } @@ -334,95 +331,67 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr, break; } - case PTRACE_SET_DEBUGREG: - ret = ptrace_set_debugreg(child, addr, data); - break; - - case PTRACE_DETACH: - ret = ptrace_detach(child, data); + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, (unsigned int __user *) data); break; - case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; - unsigned int __user *tmp = (unsigned int __user *)addr; - - for (i = 0; i < 32; i++) { - ret = put_user(*reg, tmp); - if (ret) - break; - reg++; - tmp++; + case PTRACE_GETREGS: { /* Get all pt_regs from the child. */ + int ui; + if (!access_ok(VERIFY_WRITE, (void __user *)data, + PT_REGS_COUNT * sizeof(int))) { + ret = -EIO; + break; } - break; - } - - case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; - unsigned int __user *tmp = (unsigned int __user *)addr; - - for (i = 0; i < 32; i++) { - ret = get_user(*reg, tmp); - if (ret) - break; - reg++; - tmp++; + ret = 0; + for (ui = 0; ui < PT_REGS_COUNT; ui ++) { + ret |= __put_user(ptrace_get_reg(child, ui), + (unsigned int __user *) data); + data += sizeof(int); } break; } - case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; - unsigned int __user *tmp = (unsigned int __user *)addr; - - flush_fp_to_thread(child); - - for (i = 0; i < 32; i++) { - ret = put_user(*reg, tmp); - if (ret) - break; - reg++; - tmp++; + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp; + int ui; + if (!access_ok(VERIFY_READ, (void __user *)data, + PT_REGS_COUNT * sizeof(int))) { + ret = -EIO; + break; } - break; - } - - case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; - unsigned int __user *tmp = (unsigned int __user *)addr; - - flush_fp_to_thread(child); - - for (i = 0; i < 32; i++) { - ret = get_user(*reg, tmp); + ret = 0; + for (ui = 0; ui < PT_REGS_COUNT; ui ++) { + ret = __get_user(tmp, (unsigned int __user *) data); if (ret) break; - reg++; - tmp++; + ptrace_put_reg(child, ui, tmp); + data += sizeof(int); } break; } - case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message, (unsigned int __user *) data); - break; - -#ifdef CONFIG_ALTIVEC + case PTRACE_GETFPREGS: + case PTRACE_SETFPREGS: case PTRACE_GETVRREGS: - /* Get the child altivec register state. */ - flush_altivec_to_thread(child); - ret = get_vrregs((unsigned long __user *)data, child); + case PTRACE_SETVRREGS: + case PTRACE_GETREGS64: + case PTRACE_SETREGS64: + case PPC_PTRACE_GETFPREGS: + case PPC_PTRACE_SETFPREGS: + case PTRACE_KILL: + case PTRACE_SINGLESTEP: + case PTRACE_DETACH: + case PTRACE_SET_DEBUGREG: + case PTRACE_SYSCALL: + case PTRACE_CONT: + ret = arch_ptrace(child, request, addr, data); break; - case PTRACE_SETVRREGS: - /* Set the child altivec register state. */ - flush_altivec_to_thread(child); - ret = set_vrregs(child, (unsigned long __user *)data); + /* Old reverse args ptrace callss */ + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + ret = compat_ptrace_old(child, request, addr, data); break; -#endif default: ret = ptrace_request(child, request, addr, data); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index f2286822be0..a5de6211b97 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -278,10 +278,8 @@ void __init find_and_init_phbs(void) { struct device_node *node; struct pci_controller *phb; - unsigned int index; struct device_node *root = of_find_node_by_path("/"); - index = 0; for (node = of_get_next_child(root, NULL); node != NULL; node = of_get_next_child(root, node)) { @@ -295,8 +293,7 @@ void __init find_and_init_phbs(void) continue; rtas_setup_phb(phb); pci_process_bridge_OF_ranges(phb, node, 0); - pci_setup_phb_io(phb, index == 0); - index++; + isa_bridge_find_early(phb); } of_node_put(root); @@ -335,7 +332,7 @@ int pcibios_remove_root_bus(struct pci_controller *phb) return 1; } - rc = unmap_bus_range(b); + rc = pcibios_unmap_io_space(b); if (rc) { printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", __FUNCTION__, b->name); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index ed07a198f8d..4924c48cb1f 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -32,6 +32,7 @@ #include <linux/unistd.h> #include <linux/serial.h> #include <linux/serial_8250.h> +#include <linux/debugfs.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/processor.h> @@ -486,6 +487,14 @@ int check_legacy_ioport(unsigned long base_port) switch(base_port) { case I8042_DATA_REG: + if (!(np = of_find_compatible_node(NULL, NULL, "pnpPNP,303"))) + np = of_find_compatible_node(NULL, NULL, "pnpPNP,f03"); + if (np) { + parent = of_get_parent(np); + of_node_put(np); + np = parent; + break; + } np = of_find_node_by_type(NULL, "8042"); break; case FDC_BASE: /* FDC1 */ @@ -571,3 +580,15 @@ static int __init check_cache_coherency(void) late_initcall(check_cache_coherency); #endif /* CONFIG_CHECK_CACHE_COHERENCY */ + +#ifdef CONFIG_DEBUG_FS +struct dentry *powerpc_debugfs_root; + +static int powerpc_debugfs_init(void) +{ + powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL); + + return powerpc_debugfs_root == NULL; +} +arch_initcall(powerpc_debugfs_init); +#endif diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 35f8f443c14..7ec6ba56d83 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -262,13 +262,11 @@ void __init setup_arch(char **cmdline_p) * Systems with OF can look in the properties on the cpu node(s) * for a possibly more accurate value. */ - if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) { - dcache_bsize = cur_cpu_spec->dcache_bsize; - icache_bsize = cur_cpu_spec->icache_bsize; - ucache_bsize = 0; - } else - ucache_bsize = dcache_bsize = icache_bsize - = cur_cpu_spec->dcache_bsize; + dcache_bsize = cur_cpu_spec->dcache_bsize; + icache_bsize = cur_cpu_spec->icache_bsize; + ucache_bsize = 0; + if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) + ucache_bsize = icache_bsize = dcache_bsize; /* reboot on panic */ panic_timeout = 180; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6018178708a..bc43bba05cf 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -350,13 +350,11 @@ void __init setup_system(void) { DBG(" -> setup_system()\n"); - /* Apply the CPUs-specific and firmware specific fixups to kernel - * text (nop out sections not relevant to this CPU or this firmware) + /* Apply CPUs-specific fixups to kernel text (nop out sections + * not relevant to this CPU) */ do_feature_fixups(cur_cpu_spec->cpu_features, &__start___ftr_fixup, &__stop___ftr_fixup); - do_feature_fixups(powerpc_firmware_features, - &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); /* * Unflatten the device-tree passed by prom_init or kexec @@ -394,6 +392,12 @@ void __init setup_system(void) if (ppc_md.init_early) ppc_md.init_early(); + /* Apply firmware specific fixups to kernel text (nop out + * sections not relevant to this firmware) + */ + do_feature_fixups(powerpc_firmware_features, + &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); + /* * We can discover serial ports now since the above did setup the * hash table management for us, thus ioremap works. We do that early diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c new file mode 100644 index 00000000000..c434d6c4e4e --- /dev/null +++ b/arch/powerpc/kernel/signal.c @@ -0,0 +1,180 @@ +/* + * Common signal handling code for both 32 and 64 bits + * + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Extracted from signal_32.c and signal_64.c + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#include <linux/ptrace.h> +#include <linux/signal.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> + +#include "signal.h" + +/* + * Allocate space for the signal frame + */ +void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size) +{ + unsigned long oldsp, newsp; + + /* Default to using normal stack */ + oldsp = regs->gpr[1]; + + /* Check for alt stack */ + if ((ka->sa.sa_flags & SA_ONSTACK) && + current->sas_ss_size && !on_sig_stack(oldsp)) + oldsp = (current->sas_ss_sp + current->sas_ss_size); + + /* Get aligned frame */ + newsp = (oldsp - frame_size) & ~0xFUL; + + /* Check access */ + if (!access_ok(VERIFY_WRITE, (void __user *)newsp, oldsp - newsp)) + return NULL; + + return (void __user *)newsp; +} + + +/* + * Restore the user process's signal mask + */ +void restore_sigmask(sigset_t *set) +{ + sigdelsetmask(set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = *set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} + +static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, + int has_handler) +{ + unsigned long ret = regs->gpr[3]; + int restart = 1; + + /* syscall ? */ + if (TRAP(regs) != 0x0C00) + return; + + /* error signalled ? */ + if (!(regs->ccr & 0x10000000)) + return; + + switch (ret) { + case ERESTART_RESTARTBLOCK: + case ERESTARTNOHAND: + /* ERESTARTNOHAND means that the syscall should only be + * restarted if there was no handler for the signal, and since + * we only get here if there is a handler, we dont restart. + */ + restart = !has_handler; + break; + case ERESTARTSYS: + /* ERESTARTSYS means to restart the syscall if there is no + * handler or the handler was registered with SA_RESTART + */ + restart = !has_handler || (ka->sa.sa_flags & SA_RESTART) != 0; + break; + case ERESTARTNOINTR: + /* ERESTARTNOINTR means that the syscall should be + * called again after the signal handler returns. + */ + break; + default: + return; + } + if (restart) { + if (ret == ERESTART_RESTARTBLOCK) + regs->gpr[0] = __NR_restart_syscall; + else + regs->gpr[3] = regs->orig_gpr3; + regs->nip -= 4; + regs->result = 0; + } else { + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + } +} + +int do_signal(sigset_t *oldset, struct pt_regs *regs) +{ + siginfo_t info; + int signr; + struct k_sigaction ka; + int ret; + int is32 = is_32bit_task(); + + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else if (!oldset) + oldset = ¤t->blocked; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + + /* Is there any syscall restart business here ? */ + check_syscall_restart(regs, &ka, signr > 0); + + if (signr <= 0) { + /* No signal to deliver -- put the saved sigmask back */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } + return 0; /* no signals delivered */ + } + + /* + * Reenable the DABR before delivering the signal to + * user space. The DABR will have been cleared if it + * triggered inside the kernel. + */ + if (current->thread.dabr) + set_dabr(current->thread.dabr); + + if (is32) { + if (ka.sa.sa_flags & SA_SIGINFO) + ret = handle_rt_signal32(signr, &ka, &info, oldset, + regs); + else + ret = handle_signal32(signr, &ka, &info, oldset, + regs); + } else { + ret = handle_rt_signal64(signr, &ka, &info, oldset, regs); + } + + if (ret) { + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, + &ka.sa.sa_mask); + if (!(ka.sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, signr); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + /* + * A signal was successfully delivered; the saved sigmask is in + * its frame, and we can clear the TIF_RESTORE_SIGMASK flag. + */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + } + + return ret; +} + +long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, + unsigned long r5, unsigned long r6, unsigned long r7, + unsigned long r8, struct pt_regs *regs) +{ + return do_sigaltstack(uss, uoss, regs->gpr[1]); +} diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h new file mode 100644 index 00000000000..77efb3d5465 --- /dev/null +++ b/arch/powerpc/kernel/signal.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Extracted from signal_32.c and signal_64.c + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#ifndef _POWERPC_ARCH_SIGNAL_H +#define _POWERPC_ARCH_SIGNAL_H + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size); +extern void restore_sigmask(sigset_t *set); + +extern int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs); + +extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs); + + +#ifdef CONFIG_PPC64 + +static inline int is_32bit_task(void) +{ + return test_thread_flag(TIF_32BIT); +} + +extern int handle_rt_signal64(int signr, struct k_sigaction *ka, + siginfo_t *info, sigset_t *set, + struct pt_regs *regs); + +#else /* CONFIG_PPC64 */ + +static inline int is_32bit_task(void) +{ + return 1; +} + +static inline int handle_rt_signal64(int signr, struct k_sigaction *ka, + siginfo_t *info, sigset_t *set, + struct pt_regs *regs) +{ + return -EFAULT; +} + +#endif /* !defined(CONFIG_PPC64) */ + +#endif /* _POWERPC_ARCH_SIGNAL_H */ diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index dd1dca5bfa8..590057e9e98 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -51,12 +51,11 @@ #include <asm/pgtable.h> #endif -#undef DEBUG_SIG +#include "signal.h" -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#undef DEBUG_SIG #ifdef CONFIG_PPC64 -#define do_signal do_signal32 #define sys_sigsuspend compat_sys_sigsuspend #define sys_rt_sigsuspend compat_sys_rt_sigsuspend #define sys_rt_sigreturn compat_sys_rt_sigreturn @@ -231,8 +230,6 @@ static inline int restore_general_regs(struct pt_regs *regs, #endif /* CONFIG_PPC64 */ -int do_signal(sigset_t *oldset, struct pt_regs *regs); - /* * Atomically swap in the new signal mask, and wait for a signal. */ @@ -251,14 +248,6 @@ long sys_sigsuspend(old_sigset_t mask) return -ERESTARTNOHAND; } -#ifdef CONFIG_PPC32 -long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, int r5, - int r6, int r7, int r8, struct pt_regs *regs) -{ - return do_sigaltstack(uss, uoss, regs->gpr[1]); -} -#endif - long sys_sigaction(int sig, struct old_sigaction __user *act, struct old_sigaction __user *oact) { @@ -293,14 +282,17 @@ long sys_sigaction(int sig, struct old_sigaction __user *act, /* * When we have signals to deliver, we set up on the * user stack, going down from the original stack pointer: - * a sigregs struct + * an ABI gap of 56 words + * an mcontext struct * a sigcontext struct * a gap of __SIGNAL_FRAMESIZE bytes * - * Each of these things must be a multiple of 16 bytes in size. + * Each of these things must be a multiple of 16 bytes in size. The following + * structure represent all of this except the __SIGNAL_FRAMESIZE gap * */ -struct sigregs { +struct sigframe { + struct sigcontext sctx; /* the sigcontext */ struct mcontext mctx; /* all the register values */ /* * Programs using the rs6000/xcoff abi can save up to 19 gp @@ -703,44 +695,22 @@ int compat_sys_sigaltstack(u32 __new, u32 __old, int r5, } #endif /* CONFIG_PPC64 */ - -/* - * Restore the user process's signal mask - */ -#ifdef CONFIG_PPC64 -extern void restore_sigmask(sigset_t *set); -#else /* CONFIG_PPC64 */ -static void restore_sigmask(sigset_t *set) -{ - sigdelsetmask(set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = *set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); -} -#endif - /* * Set up a signal frame for a "real-time" signal handler * (one which gets siginfo). */ -static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka, +int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, - struct pt_regs *regs, unsigned long newsp) + struct pt_regs *regs) { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; - unsigned long origsp = newsp; + unsigned long newsp = 0; /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - newsp -= sizeof(*rt_sf); - rt_sf = (struct rt_sigframe __user *)newsp; - - /* create a stack frame for the caller of the handler */ - newsp -= __SIGNAL_FRAMESIZE + 16; - - if (!access_ok(VERIFY_WRITE, (void __user *)newsp, origsp - newsp)) + rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf)); + if (unlikely(rt_sf == NULL)) goto badframe; /* Put the siginfo & fill in most of the ucontext */ @@ -770,8 +740,12 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka, current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + /* create a stack frame for the caller of the handler */ + newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; + + /* Fill registers for signal handler */ regs->gpr[1] = newsp; regs->gpr[3] = sig; regs->gpr[4] = (unsigned long) &rt_sf->info; @@ -1015,27 +989,18 @@ int sys_debug_setcontext(struct ucontext __user *ctx, /* * OK, we're invoking a handler */ -static int handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs *regs, - unsigned long newsp) +int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { struct sigcontext __user *sc; - struct sigregs __user *frame; - unsigned long origsp = newsp; + struct sigframe __user *frame; + unsigned long newsp = 0; /* Set up Signal Frame */ - newsp -= sizeof(struct sigregs); - frame = (struct sigregs __user *) newsp; - - /* Put a sigcontext on the stack */ - newsp -= sizeof(*sc); - sc = (struct sigcontext __user *) newsp; - - /* create a stack frame for the caller of the handler */ - newsp -= __SIGNAL_FRAMESIZE; - - if (!access_ok(VERIFY_WRITE, (void __user *) newsp, origsp - newsp)) + frame = get_sigframe(ka, regs, sizeof(*frame)); + if (unlikely(frame == NULL)) goto badframe; + sc = (struct sigcontext __user *) &frame->sctx; #if _NSIG != 64 #error "Please adjust handle_signal()" @@ -1047,7 +1012,7 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, #else || __put_user(oldset->sig[1], &sc->_unused[3]) #endif - || __put_user(to_user_ptr(frame), &sc->regs) + || __put_user(to_user_ptr(&frame->mctx), &sc->regs) || __put_user(sig, &sc->signal)) goto badframe; @@ -1063,8 +1028,11 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + /* create a stack frame for the caller of the handler */ + newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; + regs->gpr[1] = newsp; regs->gpr[3] = sig; regs->gpr[4] = (unsigned long) sc; @@ -1126,106 +1094,3 @@ badframe: force_sig(SIGSEGV, current); return 0; } - -/* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - */ -int do_signal(sigset_t *oldset, struct pt_regs *regs) -{ - siginfo_t info; - struct k_sigaction ka; - unsigned int newsp; - int signr, ret; - -#ifdef CONFIG_PPC32 - if (try_to_freeze()) { - signr = 0; - if (!signal_pending(current)) - goto no_signal; - } -#endif - - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - oldset = ¤t->saved_sigmask; - else if (!oldset) - oldset = ¤t->blocked; - - signr = get_signal_to_deliver(&info, &ka, regs, NULL); -#ifdef CONFIG_PPC32 -no_signal: -#endif - if (TRAP(regs) == 0x0C00 /* System Call! */ - && regs->ccr & 0x10000000 /* error signalled */ - && ((ret = regs->gpr[3]) == ERESTARTSYS - || ret == ERESTARTNOHAND || ret == ERESTARTNOINTR - || ret == ERESTART_RESTARTBLOCK)) { - - if (signr > 0 - && (ret == ERESTARTNOHAND || ret == ERESTART_RESTARTBLOCK - || (ret == ERESTARTSYS - && !(ka.sa.sa_flags & SA_RESTART)))) { - /* make the system call return an EINTR error */ - regs->result = -EINTR; - regs->gpr[3] = EINTR; - /* note that the cr0.SO bit is already set */ - } else { - regs->nip -= 4; /* Back up & retry system call */ - regs->result = 0; - regs->trap = 0; - if (ret == ERESTART_RESTARTBLOCK) - regs->gpr[0] = __NR_restart_syscall; - else - regs->gpr[3] = regs->orig_gpr3; - } - } - - if (signr == 0) { - /* No signal to deliver -- put the saved sigmask back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } - return 0; /* no signals delivered */ - } - - if ((ka.sa.sa_flags & SA_ONSTACK) && current->sas_ss_size - && !on_sig_stack(regs->gpr[1])) - newsp = current->sas_ss_sp + current->sas_ss_size; - else - newsp = regs->gpr[1]; - newsp &= ~0xfUL; - -#ifdef CONFIG_PPC64 - /* - * Reenable the DABR before delivering the signal to - * user space. The DABR will have been cleared if it - * triggered inside the kernel. - */ - if (current->thread.dabr) - set_dabr(current->thread.dabr); -#endif - - /* Whee! Actually deliver the signal. */ - if (ka.sa.sa_flags & SA_SIGINFO) - ret = handle_rt_signal(signr, &ka, &info, oldset, regs, newsp); - else - ret = handle_signal(signr, &ka, &info, oldset, regs, newsp); - - if (ret) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, - &ka.sa.sa_mask); - if (!(ka.sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - /* A signal was successfully delivered; the saved sigmask is in - its frame, and we can clear the TIF_RESTORE_SIGMASK flag */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - } - - return ret; -} diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index b27e26852fd..de895e6d8c6 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -34,9 +34,9 @@ #include <asm/syscalls.h> #include <asm/vdso.h> -#define DEBUG_SIG 0 +#include "signal.h" -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#define DEBUG_SIG 0 #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) @@ -64,14 +64,6 @@ struct rt_sigframe { char abigap[288]; } __attribute__ ((aligned (16))); -long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5, - unsigned long r6, unsigned long r7, unsigned long r8, - struct pt_regs *regs) -{ - return do_sigaltstack(uss, uoss, regs->gpr[1]); -} - - /* * Set up the sigcontext for the signal frame. */ @@ -208,25 +200,6 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, } /* - * Allocate space for the signal frame - */ -static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, - size_t frame_size) -{ - unsigned long newsp; - - /* Default to using normal stack */ - newsp = regs->gpr[1]; - - if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) { - if (! on_sig_stack(regs->gpr[1])) - newsp = (current->sas_ss_sp + current->sas_ss_size); - } - - return (void __user *)((newsp - frame_size) & -16ul); -} - -/* * Setup the trampoline code on the stack */ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) @@ -253,19 +226,6 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) } /* - * Restore the user process's signal mask (also used by signal32.c) - */ -void restore_sigmask(sigset_t *set) -{ - sigdelsetmask(set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = *set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); -} - - -/* * Handle {get,set,swap}_context operations */ int sys_swapcontext(struct ucontext __user *old_ctx, @@ -359,7 +319,7 @@ badframe: return 0; } -static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, +int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { /* Handler is *really* a pointer to the function descriptor for @@ -373,8 +333,7 @@ static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, long err = 0; frame = get_sigframe(ka, regs, sizeof(*frame)); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + if (unlikely(frame == NULL)) goto badframe; err |= __put_user(&frame->info, &frame->pinfo); @@ -411,7 +370,7 @@ static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler; /* Allocate a dummy caller frame for the signal handler. */ - newsp = (unsigned long)frame - __SIGNAL_FRAMESIZE; + newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); /* Set up "regs" so we "return" to the signal handler. */ @@ -442,134 +401,3 @@ badframe: force_sigsegv(signr, current); return 0; } - - -/* - * OK, we're invoking a handler - */ -static int handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) -{ - int ret; - - /* Set up Signal Frame */ - ret = setup_rt_frame(sig, ka, info, oldset, regs); - - if (ret) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - - return ret; -} - -static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) -{ - switch ((int)regs->result) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - /* ERESTARTNOHAND means that the syscall should only be - * restarted if there was no handler for the signal, and since - * we only get here if there is a handler, we dont restart. - */ - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; - break; - case -ERESTARTSYS: - /* ERESTARTSYS means to restart the syscall if there is no - * handler or the handler was registered with SA_RESTART - */ - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - /* ERESTARTNOINTR means that the syscall should be - * called again after the signal handler returns. - */ - regs->gpr[3] = regs->orig_gpr3; - regs->nip -= 4; - regs->result = 0; - break; - } -} - -/* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - */ -int do_signal(sigset_t *oldset, struct pt_regs *regs) -{ - siginfo_t info; - int signr; - struct k_sigaction ka; - - /* - * If the current thread is 32 bit - invoke the - * 32 bit signal handling code - */ - if (test_thread_flag(TIF_32BIT)) - return do_signal32(oldset, regs); - - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - oldset = ¤t->saved_sigmask; - else if (!oldset) - oldset = ¤t->blocked; - - signr = get_signal_to_deliver(&info, &ka, regs, NULL); - if (signr > 0) { - int ret; - - /* Whee! Actually deliver the signal. */ - if (TRAP(regs) == 0x0C00) - syscall_restart(regs, &ka); - - /* - * Reenable the DABR before delivering the signal to - * user space. The DABR will have been cleared if it - * triggered inside the kernel. - */ - if (current->thread.dabr) - set_dabr(current->thread.dabr); - - ret = handle_signal(signr, &ka, &info, oldset, regs); - - /* If a signal was successfully delivered, the saved sigmask is in - its frame, and we can clear the TIF_RESTORE_SIGMASK flag */ - if (ret && test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - - return ret; - } - - if (TRAP(regs) == 0x0C00) { /* System Call! */ - if ((int)regs->result == -ERESTARTNOHAND || - (int)regs->result == -ERESTARTSYS || - (int)regs->result == -ERESTARTNOINTR) { - regs->gpr[3] = regs->orig_gpr3; - regs->nip -= 4; /* Back up & retry system call */ - regs->result = 0; - } else if ((int)regs->result == -ERESTART_RESTARTBLOCK) { - regs->gpr[0] = __NR_restart_syscall; - regs->nip -= 4; - regs->result = 0; - } - } - /* No signal to deliver -- put the saved sigmask back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } - - return 0; -} -EXPORT_SYMBOL(do_signal); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 68991c2d4a1..55d29ed4b7a 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -442,12 +442,14 @@ int sysfs_add_device_to_node(struct sys_device *dev, int nid) return sysfs_create_link(&node->sysdev.kobj, &dev->kobj, kobject_name(&dev->kobj)); } +EXPORT_SYMBOL_GPL(sysfs_add_device_to_node); void sysfs_remove_device_from_node(struct sys_device *dev, int nid) { struct node *node = &node_devices[nid]; sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj)); } +EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node); #else static void register_nodes(void) @@ -457,9 +459,6 @@ static void register_nodes(void) #endif -EXPORT_SYMBOL_GPL(sysfs_add_device_to_node); -EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node); - /* Only valid if CPU is present. */ static ssize_t show_physical_id(struct sys_device *dev, char *buf) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2c8564d54e4..e5df167f782 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -77,9 +77,8 @@ /* keep track of when we need to update the rtc */ time_t last_rtc_update; #ifdef CONFIG_PPC_ISERIES -unsigned long iSeries_recal_titan = 0; -unsigned long iSeries_recal_tb = 0; -static unsigned long first_settimeofday = 1; +static unsigned long __initdata iSeries_recal_titan; +static signed long __initdata iSeries_recal_tb; #endif /* The decrementer counts down by 128 every 128ns on a 601. */ @@ -113,8 +112,9 @@ u64 ticklen_to_xs; /* 0.64 fraction */ DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); -u64 tb_to_ns_scale; -unsigned tb_to_ns_shift; +static u64 tb_to_ns_scale __read_mostly; +static unsigned tb_to_ns_shift __read_mostly; +static unsigned long boot_tb __read_mostly; struct gettimeofday_struct do_gtod; @@ -214,7 +214,6 @@ static void account_process_time(struct pt_regs *regs) run_posix_cpu_timers(current); } -#ifdef CONFIG_PPC_SPLPAR /* * Stuff for accounting stolen time. */ @@ -222,19 +221,28 @@ struct cpu_purr_data { int initialized; /* thread is running */ u64 tb; /* last TB value read */ u64 purr; /* last PURR value read */ - spinlock_t lock; }; +/* + * Each entry in the cpu_purr_data array is manipulated only by its + * "owner" cpu -- usually in the timer interrupt but also occasionally + * in process context for cpu online. As long as cpus do not touch + * each others' cpu_purr_data, disabling local interrupts is + * sufficient to serialize accesses. + */ static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data); static void snapshot_tb_and_purr(void *data) { + unsigned long flags; struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data); + local_irq_save(flags); p->tb = mftb(); p->purr = mfspr(SPRN_PURR); wmb(); p->initialized = 1; + local_irq_restore(flags); } /* @@ -242,15 +250,14 @@ static void snapshot_tb_and_purr(void *data) */ void snapshot_timebases(void) { - int cpu; - if (!cpu_has_feature(CPU_FTR_PURR)) return; - for_each_possible_cpu(cpu) - spin_lock_init(&per_cpu(cpu_purr_data, cpu).lock); on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1); } +/* + * Must be called with interrupts disabled. + */ void calculate_steal_time(void) { u64 tb, purr; @@ -262,7 +269,6 @@ void calculate_steal_time(void) pme = &per_cpu(cpu_purr_data, smp_processor_id()); if (!pme->initialized) return; /* this can happen in early boot */ - spin_lock(&pme->lock); tb = mftb(); purr = mfspr(SPRN_PURR); stolen = (tb - pme->tb) - (purr - pme->purr); @@ -270,9 +276,9 @@ void calculate_steal_time(void) account_steal_time(current, stolen); pme->tb = tb; pme->purr = purr; - spin_unlock(&pme->lock); } +#ifdef CONFIG_PPC_SPLPAR /* * Must be called before the cpu is added to the online map when * a cpu is being brought up at runtime. @@ -284,12 +290,12 @@ static void snapshot_purr(void) if (!cpu_has_feature(CPU_FTR_PURR)) return; + local_irq_save(flags); pme = &per_cpu(cpu_purr_data, smp_processor_id()); - spin_lock_irqsave(&pme->lock, flags); pme->tb = mftb(); pme->purr = mfspr(SPRN_PURR); pme->initialized = 1; - spin_unlock_irqrestore(&pme->lock, flags); + local_irq_restore(flags); } #endif /* CONFIG_PPC_SPLPAR */ @@ -550,10 +556,15 @@ EXPORT_SYMBOL(profile_pc); * returned by the service processor for the timebase frequency. */ -static void iSeries_tb_recal(void) +static int __init iSeries_tb_recal(void) { struct div_result divres; unsigned long titan, tb; + + /* Make sure we only run on iSeries */ + if (!firmware_has_feature(FW_FEATURE_ISERIES)) + return -ENODEV; + tb = get_tb(); titan = HvCallXm_loadTod(); if ( iSeries_recal_titan ) { @@ -594,8 +605,18 @@ static void iSeries_tb_recal(void) } iSeries_recal_titan = titan; iSeries_recal_tb = tb; + + return 0; } -#endif +late_initcall(iSeries_tb_recal); + +/* Called from platform early init */ +void __init iSeries_time_init_early(void) +{ + iSeries_recal_tb = get_tb(); + iSeries_recal_titan = HvCallXm_loadTod(); +} +#endif /* CONFIG_PPC_ISERIES */ /* * For iSeries shared processors, we have to let the hypervisor @@ -735,7 +756,7 @@ unsigned long long sched_clock(void) { if (__USE_RTC()) return get_rtc(); - return mulhdu(get_tb(), tb_to_ns_scale) << tb_to_ns_shift; + return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; } int do_settimeofday(struct timespec *tv) @@ -759,12 +780,6 @@ int do_settimeofday(struct timespec *tv) * to the RTC again, or write to the RTC but then they don't call * settimeofday to perform this operation. */ -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES) && first_settimeofday) { - iSeries_tb_recal(); - first_settimeofday = 0; - } -#endif /* Make userspace gettimeofday spin until we're done. */ ++vdso_data->tb_update_count; @@ -960,6 +975,8 @@ void __init time_init(void) } tb_to_ns_scale = scale; tb_to_ns_shift = shift; + /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ + boot_tb = get_tb(); tm = get_boot_time(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 4245579edb4..cef01e4e898 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -670,7 +670,7 @@ static int __init vdso_init(void) /* * Fill up the "systemcfg" stuff for backward compatiblity */ - strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); + strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); vdso_data->version.major = SYSTEMCFG_MAJOR; vdso_data->version.minor = SYSTEMCFG_MINOR; vdso_data->processor = mfspr(SPRN_PVR); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 21c39ff2dc3..ae4acd84143 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -7,6 +7,7 @@ #define PROVIDE32(x) PROVIDE(x) #endif #include <asm-generic/vmlinux.lds.h> +#include <asm/cache.h> ENTRY(_stext) @@ -211,6 +212,11 @@ SECTIONS *(.data.cacheline_aligned) } + . = ALIGN(L1_CACHE_BYTES); + .data.read_mostly : { + *(.data.read_mostly) + } + . = ALIGN(PAGE_SIZE); __data_nosave : { __nosave_begin = .; |