diff options
Diffstat (limited to 'arch/powerpc/platforms/cell')
30 files changed, 7999 insertions, 67 deletions
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig new file mode 100644 index 00000000000..3157071e241 --- /dev/null +++ b/arch/powerpc/platforms/cell/Kconfig @@ -0,0 +1,13 @@ +menu "Cell Broadband Engine options" + depends on PPC_CELL + +config SPU_FS + tristate "SPU file system" + default m + depends on PPC_CELL + help + The SPU file system is used to access Synergistic Processing + Units on machines implementing the Broadband Processor + Architecture. + +endmenu diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index 55e094b96bc..16031b565be 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -1,2 +1,10 @@ obj-y += interrupt.o iommu.o setup.o spider-pic.o +obj-y += pervasive.o + obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SPU_FS) += spufs/ spu-base.o + +spu-base-y += spu_base.o spu_priv1.o + +builtin-spufs-$(CONFIG_SPU_FS) += spu_syscalls.o +obj-y += $(builtin-spufs-m) diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 7fbe78a9327..63aa52acf44 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -23,6 +23,7 @@ #include <linux/config.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/module.h> #include <linux/percpu.h> #include <linux/types.h> @@ -55,6 +56,7 @@ struct iic_regs { struct iic { struct iic_regs __iomem *regs; + u8 target_id; }; static DEFINE_PER_CPU(struct iic, iic); @@ -172,12 +174,11 @@ int iic_get_irq(struct pt_regs *regs) return irq; } -static struct iic_regs __iomem *find_iic(int cpu) +static int setup_iic(int cpu, struct iic *iic) { struct device_node *np; int nodeid = cpu / 2; unsigned long regs; - struct iic_regs __iomem *iic_regs; for (np = of_find_node_by_type(NULL, "cpu"); np; @@ -188,20 +189,23 @@ static struct iic_regs __iomem *find_iic(int cpu) if (!np) { printk(KERN_WARNING "IIC: CPU %d not found\n", cpu); - iic_regs = NULL; - } else { - regs = *(long *)get_property(np, "iic", NULL); - - /* hack until we have decided on the devtree info */ - regs += 0x400; - if (cpu & 1) - regs += 0x20; - - printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs); - iic_regs = __ioremap(regs, sizeof(struct iic_regs), - _PAGE_NO_CACHE); + iic->regs = NULL; + iic->target_id = 0xff; + return -ENODEV; } - return iic_regs; + + regs = *(long *)get_property(np, "iic", NULL); + + /* hack until we have decided on the devtree info */ + regs += 0x400; + if (cpu & 1) + regs += 0x20; + + printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs); + iic->regs = __ioremap(regs, sizeof(struct iic_regs), + _PAGE_NO_CACHE); + iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe); + return 0; } #ifdef CONFIG_SMP @@ -227,6 +231,12 @@ void iic_cause_IPI(int cpu, int mesg) out_be64(&per_cpu(iic, cpu).regs->generate, (IIC_NUM_IPIS - 1 - mesg) << 4); } +u8 iic_get_target_id(int cpu) +{ + return per_cpu(iic, cpu).target_id; +} +EXPORT_SYMBOL_GPL(iic_get_target_id); + static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) { smp_message_recv(iic_irq_to_ipi(irq), regs); @@ -276,7 +286,7 @@ void iic_init_IRQ(void) irq_offset = 0; for_each_cpu(cpu) { iic = &per_cpu(iic, cpu); - iic->regs = find_iic(cpu); + setup_iic(cpu, iic); if (iic->regs) out_be64(&iic->regs->prio, 0xff); } diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h index 37d58e6fd0c..a14bd38791c 100644 --- a/arch/powerpc/platforms/cell/interrupt.h +++ b/arch/powerpc/platforms/cell/interrupt.h @@ -54,6 +54,7 @@ extern void iic_setup_cpu(void); extern void iic_local_enable(void); extern void iic_local_disable(void); +extern u8 iic_get_target_id(int cpu); extern void spider_init_IRQ(void); extern int spider_get_irq(unsigned long int_pending); diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 74f999b4ac9..46e7cb9c3e6 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -29,6 +29,8 @@ #include <linux/bootmem.h> #include <linux/mm.h> #include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/compiler.h> #include <asm/sections.h> #include <asm/iommu.h> @@ -40,6 +42,7 @@ #include <asm/abs_addr.h> #include <asm/system.h> #include <asm/ppc-pci.h> +#include <asm/udbg.h> #include "iommu.h" @@ -220,8 +223,6 @@ set_iopt_cache(void __iomem *base, unsigned long index, { unsigned long __iomem *tags = base + IOC_PT_CACHE_DIR; unsigned long __iomem *p = base + IOC_PT_CACHE_REG; - pr_debug("iopt %02lx was v%016lx/t%016lx, store v%016lx/t%016lx\n", - index, get_iopt_cache(base, index, &oldtag), oldtag, val, tag); out_be64(p, val); out_be64(&tags[index], tag); @@ -248,67 +249,176 @@ set_iocmd_config(void __iomem *base) out_be64(p, conf | IOCMD_CONF_TE); } -/* FIXME: get these from the device tree */ -#define ioc_base 0x20000511000ull -#define ioc_mmio_base 0x20000510000ull -#define ioid 0x48a -#define iopt_phys_offset (- 0x20000000) /* We have a 512MB offset from the SB */ -#define io_page_size 0x1000000 - -static unsigned long map_iopt_entry(unsigned long address) +static void enable_mapping(void __iomem *base, void __iomem *mmio_base) { - switch (address >> 20) { - case 0x600: - address = 0x24020000000ull; /* spider i/o */ - break; - default: - address += iopt_phys_offset; - break; - } - - return get_iopt_entry(address, ioid, IOPT_PROT_RW); + set_iocmd_config(base); + set_iost_origin(mmio_base); } -static void iommu_bus_setup_null(struct pci_bus *b) { } static void iommu_dev_setup_null(struct pci_dev *d) { } +static void iommu_bus_setup_null(struct pci_bus *b) { } + +struct cell_iommu { + unsigned long base; + unsigned long mmio_base; + void __iomem *mapped_base; + void __iomem *mapped_mmio_base; +}; + +static struct cell_iommu cell_iommus[NR_CPUS]; /* initialize the iommu to support a simple linear mapping * for each DMA window used by any device. For now, we * happen to know that there is only one DMA window in use, * starting at iopt_phys_offset. */ -static void cell_map_iommu(void) +static void cell_do_map_iommu(struct cell_iommu *iommu, + unsigned int ioid, + unsigned long map_start, + unsigned long map_size) { - unsigned long address; - void __iomem *base; + unsigned long io_address, real_address; + void __iomem *ioc_base, *ioc_mmio_base; ioste ioste; unsigned long index; - base = __ioremap(ioc_base, 0x1000, _PAGE_NO_CACHE); - pr_debug("%lx mapped to %p\n", ioc_base, base); - set_iocmd_config(base); - iounmap(base); + /* we pretend the io page table was at a very high address */ + const unsigned long fake_iopt = 0x10000000000ul; + const unsigned long io_page_size = 0x1000000; /* use 16M pages */ + const unsigned long io_segment_size = 0x10000000; /* 256M */ + + ioc_base = iommu->mapped_base; + ioc_mmio_base = iommu->mapped_mmio_base; + + for (real_address = 0, io_address = 0; + io_address <= map_start + map_size; + real_address += io_page_size, io_address += io_page_size) { + ioste = get_iost_entry(fake_iopt, io_address, io_page_size); + if ((real_address % io_segment_size) == 0) /* segment start */ + set_iost_cache(ioc_mmio_base, + io_address >> 28, ioste); + index = get_ioc_hash_1way(ioste, io_address); + pr_debug("addr %08lx, index %02lx, ioste %016lx\n", + io_address, index, ioste.val); + set_iopt_cache(ioc_mmio_base, + get_ioc_hash_1way(ioste, io_address), + get_ioc_tag(ioste, io_address), + get_iopt_entry(real_address-map_start, ioid, IOPT_PROT_RW)); + } +} - base = __ioremap(ioc_mmio_base, 0x1000, _PAGE_NO_CACHE); - pr_debug("%lx mapped to %p\n", ioc_mmio_base, base); +static void iommu_devnode_setup(struct device_node *d) +{ + unsigned int *ioid; + unsigned long *dma_window, map_start, map_size, token; + struct cell_iommu *iommu; - set_iost_origin(base); + ioid = (unsigned int *)get_property(d, "ioid", NULL); + if (!ioid) + pr_debug("No ioid entry found !\n"); - for (address = 0; address < 0x100000000ul; address += io_page_size) { - ioste = get_iost_entry(0x10000000000ul, address, io_page_size); - if ((address & 0xfffffff) == 0) /* segment start */ - set_iost_cache(base, address >> 28, ioste); - index = get_ioc_hash_1way(ioste, address); - pr_debug("addr %08lx, index %02lx, ioste %016lx\n", - address, index, ioste.val); - set_iopt_cache(base, - get_ioc_hash_1way(ioste, address), - get_ioc_tag(ioste, address), - map_iopt_entry(address)); - } - iounmap(base); + dma_window = (unsigned long *)get_property(d, "ibm,dma-window", NULL); + if (!dma_window) + pr_debug("No ibm,dma-window entry found !\n"); + + map_start = dma_window[1]; + map_size = dma_window[2]; + token = dma_window[0] >> 32; + + iommu = &cell_iommus[token]; + + cell_do_map_iommu(iommu, *ioid, map_start, map_size); +} + +static void iommu_bus_setup(struct pci_bus *b) +{ + struct device_node *d = (struct device_node *)b->sysdata; + iommu_devnode_setup(d); +} + + +static int cell_map_iommu_hardcoded(int num_nodes) +{ + struct cell_iommu *iommu = NULL; + + pr_debug("%s(%d): Using hardcoded defaults\n", __FUNCTION__, __LINE__); + + /* node 0 */ + iommu = &cell_iommus[0]; + iommu->mapped_base = __ioremap(0x20000511000, 0x1000, _PAGE_NO_CACHE); + iommu->mapped_mmio_base = __ioremap(0x20000510000, 0x1000, _PAGE_NO_CACHE); + + enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base); + + cell_do_map_iommu(iommu, 0x048a, + 0x20000000ul,0x20000000ul); + + if (num_nodes < 2) + return 0; + + /* node 1 */ + iommu = &cell_iommus[1]; + iommu->mapped_base = __ioremap(0x30000511000, 0x1000, _PAGE_NO_CACHE); + iommu->mapped_mmio_base = __ioremap(0x30000510000, 0x1000, _PAGE_NO_CACHE); + + enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base); + + cell_do_map_iommu(iommu, 0x048a, + 0x20000000,0x20000000ul); + + return 0; } +static int cell_map_iommu(void) +{ + unsigned int num_nodes = 0, *node_id; + unsigned long *base, *mmio_base; + struct device_node *dn; + struct cell_iommu *iommu = NULL; + + /* determine number of nodes (=iommus) */ + pr_debug("%s(%d): determining number of nodes...", __FUNCTION__, __LINE__); + for(dn = of_find_node_by_type(NULL, "cpu"); + dn; + dn = of_find_node_by_type(dn, "cpu")) { + node_id = (unsigned int *)get_property(dn, "node-id", NULL); + + if (num_nodes < *node_id) + num_nodes = *node_id; + } + + num_nodes++; + pr_debug("%i found.\n", num_nodes); + + /* map the iommu registers for each node */ + pr_debug("%s(%d): Looping through nodes\n", __FUNCTION__, __LINE__); + for(dn = of_find_node_by_type(NULL, "cpu"); + dn; + dn = of_find_node_by_type(dn, "cpu")) { + + node_id = (unsigned int *)get_property(dn, "node-id", NULL); + base = (unsigned long *)get_property(dn, "ioc-cache", NULL); + mmio_base = (unsigned long *)get_property(dn, "ioc-translation", NULL); + + if (!base || !mmio_base || !node_id) + return cell_map_iommu_hardcoded(num_nodes); + + iommu = &cell_iommus[*node_id]; + iommu->base = *base; + iommu->mmio_base = *mmio_base; + + iommu->mapped_base = __ioremap(*base, 0x1000, _PAGE_NO_CACHE); + iommu->mapped_mmio_base = __ioremap(*mmio_base, 0x1000, _PAGE_NO_CACHE); + + enable_mapping(iommu->mapped_base, + iommu->mapped_mmio_base); + + /* everything else will be done in iommu_bus_setup */ + } + + return 1; +} + static void *cell_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { @@ -365,11 +475,28 @@ static int cell_dma_supported(struct device *dev, u64 mask) void cell_init_iommu(void) { - cell_map_iommu(); - - /* Direct I/O, IOMMU off */ - ppc_md.iommu_dev_setup = iommu_dev_setup_null; - ppc_md.iommu_bus_setup = iommu_bus_setup_null; + int setup_bus = 0; + + if (of_find_node_by_path("/mambo")) { + pr_info("Not using iommu on systemsim\n"); + } else { + + if (!(of_chosen && + get_property(of_chosen, "linux,iommu-off", NULL))) + setup_bus = cell_map_iommu(); + + if (setup_bus) { + pr_debug("%s: IOMMU mapping activated\n", __FUNCTION__); + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup; + } else { + pr_debug("%s: IOMMU mapping activated, " + "no device action necessary\n", __FUNCTION__); + /* Direct I/O, IOMMU off */ + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup_null; + } + } pci_dma_ops.alloc_coherent = cell_alloc_coherent; pci_dma_ops.free_coherent = cell_free_coherent; diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c new file mode 100644 index 00000000000..e0e051c675d --- /dev/null +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -0,0 +1,229 @@ +/* + * CBE Pervasive Monitor and Debug + * + * (C) Copyright IBM Corporation 2005 + * + * Authors: Maximino Aguilar (maguilar@us.ibm.com) + * Michael N. Day (mnday@us.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/percpu.h> +#include <linux/types.h> +#include <linux/kallsyms.h> + +#include <asm/io.h> +#include <asm/machdep.h> +#include <asm/prom.h> +#include <asm/pgtable.h> +#include <asm/reg.h> + +#include "pervasive.h" + +static DEFINE_SPINLOCK(cbe_pervasive_lock); +struct cbe_pervasive { + struct pmd_regs __iomem *regs; + unsigned int thread; +}; + +/* can't use per_cpu from setup_arch */ +static struct cbe_pervasive cbe_pervasive[NR_CPUS]; + +static void __init cbe_enable_pause_zero(void) +{ + unsigned long thread_switch_control; + unsigned long temp_register; + struct cbe_pervasive *p; + int thread; + + spin_lock_irq(&cbe_pervasive_lock); + p = &cbe_pervasive[smp_processor_id()]; + + if (!cbe_pervasive->regs) + goto out; + + pr_debug("Power Management: CPU %d\n", smp_processor_id()); + + /* Enable Pause(0) control bit */ + temp_register = in_be64(&p->regs->pm_control); + + out_be64(&p->regs->pm_control, + temp_register|PMD_PAUSE_ZERO_CONTROL); + + /* Enable DEC and EE interrupt request */ + thread_switch_control = mfspr(SPRN_TSC_CELL); + thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST; + + switch ((mfspr(SPRN_CTRLF) & CTRL_CT)) { + case CTRL_CT0: + thread_switch_control |= TSC_CELL_DEC_ENABLE_0; + thread = 0; + break; + case CTRL_CT1: + thread_switch_control |= TSC_CELL_DEC_ENABLE_1; + thread = 1; + break; + default: + printk(KERN_WARNING "%s: unknown configuration\n", + __FUNCTION__); + thread = -1; + break; + } + + if (p->thread != thread) + printk(KERN_WARNING "%s: device tree inconsistant, " + "cpu %i: %d/%d\n", __FUNCTION__, + smp_processor_id(), + p->thread, thread); + + mtspr(SPRN_TSC_CELL, thread_switch_control); + +out: + spin_unlock_irq(&cbe_pervasive_lock); +} + +static void cbe_idle(void) +{ + unsigned long ctrl; + + cbe_enable_pause_zero(); + + while (1) { + if (!need_resched()) { + local_irq_disable(); + while (!need_resched()) { + /* go into low thread priority */ + HMT_low(); + + /* + * atomically disable thread execution + * and runlatch. + * External and Decrementer exceptions + * are still handled when the thread + * is disabled but now enter in + * cbe_system_reset_exception() + */ + ctrl = mfspr(SPRN_CTRLF); + ctrl &= ~(CTRL_RUNLATCH | CTRL_TE); + mtspr(SPRN_CTRLT, ctrl); + } + /* restore thread prio */ + HMT_medium(); + local_irq_enable(); + } + + /* + * turn runlatch on again before scheduling the + * process we just woke up + */ + ppc64_runlatch_on(); + + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } +} + +static int cbe_system_reset_exception(struct pt_regs *regs) +{ + switch (regs->msr & SRR1_WAKEMASK) { + case SRR1_WAKEEE: + do_IRQ(regs); + break; + case SRR1_WAKEDEC: + timer_interrupt(regs); + break; + case SRR1_WAKEMT: + /* no action required */ + break; + default: + /* do system reset */ + return 0; + } + /* everything handled */ + return 1; +} + +static int __init cbe_find_pmd_mmio(int cpu, struct cbe_pervasive *p) +{ + struct device_node *node; + unsigned int *int_servers; + char *addr; + unsigned long real_address; + unsigned int size; + + struct pmd_regs __iomem *pmd_mmio_area; + int hardid, thread; + int proplen; + + pmd_mmio_area = NULL; + hardid = get_hard_smp_processor_id(cpu); + for (node = NULL; (node = of_find_node_by_type(node, "cpu"));) { + int_servers = (void *) get_property(node, + "ibm,ppc-interrupt-server#s", &proplen); + if (!int_servers) { + printk(KERN_WARNING "%s misses " + "ibm,ppc-interrupt-server#s property", + node->full_name); + continue; + } + for (thread = 0; thread < proplen / sizeof (int); thread++) { + if (hardid == int_servers[thread]) { + addr = get_property(node, "pervasive", NULL); + goto found; + } + } + } + + printk(KERN_WARNING "%s: CPU %d not found\n", __FUNCTION__, cpu); + return -EINVAL; + +found: + real_address = *(unsigned long*) addr; + addr += sizeof (unsigned long); + size = *(unsigned int*) addr; + + pr_debug("pervasive area for CPU %d at %lx, size %x\n", + cpu, real_address, size); + p->regs = __ioremap(real_address, size, _PAGE_NO_CACHE); + p->thread = thread; + return 0; +} + +void __init cell_pervasive_init(void) +{ + struct cbe_pervasive *p; + int cpu; + int ret; + + if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO)) + return; + + for_each_cpu(cpu) { + p = &cbe_pervasive[cpu]; + ret = cbe_find_pmd_mmio(cpu, p); + if (ret) + return; + } + + ppc_md.idle_loop = cbe_idle; + ppc_md.system_reset_exception = cbe_system_reset_exception; +} diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h new file mode 100644 index 00000000000..da1fb85ca3e --- /dev/null +++ b/arch/powerpc/platforms/cell/pervasive.h @@ -0,0 +1,62 @@ +/* + * Cell Pervasive Monitor and Debug interface and HW structures + * + * (C) Copyright IBM Corporation 2005 + * + * Authors: Maximino Aguilar (maguilar@us.ibm.com) + * David J. Erb (djerb@us.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#ifndef PERVASIVE_H +#define PERVASIVE_H + +struct pmd_regs { + u8 pad_0x0000_0x0800[0x0800 - 0x0000]; /* 0x0000 */ + + /* Thermal Sensor Registers */ + u64 ts_ctsr1; /* 0x0800 */ + u64 ts_ctsr2; /* 0x0808 */ + u64 ts_mtsr1; /* 0x0810 */ + u64 ts_mtsr2; /* 0x0818 */ + u64 ts_itr1; /* 0x0820 */ + u64 ts_itr2; /* 0x0828 */ + u64 ts_gitr; /* 0x0830 */ + u64 ts_isr; /* 0x0838 */ + u64 ts_imr; /* 0x0840 */ + u64 tm_cr1; /* 0x0848 */ + u64 tm_cr2; /* 0x0850 */ + u64 tm_simr; /* 0x0858 */ + u64 tm_tpr; /* 0x0860 */ + u64 tm_str1; /* 0x0868 */ + u64 tm_str2; /* 0x0870 */ + u64 tm_tsr; /* 0x0878 */ + + /* Power Management */ + u64 pm_control; /* 0x0880 */ +#define PMD_PAUSE_ZERO_CONTROL 0x10000 + u64 pm_status; /* 0x0888 */ + + /* Time Base Register */ + u64 tbr; /* 0x0890 */ + + u8 pad_0x0898_0x1000 [0x1000 - 0x0898]; /* 0x0898 */ +}; + +void __init cell_pervasive_init(void); + +#endif diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 9a495634d0c..b33a4443f5a 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -33,6 +33,7 @@ #include <asm/mmu.h> #include <asm/processor.h> #include <asm/io.h> +#include <asm/kexec.h> #include <asm/pgtable.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -48,6 +49,7 @@ #include "interrupt.h" #include "iommu.h" +#include "pervasive.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -55,7 +57,7 @@ #define DBG(fmt...) #endif -void cell_show_cpuinfo(struct seq_file *m) +static void cell_show_cpuinfo(struct seq_file *m) { struct device_node *root; const char *model = ""; @@ -67,6 +69,77 @@ void cell_show_cpuinfo(struct seq_file *m) of_node_put(root); } +#ifdef CONFIG_SPARSEMEM +static int __init find_spu_node_id(struct device_node *spe) +{ + unsigned int *id; +#ifdef CONFIG_NUMA + struct device_node *cpu; + cpu = spe->parent->parent; + id = (unsigned int *)get_property(cpu, "node-id", NULL); +#else + id = NULL; +#endif + return id ? *id : 0; +} + +static void __init cell_spuprop_present(struct device_node *spe, + const char *prop, int early) +{ + struct address_prop { + unsigned long address; + unsigned int len; + } __attribute__((packed)) *p; + int proplen; + + unsigned long start_pfn, end_pfn, pfn; + int node_id; + + p = (void*)get_property(spe, prop, &proplen); + WARN_ON(proplen != sizeof (*p)); + + node_id = find_spu_node_id(spe); + + start_pfn = p->address >> PAGE_SHIFT; + end_pfn = (p->address + p->len + PAGE_SIZE - 1) >> PAGE_SHIFT; + + /* We need to call memory_present *before* the call to sparse_init, + but we can initialize the page structs only *after* that call. + Thus, we're being called twice. */ + if (early) + memory_present(node_id, start_pfn, end_pfn); + else { + /* As the pages backing SPU LS and I/O are outside the range + of regular memory, their page structs were not initialized + by free_area_init. Do it here instead. */ + for (pfn = start_pfn; pfn < end_pfn; pfn++) { + struct page *page = pfn_to_page(pfn); + set_page_links(page, ZONE_DMA, node_id, pfn); + set_page_count(page, 1); + reset_page_mapcount(page); + SetPageReserved(page); + INIT_LIST_HEAD(&page->lru); + } + } +} + +static void __init cell_spumem_init(int early) +{ + struct device_node *node; + for (node = of_find_node_by_type(NULL, "spe"); + node; node = of_find_node_by_type(node, "spe")) { + cell_spuprop_present(node, "local-store", early); + cell_spuprop_present(node, "problem", early); + cell_spuprop_present(node, "priv1", early); + cell_spuprop_present(node, "priv2", early); + } +} +#else +static void __init cell_spumem_init(int early) +{ +} +#endif + static void cell_progress(char *s, unsigned short hex) { printk("*** %04x : %s\n", hex, s ? s : ""); @@ -93,11 +166,14 @@ static void __init cell_setup_arch(void) init_pci_config_tokens(); find_and_init_phbs(); spider_init_IRQ(); + cell_pervasive_init(); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif mmio_nvram_init(); + + cell_spumem_init(0); } /* @@ -113,6 +189,8 @@ static void __init cell_init_early(void) ppc64_interrupt_controller = IC_CELL_PIC; + cell_spumem_init(1); + DBG(" <- cell_init_early()\n"); } @@ -125,6 +203,15 @@ static int __init cell_probe(int platform) return 1; } +/* + * Cell has no legacy IO; anything calling this function has to + * fail or bad things will happen + */ +static int cell_check_legacy_ioport(unsigned int baseport) +{ + return -ENODEV; +} + struct machdep_calls __initdata cell_md = { .probe = cell_probe, .setup_arch = cell_setup_arch, @@ -137,5 +224,11 @@ struct machdep_calls __initdata cell_md = { .get_rtc_time = rtas_get_rtc_time, .set_rtc_time = rtas_set_rtc_time, .calibrate_decr = generic_calibrate_decr, + .check_legacy_ioport = cell_check_legacy_ioport, .progress = cell_progress, +#ifdef CONFIG_KEXEC + .machine_kexec = default_machine_kexec, + .machine_kexec_prepare = default_machine_kexec_prepare, + .machine_crash_shutdown = default_machine_crash_shutdown, +#endif }; diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index de96eadf419..bdf6c5fe58c 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -86,7 +86,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) pcpu = get_hard_smp_processor_id(lcpu); /* Fixup atomic count: it exited inside IRQ handler. */ - paca[lcpu].__current->thread_info->preempt_count = 0; + task_thread_info(paca[lcpu].__current)->preempt_count = 0; /* * If the RTAS start-cpu token does not exist then presume the diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c new file mode 100644 index 00000000000..d75ae03df68 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -0,0 +1,711 @@ +/* + * Low-level SPU handling + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/poll.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/wait.h> + +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/semaphore.h> +#include <asm/spu.h> +#include <asm/mmu_context.h> + +#include "interrupt.h" + +static int __spu_trap_invalid_dma(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + force_sig(SIGBUS, /* info, */ current); + return 0; +} + +static int __spu_trap_dma_align(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + force_sig(SIGBUS, /* info, */ current); + return 0; +} + +static int __spu_trap_error(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + force_sig(SIGILL, /* info, */ current); + return 0; +} + +static void spu_restart_dma(struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags)) + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND); +} + +static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + struct mm_struct *mm = spu->mm; + u64 esid, vsid; + + pr_debug("%s\n", __FUNCTION__); + + if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) { + /* SLBs are pre-loaded for context switch, so + * we should never get here! + */ + printk("%s: invalid access during switch!\n", __func__); + return 1; + } + if (!mm || (REGION_ID(ea) != USER_REGION_ID)) { + /* Future: support kernel segments so that drivers + * can use SPUs. + */ + pr_debug("invalid region access at %016lx\n", ea); + return 1; + } + + esid = (ea & ESID_MASK) | SLB_ESID_V; + vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | SLB_VSID_USER; + if (in_hugepage_area(mm->context, ea)) + vsid |= SLB_VSID_L; + + out_be64(&priv2->slb_index_W, spu->slb_replace); + out_be64(&priv2->slb_vsid_RW, vsid); + out_be64(&priv2->slb_esid_RW, esid); + + spu->slb_replace++; + if (spu->slb_replace >= 8) + spu->slb_replace = 0; + + spu_restart_dma(spu); + + return 0; +} + +extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX +static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) +{ + pr_debug("%s\n", __FUNCTION__); + + /* Handle kernel space hash faults immediately. + User hash faults need to be deferred to process context. */ + if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) + && REGION_ID(ea) != USER_REGION_ID + && hash_page(ea, _PAGE_PRESENT, 0x300) == 0) { + spu_restart_dma(spu); + return 0; + } + + if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) { + printk("%s: invalid access during switch!\n", __func__); + return 1; + } + + spu->dar = ea; + spu->dsisr = dsisr; + mb(); + if (spu->stop_callback) + spu->stop_callback(spu); + return 0; +} + +static int __spu_trap_mailbox(struct spu *spu) +{ + if (spu->ibox_callback) + spu->ibox_callback(spu); + + /* atomically disable SPU mailbox interrupts */ + spin_lock(&spu->register_lock); + spu_int_mask_and(spu, 2, ~0x1); + spin_unlock(&spu->register_lock); + return 0; +} + +static int __spu_trap_stop(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + spu->stop_code = in_be32(&spu->problem->spu_status_R); + if (spu->stop_callback) + spu->stop_callback(spu); + return 0; +} + +static int __spu_trap_halt(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + spu->stop_code = in_be32(&spu->problem->spu_status_R); + if (spu->stop_callback) + spu->stop_callback(spu); + return 0; +} + +static int __spu_trap_tag_group(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + /* wake_up(&spu->dma_wq); */ + return 0; +} + +static int __spu_trap_spubox(struct spu *spu) +{ + if (spu->wbox_callback) + spu->wbox_callback(spu); + + /* atomically disable SPU mailbox interrupts */ + spin_lock(&spu->register_lock); + spu_int_mask_and(spu, 2, ~0x10); + spin_unlock(&spu->register_lock); + return 0; +} + +static irqreturn_t +spu_irq_class_0(int irq, void *data, struct pt_regs *regs) +{ + struct spu *spu; + + spu = data; + spu->class_0_pending = 1; + if (spu->stop_callback) + spu->stop_callback(spu); + + return IRQ_HANDLED; +} + +int +spu_irq_class_0_bottom(struct spu *spu) +{ + unsigned long stat, mask; + + spu->class_0_pending = 0; + + mask = spu_int_mask_get(spu, 0); + stat = spu_int_stat_get(spu, 0); + + stat &= mask; + + if (stat & 1) /* invalid MFC DMA */ + __spu_trap_invalid_dma(spu); + + if (stat & 2) /* invalid DMA alignment */ + __spu_trap_dma_align(spu); + + if (stat & 4) /* error on SPU */ + __spu_trap_error(spu); + + spu_int_stat_clear(spu, 0, stat); + + return (stat & 0x7) ? -EIO : 0; +} +EXPORT_SYMBOL_GPL(spu_irq_class_0_bottom); + +static irqreturn_t +spu_irq_class_1(int irq, void *data, struct pt_regs *regs) +{ + struct spu *spu; + unsigned long stat, mask, dar, dsisr; + + spu = data; + + /* atomically read & clear class1 status. */ + spin_lock(&spu->register_lock); + mask = spu_int_mask_get(spu, 1); + stat = spu_int_stat_get(spu, 1) & mask; + dar = spu_mfc_dar_get(spu); + dsisr = spu_mfc_dsisr_get(spu); + if (stat & 2) /* mapping fault */ + spu_mfc_dsisr_set(spu, 0ul); + spu_int_stat_clear(spu, 1, stat); + spin_unlock(&spu->register_lock); + + if (stat & 1) /* segment fault */ + __spu_trap_data_seg(spu, dar); + + if (stat & 2) { /* mapping fault */ + __spu_trap_data_map(spu, dar, dsisr); + } + + if (stat & 4) /* ls compare & suspend on get */ + ; + + if (stat & 8) /* ls compare & suspend on put */ + ; + + return stat ? IRQ_HANDLED : IRQ_NONE; +} +EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom); + +static irqreturn_t +spu_irq_class_2(int irq, void *data, struct pt_regs *regs) +{ + struct spu *spu; + unsigned long stat; + unsigned long mask; + + spu = data; + stat = spu_int_stat_get(spu, 2); + mask = spu_int_mask_get(spu, 2); + + pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask); + + stat &= mask; + + if (stat & 1) /* PPC core mailbox */ + __spu_trap_mailbox(spu); + + if (stat & 2) /* SPU stop-and-signal */ + __spu_trap_stop(spu); + + if (stat & 4) /* SPU halted */ + __spu_trap_halt(spu); + + if (stat & 8) /* DMA tag group complete */ + __spu_trap_tag_group(spu); + + if (stat & 0x10) /* SPU mailbox threshold */ + __spu_trap_spubox(spu); + + spu_int_stat_clear(spu, 2, stat); + return stat ? IRQ_HANDLED : IRQ_NONE; +} + +static int +spu_request_irqs(struct spu *spu) +{ + int ret; + int irq_base; + + irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET; + + snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", spu->number); + ret = request_irq(irq_base + spu->isrc, + spu_irq_class_0, 0, spu->irq_c0, spu); + if (ret) + goto out; + + snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", spu->number); + ret = request_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, + spu_irq_class_1, 0, spu->irq_c1, spu); + if (ret) + goto out1; + + snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", spu->number); + ret = request_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc, + spu_irq_class_2, 0, spu->irq_c2, spu); + if (ret) + goto out2; + goto out; + +out2: + free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu); +out1: + free_irq(irq_base + spu->isrc, spu); +out: + return ret; +} + +static void +spu_free_irqs(struct spu *spu) +{ + int irq_base; + + irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET; + + free_irq(irq_base + spu->isrc, spu); + free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu); + free_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc, spu); +} + +static LIST_HEAD(spu_list); +static DECLARE_MUTEX(spu_mutex); + +static void spu_init_channels(struct spu *spu) +{ + static const struct { + unsigned channel; + unsigned count; + } zero_list[] = { + { 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, }, + { 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, }, + }, count_list[] = { + { 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, }, + { 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, }, + { 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, }, + }; + struct spu_priv2 __iomem *priv2; + int i; + + priv2 = spu->priv2; + + /* initialize all channel data to zero */ + for (i = 0; i < ARRAY_SIZE(zero_list); i++) { + int count; + + out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel); + for (count = 0; count < zero_list[i].count; count++) + out_be64(&priv2->spu_chnldata_RW, 0); + } + + /* initialize channel counts to meaningful values */ + for (i = 0; i < ARRAY_SIZE(count_list); i++) { + out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel); + out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); + } +} + +struct spu *spu_alloc(void) +{ + struct spu *spu; + + down(&spu_mutex); + if (!list_empty(&spu_list)) { + spu = list_entry(spu_list.next, struct spu, list); + list_del_init(&spu->list); + pr_debug("Got SPU %x %d\n", spu->isrc, spu->number); + } else { + pr_debug("No SPU left\n"); + spu = NULL; + } + up(&spu_mutex); + + if (spu) + spu_init_channels(spu); + + return spu; +} +EXPORT_SYMBOL_GPL(spu_alloc); + +void spu_free(struct spu *spu) +{ + down(&spu_mutex); + list_add_tail(&spu->list, &spu_list); + up(&spu_mutex); +} +EXPORT_SYMBOL_GPL(spu_free); + +static int spu_handle_mm_fault(struct spu *spu) +{ + struct mm_struct *mm = spu->mm; + struct vm_area_struct *vma; + u64 ea, dsisr, is_write; + int ret; + + ea = spu->dar; + dsisr = spu->dsisr; +#if 0 + if (!IS_VALID_EA(ea)) { + return -EFAULT; + } +#endif /* XXX */ + if (mm == NULL) { + return -EFAULT; + } + if (mm->pgd == NULL) { + return -EFAULT; + } + + down_read(&mm->mmap_sem); + vma = find_vma(mm, ea); + if (!vma) + goto bad_area; + if (vma->vm_start <= ea) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +#if 0 + if (expand_stack(vma, ea)) + goto bad_area; +#endif /* XXX */ +good_area: + is_write = dsisr & MFC_DSISR_ACCESS_PUT; + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { + if (dsisr & MFC_DSISR_ACCESS_DENIED) + goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + ret = 0; + switch (handle_mm_fault(mm, vma, ea, is_write)) { + case VM_FAULT_MINOR: + current->min_flt++; + break; + case VM_FAULT_MAJOR: + current->maj_flt++; + break; + case VM_FAULT_SIGBUS: + ret = -EFAULT; + goto bad_area; + case VM_FAULT_OOM: + ret = -ENOMEM; + goto bad_area; + default: + BUG(); + } + up_read(&mm->mmap_sem); + return ret; + +bad_area: + up_read(&mm->mmap_sem); + return -EFAULT; +} + +int spu_irq_class_1_bottom(struct spu *spu) +{ + u64 ea, dsisr, access, error = 0UL; + int ret = 0; + + ea = spu->dar; + dsisr = spu->dsisr; + if (dsisr & MFC_DSISR_PTE_NOT_FOUND) { + access = (_PAGE_PRESENT | _PAGE_USER); + access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; + if (hash_page(ea, access, 0x300) != 0) + error |= CLASS1_ENABLE_STORAGE_FAULT_INTR; + } + if ((error & CLASS1_ENABLE_STORAGE_FAULT_INTR) || + (dsisr & MFC_DSISR_ACCESS_DENIED)) { + if ((ret = spu_handle_mm_fault(spu)) != 0) + error |= CLASS1_ENABLE_STORAGE_FAULT_INTR; + else + error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR; + } + spu->dar = 0UL; + spu->dsisr = 0UL; + if (!error) { + spu_restart_dma(spu); + } else { + __spu_trap_invalid_dma(spu); + } + return ret; +} + +void spu_irq_setaffinity(struct spu *spu, int cpu) +{ + u64 target = iic_get_target_id(cpu); + u64 route = target << 48 | target << 32 | target << 16; + spu_int_route_set(spu, route); +} +EXPORT_SYMBOL_GPL(spu_irq_setaffinity); + +static void __iomem * __init map_spe_prop(struct device_node *n, + const char *name) +{ + struct address_prop { + unsigned long address; + unsigned int len; + } __attribute__((packed)) *prop; + + void *p; + int proplen; + + p = get_property(n, name, &proplen); + if (proplen != sizeof (struct address_prop)) + return NULL; + + prop = p; + + return ioremap(prop->address, prop->len); +} + +static void spu_unmap(struct spu *spu) +{ + iounmap(spu->priv2); + iounmap(spu->priv1); + iounmap(spu->problem); + iounmap((u8 __iomem *)spu->local_store); +} + +static int __init spu_map_device(struct spu *spu, struct device_node *spe) +{ + char *prop; + int ret; + + ret = -ENODEV; + prop = get_property(spe, "isrc", NULL); + if (!prop) + goto out; + spu->isrc = *(unsigned int *)prop; + + spu->name = get_property(spe, "name", NULL); + if (!spu->name) + goto out; + + prop = get_property(spe, "local-store", NULL); + if (!prop) + goto out; + spu->local_store_phys = *(unsigned long *)prop; + + /* we use local store as ram, not io memory */ + spu->local_store = (void __force *)map_spe_prop(spe, "local-store"); + if (!spu->local_store) + goto out; + + spu->problem= map_spe_prop(spe, "problem"); + if (!spu->problem) + goto out_unmap; + + spu->priv1= map_spe_prop(spe, "priv1"); + /* priv1 is not available on a hypervisor */ + + spu->priv2= map_spe_prop(spe, "priv2"); + if (!spu->priv2) + goto out_unmap; + ret = 0; + goto out; + +out_unmap: + spu_unmap(spu); +out: + return ret; +} + +static int __init find_spu_node_id(struct device_node *spe) +{ + unsigned int *id; + struct device_node *cpu; + + cpu = spe->parent->parent; + id = (unsigned int *)get_property(cpu, "node-id", NULL); + + return id ? *id : 0; +} + +static int __init create_spu(struct device_node *spe) +{ + struct spu *spu; + int ret; + static int number; + + ret = -ENOMEM; + spu = kmalloc(sizeof (*spu), GFP_KERNEL); + if (!spu) + goto out; + + ret = spu_map_device(spu, spe); + if (ret) + goto out_free; + + spu->node = find_spu_node_id(spe); + spu->stop_code = 0; + spu->slb_replace = 0; + spu->mm = NULL; + spu->ctx = NULL; + spu->rq = NULL; + spu->pid = 0; + spu->class_0_pending = 0; + spu->flags = 0UL; + spu->dar = 0UL; + spu->dsisr = 0UL; + spin_lock_init(&spu->register_lock); + + spu_mfc_sdr_set(spu, mfspr(SPRN_SDR1)); + spu_mfc_sr1_set(spu, 0x33); + + spu->ibox_callback = NULL; + spu->wbox_callback = NULL; + spu->stop_callback = NULL; + + down(&spu_mutex); + spu->number = number++; + ret = spu_request_irqs(spu); + if (ret) + goto out_unmap; + + list_add(&spu->list, &spu_list); + up(&spu_mutex); + + pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n", + spu->name, spu->isrc, spu->local_store, + spu->problem, spu->priv1, spu->priv2, spu->number); + goto out; + +out_unmap: + up(&spu_mutex); + spu_unmap(spu); +out_free: + kfree(spu); +out: + return ret; +} + +static void destroy_spu(struct spu *spu) +{ + list_del_init(&spu->list); + + spu_free_irqs(spu); + spu_unmap(spu); + kfree(spu); +} + +static void cleanup_spu_base(void) +{ + struct spu *spu, *tmp; + down(&spu_mutex); + list_for_each_entry_safe(spu, tmp, &spu_list, list) + destroy_spu(spu); + up(&spu_mutex); +} +module_exit(cleanup_spu_base); + +static int __init init_spu_base(void) +{ + struct device_node *node; + int ret; + + ret = -ENODEV; + for (node = of_find_node_by_type(NULL, "spe"); + node; node = of_find_node_by_type(node, "spe")) { + ret = create_spu(node); + if (ret) { + printk(KERN_WARNING "%s: Error initializing %s\n", + __FUNCTION__, node->name); + cleanup_spu_base(); + break; + } + } + /* in some old firmware versions, the spe is called 'spc', so we + look for that as well */ + for (node = of_find_node_by_type(NULL, "spc"); + node; node = of_find_node_by_type(node, "spc")) { + ret = create_spu(node); + if (ret) { + printk(KERN_WARNING "%s: Error initializing %s\n", + __FUNCTION__, node->name); + cleanup_spu_base(); + break; + } + } + return ret; +} +module_init(init_spu_base); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>"); diff --git a/arch/powerpc/platforms/cell/spu_priv1.c b/arch/powerpc/platforms/cell/spu_priv1.c new file mode 100644 index 00000000000..b2656421c7b --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_priv1.c @@ -0,0 +1,133 @@ +/* + * access to SPU privileged registers + */ +#include <linux/module.h> + +#include <asm/io.h> +#include <asm/spu.h> + +void spu_int_mask_and(struct spu *spu, int class, u64 mask) +{ + u64 old_mask; + + old_mask = in_be64(&spu->priv1->int_mask_RW[class]); + out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask); +} +EXPORT_SYMBOL_GPL(spu_int_mask_and); + +void spu_int_mask_or(struct spu *spu, int class, u64 mask) +{ + u64 old_mask; + + old_mask = in_be64(&spu->priv1->int_mask_RW[class]); + out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask); +} +EXPORT_SYMBOL_GPL(spu_int_mask_or); + +void spu_int_mask_set(struct spu *spu, int class, u64 mask) +{ + out_be64(&spu->priv1->int_mask_RW[class], mask); +} +EXPORT_SYMBOL_GPL(spu_int_mask_set); + +u64 spu_int_mask_get(struct spu *spu, int class) +{ + return in_be64(&spu->priv1->int_mask_RW[class]); +} +EXPORT_SYMBOL_GPL(spu_int_mask_get); + +void spu_int_stat_clear(struct spu *spu, int class, u64 stat) +{ + out_be64(&spu->priv1->int_stat_RW[class], stat); +} +EXPORT_SYMBOL_GPL(spu_int_stat_clear); + +u64 spu_int_stat_get(struct spu *spu, int class) +{ + return in_be64(&spu->priv1->int_stat_RW[class]); +} +EXPORT_SYMBOL_GPL(spu_int_stat_get); + +void spu_int_route_set(struct spu *spu, u64 route) +{ + out_be64(&spu->priv1->int_route_RW, route); +} +EXPORT_SYMBOL_GPL(spu_int_route_set); + +u64 spu_mfc_dar_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_dar_RW); +} +EXPORT_SYMBOL_GPL(spu_mfc_dar_get); + +u64 spu_mfc_dsisr_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_dsisr_RW); +} +EXPORT_SYMBOL_GPL(spu_mfc_dsisr_get); + +void spu_mfc_dsisr_set(struct spu *spu, u64 dsisr) +{ + out_be64(&spu->priv1->mfc_dsisr_RW, dsisr); +} +EXPORT_SYMBOL_GPL(spu_mfc_dsisr_set); + +void spu_mfc_sdr_set(struct spu *spu, u64 sdr) +{ + out_be64(&spu->priv1->mfc_sdr_RW, sdr); +} +EXPORT_SYMBOL_GPL(spu_mfc_sdr_set); + +void spu_mfc_sr1_set(struct spu *spu, u64 sr1) +{ + out_be64(&spu->priv1->mfc_sr1_RW, sr1); +} +EXPORT_SYMBOL_GPL(spu_mfc_sr1_set); + +u64 spu_mfc_sr1_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_sr1_RW); +} +EXPORT_SYMBOL_GPL(spu_mfc_sr1_get); + +void spu_mfc_tclass_id_set(struct spu *spu, u64 tclass_id) +{ + out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id); +} +EXPORT_SYMBOL_GPL(spu_mfc_tclass_id_set); + +u64 spu_mfc_tclass_id_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_tclass_id_RW); +} +EXPORT_SYMBOL_GPL(spu_mfc_tclass_id_get); + +void spu_tlb_invalidate(struct spu *spu) +{ + out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul); +} +EXPORT_SYMBOL_GPL(spu_tlb_invalidate); + +void spu_resource_allocation_groupID_set(struct spu *spu, u64 id) +{ + out_be64(&spu->priv1->resource_allocation_groupID_RW, id); +} +EXPORT_SYMBOL_GPL(spu_resource_allocation_groupID_set); + +u64 spu_resource_allocation_groupID_get(struct spu *spu) +{ + return in_be64(&spu->priv1->resource_allocation_groupID_RW); +} +EXPORT_SYMBOL_GPL(spu_resource_allocation_groupID_get); + +void spu_resource_allocation_enable_set(struct spu *spu, u64 enable) +{ + out_be64(&spu->priv1->resource_allocation_enable_RW, enable); +} +EXPORT_SYMBOL_GPL(spu_resource_allocation_enable_set); + +u64 spu_resource_allocation_enable_get(struct spu *spu) +{ + return in_be64(&spu->priv1->resource_allocation_enable_RW); +} +EXPORT_SYMBOL_GPL(spu_resource_allocation_enable_get); diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c new file mode 100644 index 00000000000..261b507a901 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -0,0 +1,88 @@ +/* + * SPU file system -- system call stubs + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/file.h> +#include <linux/module.h> +#include <linux/syscalls.h> + +#include <asm/spu.h> + +struct spufs_calls spufs_calls = { + .owner = NULL, +}; + +/* These stub syscalls are needed to have the actual implementation + * within a loadable module. When spufs is built into the kernel, + * this file is not used and the syscalls directly enter the fs code */ + +asmlinkage long sys_spu_create(const char __user *name, + unsigned int flags, mode_t mode) +{ + long ret; + struct module *owner = spufs_calls.owner; + + ret = -ENOSYS; + if (owner && try_module_get(owner)) { + ret = spufs_calls.create_thread(name, flags, mode); + module_put(owner); + } + return ret; +} + +asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) +{ + long ret; + struct file *filp; + int fput_needed; + struct module *owner = spufs_calls.owner; + + ret = -ENOSYS; + if (owner && try_module_get(owner)) { + ret = -EBADF; + filp = fget_light(fd, &fput_needed); + if (filp) { + ret = spufs_calls.spu_run(filp, unpc, ustatus); + fput_light(filp, fput_needed); + } + module_put(owner); + } + return ret; +} + +int register_spu_syscalls(struct spufs_calls *calls) +{ + if (spufs_calls.owner) + return -EBUSY; + + spufs_calls.create_thread = calls->create_thread; + spufs_calls.spu_run = calls->spu_run; + smp_mb(); + spufs_calls.owner = calls->owner; + return 0; +} +EXPORT_SYMBOL_GPL(register_spu_syscalls); + +void unregister_spu_syscalls(struct spufs_calls *calls) +{ + BUG_ON(spufs_calls.owner != calls->owner); + spufs_calls.owner = NULL; +} +EXPORT_SYMBOL_GPL(unregister_spu_syscalls); diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile new file mode 100644 index 00000000000..a7cddf40e3d --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/Makefile @@ -0,0 +1,54 @@ +obj-$(CONFIG_SPU_FS) += spufs.o +spufs-y += inode.o file.o context.o switch.o syscalls.o +spufs-y += sched.o backing_ops.o hw_ops.o run.o + +# Rules to build switch.o with the help of SPU tool chain +SPU_CROSS := spu- +SPU_CC := $(SPU_CROSS)gcc +SPU_AS := $(SPU_CROSS)gcc +SPU_LD := $(SPU_CROSS)ld +SPU_OBJCOPY := $(SPU_CROSS)objcopy +SPU_CFLAGS := -O2 -Wall -I$(srctree)/include -I$(objtree)/include2 +SPU_AFLAGS := -c -D__ASSEMBLY__ -I$(srctree)/include -I$(objtree)/include2 +SPU_LDFLAGS := -N -Ttext=0x0 + +$(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h + +# Compile SPU files + cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $< +quiet_cmd_spu_cc = SPU_CC $@ +$(obj)/spu_%.o: $(src)/spu_%.c + $(call if_changed,spu_cc) + +# Assemble SPU files + cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $< +quiet_cmd_spu_as = SPU_AS $@ +$(obj)/spu_%.o: $(src)/spu_%.S + $(call if_changed,spu_as) + +# Link SPU Executables + cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^ +quiet_cmd_spu_ld = SPU_LD $@ +$(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o + $(call if_changed,spu_ld) + +# Copy into binary format + cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@ +quiet_cmd_spu_objcopy = OBJCOPY $@ +$(obj)/spu_%.bin: $(src)/spu_% + $(call if_changed,spu_objcopy) + +# create C code from ELF executable +cmd_hexdump = ( \ + echo "/*" ; \ + echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \ + echo " * Hex-dump auto generated from $*.c." ; \ + echo " * Do not edit!" ; \ + echo " */" ; \ + echo "static unsigned int $*_code[] __page_aligned = {" ; \ + hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \ + echo "};" ; \ + ) > $@ +quiet_cmd_hexdump = HEXDUMP $@ +$(obj)/%_dump.h: $(obj)/%.bin + $(call if_changed,hexdump) diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c new file mode 100644 index 00000000000..a5c489a53c6 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -0,0 +1,308 @@ +/* backing_ops.c - query/set operations on saved SPU context. + * + * Copyright (C) IBM 2005 + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * These register operations allow SPUFS to operate on saved + * SPU contexts rather than hardware. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/poll.h> + +#include <asm/io.h> +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include <asm/mmu_context.h> +#include "spufs.h" + +/* + * Reads/writes to various problem and priv2 registers require + * state changes, i.e. generate SPU events, modify channel + * counts, etc. + */ + +static void gen_spu_event(struct spu_context *ctx, u32 event) +{ + u64 ch0_cnt; + u64 ch0_data; + u64 ch1_data; + + ch0_cnt = ctx->csa.spu_chnlcnt_RW[0]; + ch0_data = ctx->csa.spu_chnldata_RW[0]; + ch1_data = ctx->csa.spu_chnldata_RW[1]; + ctx->csa.spu_chnldata_RW[0] |= event; + if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) { + ctx->csa.spu_chnlcnt_RW[0] = 1; + } +} + +static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data) +{ + u32 mbox_stat; + int ret = 0; + + spin_lock(&ctx->csa.register_lock); + mbox_stat = ctx->csa.prob.mb_stat_R; + if (mbox_stat & 0x0000ff) { + /* Read the first available word. + * Implementation note: the depth + * of pu_mb_R is currently 1. + */ + *data = ctx->csa.prob.pu_mb_R; + ctx->csa.prob.mb_stat_R &= ~(0x0000ff); + ctx->csa.spu_chnlcnt_RW[28] = 1; + gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT); + ret = 4; + } + spin_unlock(&ctx->csa.register_lock); + return ret; +} + +static u32 spu_backing_mbox_stat_read(struct spu_context *ctx) +{ + return ctx->csa.prob.mb_stat_R; +} + +static unsigned int spu_backing_mbox_stat_poll(struct spu_context *ctx, + unsigned int events) +{ + int ret; + u32 stat; + + ret = 0; + spin_lock_irq(&ctx->csa.register_lock); + stat = ctx->csa.prob.mb_stat_R; + + /* if the requested event is there, return the poll + mask, otherwise enable the interrupt to get notified, + but first mark any pending interrupts as done so + we don't get woken up unnecessarily */ + + if (events & (POLLIN | POLLRDNORM)) { + if (stat & 0xff0000) + ret |= POLLIN | POLLRDNORM; + else { + ctx->csa.priv1.int_stat_class0_RW &= ~0x1; + ctx->csa.priv1.int_mask_class2_RW |= 0x1; + } + } + if (events & (POLLOUT | POLLWRNORM)) { + if (stat & 0x00ff00) + ret = POLLOUT | POLLWRNORM; + else { + ctx->csa.priv1.int_stat_class0_RW &= ~0x10; + ctx->csa.priv1.int_mask_class2_RW |= 0x10; + } + } + spin_unlock_irq(&ctx->csa.register_lock); + return ret; +} + +static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data) +{ + int ret; + + spin_lock(&ctx->csa.register_lock); + if (ctx->csa.prob.mb_stat_R & 0xff0000) { + /* Read the first available word. + * Implementation note: the depth + * of puint_mb_R is currently 1. + */ + *data = ctx->csa.priv2.puint_mb_R; + ctx->csa.prob.mb_stat_R &= ~(0xff0000); + ctx->csa.spu_chnlcnt_RW[30] = 1; + gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT); + ret = 4; + } else { + /* make sure we get woken up by the interrupt */ + ctx->csa.priv1.int_mask_class2_RW |= 0x1UL; + ret = 0; + } + spin_unlock(&ctx->csa.register_lock); + return ret; +} + +static int spu_backing_wbox_write(struct spu_context *ctx, u32 data) +{ + int ret; + + spin_lock(&ctx->csa.register_lock); + if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) { + int slot = ctx->csa.spu_chnlcnt_RW[29]; + int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8; + + /* We have space to write wbox_data. + * Implementation note: the depth + * of spu_mb_W is currently 4. + */ + BUG_ON(avail != (4 - slot)); + ctx->csa.spu_mailbox_data[slot] = data; + ctx->csa.spu_chnlcnt_RW[29] = ++slot; + ctx->csa.prob.mb_stat_R = (((4 - slot) & 0xff) << 8); + gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT); + ret = 4; + } else { + /* make sure we get woken up by the interrupt when space + becomes available */ + ctx->csa.priv1.int_mask_class2_RW |= 0x10; + ret = 0; + } + spin_unlock(&ctx->csa.register_lock); + return ret; +} + +static u32 spu_backing_signal1_read(struct spu_context *ctx) +{ + return ctx->csa.spu_chnldata_RW[3]; +} + +static void spu_backing_signal1_write(struct spu_context *ctx, u32 data) +{ + spin_lock(&ctx->csa.register_lock); + if (ctx->csa.priv2.spu_cfg_RW & 0x1) + ctx->csa.spu_chnldata_RW[3] |= data; + else + ctx->csa.spu_chnldata_RW[3] = data; + ctx->csa.spu_chnlcnt_RW[3] = 1; + gen_spu_event(ctx, MFC_SIGNAL_1_EVENT); + spin_unlock(&ctx->csa.register_lock); +} + +static u32 spu_backing_signal2_read(struct spu_context *ctx) +{ + return ctx->csa.spu_chnldata_RW[4]; +} + +static void spu_backing_signal2_write(struct spu_context *ctx, u32 data) +{ + spin_lock(&ctx->csa.register_lock); + if (ctx->csa.priv2.spu_cfg_RW & 0x2) + ctx->csa.spu_chnldata_RW[4] |= data; + else + ctx->csa.spu_chnldata_RW[4] = data; + ctx->csa.spu_chnlcnt_RW[4] = 1; + gen_spu_event(ctx, MFC_SIGNAL_2_EVENT); + spin_unlock(&ctx->csa.register_lock); +} + +static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val) +{ + u64 tmp; + + spin_lock(&ctx->csa.register_lock); + tmp = ctx->csa.priv2.spu_cfg_RW; + if (val) + tmp |= 1; + else + tmp &= ~1; + ctx->csa.priv2.spu_cfg_RW = tmp; + spin_unlock(&ctx->csa.register_lock); +} + +static u64 spu_backing_signal1_type_get(struct spu_context *ctx) +{ + return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0); +} + +static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val) +{ + u64 tmp; + + spin_lock(&ctx->csa.register_lock); + tmp = ctx->csa.priv2.spu_cfg_RW; + if (val) + tmp |= 2; + else + tmp &= ~2; + ctx->csa.priv2.spu_cfg_RW = tmp; + spin_unlock(&ctx->csa.register_lock); +} + +static u64 spu_backing_signal2_type_get(struct spu_context *ctx) +{ + return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0); +} + +static u32 spu_backing_npc_read(struct spu_context *ctx) +{ + return ctx->csa.prob.spu_npc_RW; +} + +static void spu_backing_npc_write(struct spu_context *ctx, u32 val) +{ + ctx->csa.prob.spu_npc_RW = val; +} + +static u32 spu_backing_status_read(struct spu_context *ctx) +{ + return ctx->csa.prob.spu_status_R; +} + +static char *spu_backing_get_ls(struct spu_context *ctx) +{ + return ctx->csa.lscsa->ls; +} + +static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val) +{ + spin_lock(&ctx->csa.register_lock); + ctx->csa.prob.spu_runcntl_RW = val; + if (val & SPU_RUNCNTL_RUNNABLE) { + ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING; + } else { + ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING; + } + spin_unlock(&ctx->csa.register_lock); +} + +static void spu_backing_runcntl_stop(struct spu_context *ctx) +{ + spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP); +} + +struct spu_context_ops spu_backing_ops = { + .mbox_read = spu_backing_mbox_read, + .mbox_stat_read = spu_backing_mbox_stat_read, + .mbox_stat_poll = spu_backing_mbox_stat_poll, + .ibox_read = spu_backing_ibox_read, + .wbox_write = spu_backing_wbox_write, + .signal1_read = spu_backing_signal1_read, + .signal1_write = spu_backing_signal1_write, + .signal2_read = spu_backing_signal2_read, + .signal2_write = spu_backing_signal2_write, + .signal1_type_set = spu_backing_signal1_type_set, + .signal1_type_get = spu_backing_signal1_type_get, + .signal2_type_set = spu_backing_signal2_type_set, + .signal2_type_get = spu_backing_signal2_type_get, + .npc_read = spu_backing_npc_read, + .npc_write = spu_backing_npc_write, + .status_read = spu_backing_status_read, + .get_ls = spu_backing_get_ls, + .runcntl_write = spu_backing_runcntl_write, + .runcntl_stop = spu_backing_runcntl_stop, +}; diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c new file mode 100644 index 00000000000..336f238102f --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -0,0 +1,167 @@ +/* + * SPU file system -- SPU context management + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include "spufs.h" + +struct spu_context *alloc_spu_context(struct address_space *local_store) +{ + struct spu_context *ctx; + ctx = kmalloc(sizeof *ctx, GFP_KERNEL); + if (!ctx) + goto out; + /* Binding to physical processor deferred + * until spu_activate(). + */ + spu_init_csa(&ctx->csa); + if (!ctx->csa.lscsa) { + goto out_free; + } + spin_lock_init(&ctx->mmio_lock); + kref_init(&ctx->kref); + init_rwsem(&ctx->state_sema); + init_MUTEX(&ctx->run_sema); + init_waitqueue_head(&ctx->ibox_wq); + init_waitqueue_head(&ctx->wbox_wq); + init_waitqueue_head(&ctx->stop_wq); + ctx->ibox_fasync = NULL; + ctx->wbox_fasync = NULL; + ctx->state = SPU_STATE_SAVED; + ctx->local_store = local_store; + ctx->spu = NULL; + ctx->ops = &spu_backing_ops; + ctx->owner = get_task_mm(current); + goto out; +out_free: + kfree(ctx); + ctx = NULL; +out: + return ctx; +} + +void destroy_spu_context(struct kref *kref) +{ + struct spu_context *ctx; + ctx = container_of(kref, struct spu_context, kref); + down_write(&ctx->state_sema); + spu_deactivate(ctx); + ctx->ibox_fasync = NULL; + ctx->wbox_fasync = NULL; + up_write(&ctx->state_sema); + spu_fini_csa(&ctx->csa); + kfree(ctx); +} + +struct spu_context * get_spu_context(struct spu_context *ctx) +{ + kref_get(&ctx->kref); + return ctx; +} + +int put_spu_context(struct spu_context *ctx) +{ + return kref_put(&ctx->kref, &destroy_spu_context); +} + +/* give up the mm reference when the context is about to be destroyed */ +void spu_forget(struct spu_context *ctx) +{ + struct mm_struct *mm; + spu_acquire_saved(ctx); + mm = ctx->owner; + ctx->owner = NULL; + mmput(mm); + spu_release(ctx); +} + +void spu_acquire(struct spu_context *ctx) +{ + down_read(&ctx->state_sema); +} + +void spu_release(struct spu_context *ctx) +{ + up_read(&ctx->state_sema); +} + +void spu_unmap_mappings(struct spu_context *ctx) +{ + unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1); +} + +int spu_acquire_runnable(struct spu_context *ctx) +{ + int ret = 0; + + down_read(&ctx->state_sema); + if (ctx->state == SPU_STATE_RUNNABLE) { + ctx->spu->prio = current->prio; + return 0; + } + up_read(&ctx->state_sema); + + down_write(&ctx->state_sema); + /* ctx is about to be freed, can't acquire any more */ + if (!ctx->owner) { + ret = -EINVAL; + goto out; + } + + if (ctx->state == SPU_STATE_SAVED) { + ret = spu_activate(ctx, 0); + if (ret) + goto out; + ctx->state = SPU_STATE_RUNNABLE; + } + + downgrade_write(&ctx->state_sema); + /* On success, we return holding the lock */ + + return ret; +out: + /* Release here, to simplify calling code. */ + up_write(&ctx->state_sema); + + return ret; +} + +void spu_acquire_saved(struct spu_context *ctx) +{ + down_read(&ctx->state_sema); + + if (ctx->state == SPU_STATE_SAVED) + return; + + up_read(&ctx->state_sema); + down_write(&ctx->state_sema); + + if (ctx->state == SPU_STATE_RUNNABLE) { + spu_deactivate(ctx); + ctx->state = SPU_STATE_SAVED; + } + + downgrade_write(&ctx->state_sema); +} diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c new file mode 100644 index 00000000000..dfa649c9b95 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -0,0 +1,794 @@ +/* + * SPU file system -- file contents + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/fs.h> +#include <linux/ioctl.h> +#include <linux/module.h> +#include <linux/pagemap.h> +#include <linux/poll.h> +#include <linux/ptrace.h> + +#include <asm/io.h> +#include <asm/semaphore.h> +#include <asm/spu.h> +#include <asm/uaccess.h> + +#include "spufs.h" + + +static int +spufs_mem_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + file->private_data = i->i_ctx; + file->f_mapping = i->i_ctx->local_store; + return 0; +} + +static ssize_t +spufs_mem_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + char *local_store; + int ret; + + spu_acquire(ctx); + + local_store = ctx->ops->get_ls(ctx); + ret = simple_read_from_buffer(buffer, size, pos, local_store, LS_SIZE); + + spu_release(ctx); + return ret; +} + +static ssize_t +spufs_mem_write(struct file *file, const char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + char *local_store; + int ret; + + size = min_t(ssize_t, LS_SIZE - *pos, size); + if (size <= 0) + return -EFBIG; + *pos += size; + + spu_acquire(ctx); + + local_store = ctx->ops->get_ls(ctx); + ret = copy_from_user(local_store + *pos - size, + buffer, size) ? -EFAULT : size; + + spu_release(ctx); + return ret; +} + +#ifdef CONFIG_SPARSEMEM +static struct page * +spufs_mem_mmap_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + struct page *page = NOPAGE_SIGBUS; + + struct spu_context *ctx = vma->vm_file->private_data; + unsigned long offset = address - vma->vm_start; + offset += vma->vm_pgoff << PAGE_SHIFT; + + spu_acquire(ctx); + + if (ctx->state == SPU_STATE_SAVED) + page = vmalloc_to_page(ctx->csa.lscsa->ls + offset); + else + page = pfn_to_page((ctx->spu->local_store_phys + offset) + >> PAGE_SHIFT); + + spu_release(ctx); + + if (type) + *type = VM_FAULT_MINOR; + + page_cache_get(page); + return page; +} + +static struct vm_operations_struct spufs_mem_mmap_vmops = { + .nopage = spufs_mem_mmap_nopage, +}; + +static int +spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + /* FIXME: */ + vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) + | _PAGE_NO_CACHE); + + vma->vm_ops = &spufs_mem_mmap_vmops; + return 0; +} +#endif + +static struct file_operations spufs_mem_fops = { + .open = spufs_mem_open, + .read = spufs_mem_read, + .write = spufs_mem_write, + .llseek = generic_file_llseek, +#ifdef CONFIG_SPARSEMEM + .mmap = spufs_mem_mmap, +#endif +}; + +static int +spufs_regs_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + file->private_data = i->i_ctx; + return 0; +} + +static ssize_t +spufs_regs_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + spu_acquire_saved(ctx); + + ret = simple_read_from_buffer(buffer, size, pos, + lscsa->gprs, sizeof lscsa->gprs); + + spu_release(ctx); + return ret; +} + +static ssize_t +spufs_regs_write(struct file *file, const char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + size = min_t(ssize_t, sizeof lscsa->gprs - *pos, size); + if (size <= 0) + return -EFBIG; + *pos += size; + + spu_acquire_saved(ctx); + + ret = copy_from_user(lscsa->gprs + *pos - size, + buffer, size) ? -EFAULT : size; + + spu_release(ctx); + return ret; +} + +static struct file_operations spufs_regs_fops = { + .open = spufs_regs_open, + .read = spufs_regs_read, + .write = spufs_regs_write, + .llseek = generic_file_llseek, +}; + +static ssize_t +spufs_fpcr_read(struct file *file, char __user * buffer, + size_t size, loff_t * pos) +{ + struct spu_context *ctx = file->private_data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + spu_acquire_saved(ctx); + + ret = simple_read_from_buffer(buffer, size, pos, + &lscsa->fpcr, sizeof(lscsa->fpcr)); + + spu_release(ctx); + return ret; +} + +static ssize_t +spufs_fpcr_write(struct file *file, const char __user * buffer, + size_t size, loff_t * pos) +{ + struct spu_context *ctx = file->private_data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + size = min_t(ssize_t, sizeof(lscsa->fpcr) - *pos, size); + if (size <= 0) + return -EFBIG; + *pos += size; + + spu_acquire_saved(ctx); + + ret = copy_from_user((char *)&lscsa->fpcr + *pos - size, + buffer, size) ? -EFAULT : size; + + spu_release(ctx); + return ret; +} + +static struct file_operations spufs_fpcr_fops = { + .open = spufs_regs_open, + .read = spufs_fpcr_read, + .write = spufs_fpcr_write, + .llseek = generic_file_llseek, +}; + +/* generic open function for all pipe-like files */ +static int spufs_pipe_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + file->private_data = i->i_ctx; + + return nonseekable_open(inode, file); +} + +static ssize_t spufs_mbox_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 mbox_data; + int ret; + + if (len < 4) + return -EINVAL; + + spu_acquire(ctx); + ret = ctx->ops->mbox_read(ctx, &mbox_data); + spu_release(ctx); + + if (!ret) + return -EAGAIN; + + if (copy_to_user(buf, &mbox_data, sizeof mbox_data)) + return -EFAULT; + + return 4; +} + +static struct file_operations spufs_mbox_fops = { + .open = spufs_pipe_open, + .read = spufs_mbox_read, +}; + +static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 mbox_stat; + + if (len < 4) + return -EINVAL; + + spu_acquire(ctx); + + mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff; + + spu_release(ctx); + + if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat)) + return -EFAULT; + + return 4; +} + +static struct file_operations spufs_mbox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_mbox_stat_read, +}; + +/* low-level ibox access function */ +size_t spu_ibox_read(struct spu_context *ctx, u32 *data) +{ + return ctx->ops->ibox_read(ctx, data); +} + +static int spufs_ibox_fasync(int fd, struct file *file, int on) +{ + struct spu_context *ctx = file->private_data; + + return fasync_helper(fd, file, on, &ctx->ibox_fasync); +} + +/* interrupt-level ibox callback function. */ +void spufs_ibox_callback(struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + wake_up_all(&ctx->ibox_wq); + kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN); +} + +static ssize_t spufs_ibox_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 ibox_data; + ssize_t ret; + + if (len < 4) + return -EINVAL; + + spu_acquire(ctx); + + ret = 0; + if (file->f_flags & O_NONBLOCK) { + if (!spu_ibox_read(ctx, &ibox_data)) + ret = -EAGAIN; + } else { + ret = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data)); + } + + spu_release(ctx); + + if (ret) + return ret; + + ret = 4; + if (copy_to_user(buf, &ibox_data, sizeof ibox_data)) + ret = -EFAULT; + + return ret; +} + +static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait) +{ + struct spu_context *ctx = file->private_data; + unsigned int mask; + + poll_wait(file, &ctx->ibox_wq, wait); + + spu_acquire(ctx); + mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM); + spu_release(ctx); + + return mask; +} + +static struct file_operations spufs_ibox_fops = { + .open = spufs_pipe_open, + .read = spufs_ibox_read, + .poll = spufs_ibox_poll, + .fasync = spufs_ibox_fasync, +}; + +static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 ibox_stat; + + if (len < 4) + return -EINVAL; + + spu_acquire(ctx); + ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff; + spu_release(ctx); + + if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat)) + return -EFAULT; + + return 4; +} + +static struct file_operations spufs_ibox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_ibox_stat_read, +}; + +/* low-level mailbox write */ +size_t spu_wbox_write(struct spu_context *ctx, u32 data) +{ + return ctx->ops->wbox_write(ctx, data); +} + +static int spufs_wbox_fasync(int fd, struct file *file, int on) +{ + struct spu_context *ctx = file->private_data; + int ret; + + ret = fasync_helper(fd, file, on, &ctx->wbox_fasync); + + return ret; +} + +/* interrupt-level wbox callback function. */ +void spufs_wbox_callback(struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + wake_up_all(&ctx->wbox_wq); + kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT); +} + +static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 wbox_data; + int ret; + + if (len < 4) + return -EINVAL; + + if (copy_from_user(&wbox_data, buf, sizeof wbox_data)) + return -EFAULT; + + spu_acquire(ctx); + + ret = 0; + if (file->f_flags & O_NONBLOCK) { + if (!spu_wbox_write(ctx, wbox_data)) + ret = -EAGAIN; + } else { + ret = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data)); + } + + spu_release(ctx); + + return ret ? ret : sizeof wbox_data; +} + +static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait) +{ + struct spu_context *ctx = file->private_data; + unsigned int mask; + + poll_wait(file, &ctx->wbox_wq, wait); + + spu_acquire(ctx); + mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM); + spu_release(ctx); + + return mask; +} + +static struct file_operations spufs_wbox_fops = { + .open = spufs_pipe_open, + .write = spufs_wbox_write, + .poll = spufs_wbox_poll, + .fasync = spufs_wbox_fasync, +}; + +static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 wbox_stat; + + if (len < 4) + return -EINVAL; + + spu_acquire(ctx); + wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff; + spu_release(ctx); + + if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat)) + return -EFAULT; + + return 4; +} + +static struct file_operations spufs_wbox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_wbox_stat_read, +}; + +static ssize_t spufs_signal1_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 data; + + if (len < 4) + return -EINVAL; + + spu_acquire(ctx); + data = ctx->ops->signal1_read(ctx); + spu_release(ctx); + + if (copy_to_user(buf, &data, 4)) + return -EFAULT; + + return 4; +} + +static ssize_t spufs_signal1_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + u32 data; + + ctx = file->private_data; + + if (len < 4) + return -EINVAL; + + if (copy_from_user(&data, buf, 4)) + return -EFAULT; + + spu_acquire(ctx); + ctx->ops->signal1_write(ctx, data); + spu_release(ctx); + + return 4; +} + +static struct file_operations spufs_signal1_fops = { + .open = spufs_pipe_open, + .read = spufs_signal1_read, + .write = spufs_signal1_write, +}; + +static ssize_t spufs_signal2_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + u32 data; + + ctx = file->private_data; + + if (len < 4) + return -EINVAL; + + spu_acquire(ctx); + data = ctx->ops->signal2_read(ctx); + spu_release(ctx); + + if (copy_to_user(buf, &data, 4)) + return -EFAULT; + + return 4; +} + +static ssize_t spufs_signal2_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + u32 data; + + ctx = file->private_data; + + if (len < 4) + return -EINVAL; + + if (copy_from_user(&data, buf, 4)) + return -EFAULT; + + spu_acquire(ctx); + ctx->ops->signal2_write(ctx, data); + spu_release(ctx); + + return 4; +} + +static struct file_operations spufs_signal2_fops = { + .open = spufs_pipe_open, + .read = spufs_signal2_read, + .write = spufs_signal2_write, +}; + +static void spufs_signal1_type_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + + spu_acquire(ctx); + ctx->ops->signal1_type_set(ctx, val); + spu_release(ctx); +} + +static u64 spufs_signal1_type_get(void *data) +{ + struct spu_context *ctx = data; + u64 ret; + + spu_acquire(ctx); + ret = ctx->ops->signal1_type_get(ctx); + spu_release(ctx); + + return ret; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get, + spufs_signal1_type_set, "%llu"); + +static void spufs_signal2_type_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + + spu_acquire(ctx); + ctx->ops->signal2_type_set(ctx, val); + spu_release(ctx); +} + +static u64 spufs_signal2_type_get(void *data) +{ + struct spu_context *ctx = data; + u64 ret; + + spu_acquire(ctx); + ret = ctx->ops->signal2_type_get(ctx); + spu_release(ctx); + + return ret; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get, + spufs_signal2_type_set, "%llu"); + +static void spufs_npc_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + spu_acquire(ctx); + ctx->ops->npc_write(ctx, val); + spu_release(ctx); +} + +static u64 spufs_npc_get(void *data) +{ + struct spu_context *ctx = data; + u64 ret; + spu_acquire(ctx); + ret = ctx->ops->npc_read(ctx); + spu_release(ctx); + return ret; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, "%llx\n") + +static void spufs_decr_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + spu_acquire_saved(ctx); + lscsa->decr.slot[0] = (u32) val; + spu_release(ctx); +} + +static u64 spufs_decr_get(void *data) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + u64 ret; + spu_acquire_saved(ctx); + ret = lscsa->decr.slot[0]; + spu_release(ctx); + return ret; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, + "%llx\n") + +static void spufs_decr_status_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + spu_acquire_saved(ctx); + lscsa->decr_status.slot[0] = (u32) val; + spu_release(ctx); +} + +static u64 spufs_decr_status_get(void *data) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + u64 ret; + spu_acquire_saved(ctx); + ret = lscsa->decr_status.slot[0]; + spu_release(ctx); + return ret; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, + spufs_decr_status_set, "%llx\n") + +static void spufs_spu_tag_mask_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + spu_acquire_saved(ctx); + lscsa->tag_mask.slot[0] = (u32) val; + spu_release(ctx); +} + +static u64 spufs_spu_tag_mask_get(void *data) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + u64 ret; + spu_acquire_saved(ctx); + ret = lscsa->tag_mask.slot[0]; + spu_release(ctx); + return ret; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_spu_tag_mask_ops, spufs_spu_tag_mask_get, + spufs_spu_tag_mask_set, "%llx\n") + +static void spufs_event_mask_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + spu_acquire_saved(ctx); + lscsa->event_mask.slot[0] = (u32) val; + spu_release(ctx); +} + +static u64 spufs_event_mask_get(void *data) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + u64 ret; + spu_acquire_saved(ctx); + ret = lscsa->event_mask.slot[0]; + spu_release(ctx); + return ret; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, + spufs_event_mask_set, "%llx\n") + +static void spufs_srr0_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + spu_acquire_saved(ctx); + lscsa->srr0.slot[0] = (u32) val; + spu_release(ctx); +} + +static u64 spufs_srr0_get(void *data) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + u64 ret; + spu_acquire_saved(ctx); + ret = lscsa->srr0.slot[0]; + spu_release(ctx); + return ret; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, + "%llx\n") + +struct tree_descr spufs_dir_contents[] = { + { "mem", &spufs_mem_fops, 0666, }, + { "regs", &spufs_regs_fops, 0666, }, + { "mbox", &spufs_mbox_fops, 0444, }, + { "ibox", &spufs_ibox_fops, 0444, }, + { "wbox", &spufs_wbox_fops, 0222, }, + { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, + { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, + { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, + { "signal1", &spufs_signal1_fops, 0666, }, + { "signal2", &spufs_signal2_fops, 0666, }, + { "signal1_type", &spufs_signal1_type, 0666, }, + { "signal2_type", &spufs_signal2_type, 0666, }, + { "npc", &spufs_npc_ops, 0666, }, + { "fpcr", &spufs_fpcr_fops, 0666, }, + { "decr", &spufs_decr_ops, 0666, }, + { "decr_status", &spufs_decr_status_ops, 0666, }, + { "spu_tag_mask", &spufs_spu_tag_mask_ops, 0666, }, + { "event_mask", &spufs_event_mask_ops, 0666, }, + { "srr0", &spufs_srr0_ops, 0666, }, + {}, +}; diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c new file mode 100644 index 00000000000..5445719bff7 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c @@ -0,0 +1,255 @@ +/* hw_ops.c - query/set operations on active SPU context. + * + * Copyright (C) IBM 2005 + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/poll.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> + +#include <asm/io.h> +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include <asm/mmu_context.h> +#include "spufs.h" + +static int spu_hw_mbox_read(struct spu_context *ctx, u32 * data) +{ + struct spu *spu = ctx->spu; + struct spu_problem __iomem *prob = spu->problem; + u32 mbox_stat; + int ret = 0; + + spin_lock_irq(&spu->register_lock); + mbox_stat = in_be32(&prob->mb_stat_R); + if (mbox_stat & 0x0000ff) { + *data = in_be32(&prob->pu_mb_R); + ret = 4; + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static u32 spu_hw_mbox_stat_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->mb_stat_R); +} + +static unsigned int spu_hw_mbox_stat_poll(struct spu_context *ctx, + unsigned int events) +{ + struct spu *spu = ctx->spu; + int ret = 0; + u32 stat; + + spin_lock_irq(&spu->register_lock); + stat = in_be32(&spu->problem->mb_stat_R); + + /* if the requested event is there, return the poll + mask, otherwise enable the interrupt to get notified, + but first mark any pending interrupts as done so + we don't get woken up unnecessarily */ + + if (events & (POLLIN | POLLRDNORM)) { + if (stat & 0xff0000) + ret |= POLLIN | POLLRDNORM; + else { + spu_int_stat_clear(spu, 2, 0x1); + spu_int_mask_or(spu, 2, 0x1); + } + } + if (events & (POLLOUT | POLLWRNORM)) { + if (stat & 0x00ff00) + ret = POLLOUT | POLLWRNORM; + else { + spu_int_stat_clear(spu, 2, 0x10); + spu_int_mask_or(spu, 2, 0x10); + } + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data) +{ + struct spu *spu = ctx->spu; + struct spu_problem __iomem *prob = spu->problem; + struct spu_priv2 __iomem *priv2 = spu->priv2; + int ret; + + spin_lock_irq(&spu->register_lock); + if (in_be32(&prob->mb_stat_R) & 0xff0000) { + /* read the first available word */ + *data = in_be64(&priv2->puint_mb_R); + ret = 4; + } else { + /* make sure we get woken up by the interrupt */ + spu_int_mask_or(spu, 2, 0x1); + ret = 0; + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static int spu_hw_wbox_write(struct spu_context *ctx, u32 data) +{ + struct spu *spu = ctx->spu; + struct spu_problem __iomem *prob = spu->problem; + int ret; + + spin_lock_irq(&spu->register_lock); + if (in_be32(&prob->mb_stat_R) & 0x00ff00) { + /* we have space to write wbox_data to */ + out_be32(&prob->spu_mb_W, data); + ret = 4; + } else { + /* make sure we get woken up by the interrupt when space + becomes available */ + spu_int_mask_or(spu, 2, 0x10); + ret = 0; + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static u32 spu_hw_signal1_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->signal_notify1); +} + +static void spu_hw_signal1_write(struct spu_context *ctx, u32 data) +{ + out_be32(&ctx->spu->problem->signal_notify1, data); +} + +static u32 spu_hw_signal2_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->signal_notify1); +} + +static void spu_hw_signal2_write(struct spu_context *ctx, u32 data) +{ + out_be32(&ctx->spu->problem->signal_notify2, data); +} + +static void spu_hw_signal1_type_set(struct spu_context *ctx, u64 val) +{ + struct spu *spu = ctx->spu; + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 tmp; + + spin_lock_irq(&spu->register_lock); + tmp = in_be64(&priv2->spu_cfg_RW); + if (val) + tmp |= 1; + else + tmp &= ~1; + out_be64(&priv2->spu_cfg_RW, tmp); + spin_unlock_irq(&spu->register_lock); +} + +static u64 spu_hw_signal1_type_get(struct spu_context *ctx) +{ + return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0); +} + +static void spu_hw_signal2_type_set(struct spu_context *ctx, u64 val) +{ + struct spu *spu = ctx->spu; + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 tmp; + + spin_lock_irq(&spu->register_lock); + tmp = in_be64(&priv2->spu_cfg_RW); + if (val) + tmp |= 2; + else + tmp &= ~2; + out_be64(&priv2->spu_cfg_RW, tmp); + spin_unlock_irq(&spu->register_lock); +} + +static u64 spu_hw_signal2_type_get(struct spu_context *ctx) +{ + return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0); +} + +static u32 spu_hw_npc_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->spu_npc_RW); +} + +static void spu_hw_npc_write(struct spu_context *ctx, u32 val) +{ + out_be32(&ctx->spu->problem->spu_npc_RW, val); +} + +static u32 spu_hw_status_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->spu_status_R); +} + +static char *spu_hw_get_ls(struct spu_context *ctx) +{ + return ctx->spu->local_store; +} + +static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val) +{ + eieio(); + out_be32(&ctx->spu->problem->spu_runcntl_RW, val); +} + +static void spu_hw_runcntl_stop(struct spu_context *ctx) +{ + spin_lock_irq(&ctx->spu->register_lock); + out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP); + while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING) + cpu_relax(); + spin_unlock_irq(&ctx->spu->register_lock); +} + +struct spu_context_ops spu_hw_ops = { + .mbox_read = spu_hw_mbox_read, + .mbox_stat_read = spu_hw_mbox_stat_read, + .mbox_stat_poll = spu_hw_mbox_stat_poll, + .ibox_read = spu_hw_ibox_read, + .wbox_write = spu_hw_wbox_write, + .signal1_read = spu_hw_signal1_read, + .signal1_write = spu_hw_signal1_write, + .signal2_read = spu_hw_signal2_read, + .signal2_write = spu_hw_signal2_write, + .signal1_type_set = spu_hw_signal1_type_set, + .signal1_type_get = spu_hw_signal1_type_get, + .signal2_type_set = spu_hw_signal2_type_set, + .signal2_type_get = spu_hw_signal2_type_get, + .npc_read = spu_hw_npc_read, + .npc_write = spu_hw_npc_write, + .status_read = spu_hw_status_read, + .get_ls = spu_hw_get_ls, + .runcntl_write = spu_hw_runcntl_write, + .runcntl_stop = spu_hw_runcntl_stop, +}; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c new file mode 100644 index 00000000000..b3962c3a034 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -0,0 +1,486 @@ +/* + * SPU file system + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/backing-dev.h> +#include <linux/init.h> +#include <linux/ioctl.h> +#include <linux/module.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/pagemap.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/parser.h> + +#include <asm/io.h> +#include <asm/semaphore.h> +#include <asm/spu.h> +#include <asm/uaccess.h> + +#include "spufs.h" + +static kmem_cache_t *spufs_inode_cache; + +static struct inode * +spufs_alloc_inode(struct super_block *sb) +{ + struct spufs_inode_info *ei; + + ei = kmem_cache_alloc(spufs_inode_cache, SLAB_KERNEL); + if (!ei) + return NULL; + return &ei->vfs_inode; +} + +static void +spufs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(spufs_inode_cache, SPUFS_I(inode)); +} + +static void +spufs_init_once(void *p, kmem_cache_t * cachep, unsigned long flags) +{ + struct spufs_inode_info *ei = p; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + inode_init_once(&ei->vfs_inode); + } +} + +static struct inode * +spufs_new_inode(struct super_block *sb, int mode) +{ + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + goto out; + + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +out: + return inode; +} + +static int +spufs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + + if ((attr->ia_valid & ATTR_SIZE) && + (attr->ia_size != inode->i_size)) + return -EINVAL; + return inode_setattr(inode, attr); +} + + +static int +spufs_new_file(struct super_block *sb, struct dentry *dentry, + struct file_operations *fops, int mode, + struct spu_context *ctx) +{ + static struct inode_operations spufs_file_iops = { + .setattr = spufs_setattr, + }; + struct inode *inode; + int ret; + + ret = -ENOSPC; + inode = spufs_new_inode(sb, S_IFREG | mode); + if (!inode) + goto out; + + ret = 0; + inode->i_op = &spufs_file_iops; + inode->i_fop = fops; + inode->u.generic_ip = SPUFS_I(inode)->i_ctx = get_spu_context(ctx); + d_add(dentry, inode); +out: + return ret; +} + +static void +spufs_delete_inode(struct inode *inode) +{ + if (SPUFS_I(inode)->i_ctx) + put_spu_context(SPUFS_I(inode)->i_ctx); + clear_inode(inode); +} + +static void spufs_prune_dir(struct dentry *dir) +{ + struct dentry *dentry, *tmp; + mutex_lock(&dir->d_inode->i_mutex); + list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { + spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + if (!(d_unhashed(dentry)) && dentry->d_inode) { + dget_locked(dentry); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + simple_unlink(dir->d_inode, dentry); + spin_unlock(&dcache_lock); + dput(dentry); + } else { + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + } + } + shrink_dcache_parent(dir); + mutex_unlock(&dir->d_inode->i_mutex); +} + +static int spufs_rmdir(struct inode *root, struct dentry *dir_dentry) +{ + struct spu_context *ctx; + + /* remove all entries */ + mutex_lock(&root->i_mutex); + spufs_prune_dir(dir_dentry); + mutex_unlock(&root->i_mutex); + + /* We have to give up the mm_struct */ + ctx = SPUFS_I(dir_dentry->d_inode)->i_ctx; + spu_forget(ctx); + + /* XXX Do we need to hold i_mutex here ? */ + return simple_rmdir(root, dir_dentry); +} + +static int spufs_fill_dir(struct dentry *dir, struct tree_descr *files, + int mode, struct spu_context *ctx) +{ + struct dentry *dentry; + int ret; + + while (files->name && files->name[0]) { + ret = -ENOMEM; + dentry = d_alloc_name(dir, files->name); + if (!dentry) + goto out; + ret = spufs_new_file(dir->d_sb, dentry, files->ops, + files->mode & mode, ctx); + if (ret) + goto out; + files++; + } + return 0; +out: + spufs_prune_dir(dir); + return ret; +} + +static int spufs_dir_close(struct inode *inode, struct file *file) +{ + struct inode *dir; + struct dentry *dentry; + int ret; + + dentry = file->f_dentry; + dir = dentry->d_parent->d_inode; + + ret = spufs_rmdir(dir, dentry); + WARN_ON(ret); + + return dcache_dir_close(inode, file); +} + +struct inode_operations spufs_dir_inode_operations = { + .lookup = simple_lookup, +}; + +struct file_operations spufs_context_fops = { + .open = dcache_dir_open, + .release = spufs_dir_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .readdir = dcache_readdir, + .fsync = simple_sync_file, +}; + +static int +spufs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + int ret; + struct inode *inode; + struct spu_context *ctx; + + ret = -ENOSPC; + inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR); + if (!inode) + goto out; + + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + inode->i_mode &= S_ISGID; + } + ctx = alloc_spu_context(inode->i_mapping); + SPUFS_I(inode)->i_ctx = ctx; + if (!ctx) + goto out_iput; + + inode->i_op = &spufs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx); + if (ret) + goto out_free_ctx; + + d_instantiate(dentry, inode); + dget(dentry); + dir->i_nlink++; + dentry->d_inode->i_nlink++; + goto out; + +out_free_ctx: + put_spu_context(ctx); +out_iput: + iput(inode); +out: + return ret; +} + +static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt) +{ + int ret; + struct file *filp; + + ret = get_unused_fd(); + if (ret < 0) { + dput(dentry); + mntput(mnt); + goto out; + } + + filp = dentry_open(dentry, mnt, O_RDONLY); + if (IS_ERR(filp)) { + put_unused_fd(ret); + ret = PTR_ERR(filp); + goto out; + } + + filp->f_op = &spufs_context_fops; + fd_install(ret, filp); +out: + return ret; +} + +static struct file_system_type spufs_type; + +long spufs_create_thread(struct nameidata *nd, + unsigned int flags, mode_t mode) +{ + struct dentry *dentry; + int ret; + + /* need to be at the root of spufs */ + ret = -EINVAL; + if (nd->dentry->d_sb->s_type != &spufs_type || + nd->dentry != nd->dentry->d_sb->s_root) + goto out; + + dentry = lookup_create(nd, 1); + ret = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out_dir; + + ret = -EEXIST; + if (dentry->d_inode) + goto out_dput; + + mode &= ~current->fs->umask; + ret = spufs_mkdir(nd->dentry->d_inode, dentry, mode & S_IRWXUGO); + if (ret) + goto out_dput; + + /* + * get references for dget and mntget, will be released + * in error path of *_open(). + */ + ret = spufs_context_open(dget(dentry), mntget(nd->mnt)); + if (ret < 0) + spufs_rmdir(nd->dentry->d_inode, dentry); + +out_dput: + dput(dentry); +out_dir: + mutex_unlock(&nd->dentry->d_inode->i_mutex); +out: + return ret; +} + +/* File system initialization */ +enum { + Opt_uid, Opt_gid, Opt_err, +}; + +static match_table_t spufs_tokens = { + { Opt_uid, "uid=%d" }, + { Opt_gid, "gid=%d" }, + { Opt_err, NULL }, +}; + +static int +spufs_parse_options(char *options, struct inode *root) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + + while ((p = strsep(&options, ",")) != NULL) { + int token, option; + + if (!*p) + continue; + + token = match_token(p, spufs_tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return 0; + root->i_uid = option; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + root->i_gid = option; + break; + default: + return 0; + } + } + return 1; +} + +static int +spufs_create_root(struct super_block *sb, void *data) +{ + struct inode *inode; + int ret; + + ret = -ENOMEM; + inode = spufs_new_inode(sb, S_IFDIR | 0775); + if (!inode) + goto out; + + inode->i_op = &spufs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + SPUFS_I(inode)->i_ctx = NULL; + + ret = -EINVAL; + if (!spufs_parse_options(data, inode)) + goto out_iput; + + ret = -ENOMEM; + sb->s_root = d_alloc_root(inode); + if (!sb->s_root) + goto out_iput; + + return 0; +out_iput: + iput(inode); +out: + return ret; +} + +static int +spufs_fill_super(struct super_block *sb, void *data, int silent) +{ + static struct super_operations s_ops = { + .alloc_inode = spufs_alloc_inode, + .destroy_inode = spufs_destroy_inode, + .statfs = simple_statfs, + .delete_inode = spufs_delete_inode, + .drop_inode = generic_delete_inode, + }; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = SPUFS_MAGIC; + sb->s_op = &s_ops; + + return spufs_create_root(sb, data); +} + +static struct super_block * +spufs_get_sb(struct file_system_type *fstype, int flags, + const char *name, void *data) +{ + return get_sb_single(fstype, flags, data, spufs_fill_super); +} + +static struct file_system_type spufs_type = { + .owner = THIS_MODULE, + .name = "spufs", + .get_sb = spufs_get_sb, + .kill_sb = kill_litter_super, +}; + +static int spufs_init(void) +{ + int ret; + ret = -ENOMEM; + spufs_inode_cache = kmem_cache_create("spufs_inode_cache", + sizeof(struct spufs_inode_info), 0, + SLAB_HWCACHE_ALIGN, spufs_init_once, NULL); + + if (!spufs_inode_cache) + goto out; + if (spu_sched_init() != 0) { + kmem_cache_destroy(spufs_inode_cache); + goto out; + } + ret = register_filesystem(&spufs_type); + if (ret) + goto out_cache; + ret = register_spu_syscalls(&spufs_calls); + if (ret) + goto out_fs; + return 0; +out_fs: + unregister_filesystem(&spufs_type); +out_cache: + kmem_cache_destroy(spufs_inode_cache); +out: + return ret; +} +module_init(spufs_init); + +static void spufs_exit(void) +{ + spu_sched_exit(); + unregister_spu_syscalls(&spufs_calls); + unregister_filesystem(&spufs_type); + kmem_cache_destroy(spufs_inode_cache); +} +module_exit(spufs_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>"); + diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c new file mode 100644 index 00000000000..18ea8866c61 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -0,0 +1,131 @@ +#include <linux/wait.h> +#include <linux/ptrace.h> + +#include <asm/spu.h> + +#include "spufs.h" + +/* interrupt-level stop callback function. */ +void spufs_stop_callback(struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + wake_up_all(&ctx->stop_wq); +} + +static inline int spu_stopped(struct spu_context *ctx, u32 * stat) +{ + struct spu *spu; + u64 pte_fault; + + *stat = ctx->ops->status_read(ctx); + if (ctx->state != SPU_STATE_RUNNABLE) + return 1; + spu = ctx->spu; + pte_fault = spu->dsisr & + (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); + return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0; +} + +static inline int spu_run_init(struct spu_context *ctx, u32 * npc, + u32 * status) +{ + int ret; + + if ((ret = spu_acquire_runnable(ctx)) != 0) + return ret; + ctx->ops->npc_write(ctx, *npc); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); + return 0; +} + +static inline int spu_run_fini(struct spu_context *ctx, u32 * npc, + u32 * status) +{ + int ret = 0; + + *status = ctx->ops->status_read(ctx); + *npc = ctx->ops->npc_read(ctx); + spu_release(ctx); + + if (signal_pending(current)) + ret = -ERESTARTSYS; + if (unlikely(current->ptrace & PT_PTRACED)) { + if ((*status & SPU_STATUS_STOPPED_BY_STOP) + && (*status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) { + force_sig(SIGTRAP, current); + ret = -ERESTARTSYS; + } + } + return ret; +} + +static inline int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc, + u32 *status) +{ + int ret; + + if ((ret = spu_run_fini(ctx, npc, status)) != 0) + return ret; + if (*status & (SPU_STATUS_STOPPED_BY_STOP | + SPU_STATUS_STOPPED_BY_HALT)) { + return *status; + } + if ((ret = spu_run_init(ctx, npc, status)) != 0) + return ret; + return 0; +} + +static inline int spu_process_events(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + u64 pte_fault = MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED; + int ret = 0; + + if (spu->dsisr & pte_fault) + ret = spu_irq_class_1_bottom(spu); + if (spu->class_0_pending) + ret = spu_irq_class_0_bottom(spu); + if (!ret && signal_pending(current)) + ret = -ERESTARTSYS; + return ret; +} + +long spufs_run_spu(struct file *file, struct spu_context *ctx, + u32 * npc, u32 * status) +{ + int ret; + + if (down_interruptible(&ctx->run_sema)) + return -ERESTARTSYS; + + ret = spu_run_init(ctx, npc, status); + if (ret) + goto out; + + do { + ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, status)); + if (unlikely(ret)) + break; + if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) { + ret = spu_reacquire_runnable(ctx, npc, status); + if (ret) + goto out; + continue; + } + ret = spu_process_events(ctx); + + } while (!ret && !(*status & (SPU_STATUS_STOPPED_BY_STOP | + SPU_STATUS_STOPPED_BY_HALT))); + + ctx->ops->runcntl_stop(ctx); + ret = spu_run_fini(ctx, npc, status); + if (!ret) + ret = *status; + spu_yield(ctx); + +out: + up(&ctx->run_sema); + return ret; +} + diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c new file mode 100644 index 00000000000..963182fbd1a --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -0,0 +1,461 @@ +/* sched.c - SPU scheduler. + * + * Copyright (C) IBM 2005 + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * SPU scheduler, based on Linux thread priority. For now use + * a simple "cooperative" yield model with no preemption. SPU + * scheduling will eventually be preemptive: When a thread with + * a higher static priority gets ready to run, then an active SPU + * context will be preempted and returned to the waitq. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/completion.h> +#include <linux/vmalloc.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> + +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include "spufs.h" + +#define SPU_MIN_TIMESLICE (100 * HZ / 1000) + +#define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) +struct spu_prio_array { + atomic_t nr_blocked; + unsigned long bitmap[SPU_BITMAP_SIZE]; + wait_queue_head_t waitq[MAX_PRIO]; +}; + +/* spu_runqueue - This is the main runqueue data structure for SPUs. */ +struct spu_runqueue { + struct semaphore sem; + unsigned long nr_active; + unsigned long nr_idle; + unsigned long nr_switches; + struct list_head active_list; + struct list_head idle_list; + struct spu_prio_array prio; +}; + +static struct spu_runqueue *spu_runqueues = NULL; + +static inline struct spu_runqueue *spu_rq(void) +{ + /* Future: make this a per-NODE array, + * and use cpu_to_node(smp_processor_id()) + */ + return spu_runqueues; +} + +static inline struct spu *del_idle(struct spu_runqueue *rq) +{ + struct spu *spu; + + BUG_ON(rq->nr_idle <= 0); + BUG_ON(list_empty(&rq->idle_list)); + /* Future: Move SPU out of low-power SRI state. */ + spu = list_entry(rq->idle_list.next, struct spu, sched_list); + list_del_init(&spu->sched_list); + rq->nr_idle--; + return spu; +} + +static inline void del_active(struct spu_runqueue *rq, struct spu *spu) +{ + BUG_ON(rq->nr_active <= 0); + BUG_ON(list_empty(&rq->active_list)); + list_del_init(&spu->sched_list); + rq->nr_active--; +} + +static inline void add_idle(struct spu_runqueue *rq, struct spu *spu) +{ + /* Future: Put SPU into low-power SRI state. */ + list_add_tail(&spu->sched_list, &rq->idle_list); + rq->nr_idle++; +} + +static inline void add_active(struct spu_runqueue *rq, struct spu *spu) +{ + rq->nr_active++; + rq->nr_switches++; + list_add_tail(&spu->sched_list, &rq->active_list); +} + +static void prio_wakeup(struct spu_runqueue *rq) +{ + if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) { + int best = sched_find_first_bit(rq->prio.bitmap); + if (best < MAX_PRIO) { + wait_queue_head_t *wq = &rq->prio.waitq[best]; + wake_up_interruptible_nr(wq, 1); + } + } +} + +static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx, + u64 flags) +{ + int prio = current->prio; + wait_queue_head_t *wq = &rq->prio.waitq[prio]; + DEFINE_WAIT(wait); + + __set_bit(prio, rq->prio.bitmap); + atomic_inc(&rq->prio.nr_blocked); + prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE); + if (!signal_pending(current)) { + up(&rq->sem); + up_write(&ctx->state_sema); + pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__, + current->pid, current->prio); + schedule(); + down_write(&ctx->state_sema); + down(&rq->sem); + } + finish_wait(wq, &wait); + atomic_dec(&rq->prio.nr_blocked); + if (!waitqueue_active(wq)) + __clear_bit(prio, rq->prio.bitmap); +} + +static inline int is_best_prio(struct spu_runqueue *rq) +{ + int best_prio; + + best_prio = sched_find_first_bit(rq->prio.bitmap); + return (current->prio < best_prio) ? 1 : 0; +} + +static inline void mm_needs_global_tlbie(struct mm_struct *mm) +{ + /* Global TLBIE broadcast required with SPEs. */ +#if (NR_CPUS > 1) + __cpus_setall(&mm->cpu_vm_mask, NR_CPUS); +#else + __cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */ +#endif +} + +static inline void bind_context(struct spu *spu, struct spu_context *ctx) +{ + pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid, + spu->number); + spu->ctx = ctx; + spu->flags = 0; + ctx->flags = 0; + ctx->spu = spu; + ctx->ops = &spu_hw_ops; + spu->pid = current->pid; + spu->prio = current->prio; + spu->mm = ctx->owner; + mm_needs_global_tlbie(spu->mm); + spu->ibox_callback = spufs_ibox_callback; + spu->wbox_callback = spufs_wbox_callback; + spu->stop_callback = spufs_stop_callback; + mb(); + spu_unmap_mappings(ctx); + spu_restore(&ctx->csa, spu); + spu->timestamp = jiffies; +} + +static inline void unbind_context(struct spu *spu, struct spu_context *ctx) +{ + pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__, + spu->pid, spu->number); + spu_unmap_mappings(ctx); + spu_save(&ctx->csa, spu); + spu->timestamp = jiffies; + ctx->state = SPU_STATE_SAVED; + spu->ibox_callback = NULL; + spu->wbox_callback = NULL; + spu->stop_callback = NULL; + spu->mm = NULL; + spu->pid = 0; + spu->prio = MAX_PRIO; + ctx->ops = &spu_backing_ops; + ctx->spu = NULL; + ctx->flags = 0; + spu->flags = 0; + spu->ctx = NULL; +} + +static void spu_reaper(void *data) +{ + struct spu_context *ctx = data; + struct spu *spu; + + down_write(&ctx->state_sema); + spu = ctx->spu; + if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { + if (atomic_read(&spu->rq->prio.nr_blocked)) { + pr_debug("%s: spu=%d\n", __func__, spu->number); + ctx->ops->runcntl_stop(ctx); + spu_deactivate(ctx); + wake_up_all(&ctx->stop_wq); + } else { + clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); + } + } + up_write(&ctx->state_sema); + put_spu_context(ctx); +} + +static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu) +{ + struct spu_context *ctx = get_spu_context(spu->ctx); + unsigned long now = jiffies; + unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE; + + set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); + INIT_WORK(&ctx->reap_work, spu_reaper, ctx); + if (time_after(now, expire)) + schedule_work(&ctx->reap_work); + else + schedule_delayed_work(&ctx->reap_work, expire - now); +} + +static void check_preempt_active(struct spu_runqueue *rq) +{ + struct list_head *p; + struct spu *worst = NULL; + + list_for_each(p, &rq->active_list) { + struct spu *spu = list_entry(p, struct spu, sched_list); + struct spu_context *ctx = spu->ctx; + if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { + if (!worst || (spu->prio > worst->prio)) { + worst = spu; + } + } + } + if (worst && (current->prio < worst->prio)) + schedule_spu_reaper(rq, worst); +} + +static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags) +{ + struct spu_runqueue *rq; + struct spu *spu = NULL; + + rq = spu_rq(); + down(&rq->sem); + for (;;) { + if (rq->nr_idle > 0) { + if (is_best_prio(rq)) { + /* Fall through. */ + spu = del_idle(rq); + break; + } else { + prio_wakeup(rq); + up(&rq->sem); + yield(); + if (signal_pending(current)) { + return NULL; + } + rq = spu_rq(); + down(&rq->sem); + continue; + } + } else { + check_preempt_active(rq); + prio_wait(rq, ctx, flags); + if (signal_pending(current)) { + prio_wakeup(rq); + spu = NULL; + break; + } + continue; + } + } + up(&rq->sem); + return spu; +} + +static void put_idle_spu(struct spu *spu) +{ + struct spu_runqueue *rq = spu->rq; + + down(&rq->sem); + add_idle(rq, spu); + prio_wakeup(rq); + up(&rq->sem); +} + +static int get_active_spu(struct spu *spu) +{ + struct spu_runqueue *rq = spu->rq; + struct list_head *p; + struct spu *tmp; + int rc = 0; + + down(&rq->sem); + list_for_each(p, &rq->active_list) { + tmp = list_entry(p, struct spu, sched_list); + if (tmp == spu) { + del_active(rq, spu); + rc = 1; + break; + } + } + up(&rq->sem); + return rc; +} + +static void put_active_spu(struct spu *spu) +{ + struct spu_runqueue *rq = spu->rq; + + down(&rq->sem); + add_active(rq, spu); + up(&rq->sem); +} + +/* Lock order: + * spu_activate() & spu_deactivate() require the + * caller to have down_write(&ctx->state_sema). + * + * The rq->sem is breifly held (inside or outside a + * given ctx lock) for list management, but is never + * held during save/restore. + */ + +int spu_activate(struct spu_context *ctx, u64 flags) +{ + struct spu *spu; + + if (ctx->spu) + return 0; + spu = get_idle_spu(ctx, flags); + if (!spu) + return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; + bind_context(spu, ctx); + /* + * We're likely to wait for interrupts on the same + * CPU that we are now on, so send them here. + */ + spu_irq_setaffinity(spu, raw_smp_processor_id()); + put_active_spu(spu); + return 0; +} + +void spu_deactivate(struct spu_context *ctx) +{ + struct spu *spu; + int needs_idle; + + spu = ctx->spu; + if (!spu) + return; + needs_idle = get_active_spu(spu); + unbind_context(spu, ctx); + if (needs_idle) + put_idle_spu(spu); +} + +void spu_yield(struct spu_context *ctx) +{ + struct spu *spu; + int need_yield = 0; + + down_write(&ctx->state_sema); + spu = ctx->spu; + if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) { + pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number); + spu_deactivate(ctx); + ctx->state = SPU_STATE_SAVED; + need_yield = 1; + } else if (spu) { + spu->prio = MAX_PRIO; + } + up_write(&ctx->state_sema); + if (unlikely(need_yield)) + yield(); +} + +int __init spu_sched_init(void) +{ + struct spu_runqueue *rq; + struct spu *spu; + int i; + + rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL); + if (!rq) { + printk(KERN_WARNING "%s: Unable to allocate runqueues.\n", + __FUNCTION__); + return 1; + } + memset(rq, 0, sizeof(struct spu_runqueue)); + init_MUTEX(&rq->sem); + INIT_LIST_HEAD(&rq->active_list); + INIT_LIST_HEAD(&rq->idle_list); + rq->nr_active = 0; + rq->nr_idle = 0; + rq->nr_switches = 0; + atomic_set(&rq->prio.nr_blocked, 0); + for (i = 0; i < MAX_PRIO; i++) { + init_waitqueue_head(&rq->prio.waitq[i]); + __clear_bit(i, rq->prio.bitmap); + } + __set_bit(MAX_PRIO, rq->prio.bitmap); + for (;;) { + spu = spu_alloc(); + if (!spu) + break; + pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number); + add_idle(rq, spu); + spu->rq = rq; + spu->timestamp = jiffies; + } + if (!rq->nr_idle) { + printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__); + kfree(rq); + return 1; + } + return 0; +} + +void __exit spu_sched_exit(void) +{ + struct spu_runqueue *rq = spu_rq(); + struct spu *spu; + + if (!rq) { + printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__); + return; + } + while (rq->nr_idle > 0) { + spu = del_idle(rq); + if (!spu) + break; + spu_free(spu); + } + kfree(rq); +} diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c new file mode 100644 index 00000000000..0bf723dcd67 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c @@ -0,0 +1,336 @@ +/* + * spu_restore.c + * + * (C) Copyright IBM Corp. 2005 + * + * SPU-side context restore sequence outlined in + * Synergistic Processor Element Book IV + * + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +#ifndef LS_SIZE +#define LS_SIZE 0x40000 /* 256K (in bytes) */ +#endif + +typedef unsigned int u32; +typedef unsigned long long u64; + +#include <spu_intrinsics.h> +#include <asm/spu_csa.h> +#include "spu_utils.h" + +#define BR_INSTR 0x327fff80 /* br -4 */ +#define NOP_INSTR 0x40200000 /* nop */ +#define HEQ_INSTR 0x7b000000 /* heq $0, $0 */ +#define STOP_INSTR 0x00000000 /* stop 0x0 */ +#define ILLEGAL_INSTR 0x00800000 /* illegal instr */ +#define RESTORE_COMPLETE 0x00003ffc /* stop 0x3ffc */ + +static inline void fetch_regs_from_mem(addr64 lscsa_ea) +{ + unsigned int ls = (unsigned int)®s_spill[0]; + unsigned int size = sizeof(regs_spill); + unsigned int tag_id = 0; + unsigned int cmd = 0x40; /* GET */ + + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, lscsa_ea.ui[1]); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void restore_upper_240kb(addr64 lscsa_ea) +{ + unsigned int ls = 16384; + unsigned int list = (unsigned int)&dma_list[0]; + unsigned int size = sizeof(dma_list); + unsigned int tag_id = 0; + unsigned int cmd = 0x44; /* GETL */ + + /* Restore, Step 4: + * Enqueue the GETL command (tag 0) to the MFC SPU command + * queue to transfer the upper 240 kb of LS from CSA. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, list); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void restore_decr(void) +{ + unsigned int offset; + unsigned int decr_running; + unsigned int decr; + + /* Restore, Step 6: + * If the LSCSA "decrementer running" flag is set + * then write the SPU_WrDec channel with the + * decrementer value from LSCSA. + */ + offset = LSCSA_QW_OFFSET(decr_status); + decr_running = regs_spill[offset].slot[0]; + if (decr_running) { + offset = LSCSA_QW_OFFSET(decr); + decr = regs_spill[offset].slot[0]; + spu_writech(SPU_WrDec, decr); + } +} + +static inline void write_ppu_mb(void) +{ + unsigned int offset; + unsigned int data; + + /* Restore, Step 11: + * Write the MFC_WrOut_MB channel with the PPU_MB + * data from LSCSA. + */ + offset = LSCSA_QW_OFFSET(ppu_mb); + data = regs_spill[offset].slot[0]; + spu_writech(SPU_WrOutMbox, data); +} + +static inline void write_ppuint_mb(void) +{ + unsigned int offset; + unsigned int data; + + /* Restore, Step 12: + * Write the MFC_WrInt_MB channel with the PPUINT_MB + * data from LSCSA. + */ + offset = LSCSA_QW_OFFSET(ppuint_mb); + data = regs_spill[offset].slot[0]; + spu_writech(SPU_WrOutIntrMbox, data); +} + +static inline void restore_fpcr(void) +{ + unsigned int offset; + vector unsigned int fpcr; + + /* Restore, Step 13: + * Restore the floating-point status and control + * register from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(fpcr); + fpcr = regs_spill[offset].v; + spu_mtfpscr(fpcr); +} + +static inline void restore_srr0(void) +{ + unsigned int offset; + unsigned int srr0; + + /* Restore, Step 14: + * Restore the SPU SRR0 data from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(srr0); + srr0 = regs_spill[offset].slot[0]; + spu_writech(SPU_WrSRR0, srr0); +} + +static inline void restore_event_mask(void) +{ + unsigned int offset; + unsigned int event_mask; + + /* Restore, Step 15: + * Restore the SPU_RdEventMsk data from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(event_mask); + event_mask = regs_spill[offset].slot[0]; + spu_writech(SPU_WrEventMask, event_mask); +} + +static inline void restore_tag_mask(void) +{ + unsigned int offset; + unsigned int tag_mask; + + /* Restore, Step 16: + * Restore the SPU_RdTagMsk data from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(tag_mask); + tag_mask = regs_spill[offset].slot[0]; + spu_writech(MFC_WrTagMask, tag_mask); +} + +static inline void restore_complete(void) +{ + extern void exit_fini(void); + unsigned int *exit_instrs = (unsigned int *)exit_fini; + unsigned int offset; + unsigned int stopped_status; + unsigned int stopped_code; + + /* Restore, Step 18: + * Issue a stop-and-signal instruction with + * "good context restore" signal value. + * + * Restore, Step 19: + * There may be additional instructions placed + * here by the PPE Sequence for SPU Context + * Restore in order to restore the correct + * "stopped state". + * + * This step is handled here by analyzing the + * LSCSA.stopped_status and then modifying the + * exit() function to behave appropriately. + */ + + offset = LSCSA_QW_OFFSET(stopped_status); + stopped_status = regs_spill[offset].slot[0]; + stopped_code = regs_spill[offset].slot[1]; + + switch (stopped_status) { + case SPU_STOPPED_STATUS_P_I: + /* SPU_Status[P,I]=1. Add illegal instruction + * followed by stop-and-signal instruction after + * end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = ILLEGAL_INSTR; + exit_instrs[2] = STOP_INSTR | stopped_code; + break; + case SPU_STOPPED_STATUS_P_H: + /* SPU_Status[P,H]=1. Add 'heq $0, $0' followed + * by stop-and-signal instruction after end of + * restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = HEQ_INSTR; + exit_instrs[2] = STOP_INSTR | stopped_code; + break; + case SPU_STOPPED_STATUS_S_P: + /* SPU_Status[S,P]=1. Add nop instruction + * followed by 'br -4' after end of restore + * code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = STOP_INSTR | stopped_code; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_S_I: + /* SPU_Status[S,I]=1. Add illegal instruction + * followed by 'br -4' after end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = ILLEGAL_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_I: + /* SPU_Status[I]=1. Add illegal instruction followed + * by infinite loop after end of restore sequence. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = ILLEGAL_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_S: + /* SPU_Status[S]=1. Add two 'nop' instructions. */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = NOP_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_H: + /* SPU_Status[H]=1. Add 'heq $0, $0' instruction + * after end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = HEQ_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_P: + /* SPU_Status[P]=1. Add stop-and-signal instruction + * after end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = STOP_INSTR | stopped_code; + break; + case SPU_STOPPED_STATUS_R: + /* SPU_Status[I,S,H,P,R]=0. Add infinite loop. */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = NOP_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + default: + /* SPU_Status[R]=1. No additonal instructions. */ + break; + } + spu_sync(); +} + +/** + * main - entry point for SPU-side context restore. + * + * This code deviates from the documented sequence in the + * following aspects: + * + * 1. The EA for LSCSA is passed from PPE in the + * signal notification channels. + * 2. The register spill area is pulled by SPU + * into LS, rather than pushed by PPE. + * 3. All 128 registers are restored by exit(). + * 4. The exit() function is modified at run + * time in order to properly restore the + * SPU_Status register. + */ +int main() +{ + addr64 lscsa_ea; + + lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1); + lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2); + fetch_regs_from_mem(lscsa_ea); + + set_event_mask(); /* Step 1. */ + set_tag_mask(); /* Step 2. */ + build_dma_list(lscsa_ea); /* Step 3. */ + restore_upper_240kb(lscsa_ea); /* Step 4. */ + /* Step 5: done by 'exit'. */ + restore_decr(); /* Step 6. */ + enqueue_putllc(lscsa_ea); /* Step 7. */ + set_tag_update(); /* Step 8. */ + read_tag_status(); /* Step 9. */ + read_llar_status(); /* Step 10. */ + write_ppu_mb(); /* Step 11. */ + write_ppuint_mb(); /* Step 12. */ + restore_fpcr(); /* Step 13. */ + restore_srr0(); /* Step 14. */ + restore_event_mask(); /* Step 15. */ + restore_tag_mask(); /* Step 16. */ + /* Step 17. done by 'exit'. */ + restore_complete(); /* Step 18. */ + + return 0; +} diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S new file mode 100644 index 00000000000..2905949debe --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S @@ -0,0 +1,116 @@ +/* + * crt0_r.S: Entry function for SPU-side context restore. + * + * Copyright (C) 2005 IBM + * + * Entry and exit function for SPU-side of the context restore + * sequence. Sets up an initial stack frame, then branches to + * 'main'. On return, restores all 128 registers from the LSCSA + * and exits. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <asm/spu_csa.h> + +.data +.align 7 +.globl regs_spill +regs_spill: +.space SIZEOF_SPU_SPILL_REGS, 0x0 + +.text +.global _start +_start: + /* Initialize the stack pointer to point to 16368 + * (16kb-16). The back chain pointer is initialized + * to NULL. + */ + il $0, 0 + il $SP, 16368 + stqd $0, 0($SP) + + /* Allocate a minimum stack frame for the called main. + * This is needed so that main has a place to save the + * link register when it calls another function. + */ + stqd $SP, -160($SP) + ai $SP, $SP, -160 + + /* Call the program's main function. */ + brsl $0, main + +.global exit +.global _exit +exit: +_exit: + /* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */ + ila $3, regs_spill + 256 +restore_regs: + lqr $4, restore_reg_insts +restore_reg_loop: + ai $4, $4, 4 + .balignl 16, 0x40200000 +restore_reg_insts: /* must be quad-word aligned. */ + lqd $16, 0($3) + lqd $17, 16($3) + lqd $18, 32($3) + lqd $19, 48($3) + andi $5, $4, 0x7F + stqr $4, restore_reg_insts + ai $3, $3, 64 + brnz $5, restore_reg_loop + + /* SPU Context Restore Step 17: Restore the first 16 GPRs. */ + lqa $0, regs_spill + 0 + lqa $1, regs_spill + 16 + lqa $2, regs_spill + 32 + lqa $3, regs_spill + 48 + lqa $4, regs_spill + 64 + lqa $5, regs_spill + 80 + lqa $6, regs_spill + 96 + lqa $7, regs_spill + 112 + lqa $8, regs_spill + 128 + lqa $9, regs_spill + 144 + lqa $10, regs_spill + 160 + lqa $11, regs_spill + 176 + lqa $12, regs_spill + 192 + lqa $13, regs_spill + 208 + lqa $14, regs_spill + 224 + lqa $15, regs_spill + 240 + + /* Under normal circumstances, the 'exit' function + * terminates with 'stop SPU_RESTORE_COMPLETE', + * indicating that the SPU-side restore code has + * completed. + * + * However it is possible that instructions immediately + * following the 'stop 0x3ffc' have been modified at run + * time so as to recreate the exact SPU_Status settings + * from the application, e.g. illegal instruciton, halt, + * etc. + */ +.global exit_fini +.global _exit_fini +exit_fini: +_exit_fini: + stop SPU_RESTORE_COMPLETE + stop 0 + stop 0 + stop 0 + + /* Pad the size of this crt0.o to be multiple of 16 bytes. */ +.balignl 16, 0x0 diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped new file mode 100644 index 00000000000..1b2355ff703 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped @@ -0,0 +1,231 @@ +/* + * spu_restore_dump.h: Copyright (C) 2005 IBM. + * Hex-dump auto generated from spu_restore.c. + * Do not edit! + */ +static unsigned int spu_restore_code[] __page_aligned = { +0x40800000, 0x409ff801, 0x24000080, 0x24fd8081, +0x1cd80081, 0x33001180, 0x42030003, 0x33800284, +0x1c010204, 0x40200000, 0x40200000, 0x40200000, +0x34000190, 0x34004191, 0x34008192, 0x3400c193, +0x141fc205, 0x23fffd84, 0x1c100183, 0x217ffa85, +0x3080a000, 0x3080a201, 0x3080a402, 0x3080a603, +0x3080a804, 0x3080aa05, 0x3080ac06, 0x3080ae07, +0x3080b008, 0x3080b209, 0x3080b40a, 0x3080b60b, +0x3080b80c, 0x3080ba0d, 0x3080bc0e, 0x3080be0f, +0x00003ffc, 0x00000000, 0x00000000, 0x00000000, +0x01a00182, 0x3ec00083, 0xb0a14103, 0x01a00204, +0x3ec10082, 0x4202800e, 0x04000703, 0xb0a14202, +0x21a00803, 0x3fbf028d, 0x3f20068d, 0x3fbe0682, +0x3fe30102, 0x21a00882, 0x3f82028f, 0x3fe3078f, +0x3fbf0784, 0x3f200204, 0x3fbe0204, 0x3fe30204, +0x04000203, 0x21a00903, 0x40848002, 0x21a00982, +0x40800003, 0x21a00a03, 0x40802002, 0x21a00a82, +0x21a00083, 0x40800082, 0x21a00b02, 0x10002818, +0x40a80002, 0x32800007, 0x4207000c, 0x18008208, +0x40a0000b, 0x4080020a, 0x40800709, 0x00200000, +0x42070002, 0x3ac30384, 0x1cffc489, 0x00200000, +0x18008383, 0x38830382, 0x4cffc486, 0x3ac28185, +0xb0408584, 0x28830382, 0x1c020387, 0x38828182, +0xb0408405, 0x1802c408, 0x28828182, 0x217ff886, +0x04000583, 0x21a00803, 0x3fbe0682, 0x3fe30102, +0x04000106, 0x21a00886, 0x04000603, 0x21a00903, +0x40803c02, 0x21a00982, 0x40800003, 0x04000184, +0x21a00a04, 0x40802202, 0x21a00a82, 0x42028005, +0x34208702, 0x21002282, 0x21a00804, 0x21a00886, +0x3fbf0782, 0x3f200102, 0x3fbe0102, 0x3fe30102, +0x21a00902, 0x40804003, 0x21a00983, 0x21a00a04, +0x40805a02, 0x21a00a82, 0x40800083, 0x21a00b83, +0x01a00c02, 0x01a00d83, 0x3420c282, 0x21a00e02, +0x34210283, 0x21a00f03, 0x34200284, 0x77400200, +0x3421c282, 0x21a00702, 0x34218283, 0x21a00083, +0x34214282, 0x21a00b02, 0x4200480c, 0x00200000, +0x1c010286, 0x34220284, 0x34220302, 0x0f608203, +0x5c024204, 0x3b81810b, 0x42013c02, 0x00200000, +0x18008185, 0x38808183, 0x3b814182, 0x21004e84, +0x4020007f, 0x35000100, 0x000004e0, 0x000002a0, +0x000002e8, 0x00000428, 0x00000360, 0x000002e8, +0x000004a0, 0x00000468, 0x000003c8, 0x00000360, +0x409ffe02, 0x30801203, 0x40800204, 0x3ec40085, +0x10009c09, 0x3ac10606, 0xb060c105, 0x4020007f, +0x4020007f, 0x20801203, 0x38810602, 0xb0408586, +0x28810602, 0x32004180, 0x34204702, 0x21a00382, +0x4020007f, 0x327fdc80, 0x409ffe02, 0x30801203, +0x40800204, 0x3ec40087, 0x40800405, 0x00200000, +0x40800606, 0x3ac10608, 0x3ac14609, 0x3ac1860a, +0xb060c107, 0x20801203, 0x41004003, 0x38810602, +0x4020007f, 0xb0408188, 0x4020007f, 0x28810602, +0x41201002, 0x38814603, 0x10009c09, 0xb060c109, +0x4020007f, 0x28814603, 0x41193f83, 0x38818602, +0x60ffc003, 0xb040818a, 0x28818602, 0x32003080, +0x409ffe02, 0x30801203, 0x40800204, 0x3ec40087, +0x41201008, 0x10009c14, 0x40800405, 0x3ac10609, +0x40800606, 0x3ac1460a, 0xb060c107, 0x3ac1860b, +0x20801203, 0x38810602, 0xb0408409, 0x28810602, +0x38814603, 0xb060c40a, 0x4020007f, 0x28814603, +0x41193f83, 0x38818602, 0x60ffc003, 0xb040818b, +0x28818602, 0x32002380, 0x409ffe02, 0x30801204, +0x40800205, 0x3ec40083, 0x40800406, 0x3ac14607, +0x3ac18608, 0xb0810103, 0x41004002, 0x20801204, +0x4020007f, 0x38814603, 0x10009c0b, 0xb060c107, +0x4020007f, 0x4020007f, 0x28814603, 0x38818602, +0x4020007f, 0x4020007f, 0xb0408588, 0x28818602, +0x4020007f, 0x32001780, 0x409ffe02, 0x1000640e, +0x40800204, 0x30801203, 0x40800405, 0x3ec40087, +0x40800606, 0x3ac10608, 0x3ac14609, 0x3ac1860a, +0xb060c107, 0x20801203, 0x413d8003, 0x38810602, +0x4020007f, 0x327fd780, 0x409ffe02, 0x10007f0c, +0x40800205, 0x30801204, 0x40800406, 0x3ec40083, +0x3ac14607, 0x3ac18608, 0xb0810103, 0x413d8002, +0x20801204, 0x38814603, 0x4020007f, 0x327feb80, +0x409ffe02, 0x30801203, 0x40800204, 0x3ec40087, +0x40800405, 0x1000650a, 0x40800606, 0x3ac10608, +0x3ac14609, 0x3ac1860a, 0xb060c107, 0x20801203, +0x38810602, 0xb0408588, 0x4020007f, 0x327fc980, +0x00400000, 0x40800003, 0x4020007f, 0x35000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c new file mode 100644 index 00000000000..196033b8a57 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_save.c @@ -0,0 +1,195 @@ +/* + * spu_save.c + * + * (C) Copyright IBM Corp. 2005 + * + * SPU-side context save sequence outlined in + * Synergistic Processor Element Book IV + * + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +#ifndef LS_SIZE +#define LS_SIZE 0x40000 /* 256K (in bytes) */ +#endif + +typedef unsigned int u32; +typedef unsigned long long u64; + +#include <spu_intrinsics.h> +#include <asm/spu_csa.h> +#include "spu_utils.h" + +static inline void save_event_mask(void) +{ + unsigned int offset; + + /* Save, Step 2: + * Read the SPU_RdEventMsk channel and save to the LSCSA. + */ + offset = LSCSA_QW_OFFSET(event_mask); + regs_spill[offset].slot[0] = spu_readch(SPU_RdEventStatMask); +} + +static inline void save_tag_mask(void) +{ + unsigned int offset; + + /* Save, Step 3: + * Read the SPU_RdTagMsk channel and save to the LSCSA. + */ + offset = LSCSA_QW_OFFSET(tag_mask); + regs_spill[offset].slot[0] = spu_readch(MFC_RdTagMask); +} + +static inline void save_upper_240kb(addr64 lscsa_ea) +{ + unsigned int ls = 16384; + unsigned int list = (unsigned int)&dma_list[0]; + unsigned int size = sizeof(dma_list); + unsigned int tag_id = 0; + unsigned int cmd = 0x24; /* PUTL */ + + /* Save, Step 7: + * Enqueue the PUTL command (tag 0) to the MFC SPU command + * queue to transfer the remaining 240 kb of LS to CSA. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, list); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void save_fpcr(void) +{ + // vector unsigned int fpcr; + unsigned int offset; + + /* Save, Step 9: + * Issue the floating-point status and control register + * read instruction, and save to the LSCSA. + */ + offset = LSCSA_QW_OFFSET(fpcr); + regs_spill[offset].v = spu_mffpscr(); +} + +static inline void save_decr(void) +{ + unsigned int offset; + + /* Save, Step 10: + * Read and save the SPU_RdDec channel data to + * the LSCSA. + */ + offset = LSCSA_QW_OFFSET(decr); + regs_spill[offset].slot[0] = spu_readch(SPU_RdDec); +} + +static inline void save_srr0(void) +{ + unsigned int offset; + + /* Save, Step 11: + * Read and save the SPU_WSRR0 channel data to + * the LSCSA. + */ + offset = LSCSA_QW_OFFSET(srr0); + regs_spill[offset].slot[0] = spu_readch(SPU_RdSRR0); +} + +static inline void spill_regs_to_mem(addr64 lscsa_ea) +{ + unsigned int ls = (unsigned int)®s_spill[0]; + unsigned int size = sizeof(regs_spill); + unsigned int tag_id = 0; + unsigned int cmd = 0x20; /* PUT */ + + /* Save, Step 13: + * Enqueue a PUT command (tag 0) to send the LSCSA + * to the CSA. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, lscsa_ea.ui[1]); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void enqueue_sync(addr64 lscsa_ea) +{ + unsigned int tag_id = 0; + unsigned int cmd = 0xCC; + + /* Save, Step 14: + * Enqueue an MFC_SYNC command (tag 0). + */ + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void save_complete(void) +{ + /* Save, Step 18: + * Issue a stop-and-signal instruction indicating + * "save complete". Note: This function will not + * return!! + */ + spu_stop(SPU_SAVE_COMPLETE); +} + +/** + * main - entry point for SPU-side context save. + * + * This code deviates from the documented sequence as follows: + * + * 1. The EA for LSCSA is passed from PPE in the + * signal notification channels. + * 2. All 128 registers are saved by crt0.o. + */ +int main() +{ + addr64 lscsa_ea; + + lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1); + lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2); + + /* Step 1: done by exit(). */ + save_event_mask(); /* Step 2. */ + save_tag_mask(); /* Step 3. */ + set_event_mask(); /* Step 4. */ + set_tag_mask(); /* Step 5. */ + build_dma_list(lscsa_ea); /* Step 6. */ + save_upper_240kb(lscsa_ea); /* Step 7. */ + /* Step 8: done by exit(). */ + save_fpcr(); /* Step 9. */ + save_decr(); /* Step 10. */ + save_srr0(); /* Step 11. */ + enqueue_putllc(lscsa_ea); /* Step 12. */ + spill_regs_to_mem(lscsa_ea); /* Step 13. */ + enqueue_sync(lscsa_ea); /* Step 14. */ + set_tag_update(); /* Step 15. */ + read_tag_status(); /* Step 16. */ + read_llar_status(); /* Step 17. */ + save_complete(); /* Step 18. */ + + return 0; +} diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S new file mode 100644 index 00000000000..6659d6a66fa --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S @@ -0,0 +1,102 @@ +/* + * crt0_s.S: Entry function for SPU-side context save. + * + * Copyright (C) 2005 IBM + * + * Entry function for SPU-side of the context save sequence. + * Saves all 128 GPRs, sets up an initial stack frame, then + * branches to 'main'. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <asm/spu_csa.h> + +.data +.align 7 +.globl regs_spill +regs_spill: +.space SIZEOF_SPU_SPILL_REGS, 0x0 + +.text +.global _start +_start: + /* SPU Context Save Step 1: Save the first 16 GPRs. */ + stqa $0, regs_spill + 0 + stqa $1, regs_spill + 16 + stqa $2, regs_spill + 32 + stqa $3, regs_spill + 48 + stqa $4, regs_spill + 64 + stqa $5, regs_spill + 80 + stqa $6, regs_spill + 96 + stqa $7, regs_spill + 112 + stqa $8, regs_spill + 128 + stqa $9, regs_spill + 144 + stqa $10, regs_spill + 160 + stqa $11, regs_spill + 176 + stqa $12, regs_spill + 192 + stqa $13, regs_spill + 208 + stqa $14, regs_spill + 224 + stqa $15, regs_spill + 240 + + /* SPU Context Save, Step 8: Save the remaining 112 GPRs. */ + ila $3, regs_spill + 256 +save_regs: + lqr $4, save_reg_insts +save_reg_loop: + ai $4, $4, 4 + .balignl 16, 0x40200000 +save_reg_insts: /* must be quad-word aligned. */ + stqd $16, 0($3) + stqd $17, 16($3) + stqd $18, 32($3) + stqd $19, 48($3) + andi $5, $4, 0x7F + stqr $4, save_reg_insts + ai $3, $3, 64 + brnz $5, save_reg_loop + + /* Initialize the stack pointer to point to 16368 + * (16kb-16). The back chain pointer is initialized + * to NULL. + */ + il $0, 0 + il $SP, 16368 + stqd $0, 0($SP) + + /* Allocate a minimum stack frame for the called main. + * This is needed so that main has a place to save the + * link register when it calls another function. + */ + stqd $SP, -160($SP) + ai $SP, $SP, -160 + + /* Call the program's main function. */ + brsl $0, main + + /* In this case main should not return; if it does + * there has been an error in the sequence. Execute + * stop-and-signal with code=0. + */ +.global exit +.global _exit +exit: +_exit: + stop 0x0 + + /* Pad the size of this crt0.o to be multiple of 16 bytes. */ +.balignl 16, 0x0 + diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped new file mode 100644 index 00000000000..39e54003f1d --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped @@ -0,0 +1,191 @@ +/* + * spu_save_dump.h: Copyright (C) 2005 IBM. + * Hex-dump auto generated from spu_save.c. + * Do not edit! + */ +static unsigned int spu_save_code[] __page_aligned = { +0x20805000, 0x20805201, 0x20805402, 0x20805603, +0x20805804, 0x20805a05, 0x20805c06, 0x20805e07, +0x20806008, 0x20806209, 0x2080640a, 0x2080660b, +0x2080680c, 0x20806a0d, 0x20806c0e, 0x20806e0f, +0x4201c003, 0x33800184, 0x1c010204, 0x40200000, +0x24000190, 0x24004191, 0x24008192, 0x2400c193, +0x141fc205, 0x23fffd84, 0x1c100183, 0x217ffb85, +0x40800000, 0x409ff801, 0x24000080, 0x24fd8081, +0x1cd80081, 0x33000180, 0x00000000, 0x00000000, +0x01a00182, 0x3ec00083, 0xb1c38103, 0x01a00204, +0x3ec10082, 0x4201400d, 0xb1c38202, 0x01a00583, +0x34218682, 0x3ed80684, 0xb0408184, 0x24218682, +0x01a00603, 0x00200000, 0x34214682, 0x3ed40684, +0xb0408184, 0x40800003, 0x24214682, 0x21a00083, +0x40800082, 0x21a00b02, 0x4020007f, 0x1000251e, +0x40a80002, 0x32800008, 0x4205c00c, 0x00200000, +0x40a0000b, 0x3f82070f, 0x4080020a, 0x40800709, +0x3fe3078f, 0x3fbf0783, 0x3f200183, 0x3fbe0183, +0x3fe30187, 0x18008387, 0x4205c002, 0x3ac30404, +0x1cffc489, 0x00200000, 0x18008403, 0x38830402, +0x4cffc486, 0x3ac28185, 0xb0408584, 0x28830402, +0x1c020408, 0x38828182, 0xb0408385, 0x1802c387, +0x28828182, 0x217ff886, 0x04000582, 0x32800007, +0x21a00802, 0x3fbf0705, 0x3f200285, 0x3fbe0285, +0x3fe30285, 0x21a00885, 0x04000603, 0x21a00903, +0x40803c02, 0x21a00982, 0x04000386, 0x21a00a06, +0x40801202, 0x21a00a82, 0x73000003, 0x24200683, +0x01a00404, 0x00200000, 0x34204682, 0x3ec40683, +0xb0408203, 0x24204682, 0x01a00783, 0x00200000, +0x3421c682, 0x3edc0684, 0xb0408184, 0x2421c682, +0x21a00806, 0x21a00885, 0x3fbf0784, 0x3f200204, +0x3fbe0204, 0x3fe30204, 0x21a00904, 0x40804002, +0x21a00982, 0x21a00a06, 0x40805a02, 0x21a00a82, +0x04000683, 0x21a00803, 0x21a00885, 0x21a00904, +0x40848002, 0x21a00982, 0x21a00a06, 0x40801002, +0x21a00a82, 0x21a00a06, 0x40806602, 0x00200000, +0x35800009, 0x21a00a82, 0x40800083, 0x21a00b83, +0x01a00c02, 0x01a00d83, 0x00003ffb, 0x40800003, +0x4020007f, 0x35000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; diff --git a/arch/powerpc/platforms/cell/spufs/spu_utils.h b/arch/powerpc/platforms/cell/spufs/spu_utils.h new file mode 100644 index 00000000000..58359feb6c9 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_utils.h @@ -0,0 +1,160 @@ +/* + * utils.h: Utilities for SPU-side of the context switch operation. + * + * (C) Copyright IBM 2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _SPU_CONTEXT_UTILS_H_ +#define _SPU_CONTEXT_UTILS_H_ + +/* + * 64-bit safe EA. + */ +typedef union { + unsigned long long ull; + unsigned int ui[2]; +} addr64; + +/* + * 128-bit register template. + */ +typedef union { + unsigned int slot[4]; + vector unsigned int v; +} spu_reg128v; + +/* + * DMA list structure. + */ +struct dma_list_elem { + unsigned int size; + unsigned int ea_low; +}; + +/* + * Declare storage for 8-byte aligned DMA list. + */ +struct dma_list_elem dma_list[15] __attribute__ ((aligned(8))); + +/* + * External definition for storage + * declared in crt0. + */ +extern spu_reg128v regs_spill[NR_SPU_SPILL_REGS]; + +/* + * Compute LSCSA byte offset for a given field. + */ +static struct spu_lscsa *dummy = (struct spu_lscsa *)0; +#define LSCSA_BYTE_OFFSET(_field) \ + ((char *)(&(dummy->_field)) - (char *)(&(dummy->gprs[0].slot[0]))) +#define LSCSA_QW_OFFSET(_field) (LSCSA_BYTE_OFFSET(_field) >> 4) + +static inline void set_event_mask(void) +{ + unsigned int event_mask = 0; + + /* Save, Step 4: + * Restore, Step 1: + * Set the SPU_RdEventMsk channel to zero to mask + * all events. + */ + spu_writech(SPU_WrEventMask, event_mask); +} + +static inline void set_tag_mask(void) +{ + unsigned int tag_mask = 1; + + /* Save, Step 5: + * Restore, Step 2: + * Set the SPU_WrTagMsk channel to '01' to unmask + * only tag group 0. + */ + spu_writech(MFC_WrTagMask, tag_mask); +} + +static inline void build_dma_list(addr64 lscsa_ea) +{ + unsigned int ea_low; + int i; + + /* Save, Step 6: + * Restore, Step 3: + * Update the effective address for the CSA in the + * pre-canned DMA-list in local storage. + */ + ea_low = lscsa_ea.ui[1]; + ea_low += LSCSA_BYTE_OFFSET(ls[16384]); + + for (i = 0; i < 15; i++, ea_low += 16384) { + dma_list[i].size = 16384; + dma_list[i].ea_low = ea_low; + } +} + +static inline void enqueue_putllc(addr64 lscsa_ea) +{ + unsigned int ls = 0; + unsigned int size = 128; + unsigned int tag_id = 0; + unsigned int cmd = 0xB4; /* PUTLLC */ + + /* Save, Step 12: + * Restore, Step 7: + * Send a PUTLLC (tag 0) command to the MFC using + * an effective address in the CSA in order to + * remove any possible lock-line reservation. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, lscsa_ea.ui[1]); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void set_tag_update(void) +{ + unsigned int update_any = 1; + + /* Save, Step 15: + * Restore, Step 8: + * Write the MFC_TagUpdate channel with '01'. + */ + spu_writech(MFC_WrTagUpdate, update_any); +} + +static inline void read_tag_status(void) +{ + /* Save, Step 16: + * Restore, Step 9: + * Read the MFC_TagStat channel data. + */ + spu_readch(MFC_RdTagStat); +} + +static inline void read_llar_status(void) +{ + /* Save, Step 17: + * Restore, Step 10: + * Read the MFC_AtomicStat channel data. + */ + spu_readch(MFC_RdAtomicStat); +} + +#endif /* _SPU_CONTEXT_UTILS_H_ */ diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h new file mode 100644 index 00000000000..db2601f0abd --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -0,0 +1,163 @@ +/* + * SPU file system + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef SPUFS_H +#define SPUFS_H + +#include <linux/kref.h> +#include <linux/rwsem.h> +#include <linux/spinlock.h> +#include <linux/fs.h> + +#include <asm/spu.h> +#include <asm/spu_csa.h> + +/* The magic number for our file system */ +enum { + SPUFS_MAGIC = 0x23c9b64e, +}; + +struct spu_context_ops; + +#define SPU_CONTEXT_PREEMPT 0UL + +struct spu_context { + struct spu *spu; /* pointer to a physical SPU */ + struct spu_state csa; /* SPU context save area. */ + spinlock_t mmio_lock; /* protects mmio access */ + struct address_space *local_store;/* local store backing store */ + + enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state; + struct rw_semaphore state_sema; + struct semaphore run_sema; + + struct mm_struct *owner; + + struct kref kref; + wait_queue_head_t ibox_wq; + wait_queue_head_t wbox_wq; + wait_queue_head_t stop_wq; + struct fasync_struct *ibox_fasync; + struct fasync_struct *wbox_fasync; + struct spu_context_ops *ops; + struct work_struct reap_work; + u64 flags; +}; + +/* SPU context query/set operations. */ +struct spu_context_ops { + int (*mbox_read) (struct spu_context * ctx, u32 * data); + u32(*mbox_stat_read) (struct spu_context * ctx); + unsigned int (*mbox_stat_poll)(struct spu_context *ctx, + unsigned int events); + int (*ibox_read) (struct spu_context * ctx, u32 * data); + int (*wbox_write) (struct spu_context * ctx, u32 data); + u32(*signal1_read) (struct spu_context * ctx); + void (*signal1_write) (struct spu_context * ctx, u32 data); + u32(*signal2_read) (struct spu_context * ctx); + void (*signal2_write) (struct spu_context * ctx, u32 data); + void (*signal1_type_set) (struct spu_context * ctx, u64 val); + u64(*signal1_type_get) (struct spu_context * ctx); + void (*signal2_type_set) (struct spu_context * ctx, u64 val); + u64(*signal2_type_get) (struct spu_context * ctx); + u32(*npc_read) (struct spu_context * ctx); + void (*npc_write) (struct spu_context * ctx, u32 data); + u32(*status_read) (struct spu_context * ctx); + char*(*get_ls) (struct spu_context * ctx); + void (*runcntl_write) (struct spu_context * ctx, u32 data); + void (*runcntl_stop) (struct spu_context * ctx); +}; + +extern struct spu_context_ops spu_hw_ops; +extern struct spu_context_ops spu_backing_ops; + +struct spufs_inode_info { + struct spu_context *i_ctx; + struct inode vfs_inode; +}; +#define SPUFS_I(inode) \ + container_of(inode, struct spufs_inode_info, vfs_inode) + +extern struct tree_descr spufs_dir_contents[]; + +/* system call implementation */ +long spufs_run_spu(struct file *file, + struct spu_context *ctx, u32 *npc, u32 *status); +long spufs_create_thread(struct nameidata *nd, + unsigned int flags, mode_t mode); +extern struct file_operations spufs_context_fops; + +/* context management */ +struct spu_context * alloc_spu_context(struct address_space *local_store); +void destroy_spu_context(struct kref *kref); +struct spu_context * get_spu_context(struct spu_context *ctx); +int put_spu_context(struct spu_context *ctx); +void spu_unmap_mappings(struct spu_context *ctx); + +void spu_forget(struct spu_context *ctx); +void spu_acquire(struct spu_context *ctx); +void spu_release(struct spu_context *ctx); +int spu_acquire_runnable(struct spu_context *ctx); +void spu_acquire_saved(struct spu_context *ctx); + +int spu_activate(struct spu_context *ctx, u64 flags); +void spu_deactivate(struct spu_context *ctx); +void spu_yield(struct spu_context *ctx); +int __init spu_sched_init(void); +void __exit spu_sched_exit(void); + +/* + * spufs_wait + * Same as wait_event_interruptible(), except that here + * we need to call spu_release(ctx) before sleeping, and + * then spu_acquire(ctx) when awoken. + */ + +#define spufs_wait(wq, condition) \ +({ \ + int __ret = 0; \ + DEFINE_WAIT(__wait); \ + for (;;) { \ + prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (!signal_pending(current)) { \ + spu_release(ctx); \ + schedule(); \ + spu_acquire(ctx); \ + continue; \ + } \ + __ret = -ERESTARTSYS; \ + break; \ + } \ + finish_wait(&(wq), &__wait); \ + __ret; \ +}) + +size_t spu_wbox_write(struct spu_context *ctx, u32 data); +size_t spu_ibox_read(struct spu_context *ctx, u32 *data); + +/* irq callback funcs. */ +void spufs_ibox_callback(struct spu *spu); +void spufs_wbox_callback(struct spu *spu); +void spufs_stop_callback(struct spu *spu); + +#endif diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c new file mode 100644 index 00000000000..212db28531f --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -0,0 +1,2204 @@ +/* + * spu_switch.c + * + * (C) Copyright IBM Corp. 2005 + * + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * Host-side part of SPU context switch sequence outlined in + * Synergistic Processor Element, Book IV. + * + * A fully premptive switch of an SPE is very expensive in terms + * of time and system resources. SPE Book IV indicates that SPE + * allocation should follow a "serially reusable device" model, + * in which the SPE is assigned a task until it completes. When + * this is not possible, this sequence may be used to premptively + * save, and then later (optionally) restore the context of a + * program executing on an SPE. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> + +#include <asm/io.h> +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include <asm/mmu_context.h> + +#include "spu_save_dump.h" +#include "spu_restore_dump.h" + +#if 0 +#define POLL_WHILE_TRUE(_c) { \ + do { \ + } while (_c); \ + } +#else +#define RELAX_SPIN_COUNT 1000 +#define POLL_WHILE_TRUE(_c) { \ + do { \ + int _i; \ + for (_i=0; _i<RELAX_SPIN_COUNT && (_c); _i++) { \ + cpu_relax(); \ + } \ + if (unlikely(_c)) yield(); \ + else break; \ + } while (_c); \ + } +#endif /* debug */ + +#define POLL_WHILE_FALSE(_c) POLL_WHILE_TRUE(!(_c)) + +static inline void acquire_spu_lock(struct spu *spu) +{ + /* Save, Step 1: + * Restore, Step 1: + * Acquire SPU-specific mutual exclusion lock. + * TBD. + */ +} + +static inline void release_spu_lock(struct spu *spu) +{ + /* Restore, Step 76: + * Release SPU-specific mutual exclusion lock. + * TBD. + */ +} + +static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 isolate_state; + + /* Save, Step 2: + * Save, Step 6: + * If SPU_Status[E,L,IS] any field is '1', this + * SPU is in isolate state and cannot be context + * saved at this time. + */ + isolate_state = SPU_STATUS_ISOLATED_STATE | + SPU_STATUS_ISOLATED_LOAD_STAUTUS | SPU_STATUS_ISOLATED_EXIT_STAUTUS; + return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0; +} + +static inline void disable_interrupts(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 3: + * Restore, Step 2: + * Save INT_Mask_class0 in CSA. + * Write INT_MASK_class0 with value of 0. + * Save INT_Mask_class1 in CSA. + * Write INT_MASK_class1 with value of 0. + * Save INT_Mask_class2 in CSA. + * Write INT_MASK_class2 with value of 0. + */ + spin_lock_irq(&spu->register_lock); + if (csa) { + csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0); + csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1); + csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2); + } + spu_int_mask_set(spu, 0, 0ul); + spu_int_mask_set(spu, 1, 0ul); + spu_int_mask_set(spu, 2, 0ul); + eieio(); + spin_unlock_irq(&spu->register_lock); +} + +static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 4: + * Restore, Step 25. + * Set a software watchdog timer, which specifies the + * maximum allowable time for a context save sequence. + * + * For present, this implementation will not set a global + * watchdog timer, as virtualization & variable system load + * may cause unpredictable execution times. + */ +} + +static inline void inhibit_user_access(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 5: + * Restore, Step 3: + * Inhibit user-space access (if provided) to this + * SPU by unmapping the virtual pages assigned to + * the SPU memory-mapped I/O (MMIO) for problem + * state. TBD. + */ +} + +static inline void set_switch_pending(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 7: + * Restore, Step 5: + * Set a software context switch pending flag. + */ + set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags); + mb(); +} + +static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 8: + * Suspend DMA and save MFC_CNTL. + */ + switch (in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) { + case MFC_CNTL_SUSPEND_IN_PROGRESS: + POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == + MFC_CNTL_SUSPEND_COMPLETE); + /* fall through */ + case MFC_CNTL_SUSPEND_COMPLETE: + if (csa) { + csa->priv2.mfc_control_RW = + in_be64(&priv2->mfc_control_RW) | + MFC_CNTL_SUSPEND_DMA_QUEUE; + } + break; + case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION: + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); + POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == + MFC_CNTL_SUSPEND_COMPLETE); + if (csa) { + csa->priv2.mfc_control_RW = + in_be64(&priv2->mfc_control_RW) & + ~MFC_CNTL_SUSPEND_DMA_QUEUE; + } + break; + } +} + +static inline void save_spu_runcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 9: + * Save SPU_Runcntl in the CSA. This value contains + * the "Application Desired State". + */ + csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW); +} + +static inline void save_mfc_sr1(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 10: + * Save MFC_SR1 in the CSA. + */ + csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu); +} + +static inline void save_spu_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 11: + * Read SPU_Status[R], and save to CSA. + */ + if ((in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) == 0) { + csa->prob.spu_status_R = in_be32(&prob->spu_status_R); + } else { + u32 stopped; + + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + stopped = + SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; + if ((in_be32(&prob->spu_status_R) & stopped) == 0) + csa->prob.spu_status_R = SPU_STATUS_RUNNING; + else + csa->prob.spu_status_R = in_be32(&prob->spu_status_R); + } +} + +static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 12: + * Read MFC_CNTL[Ds]. Update saved copy of + * CSA.MFC_CNTL[Ds]. + */ + if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) { + csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; + csa->suspend_time = get_cycles(); + out_be64(&priv2->spu_chnlcntptr_RW, 7ULL); + eieio(); + csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW); + eieio(); + } else { + csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; + } +} + +static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 13: + * Write MFC_CNTL[Dh] set to a '1' to halt + * the decrementer. + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED); + eieio(); +} + +static inline void save_timebase(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 14: + * Read PPE Timebase High and Timebase low registers + * and save in CSA. TBD. + */ + csa->suspend_time = get_cycles(); +} + +static inline void remove_other_spu_access(struct spu_state *csa, + struct spu *spu) +{ + /* Save, Step 15: + * Remove other SPU access to this SPU by unmapping + * this SPU's pages from their address space. TBD. + */ +} + +static inline void do_mfc_mssync(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 16: + * Restore, Step 11. + * Write SPU_MSSync register. Poll SPU_MSSync[P] + * for a value of 0. + */ + out_be64(&prob->spc_mssync_RW, 1UL); + POLL_WHILE_TRUE(in_be64(&prob->spc_mssync_RW) & MS_SYNC_PENDING); +} + +static inline void issue_mfc_tlbie(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 17: + * Restore, Step 12. + * Restore, Step 48. + * Write TLB_Invalidate_Entry[IS,VPN,L,Lp]=0 register. + * Then issue a PPE sync instruction. + */ + spu_tlb_invalidate(spu); + mb(); +} + +static inline void handle_pending_interrupts(struct spu_state *csa, + struct spu *spu) +{ + /* Save, Step 18: + * Handle any pending interrupts from this SPU + * here. This is OS or hypervisor specific. One + * option is to re-enable interrupts to handle any + * pending interrupts, with the interrupt handlers + * recognizing the software Context Switch Pending + * flag, to ensure the SPU execution or MFC command + * queue is not restarted. TBD. + */ +} + +static inline void save_mfc_queues(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Save, Step 19: + * If MFC_Cntl[Se]=0 then save + * MFC command queues. + */ + if ((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DMA_QUEUES_EMPTY) == 0) { + for (i = 0; i < 8; i++) { + csa->priv2.puq[i].mfc_cq_data0_RW = + in_be64(&priv2->puq[i].mfc_cq_data0_RW); + csa->priv2.puq[i].mfc_cq_data1_RW = + in_be64(&priv2->puq[i].mfc_cq_data1_RW); + csa->priv2.puq[i].mfc_cq_data2_RW = + in_be64(&priv2->puq[i].mfc_cq_data2_RW); + csa->priv2.puq[i].mfc_cq_data3_RW = + in_be64(&priv2->puq[i].mfc_cq_data3_RW); + } + for (i = 0; i < 16; i++) { + csa->priv2.spuq[i].mfc_cq_data0_RW = + in_be64(&priv2->spuq[i].mfc_cq_data0_RW); + csa->priv2.spuq[i].mfc_cq_data1_RW = + in_be64(&priv2->spuq[i].mfc_cq_data1_RW); + csa->priv2.spuq[i].mfc_cq_data2_RW = + in_be64(&priv2->spuq[i].mfc_cq_data2_RW); + csa->priv2.spuq[i].mfc_cq_data3_RW = + in_be64(&priv2->spuq[i].mfc_cq_data3_RW); + } + } +} + +static inline void save_ppu_querymask(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 20: + * Save the PPU_QueryMask register + * in the CSA. + */ + csa->prob.dma_querymask_RW = in_be32(&prob->dma_querymask_RW); +} + +static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 21: + * Save the PPU_QueryType register + * in the CSA. + */ + csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW); +} + +static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 22: + * Save the MFC_CSR_TSQ register + * in the LSCSA. + */ + csa->priv2.spu_tag_status_query_RW = + in_be64(&priv2->spu_tag_status_query_RW); +} + +static inline void save_mfc_csr_cmd(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 23: + * Save the MFC_CSR_CMD1 and MFC_CSR_CMD2 + * registers in the CSA. + */ + csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW); + csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW); +} + +static inline void save_mfc_csr_ato(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 24: + * Save the MFC_CSR_ATO register in + * the CSA. + */ + csa->priv2.spu_atomic_status_RW = in_be64(&priv2->spu_atomic_status_RW); +} + +static inline void save_mfc_tclass_id(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 25: + * Save the MFC_TCLASS_ID register in + * the CSA. + */ + csa->priv1.mfc_tclass_id_RW = spu_mfc_tclass_id_get(spu); +} + +static inline void set_mfc_tclass_id(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 26: + * Restore, Step 23. + * Write the MFC_TCLASS_ID register with + * the value 0x10000000. + */ + spu_mfc_tclass_id_set(spu, 0x10000000); + eieio(); +} + +static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 27: + * Restore, Step 14. + * Write MFC_CNTL[Pc]=1 (purge queue). + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_PURGE_DMA_REQUEST); + eieio(); +} + +static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 28: + * Poll MFC_CNTL[Ps] until value '11' is read + * (purge complete). + */ + POLL_WHILE_FALSE(in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_PURGE_DMA_COMPLETE); +} + +static inline void save_mfc_slbs(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Save, Step 29: + * If MFC_SR1[R]='1', save SLBs in CSA. + */ + if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) { + csa->priv2.slb_index_W = in_be64(&priv2->slb_index_W); + for (i = 0; i < 8; i++) { + out_be64(&priv2->slb_index_W, i); + eieio(); + csa->slb_esid_RW[i] = in_be64(&priv2->slb_esid_RW); + csa->slb_vsid_RW[i] = in_be64(&priv2->slb_vsid_RW); + eieio(); + } + } +} + +static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 30: + * Restore, Step 18: + * Write MFC_SR1 with MFC_SR1[D=0,S=1] and + * MFC_SR1[TL,R,Pr,T] set correctly for the + * OS specific environment. + * + * Implementation note: The SPU-side code + * for save/restore is privileged, so the + * MFC_SR1[Pr] bit is not set. + * + */ + spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK | + MFC_STATE1_RELOCATE_MASK | + MFC_STATE1_BUS_TLBIE_MASK)); +} + +static inline void save_spu_npc(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 31: + * Save SPU_NPC in the CSA. + */ + csa->prob.spu_npc_RW = in_be32(&prob->spu_npc_RW); +} + +static inline void save_spu_privcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 32: + * Save SPU_PrivCntl in the CSA. + */ + csa->priv2.spu_privcntl_RW = in_be64(&priv2->spu_privcntl_RW); +} + +static inline void reset_spu_privcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 33: + * Restore, Step 16: + * Write SPU_PrivCntl[S,Le,A] fields reset to 0. + */ + out_be64(&priv2->spu_privcntl_RW, 0UL); + eieio(); +} + +static inline void save_spu_lslr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 34: + * Save SPU_LSLR in the CSA. + */ + csa->priv2.spu_lslr_RW = in_be64(&priv2->spu_lslr_RW); +} + +static inline void reset_spu_lslr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 35: + * Restore, Step 17. + * Reset SPU_LSLR. + */ + out_be64(&priv2->spu_lslr_RW, LS_ADDR_MASK); + eieio(); +} + +static inline void save_spu_cfg(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 36: + * Save SPU_Cfg in the CSA. + */ + csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW); +} + +static inline void save_pm_trace(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 37: + * Save PM_Trace_Tag_Wait_Mask in the CSA. + * Not performed by this implementation. + */ +} + +static inline void save_mfc_rag(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 38: + * Save RA_GROUP_ID register and the + * RA_ENABLE reigster in the CSA. + */ + csa->priv1.resource_allocation_groupID_RW = + spu_resource_allocation_groupID_get(spu); + csa->priv1.resource_allocation_enable_RW = + spu_resource_allocation_enable_get(spu); +} + +static inline void save_ppu_mb_stat(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 39: + * Save MB_Stat register in the CSA. + */ + csa->prob.mb_stat_R = in_be32(&prob->mb_stat_R); +} + +static inline void save_ppu_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 40: + * Save the PPU_MB register in the CSA. + */ + csa->prob.pu_mb_R = in_be32(&prob->pu_mb_R); +} + +static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 41: + * Save the PPUINT_MB register in the CSA. + */ + csa->priv2.puint_mb_R = in_be64(&priv2->puint_mb_R); +} + +static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 idx, ch_indices[7] = { 0UL, 1UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + int i; + + /* Save, Step 42: + * Save the following CH: [0,1,3,4,24,25,27] + */ + for (i = 0; i < 7; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + csa->spu_chnldata_RW[idx] = in_be64(&priv2->spu_chnldata_RW); + csa->spu_chnlcnt_RW[idx] = in_be64(&priv2->spu_chnlcnt_RW); + out_be64(&priv2->spu_chnldata_RW, 0UL); + out_be64(&priv2->spu_chnlcnt_RW, 0UL); + eieio(); + } +} + +static inline void save_spu_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Save, Step 43: + * Save SPU Read Mailbox Channel. + */ + out_be64(&priv2->spu_chnlcntptr_RW, 29UL); + eieio(); + csa->spu_chnlcnt_RW[29] = in_be64(&priv2->spu_chnlcnt_RW); + for (i = 0; i < 4; i++) { + csa->spu_mailbox_data[i] = in_be64(&priv2->spu_chnldata_RW); + } + out_be64(&priv2->spu_chnlcnt_RW, 0UL); + eieio(); +} + +static inline void save_mfc_cmd(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 44: + * Save MFC_CMD Channel. + */ + out_be64(&priv2->spu_chnlcntptr_RW, 21UL); + eieio(); + csa->spu_chnlcnt_RW[21] = in_be64(&priv2->spu_chnlcnt_RW); + eieio(); +} + +static inline void reset_ch(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[4] = { 21UL, 23UL, 28UL, 30UL }; + u64 ch_counts[4] = { 16UL, 1UL, 1UL, 1UL }; + u64 idx; + int i; + + /* Save, Step 45: + * Reset the following CH: [21, 23, 28, 30] + */ + for (i = 0; i < 4; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]); + eieio(); + } +} + +static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 46: + * Restore, Step 25. + * Write MFC_CNTL[Sc]=0 (resume queue processing). + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE); +} + +static inline void invalidate_slbs(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 45: + * Restore, Step 19: + * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All. + */ + if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) { + out_be64(&priv2->slb_invalidate_all_W, 0UL); + eieio(); + } +} + +static inline void get_kernel_slb(u64 ea, u64 slb[2]) +{ + slb[0] = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; + slb[1] = (ea & ESID_MASK) | SLB_ESID_V; + + /* Large pages are used for kernel text/data, but not vmalloc. */ + if (cpu_has_feature(CPU_FTR_16M_PAGE) + && REGION_ID(ea) == KERNEL_REGION_ID) + slb[0] |= SLB_VSID_L; +} + +static inline void load_mfc_slb(struct spu *spu, u64 slb[2], int slbe) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + out_be64(&priv2->slb_index_W, slbe); + eieio(); + out_be64(&priv2->slb_vsid_RW, slb[0]); + out_be64(&priv2->slb_esid_RW, slb[1]); + eieio(); +} + +static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu) +{ + u64 code_slb[2]; + u64 lscsa_slb[2]; + + /* Save, Step 47: + * Restore, Step 30. + * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All + * register, then initialize SLB_VSID and SLB_ESID + * to provide access to SPU context save code and + * LSCSA. + * + * This implementation places both the context + * switch code and LSCSA in kernel address space. + * + * Further this implementation assumes that the + * MFC_SR1[R]=1 (in other words, assume that + * translation is desired by OS environment). + */ + invalidate_slbs(csa, spu); + get_kernel_slb((unsigned long)&spu_save_code[0], code_slb); + get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb); + load_mfc_slb(spu, code_slb, 0); + if ((lscsa_slb[0] != code_slb[0]) || (lscsa_slb[1] != code_slb[1])) + load_mfc_slb(spu, lscsa_slb, 1); +} + +static inline void set_switch_active(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 48: + * Restore, Step 23. + * Change the software context switch pending flag + * to context switch active. + */ + set_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags); + clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags); + mb(); +} + +static inline void enable_interrupts(struct spu_state *csa, struct spu *spu) +{ + unsigned long class1_mask = CLASS1_ENABLE_SEGMENT_FAULT_INTR | + CLASS1_ENABLE_STORAGE_FAULT_INTR; + + /* Save, Step 49: + * Restore, Step 22: + * Reset and then enable interrupts, as + * needed by OS. + * + * This implementation enables only class1 + * (translation) interrupts. + */ + spin_lock_irq(&spu->register_lock); + spu_int_stat_clear(spu, 0, ~0ul); + spu_int_stat_clear(spu, 1, ~0ul); + spu_int_stat_clear(spu, 2, ~0ul); + spu_int_mask_set(spu, 0, 0ul); + spu_int_mask_set(spu, 1, class1_mask); + spu_int_mask_set(spu, 2, 0ul); + spin_unlock_irq(&spu->register_lock); +} + +static inline int send_mfc_dma(struct spu *spu, unsigned long ea, + unsigned int ls_offset, unsigned int size, + unsigned int tag, unsigned int rclass, + unsigned int cmd) +{ + struct spu_problem __iomem *prob = spu->problem; + union mfc_tag_size_class_cmd command; + unsigned int transfer_size; + volatile unsigned int status = 0x0; + + while (size > 0) { + transfer_size = + (size > MFC_MAX_DMA_SIZE) ? MFC_MAX_DMA_SIZE : size; + command.u.mfc_size = transfer_size; + command.u.mfc_tag = tag; + command.u.mfc_rclassid = rclass; + command.u.mfc_cmd = cmd; + do { + out_be32(&prob->mfc_lsa_W, ls_offset); + out_be64(&prob->mfc_ea_W, ea); + out_be64(&prob->mfc_union_W.all64, command.all64); + status = + in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32); + if (unlikely(status & 0x2)) { + cpu_relax(); + } + } while (status & 0x3); + size -= transfer_size; + ea += transfer_size; + ls_offset += transfer_size; + } + return 0; +} + +static inline void save_ls_16kb(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&csa->lscsa->ls[0]; + unsigned int ls_offset = 0x0; + unsigned int size = 16384; + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_PUT_CMD; + + /* Save, Step 50: + * Issue a DMA command to copy the first 16K bytes + * of local storage to the CSA. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void set_spu_npc(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 51: + * Restore, Step 31. + * Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry + * point address of context save code in local + * storage. + * + * This implementation uses SPU-side save/restore + * programs with entry points at LSA of 0. + */ + out_be32(&prob->spu_npc_RW, 0); + eieio(); +} + +static inline void set_signot1(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + union { + u64 ull; + u32 ui[2]; + } addr64; + + /* Save, Step 52: + * Restore, Step 32: + * Write SPU_Sig_Notify_1 register with upper 32-bits + * of the CSA.LSCSA effective address. + */ + addr64.ull = (u64) csa->lscsa; + out_be32(&prob->signal_notify1, addr64.ui[0]); + eieio(); +} + +static inline void set_signot2(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + union { + u64 ull; + u32 ui[2]; + } addr64; + + /* Save, Step 53: + * Restore, Step 33: + * Write SPU_Sig_Notify_2 register with lower 32-bits + * of the CSA.LSCSA effective address. + */ + addr64.ull = (u64) csa->lscsa; + out_be32(&prob->signal_notify2, addr64.ui[1]); + eieio(); +} + +static inline void send_save_code(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&spu_save_code[0]; + unsigned int ls_offset = 0x0; + unsigned int size = sizeof(spu_save_code); + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_GETFS_CMD; + + /* Save, Step 54: + * Issue a DMA command to copy context save code + * to local storage and start SPU. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void set_ppu_querymask(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 55: + * Restore, Step 38. + * Write PPU_QueryMask=1 (enable Tag Group 0) + * and issue eieio instruction. + */ + out_be32(&prob->dma_querymask_RW, MFC_TAGID_TO_TAGMASK(0)); + eieio(); +} + +static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 mask = MFC_TAGID_TO_TAGMASK(0); + unsigned long flags; + + /* Save, Step 56: + * Restore, Step 39. + * Restore, Step 39. + * Restore, Step 46. + * Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete) + * or write PPU_QueryType[TS]=01 and wait for Tag Group + * Complete Interrupt. Write INT_Stat_Class0 or + * INT_Stat_Class2 with value of 'handled'. + */ + POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask); + + local_irq_save(flags); + spu_int_stat_clear(spu, 0, ~(0ul)); + spu_int_stat_clear(spu, 2, ~(0ul)); + local_irq_restore(flags); +} + +static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + unsigned long flags; + + /* Save, Step 57: + * Restore, Step 40. + * Poll until SPU_Status[R]=0 or wait for SPU Class 0 + * or SPU Class 2 interrupt. Write INT_Stat_class0 + * or INT_Stat_class2 with value of handled. + */ + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING); + + local_irq_save(flags); + spu_int_stat_clear(spu, 0, ~(0ul)); + spu_int_stat_clear(spu, 2, ~(0ul)); + local_irq_restore(flags); +} + +static inline int check_save_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 complete; + + /* Save, Step 54: + * If SPU_Status[P]=1 and SPU_Status[SC] = "success", + * context save succeeded, otherwise context save + * failed. + */ + complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) | + SPU_STATUS_STOPPED_BY_STOP); + return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0; +} + +static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 4: + * If required, notify the "using application" that + * the SPU task has been terminated. TBD. + */ +} + +static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 7: + * Restore, Step 47. + * Write MFC_Cntl[Dh,Sc]='1','1' to suspend + * the queue and halt the decrementer. + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | + MFC_CNTL_DECREMENTER_HALTED); + eieio(); +} + +static inline void wait_suspend_mfc_complete(struct spu_state *csa, + struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 8: + * Restore, Step 47. + * Poll MFC_CNTL[Ss] until 11 is returned. + */ + POLL_WHILE_FALSE(in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_COMPLETE); +} + +static inline int suspend_spe(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 9: + * If SPU_Status[R]=1, stop SPU execution + * and wait for stop to complete. + * + * Returns 1 if SPU_Status[R]=1 on entry. + * 0 otherwise + */ + if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) { + if (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_EXIT_STAUTUS) { + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + if ((in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_LOAD_STAUTUS) + || (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_STATE)) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + out_be32(&prob->spu_runcntl_RW, 0x2); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + if (in_be32(&prob->spu_status_R) & + SPU_STATUS_WAITING_FOR_CHANNEL) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + return 1; + } + return 0; +} + +static inline void clear_spu_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 10: + * If SPU_Status[R]=0 and SPU_Status[E,L,IS]=1, + * release SPU from isolate state. + */ + if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) { + if (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_EXIT_STAUTUS) { + spu_mfc_sr1_set(spu, + MFC_STATE1_MASTER_RUN_CONTROL_MASK); + eieio(); + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + if ((in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_LOAD_STAUTUS) + || (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_STATE)) { + spu_mfc_sr1_set(spu, + MFC_STATE1_MASTER_RUN_CONTROL_MASK); + eieio(); + out_be32(&prob->spu_runcntl_RW, 0x2); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + } +} + +static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[7] = { 0UL, 1UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx; + int i; + + /* Restore, Step 20: + * Reset the following CH: [0,1,3,4,24,25,27] + */ + for (i = 0; i < 7; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnldata_RW, 0UL); + out_be64(&priv2->spu_chnlcnt_RW, 0UL); + eieio(); + } +} + +static inline void reset_ch_part2(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[5] = { 21UL, 23UL, 28UL, 29UL, 30UL }; + u64 ch_counts[5] = { 16UL, 1UL, 1UL, 0UL, 1UL }; + u64 idx; + int i; + + /* Restore, Step 21: + * Reset the following CH: [21, 23, 28, 29, 30] + */ + for (i = 0; i < 5; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]); + eieio(); + } +} + +static inline void setup_spu_status_part1(struct spu_state *csa, + struct spu *spu) +{ + u32 status_P = SPU_STATUS_STOPPED_BY_STOP; + u32 status_I = SPU_STATUS_INVALID_INSTR; + u32 status_H = SPU_STATUS_STOPPED_BY_HALT; + u32 status_S = SPU_STATUS_SINGLE_STEP; + u32 status_S_I = SPU_STATUS_SINGLE_STEP | SPU_STATUS_INVALID_INSTR; + u32 status_S_P = SPU_STATUS_SINGLE_STEP | SPU_STATUS_STOPPED_BY_STOP; + u32 status_P_H = SPU_STATUS_STOPPED_BY_HALT |SPU_STATUS_STOPPED_BY_STOP; + u32 status_P_I = SPU_STATUS_STOPPED_BY_STOP |SPU_STATUS_INVALID_INSTR; + u32 status_code; + + /* Restore, Step 27: + * If the CSA.SPU_Status[I,S,H,P]=1 then add the correct + * instruction sequence to the end of the SPU based restore + * code (after the "context restored" stop and signal) to + * restore the correct SPU status. + * + * NOTE: Rather than modifying the SPU executable, we + * instead add a new 'stopped_status' field to the + * LSCSA. The SPU-side restore reads this field and + * takes the appropriate action when exiting. + */ + + status_code = + (csa->prob.spu_status_R >> SPU_STOP_STATUS_SHIFT) & 0xFFFF; + if ((csa->prob.spu_status_R & status_P_I) == status_P_I) { + + /* SPU_Status[P,I]=1 - Illegal Instruction followed + * by Stop and Signal instruction, followed by 'br -4'. + * + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_I; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_P_H) == status_P_H) { + + /* SPU_Status[P,H]=1 - Halt Conditional, followed + * by Stop and Signal instruction, followed by + * 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_H; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_S_P) == status_S_P) { + + /* SPU_Status[S,P]=1 - Stop and Signal instruction + * followed by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_P; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_S_I) == status_S_I) { + + /* SPU_Status[S,I]=1 - Illegal instruction followed + * by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_I; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_P) == status_P) { + + /* SPU_Status[P]=1 - Stop and Signal instruction + * followed by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_H) == status_H) { + + /* SPU_Status[H]=1 - Halt Conditional, followed + * by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_H; + + } else if ((csa->prob.spu_status_R & status_S) == status_S) { + + /* SPU_Status[S]=1 - Two nop instructions. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S; + + } else if ((csa->prob.spu_status_R & status_I) == status_I) { + + /* SPU_Status[I]=1 - Illegal instruction followed + * by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_I; + + } +} + +static inline void setup_spu_status_part2(struct spu_state *csa, + struct spu *spu) +{ + u32 mask; + + /* Restore, Step 28: + * If the CSA.SPU_Status[I,S,H,P,R]=0 then + * add a 'br *' instruction to the end of + * the SPU based restore code. + * + * NOTE: Rather than modifying the SPU executable, we + * instead add a new 'stopped_status' field to the + * LSCSA. The SPU-side restore reads this field and + * takes the appropriate action when exiting. + */ + mask = SPU_STATUS_INVALID_INSTR | + SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | + SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING; + if (!(csa->prob.spu_status_R & mask)) { + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_R; + } +} + +static inline void restore_mfc_rag(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 29: + * Restore RA_GROUP_ID register and the + * RA_ENABLE reigster from the CSA. + */ + spu_resource_allocation_groupID_set(spu, + csa->priv1.resource_allocation_groupID_RW); + spu_resource_allocation_enable_set(spu, + csa->priv1.resource_allocation_enable_RW); +} + +static inline void send_restore_code(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&spu_restore_code[0]; + unsigned int ls_offset = 0x0; + unsigned int size = sizeof(spu_restore_code); + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_GETFS_CMD; + + /* Restore, Step 37: + * Issue MFC DMA command to copy context + * restore code to local storage. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void setup_decr(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 34: + * If CSA.MFC_CNTL[Ds]=1 (decrementer was + * running) then adjust decrementer, set + * decrementer running status in LSCSA, + * and set decrementer "wrapped" status + * in LSCSA. + */ + if (csa->priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) { + cycles_t resume_time = get_cycles(); + cycles_t delta_time = resume_time - csa->suspend_time; + + csa->lscsa->decr.slot[0] = delta_time; + } +} + +static inline void setup_ppu_mb(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 35: + * Copy the CSA.PU_MB data into the LSCSA. + */ + csa->lscsa->ppu_mb.slot[0] = csa->prob.pu_mb_R; +} + +static inline void setup_ppuint_mb(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 36: + * Copy the CSA.PUINT_MB data into the LSCSA. + */ + csa->lscsa->ppuint_mb.slot[0] = csa->priv2.puint_mb_R; +} + +static inline int check_restore_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 complete; + + /* Restore, Step 40: + * If SPU_Status[P]=1 and SPU_Status[SC] = "success", + * context restore succeeded, otherwise context restore + * failed. + */ + complete = ((SPU_RESTORE_COMPLETE << SPU_STOP_STATUS_SHIFT) | + SPU_STATUS_STOPPED_BY_STOP); + return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0; +} + +static inline void restore_spu_privcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 41: + * Restore SPU_PrivCntl from the CSA. + */ + out_be64(&priv2->spu_privcntl_RW, csa->priv2.spu_privcntl_RW); + eieio(); +} + +static inline void restore_status_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 mask; + + /* Restore, Step 42: + * If any CSA.SPU_Status[I,S,H,P]=1, then + * restore the error or single step state. + */ + mask = SPU_STATUS_INVALID_INSTR | + SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; + if (csa->prob.spu_status_R & mask) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } +} + +static inline void restore_status_part2(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 mask; + + /* Restore, Step 43: + * If all CSA.SPU_Status[I,S,H,P,R]=0 then write + * SPU_RunCntl[R0R1]='01', wait for SPU_Status[R]=1, + * then write '00' to SPU_RunCntl[R0R1] and wait + * for SPU_Status[R]=0. + */ + mask = SPU_STATUS_INVALID_INSTR | + SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | + SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING; + if (!(csa->prob.spu_status_R & mask)) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + POLL_WHILE_FALSE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } +} + +static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&csa->lscsa->ls[0]; + unsigned int ls_offset = 0x0; + unsigned int size = 16384; + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_GET_CMD; + + /* Restore, Step 44: + * Issue a DMA command to restore the first + * 16kb of local storage from CSA. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 49: + * Write INT_MASK_class0 with value of 0. + * Write INT_MASK_class1 with value of 0. + * Write INT_MASK_class2 with value of 0. + * Write INT_STAT_class0 with value of -1. + * Write INT_STAT_class1 with value of -1. + * Write INT_STAT_class2 with value of -1. + */ + spin_lock_irq(&spu->register_lock); + spu_int_mask_set(spu, 0, 0ul); + spu_int_mask_set(spu, 1, 0ul); + spu_int_mask_set(spu, 2, 0ul); + spu_int_stat_clear(spu, 0, ~0ul); + spu_int_stat_clear(spu, 1, ~0ul); + spu_int_stat_clear(spu, 2, ~0ul); + spin_unlock_irq(&spu->register_lock); +} + +static inline void restore_mfc_queues(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Restore, Step 50: + * If MFC_Cntl[Se]!=0 then restore + * MFC command queues. + */ + if ((csa->priv2.mfc_control_RW & MFC_CNTL_DMA_QUEUES_EMPTY_MASK) == 0) { + for (i = 0; i < 8; i++) { + out_be64(&priv2->puq[i].mfc_cq_data0_RW, + csa->priv2.puq[i].mfc_cq_data0_RW); + out_be64(&priv2->puq[i].mfc_cq_data1_RW, + csa->priv2.puq[i].mfc_cq_data1_RW); + out_be64(&priv2->puq[i].mfc_cq_data2_RW, + csa->priv2.puq[i].mfc_cq_data2_RW); + out_be64(&priv2->puq[i].mfc_cq_data3_RW, + csa->priv2.puq[i].mfc_cq_data3_RW); + } + for (i = 0; i < 16; i++) { + out_be64(&priv2->spuq[i].mfc_cq_data0_RW, + csa->priv2.spuq[i].mfc_cq_data0_RW); + out_be64(&priv2->spuq[i].mfc_cq_data1_RW, + csa->priv2.spuq[i].mfc_cq_data1_RW); + out_be64(&priv2->spuq[i].mfc_cq_data2_RW, + csa->priv2.spuq[i].mfc_cq_data2_RW); + out_be64(&priv2->spuq[i].mfc_cq_data3_RW, + csa->priv2.spuq[i].mfc_cq_data3_RW); + } + } + eieio(); +} + +static inline void restore_ppu_querymask(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 51: + * Restore the PPU_QueryMask register from CSA. + */ + out_be32(&prob->dma_querymask_RW, csa->prob.dma_querymask_RW); + eieio(); +} + +static inline void restore_ppu_querytype(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 52: + * Restore the PPU_QueryType register from CSA. + */ + out_be32(&prob->dma_querytype_RW, csa->prob.dma_querytype_RW); + eieio(); +} + +static inline void restore_mfc_csr_tsq(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 53: + * Restore the MFC_CSR_TSQ register from CSA. + */ + out_be64(&priv2->spu_tag_status_query_RW, + csa->priv2.spu_tag_status_query_RW); + eieio(); +} + +static inline void restore_mfc_csr_cmd(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 54: + * Restore the MFC_CSR_CMD1 and MFC_CSR_CMD2 + * registers from CSA. + */ + out_be64(&priv2->spu_cmd_buf1_RW, csa->priv2.spu_cmd_buf1_RW); + out_be64(&priv2->spu_cmd_buf2_RW, csa->priv2.spu_cmd_buf2_RW); + eieio(); +} + +static inline void restore_mfc_csr_ato(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 55: + * Restore the MFC_CSR_ATO register from CSA. + */ + out_be64(&priv2->spu_atomic_status_RW, csa->priv2.spu_atomic_status_RW); +} + +static inline void restore_mfc_tclass_id(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 56: + * Restore the MFC_TCLASS_ID register from CSA. + */ + spu_mfc_tclass_id_set(spu, csa->priv1.mfc_tclass_id_RW); + eieio(); +} + +static inline void set_llr_event(struct spu_state *csa, struct spu *spu) +{ + u64 ch0_cnt, ch0_data; + u64 ch1_data; + + /* Restore, Step 57: + * Set the Lock Line Reservation Lost Event by: + * 1. OR CSA.SPU_Event_Status with bit 21 (Lr) set to 1. + * 2. If CSA.SPU_Channel_0_Count=0 and + * CSA.SPU_Wr_Event_Mask[Lr]=1 and + * CSA.SPU_Event_Status[Lr]=0 then set + * CSA.SPU_Event_Status_Count=1. + */ + ch0_cnt = csa->spu_chnlcnt_RW[0]; + ch0_data = csa->spu_chnldata_RW[0]; + ch1_data = csa->spu_chnldata_RW[1]; + csa->spu_chnldata_RW[0] |= MFC_LLR_LOST_EVENT; + if ((ch0_cnt == 0) && !(ch0_data & MFC_LLR_LOST_EVENT) && + (ch1_data & MFC_LLR_LOST_EVENT)) { + csa->spu_chnlcnt_RW[0] = 1; + } +} + +static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 58: + * If the status of the CSA software decrementer + * "wrapped" flag is set, OR in a '1' to + * CSA.SPU_Event_Status[Tm]. + */ + if (csa->lscsa->decr_status.slot[0] == 1) { + csa->spu_chnldata_RW[0] |= 0x20; + } + if ((csa->lscsa->decr_status.slot[0] == 1) && + (csa->spu_chnlcnt_RW[0] == 0 && + ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) && + ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) { + csa->spu_chnlcnt_RW[0] = 1; + } +} + +static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 idx, ch_indices[7] = { 0UL, 1UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + int i; + + /* Restore, Step 59: + * Restore the following CH: [0,1,3,4,24,25,27] + */ + for (i = 0; i < 7; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[idx]); + out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[idx]); + eieio(); + } +} + +static inline void restore_ch_part2(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[3] = { 9UL, 21UL, 23UL }; + u64 ch_counts[3] = { 1UL, 16UL, 1UL }; + u64 idx; + int i; + + /* Restore, Step 60: + * Restore the following CH: [9,21,23]. + */ + ch_counts[0] = 1UL; + ch_counts[1] = csa->spu_chnlcnt_RW[21]; + ch_counts[2] = 1UL; + for (i = 0; i < 3; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]); + eieio(); + } +} + +static inline void restore_spu_lslr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 61: + * Restore the SPU_LSLR register from CSA. + */ + out_be64(&priv2->spu_lslr_RW, csa->priv2.spu_lslr_RW); + eieio(); +} + +static inline void restore_spu_cfg(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 62: + * Restore the SPU_Cfg register from CSA. + */ + out_be64(&priv2->spu_cfg_RW, csa->priv2.spu_cfg_RW); + eieio(); +} + +static inline void restore_pm_trace(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 63: + * Restore PM_Trace_Tag_Wait_Mask from CSA. + * Not performed by this implementation. + */ +} + +static inline void restore_spu_npc(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 64: + * Restore SPU_NPC from CSA. + */ + out_be32(&prob->spu_npc_RW, csa->prob.spu_npc_RW); + eieio(); +} + +static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Restore, Step 65: + * Restore MFC_RdSPU_MB from CSA. + */ + out_be64(&priv2->spu_chnlcntptr_RW, 29UL); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[29]); + for (i = 0; i < 4; i++) { + out_be64(&priv2->spu_chnldata_RW, csa->spu_mailbox_data[i]); + } + eieio(); +} + +static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 dummy = 0; + + /* Restore, Step 66: + * If CSA.MB_Stat[P]=0 (mailbox empty) then + * read from the PPU_MB register. + */ + if ((csa->prob.mb_stat_R & 0xFF) == 0) { + dummy = in_be32(&prob->pu_mb_R); + eieio(); + } +} + +static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 dummy = 0UL; + + /* Restore, Step 66: + * If CSA.MB_Stat[I]=0 (mailbox empty) then + * read from the PPUINT_MB register. + */ + if ((csa->prob.mb_stat_R & 0xFF0000) == 0) { + dummy = in_be64(&priv2->puint_mb_R); + eieio(); + spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); + eieio(); + } +} + +static inline void restore_mfc_slbs(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Restore, Step 68: + * If MFC_SR1[R]='1', restore SLBs from CSA. + */ + if (csa->priv1.mfc_sr1_RW & MFC_STATE1_RELOCATE_MASK) { + for (i = 0; i < 8; i++) { + out_be64(&priv2->slb_index_W, i); + eieio(); + out_be64(&priv2->slb_esid_RW, csa->slb_esid_RW[i]); + out_be64(&priv2->slb_vsid_RW, csa->slb_vsid_RW[i]); + eieio(); + } + out_be64(&priv2->slb_index_W, csa->priv2.slb_index_W); + eieio(); + } +} + +static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 69: + * Restore the MFC_SR1 register from CSA. + */ + spu_mfc_sr1_set(spu, csa->priv1.mfc_sr1_RW); + eieio(); +} + +static inline void restore_other_spu_access(struct spu_state *csa, + struct spu *spu) +{ + /* Restore, Step 70: + * Restore other SPU mappings to this SPU. TBD. + */ +} + +static inline void restore_spu_runcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 71: + * If CSA.SPU_Status[R]=1 then write + * SPU_RunCntl[R0R1]='01'. + */ + if (csa->prob.spu_status_R & SPU_STATUS_RUNNING) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + } +} + +static inline void restore_mfc_cntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 72: + * Restore the MFC_CNTL register for the CSA. + */ + out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW); + eieio(); +} + +static inline void enable_user_access(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 73: + * Enable user-space access (if provided) to this + * SPU by mapping the virtual pages assigned to + * the SPU memory-mapped I/O (MMIO) for problem + * state. TBD. + */ +} + +static inline void reset_switch_active(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 74: + * Reset the "context switch active" flag. + */ + clear_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags); + mb(); +} + +static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 75: + * Re-enable SPU interrupts. + */ + spin_lock_irq(&spu->register_lock); + spu_int_mask_set(spu, 0, csa->priv1.int_mask_class0_RW); + spu_int_mask_set(spu, 1, csa->priv1.int_mask_class1_RW); + spu_int_mask_set(spu, 2, csa->priv1.int_mask_class2_RW); + spin_unlock_irq(&spu->register_lock); +} + +static int quiece_spu(struct spu_state *prev, struct spu *spu) +{ + /* + * Combined steps 2-18 of SPU context save sequence, which + * quiesce the SPU state (disable SPU execution, MFC command + * queues, decrementer, SPU interrupts, etc.). + * + * Returns 0 on success. + * 2 if failed step 2. + * 6 if failed step 6. + */ + + if (check_spu_isolate(prev, spu)) { /* Step 2. */ + return 2; + } + disable_interrupts(prev, spu); /* Step 3. */ + set_watchdog_timer(prev, spu); /* Step 4. */ + inhibit_user_access(prev, spu); /* Step 5. */ + if (check_spu_isolate(prev, spu)) { /* Step 6. */ + return 6; + } + set_switch_pending(prev, spu); /* Step 7. */ + save_mfc_cntl(prev, spu); /* Step 8. */ + save_spu_runcntl(prev, spu); /* Step 9. */ + save_mfc_sr1(prev, spu); /* Step 10. */ + save_spu_status(prev, spu); /* Step 11. */ + save_mfc_decr(prev, spu); /* Step 12. */ + halt_mfc_decr(prev, spu); /* Step 13. */ + save_timebase(prev, spu); /* Step 14. */ + remove_other_spu_access(prev, spu); /* Step 15. */ + do_mfc_mssync(prev, spu); /* Step 16. */ + issue_mfc_tlbie(prev, spu); /* Step 17. */ + handle_pending_interrupts(prev, spu); /* Step 18. */ + + return 0; +} + +static void save_csa(struct spu_state *prev, struct spu *spu) +{ + /* + * Combine steps 19-44 of SPU context save sequence, which + * save regions of the privileged & problem state areas. + */ + + save_mfc_queues(prev, spu); /* Step 19. */ + save_ppu_querymask(prev, spu); /* Step 20. */ + save_ppu_querytype(prev, spu); /* Step 21. */ + save_mfc_csr_tsq(prev, spu); /* Step 22. */ + save_mfc_csr_cmd(prev, spu); /* Step 23. */ + save_mfc_csr_ato(prev, spu); /* Step 24. */ + save_mfc_tclass_id(prev, spu); /* Step 25. */ + set_mfc_tclass_id(prev, spu); /* Step 26. */ + purge_mfc_queue(prev, spu); /* Step 27. */ + wait_purge_complete(prev, spu); /* Step 28. */ + save_mfc_slbs(prev, spu); /* Step 29. */ + setup_mfc_sr1(prev, spu); /* Step 30. */ + save_spu_npc(prev, spu); /* Step 31. */ + save_spu_privcntl(prev, spu); /* Step 32. */ + reset_spu_privcntl(prev, spu); /* Step 33. */ + save_spu_lslr(prev, spu); /* Step 34. */ + reset_spu_lslr(prev, spu); /* Step 35. */ + save_spu_cfg(prev, spu); /* Step 36. */ + save_pm_trace(prev, spu); /* Step 37. */ + save_mfc_rag(prev, spu); /* Step 38. */ + save_ppu_mb_stat(prev, spu); /* Step 39. */ + save_ppu_mb(prev, spu); /* Step 40. */ + save_ppuint_mb(prev, spu); /* Step 41. */ + save_ch_part1(prev, spu); /* Step 42. */ + save_spu_mb(prev, spu); /* Step 43. */ + save_mfc_cmd(prev, spu); /* Step 44. */ + reset_ch(prev, spu); /* Step 45. */ +} + +static void save_lscsa(struct spu_state *prev, struct spu *spu) +{ + /* + * Perform steps 46-57 of SPU context save sequence, + * which save regions of the local store and register + * file. + */ + + resume_mfc_queue(prev, spu); /* Step 46. */ + setup_mfc_slbs(prev, spu); /* Step 47. */ + set_switch_active(prev, spu); /* Step 48. */ + enable_interrupts(prev, spu); /* Step 49. */ + save_ls_16kb(prev, spu); /* Step 50. */ + set_spu_npc(prev, spu); /* Step 51. */ + set_signot1(prev, spu); /* Step 52. */ + set_signot2(prev, spu); /* Step 53. */ + send_save_code(prev, spu); /* Step 54. */ + set_ppu_querymask(prev, spu); /* Step 55. */ + wait_tag_complete(prev, spu); /* Step 56. */ + wait_spu_stopped(prev, spu); /* Step 57. */ +} + +static void harvest(struct spu_state *prev, struct spu *spu) +{ + /* + * Perform steps 2-25 of SPU context restore sequence, + * which resets an SPU either after a failed save, or + * when using SPU for first time. + */ + + disable_interrupts(prev, spu); /* Step 2. */ + inhibit_user_access(prev, spu); /* Step 3. */ + terminate_spu_app(prev, spu); /* Step 4. */ + set_switch_pending(prev, spu); /* Step 5. */ + remove_other_spu_access(prev, spu); /* Step 6. */ + suspend_mfc(prev, spu); /* Step 7. */ + wait_suspend_mfc_complete(prev, spu); /* Step 8. */ + if (!suspend_spe(prev, spu)) /* Step 9. */ + clear_spu_status(prev, spu); /* Step 10. */ + do_mfc_mssync(prev, spu); /* Step 11. */ + issue_mfc_tlbie(prev, spu); /* Step 12. */ + handle_pending_interrupts(prev, spu); /* Step 13. */ + purge_mfc_queue(prev, spu); /* Step 14. */ + wait_purge_complete(prev, spu); /* Step 15. */ + reset_spu_privcntl(prev, spu); /* Step 16. */ + reset_spu_lslr(prev, spu); /* Step 17. */ + setup_mfc_sr1(prev, spu); /* Step 18. */ + invalidate_slbs(prev, spu); /* Step 19. */ + reset_ch_part1(prev, spu); /* Step 20. */ + reset_ch_part2(prev, spu); /* Step 21. */ + enable_interrupts(prev, spu); /* Step 22. */ + set_switch_active(prev, spu); /* Step 23. */ + set_mfc_tclass_id(prev, spu); /* Step 24. */ + resume_mfc_queue(prev, spu); /* Step 25. */ +} + +static void restore_lscsa(struct spu_state *next, struct spu *spu) +{ + /* + * Perform steps 26-40 of SPU context restore sequence, + * which restores regions of the local store and register + * file. + */ + + set_watchdog_timer(next, spu); /* Step 26. */ + setup_spu_status_part1(next, spu); /* Step 27. */ + setup_spu_status_part2(next, spu); /* Step 28. */ + restore_mfc_rag(next, spu); /* Step 29. */ + setup_mfc_slbs(next, spu); /* Step 30. */ + set_spu_npc(next, spu); /* Step 31. */ + set_signot1(next, spu); /* Step 32. */ + set_signot2(next, spu); /* Step 33. */ + setup_decr(next, spu); /* Step 34. */ + setup_ppu_mb(next, spu); /* Step 35. */ + setup_ppuint_mb(next, spu); /* Step 36. */ + send_restore_code(next, spu); /* Step 37. */ + set_ppu_querymask(next, spu); /* Step 38. */ + wait_tag_complete(next, spu); /* Step 39. */ + wait_spu_stopped(next, spu); /* Step 40. */ +} + +static void restore_csa(struct spu_state *next, struct spu *spu) +{ + /* + * Combine steps 41-76 of SPU context restore sequence, which + * restore regions of the privileged & problem state areas. + */ + + restore_spu_privcntl(next, spu); /* Step 41. */ + restore_status_part1(next, spu); /* Step 42. */ + restore_status_part2(next, spu); /* Step 43. */ + restore_ls_16kb(next, spu); /* Step 44. */ + wait_tag_complete(next, spu); /* Step 45. */ + suspend_mfc(next, spu); /* Step 46. */ + wait_suspend_mfc_complete(next, spu); /* Step 47. */ + issue_mfc_tlbie(next, spu); /* Step 48. */ + clear_interrupts(next, spu); /* Step 49. */ + restore_mfc_queues(next, spu); /* Step 50. */ + restore_ppu_querymask(next, spu); /* Step 51. */ + restore_ppu_querytype(next, spu); /* Step 52. */ + restore_mfc_csr_tsq(next, spu); /* Step 53. */ + restore_mfc_csr_cmd(next, spu); /* Step 54. */ + restore_mfc_csr_ato(next, spu); /* Step 55. */ + restore_mfc_tclass_id(next, spu); /* Step 56. */ + set_llr_event(next, spu); /* Step 57. */ + restore_decr_wrapped(next, spu); /* Step 58. */ + restore_ch_part1(next, spu); /* Step 59. */ + restore_ch_part2(next, spu); /* Step 60. */ + restore_spu_lslr(next, spu); /* Step 61. */ + restore_spu_cfg(next, spu); /* Step 62. */ + restore_pm_trace(next, spu); /* Step 63. */ + restore_spu_npc(next, spu); /* Step 64. */ + restore_spu_mb(next, spu); /* Step 65. */ + check_ppu_mb_stat(next, spu); /* Step 66. */ + check_ppuint_mb_stat(next, spu); /* Step 67. */ + restore_mfc_slbs(next, spu); /* Step 68. */ + restore_mfc_sr1(next, spu); /* Step 69. */ + restore_other_spu_access(next, spu); /* Step 70. */ + restore_spu_runcntl(next, spu); /* Step 71. */ + restore_mfc_cntl(next, spu); /* Step 72. */ + enable_user_access(next, spu); /* Step 73. */ + reset_switch_active(next, spu); /* Step 74. */ + reenable_interrupts(next, spu); /* Step 75. */ +} + +static int __do_spu_save(struct spu_state *prev, struct spu *spu) +{ + int rc; + + /* + * SPU context save can be broken into three phases: + * + * (a) quiesce [steps 2-16]. + * (b) save of CSA, performed by PPE [steps 17-42] + * (c) save of LSCSA, mostly performed by SPU [steps 43-52]. + * + * Returns 0 on success. + * 2,6 if failed to quiece SPU + * 53 if SPU-side of save failed. + */ + + rc = quiece_spu(prev, spu); /* Steps 2-16. */ + switch (rc) { + default: + case 2: + case 6: + harvest(prev, spu); + return rc; + break; + case 0: + break; + } + save_csa(prev, spu); /* Steps 17-43. */ + save_lscsa(prev, spu); /* Steps 44-53. */ + return check_save_status(prev, spu); /* Step 54. */ +} + +static int __do_spu_restore(struct spu_state *next, struct spu *spu) +{ + int rc; + + /* + * SPU context restore can be broken into three phases: + * + * (a) harvest (or reset) SPU [steps 2-24]. + * (b) restore LSCSA [steps 25-40], mostly performed by SPU. + * (c) restore CSA [steps 41-76], performed by PPE. + * + * The 'harvest' step is not performed here, but rather + * as needed below. + */ + + restore_lscsa(next, spu); /* Steps 24-39. */ + rc = check_restore_status(next, spu); /* Step 40. */ + switch (rc) { + default: + /* Failed. Return now. */ + return rc; + break; + case 0: + /* Fall through to next step. */ + break; + } + restore_csa(next, spu); + + return 0; +} + +/** + * spu_save - SPU context save, with locking. + * @prev: pointer to SPU context save area, to be saved. + * @spu: pointer to SPU iomem structure. + * + * Acquire locks, perform the save operation then return. + */ +int spu_save(struct spu_state *prev, struct spu *spu) +{ + int rc; + + acquire_spu_lock(spu); /* Step 1. */ + rc = __do_spu_save(prev, spu); /* Steps 2-53. */ + release_spu_lock(spu); + if (rc) { + panic("%s failed on SPU[%d], rc=%d.\n", + __func__, spu->number, rc); + } + return rc; +} + +/** + * spu_restore - SPU context restore, with harvest and locking. + * @new: pointer to SPU context save area, to be restored. + * @spu: pointer to SPU iomem structure. + * + * Perform harvest + restore, as we may not be coming + * from a previous succesful save operation, and the + * hardware state is unknown. + */ +int spu_restore(struct spu_state *new, struct spu *spu) +{ + int rc; + + acquire_spu_lock(spu); + harvest(NULL, spu); + spu->stop_code = 0; + spu->dar = 0; + spu->dsisr = 0; + spu->slb_replace = 0; + spu->class_0_pending = 0; + rc = __do_spu_restore(new, spu); + release_spu_lock(spu); + if (rc) { + panic("%s failed on SPU[%d] rc=%d.\n", + __func__, spu->number, rc); + } + return rc; +} + +/** + * spu_harvest - SPU harvest (reset) operation + * @spu: pointer to SPU iomem structure. + * + * Perform SPU harvest (reset) operation. + */ +void spu_harvest(struct spu *spu) +{ + acquire_spu_lock(spu); + harvest(NULL, spu); + release_spu_lock(spu); +} + +static void init_prob(struct spu_state *csa) +{ + csa->spu_chnlcnt_RW[9] = 1; + csa->spu_chnlcnt_RW[21] = 16; + csa->spu_chnlcnt_RW[23] = 1; + csa->spu_chnlcnt_RW[28] = 1; + csa->spu_chnlcnt_RW[30] = 1; + csa->prob.spu_runcntl_RW = SPU_RUNCNTL_STOP; +} + +static void init_priv1(struct spu_state *csa) +{ + /* Enable decode, relocate, tlbie response, master runcntl. */ + csa->priv1.mfc_sr1_RW = MFC_STATE1_LOCAL_STORAGE_DECODE_MASK | + MFC_STATE1_MASTER_RUN_CONTROL_MASK | + MFC_STATE1_PROBLEM_STATE_MASK | + MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK; + + /* Set storage description. */ + csa->priv1.mfc_sdr_RW = mfspr(SPRN_SDR1); + + /* Enable OS-specific set of interrupts. */ + csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR | + CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR | + CLASS0_ENABLE_SPU_ERROR_INTR; + csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR | + CLASS1_ENABLE_STORAGE_FAULT_INTR; + csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR | + CLASS2_ENABLE_SPU_HALT_INTR; +} + +static void init_priv2(struct spu_state *csa) +{ + csa->priv2.spu_lslr_RW = LS_ADDR_MASK; + csa->priv2.mfc_control_RW = MFC_CNTL_RESUME_DMA_QUEUE | + MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION | + MFC_CNTL_DMA_QUEUES_EMPTY_MASK; +} + +/** + * spu_alloc_csa - allocate and initialize an SPU context save area. + * + * Allocate and initialize the contents of an SPU context save area. + * This includes enabling address translation, interrupt masks, etc., + * as appropriate for the given OS environment. + * + * Note that storage for the 'lscsa' is allocated separately, + * as it is by far the largest of the context save regions, + * and may need to be pinned or otherwise specially aligned. + */ +void spu_init_csa(struct spu_state *csa) +{ + struct spu_lscsa *lscsa; + unsigned char *p; + + if (!csa) + return; + memset(csa, 0, sizeof(struct spu_state)); + + lscsa = vmalloc(sizeof(struct spu_lscsa)); + if (!lscsa) + return; + + memset(lscsa, 0, sizeof(struct spu_lscsa)); + csa->lscsa = lscsa; + csa->register_lock = SPIN_LOCK_UNLOCKED; + + /* Set LS pages reserved to allow for user-space mapping. */ + for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE) + SetPageReserved(vmalloc_to_page(p)); + + init_prob(csa); + init_priv1(csa); + init_priv2(csa); +} + +void spu_fini_csa(struct spu_state *csa) +{ + /* Clear reserved bit before vfree. */ + unsigned char *p; + for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) + ClearPageReserved(vmalloc_to_page(p)); + + vfree(csa->lscsa); +} diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c new file mode 100644 index 00000000000..e6565a949dd --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -0,0 +1,103 @@ +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/mount.h> +#include <linux/namei.h> + +#include <asm/uaccess.h> + +#include "spufs.h" + +/** + * sys_spu_run - run code loaded into an SPU + * + * @unpc: next program counter for the SPU + * @ustatus: status of the SPU + * + * This system call transfers the control of execution of a + * user space thread to an SPU. It will return when the + * SPU has finished executing or when it hits an error + * condition and it will be interrupted if a signal needs + * to be delivered to a handler in user space. + * + * The next program counter is set to the passed value + * before the SPU starts fetching code and the user space + * pointer gets updated with the new value when returning + * from kernel space. + * + * The status value returned from spu_run reflects the + * value of the spu_status register after the SPU has stopped. + * + */ +static long do_spu_run(struct file *filp, + __u32 __user *unpc, + __u32 __user *ustatus) +{ + long ret; + struct spufs_inode_info *i; + u32 npc, status; + + ret = -EFAULT; + if (get_user(npc, unpc) || get_user(status, ustatus)) + goto out; + + /* check if this file was created by spu_create */ + ret = -EINVAL; + if (filp->f_op != &spufs_context_fops) + goto out; + + i = SPUFS_I(filp->f_dentry->d_inode); + ret = spufs_run_spu(filp, i->i_ctx, &npc, &status); + + if (put_user(npc, unpc) || put_user(status, ustatus)) + ret = -EFAULT; +out: + return ret; +} + +#ifndef MODULE +asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) +{ + int fput_needed; + struct file *filp; + long ret; + + ret = -EBADF; + filp = fget_light(fd, &fput_needed); + if (filp) { + ret = do_spu_run(filp, unpc, ustatus); + fput_light(filp, fput_needed); + } + + return ret; +} +#endif + +asmlinkage long sys_spu_create(const char __user *pathname, + unsigned int flags, mode_t mode) +{ + char *tmp; + int ret; + + tmp = getname(pathname); + ret = PTR_ERR(tmp); + if (!IS_ERR(tmp)) { + struct nameidata nd; + + ret = path_lookup(tmp, LOOKUP_PARENT| + LOOKUP_OPEN|LOOKUP_CREATE, &nd); + if (!ret) { + ret = spufs_create_thread(&nd, flags, mode); + path_release(&nd); + } + putname(tmp); + } + + return ret; +} + +struct spufs_calls spufs_calls = { + .create_thread = sys_spu_create, + .spu_run = do_spu_run, + .owner = THIS_MODULE, +}; |