diff options
author | Paul Mackerras <paulus@samba.org> | 2005-09-28 21:11:41 +1000 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2005-09-28 21:11:41 +1000 |
commit | 952ecef7a0479049c8abb7c34a688ec2981ceadd (patch) | |
tree | 74377080bb1fe12e874d18cb77461341a904f854 /arch/powerpc/platforms | |
parent | beeca08738c4c4024c81a591812bfe38f8c436c0 (diff) | |
parent | 252e75a51d40757928d692b3d339e66838294b4b (diff) |
Merge Stephen Rothwell's patches
Diffstat (limited to 'arch/powerpc/platforms')
21 files changed, 6191 insertions, 4 deletions
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index dbc093759a8..7637ff3642c 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -1,4 +1,7 @@ -obj-$(CONFIG_PPC_PMAC) += powermac/ -obj-$(CONFIG_4xx) += 4xx/ -obj-$(CONFIG_83xx) += 83xx/ -obj-$(CONFIG_85xx) += 85xx/ +ifeq ($(CONFIG_PPC32),y) +obj-$(CONFIG_PPC_PMAC) += powermac/ +endif +obj-$(CONFIG_4xx) += 4xx/ +obj-$(CONFIG_83xx) += 83xx/ +obj-$(CONFIG_85xx) += 85xx/ +obj-$(CONFIG_PPC_ISERIES) += iseries/ diff --git a/arch/powerpc/platforms/iseries/Makefile b/arch/powerpc/platforms/iseries/Makefile new file mode 100644 index 00000000000..18bf4007756 --- /dev/null +++ b/arch/powerpc/platforms/iseries/Makefile @@ -0,0 +1,7 @@ +obj-y += hvlog.o hvlpconfig.o lpardata.o setup.o mf.o lpevents.o \ + hvcall.o proc.o htab.o iommu.o misc.o +obj-$(CONFIG_PCI) += pci.o irq.o vpdinfo.o +obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_VIOPATH) += viopath.o +obj-$(CONFIG_MODULES) += ksyms.o diff --git a/arch/powerpc/platforms/iseries/htab.c b/arch/powerpc/platforms/iseries/htab.c new file mode 100644 index 00000000000..431b22767d0 --- /dev/null +++ b/arch/powerpc/platforms/iseries/htab.c @@ -0,0 +1,256 @@ +/* + * iSeries hashtable management. + * Derived from pSeries_htab.c + * + * SMP scalability work: + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/machdep.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/iSeries/HvCallHpt.h> +#include <asm/abs_addr.h> +#include <linux/spinlock.h> + +static spinlock_t iSeries_hlocks[64] __cacheline_aligned_in_smp = + { [0 ... 63] = SPIN_LOCK_UNLOCKED}; + +/* + * Very primitive algorithm for picking up a lock + */ +static inline void iSeries_hlock(unsigned long slot) +{ + if (slot & 0x8) + slot = ~slot; + spin_lock(&iSeries_hlocks[(slot >> 4) & 0x3f]); +} + +static inline void iSeries_hunlock(unsigned long slot) +{ + if (slot & 0x8) + slot = ~slot; + spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]); +} + +static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, + unsigned long prpn, unsigned long vflags, + unsigned long rflags) +{ + unsigned long arpn; + long slot; + hpte_t lhpte; + int secondary = 0; + + /* + * The hypervisor tries both primary and secondary. + * If we are being called to insert in the secondary, + * it means we have already tried both primary and secondary, + * so we return failure immediately. + */ + if (vflags & HPTE_V_SECONDARY) + return -1; + + iSeries_hlock(hpte_group); + + slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT); + BUG_ON(lhpte.v & HPTE_V_VALID); + + if (slot == -1) { /* No available entry found in either group */ + iSeries_hunlock(hpte_group); + return -1; + } + + if (slot < 0) { /* MSB set means secondary group */ + vflags |= HPTE_V_VALID; + secondary = 1; + slot &= 0x7fffffffffffffff; + } + + arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT; + + lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; + lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags; + + /* Now fill in the actual HPTE */ + HvCallHpt_addValidate(slot, secondary, &lhpte); + + iSeries_hunlock(hpte_group); + + return (secondary << 3) | (slot & 7); +} + +long iSeries_hpte_bolt_or_insert(unsigned long hpte_group, + unsigned long va, unsigned long prpn, unsigned long vflags, + unsigned long rflags) +{ + long slot; + hpte_t lhpte; + + slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT); + + if (lhpte.v & HPTE_V_VALID) { + /* Bolt the existing HPTE */ + HvCallHpt_setSwBits(slot, 0x10, 0); + HvCallHpt_setPp(slot, PP_RWXX); + return 0; + } + + return iSeries_hpte_insert(hpte_group, va, prpn, vflags, rflags); +} + +static unsigned long iSeries_hpte_getword0(unsigned long slot) +{ + hpte_t hpte; + + HvCallHpt_get(&hpte, slot); + return hpte.v; +} + +static long iSeries_hpte_remove(unsigned long hpte_group) +{ + unsigned long slot_offset; + int i; + unsigned long hpte_v; + + /* Pick a random slot to start at */ + slot_offset = mftb() & 0x7; + + iSeries_hlock(hpte_group); + + for (i = 0; i < HPTES_PER_GROUP; i++) { + hpte_v = iSeries_hpte_getword0(hpte_group + slot_offset); + + if (! (hpte_v & HPTE_V_BOLTED)) { + HvCallHpt_invalidateSetSwBitsGet(hpte_group + + slot_offset, 0, 0); + iSeries_hunlock(hpte_group); + return i; + } + + slot_offset++; + slot_offset &= 0x7; + } + + iSeries_hunlock(hpte_group); + + return -1; +} + +/* + * The HyperVisor expects the "flags" argument in this form: + * bits 0..59 : reserved + * bit 60 : N + * bits 61..63 : PP2,PP1,PP0 + */ +static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int large, int local) +{ + hpte_t hpte; + unsigned long avpn = va >> 23; + + iSeries_hlock(slot); + + HvCallHpt_get(&hpte, slot); + if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) { + /* + * Hypervisor expects bits as NPPP, which is + * different from how they are mapped in our PP. + */ + HvCallHpt_setPp(slot, (newpp & 0x3) | ((newpp & 0x4) << 1)); + iSeries_hunlock(slot); + return 0; + } + iSeries_hunlock(slot); + + return -1; +} + +/* + * Functions used to find the PTE for a particular virtual address. + * Only used during boot when bolting pages. + * + * Input : vpn : virtual page number + * Output: PTE index within the page table of the entry + * -1 on failure + */ +static long iSeries_hpte_find(unsigned long vpn) +{ + hpte_t hpte; + long slot; + + /* + * The HvCallHpt_findValid interface is as follows: + * 0xffffffffffffffff : No entry found. + * 0x00000000xxxxxxxx : Entry found in primary group, slot x + * 0x80000000xxxxxxxx : Entry found in secondary group, slot x + */ + slot = HvCallHpt_findValid(&hpte, vpn); + if (hpte.v & HPTE_V_VALID) { + if (slot < 0) { + slot &= 0x7fffffffffffffff; + slot = -slot; + } + } else + slot = -1; + return slot; +} + +/* + * Update the page protection bits. Intended to be used to create + * guard pages for kernel data structures on pages which are bolted + * in the HPT. Assumes pages being operated on will not be stolen. + * Does not work on large pages. + * + * No need to lock here because we should be the only user. + */ +static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) +{ + unsigned long vsid,va,vpn; + long slot; + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + slot = iSeries_hpte_find(vpn); + if (slot == -1) + panic("updateboltedpp: Could not find page to bolt\n"); + HvCallHpt_setPp(slot, newpp); +} + +static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va, + int large, int local) +{ + unsigned long hpte_v; + unsigned long avpn = va >> 23; + unsigned long flags; + + local_irq_save(flags); + + iSeries_hlock(slot); + + hpte_v = iSeries_hpte_getword0(slot); + + if ((HPTE_V_AVPN_VAL(hpte_v) == avpn) && (hpte_v & HPTE_V_VALID)) + HvCallHpt_invalidateSetSwBitsGet(slot, 0, 0); + + iSeries_hunlock(slot); + + local_irq_restore(flags); +} + +void hpte_init_iSeries(void) +{ + ppc_md.hpte_invalidate = iSeries_hpte_invalidate; + ppc_md.hpte_updatepp = iSeries_hpte_updatepp; + ppc_md.hpte_updateboltedpp = iSeries_hpte_updateboltedpp; + ppc_md.hpte_insert = iSeries_hpte_insert; + ppc_md.hpte_remove = iSeries_hpte_remove; + + htab_finish_init(); +} diff --git a/arch/powerpc/platforms/iseries/hvcall.S b/arch/powerpc/platforms/iseries/hvcall.S new file mode 100644 index 00000000000..9901c0ec141 --- /dev/null +++ b/arch/powerpc/platforms/iseries/hvcall.S @@ -0,0 +1,93 @@ +/* + * This file contains the code to perform calls to the + * iSeries LPAR hypervisor + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> +#include <asm/processor.h> + + .text + +/* + * Hypervisor call + * + * Invoke the iSeries hypervisor via the System Call instruction + * Parameters are passed to this routine in registers r3 - r10 + * + * r3 contains the HV function to be called + * r4-r10 contain the operands to the hypervisor function + * + */ + +_GLOBAL(HvCall) +_GLOBAL(HvCall0) +_GLOBAL(HvCall1) +_GLOBAL(HvCall2) +_GLOBAL(HvCall3) +_GLOBAL(HvCall4) +_GLOBAL(HvCall5) +_GLOBAL(HvCall6) +_GLOBAL(HvCall7) + + + mfcr r0 + std r0,-8(r1) + stdu r1,-(STACK_FRAME_OVERHEAD+16)(r1) + + /* r0 = 0xffffffffffffffff indicates a hypervisor call */ + + li r0,-1 + + /* Invoke the hypervisor */ + + sc + + ld r1,0(r1) + ld r0,-8(r1) + mtcrf 0xff,r0 + + /* return to caller, return value in r3 */ + + blr + +_GLOBAL(HvCall0Ret16) +_GLOBAL(HvCall1Ret16) +_GLOBAL(HvCall2Ret16) +_GLOBAL(HvCall3Ret16) +_GLOBAL(HvCall4Ret16) +_GLOBAL(HvCall5Ret16) +_GLOBAL(HvCall6Ret16) +_GLOBAL(HvCall7Ret16) + + mfcr r0 + std r0,-8(r1) + std r31,-16(r1) + stdu r1,-(STACK_FRAME_OVERHEAD+32)(r1) + + mr r31,r4 + li r0,-1 + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + + sc + + std r3,0(r31) + std r4,8(r31) + + mr r3,r5 + + ld r1,0(r1) + ld r0,-8(r1) + mtcrf 0xff,r0 + ld r31,-16(r1) + + blr diff --git a/arch/powerpc/platforms/iseries/hvlog.c b/arch/powerpc/platforms/iseries/hvlog.c new file mode 100644 index 00000000000..f61e2e9ac9e --- /dev/null +++ b/arch/powerpc/platforms/iseries/hvlog.c @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <asm/page.h> +#include <asm/abs_addr.h> +#include <asm/iSeries/HvCall.h> +#include <asm/iSeries/HvCallSc.h> +#include <asm/iSeries/HvTypes.h> + + +void HvCall_writeLogBuffer(const void *buffer, u64 len) +{ + struct HvLpBufferList hv_buf; + u64 left_this_page; + u64 cur = virt_to_abs(buffer); + + while (len) { + hv_buf.addr = cur; + left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur; + if (left_this_page > len) + left_this_page = len; + hv_buf.len = left_this_page; + len -= left_this_page; + HvCall2(HvCallBaseWriteLogBuffer, + virt_to_abs(&hv_buf), + left_this_page); + cur = (cur & PAGE_MASK) + PAGE_SIZE; + } +} diff --git a/arch/powerpc/platforms/iseries/hvlpconfig.c b/arch/powerpc/platforms/iseries/hvlpconfig.c new file mode 100644 index 00000000000..dc28621aea0 --- /dev/null +++ b/arch/powerpc/platforms/iseries/hvlpconfig.c @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2001 Kyle A. Lucke, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <asm/iSeries/HvLpConfig.h> + +HvLpIndex HvLpConfig_getLpIndex_outline(void) +{ + return HvLpConfig_getLpIndex(); +} +EXPORT_SYMBOL(HvLpConfig_getLpIndex_outline); diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c new file mode 100644 index 00000000000..9ac735d5b81 --- /dev/null +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup: + * + * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation + * + * Dynamic DMA mapping support, iSeries-specific parts. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/types.h> +#include <linux/dma-mapping.h> +#include <linux/list.h> + +#include <asm/iommu.h> +#include <asm/tce.h> +#include <asm/machdep.h> +#include <asm/iSeries/HvCallXm.h> +#include <asm/iSeries/iSeries_pci.h> + +extern struct list_head iSeries_Global_Device_List; + + +static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction) +{ + u64 rc; + union tce_entry tce; + + while (npages--) { + tce.te_word = 0; + tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT; + + if (tbl->it_type == TCE_VB) { + /* Virtual Bus */ + tce.te_bits.tb_valid = 1; + tce.te_bits.tb_allio = 1; + if (direction != DMA_TO_DEVICE) + tce.te_bits.tb_rdwr = 1; + } else { + /* PCI Bus */ + tce.te_bits.tb_rdwr = 1; /* Read allowed */ + if (direction != DMA_TO_DEVICE) + tce.te_bits.tb_pciwr = 1; + } + + rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, + tce.te_word); + if (rc) + panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", + rc); + index++; + uaddr += PAGE_SIZE; + } +} + +static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) +{ + u64 rc; + + while (npages--) { + rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0); + if (rc) + panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", + rc); + index++; + } +} + +#ifdef CONFIG_PCI +/* + * This function compares the known tables to find an iommu_table + * that has already been built for hardware TCEs. + */ +static struct iommu_table *iommu_table_find(struct iommu_table * tbl) +{ + struct device_node *dp; + + list_for_each_entry(dp, &iSeries_Global_Device_List, Device_List) { + struct iommu_table *it = PCI_DN(dp)->iommu_table; + + if ((it != NULL) && + (it->it_type == TCE_PCI) && + (it->it_offset == tbl->it_offset) && + (it->it_index == tbl->it_index) && + (it->it_size == tbl->it_size)) + return it; + } + return NULL; +} + +/* + * Call Hv with the architected data structure to get TCE table info. + * info. Put the returned data into the Linux representation of the + * TCE table data. + * The Hardware Tce table comes in three flavors. + * 1. TCE table shared between Buses. + * 2. TCE table per Bus. + * 3. TCE Table per IOA. + */ +static void iommu_table_getparms(struct device_node *dn, + struct iommu_table* tbl) +{ + struct iommu_table_cb *parms; + + parms = kmalloc(sizeof(*parms), GFP_KERNEL); + if (parms == NULL) + panic("PCI_DMA: TCE Table Allocation failed."); + + memset(parms, 0, sizeof(*parms)); + + parms->itc_busno = ISERIES_BUS(dn); + parms->itc_slotno = PCI_DN(dn)->LogicalSlot; + parms->itc_virtbus = 0; + + HvCallXm_getTceTableParms(ISERIES_HV_ADDR(parms)); + + if (parms->itc_size == 0) + panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms); + + /* itc_size is in pages worth of table, it_size is in # of entries */ + tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry); + tbl->it_busno = parms->itc_busno; + tbl->it_offset = parms->itc_offset; + tbl->it_index = parms->itc_index; + tbl->it_blocksize = 1; + tbl->it_type = TCE_PCI; + + kfree(parms); +} + + +void iommu_devnode_init_iSeries(struct device_node *dn) +{ + struct iommu_table *tbl; + struct pci_dn *pdn = PCI_DN(dn); + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_getparms(dn, tbl); + + /* Look for existing tce table */ + pdn->iommu_table = iommu_table_find(tbl); + if (pdn->iommu_table == NULL) + pdn->iommu_table = iommu_init_table(tbl); + else + kfree(tbl); +} +#endif + +static void iommu_dev_setup_iSeries(struct pci_dev *dev) { } +static void iommu_bus_setup_iSeries(struct pci_bus *bus) { } + +void iommu_init_early_iSeries(void) +{ + ppc_md.tce_build = tce_build_iSeries; + ppc_md.tce_free = tce_free_iSeries; + + ppc_md.iommu_dev_setup = iommu_dev_setup_iSeries; + ppc_md.iommu_bus_setup = iommu_bus_setup_iSeries; + + pci_iommu_init(); +} diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c new file mode 100644 index 00000000000..5a8a0056b31 --- /dev/null +++ b/arch/powerpc/platforms/iseries/irq.c @@ -0,0 +1,365 @@ +/* + * This module supports the iSeries PCI bus interrupt handling + * Copyright (C) 20yy <Robert L Holtorf> <IBM Corp> + * Copyright (C) 2004-2005 IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the: + * Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, + * Boston, MA 02111-1307 USA + * + * Change Activity: + * Created, December 13, 2000 by Wayne Holm + * End Change Activity + */ +#include <linux/config.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/threads.h> +#include <linux/smp.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/bootmem.h> +#include <linux/ide.h> +#include <linux/irq.h> +#include <linux/spinlock.h> + +#include <asm/ppcdebug.h> +#include <asm/iSeries/HvTypes.h> +#include <asm/iSeries/HvLpEvent.h> +#include <asm/iSeries/HvCallPci.h> +#include <asm/iSeries/HvCallXm.h> +#include <asm/iSeries/iSeries_irq.h> + +/* This maps virtual irq numbers to real irqs */ +unsigned int virt_irq_to_real_map[NR_IRQS]; + +/* The next available virtual irq number */ +/* Note: the pcnet32 driver assumes irq numbers < 2 aren't valid. :( */ +static int next_virtual_irq = 2; + +static long Pci_Interrupt_Count; +static long Pci_Event_Count; + +enum XmPciLpEvent_Subtype { + XmPciLpEvent_BusCreated = 0, // PHB has been created + XmPciLpEvent_BusError = 1, // PHB has failed + XmPciLpEvent_BusFailed = 2, // Msg to Secondary, Primary failed bus + XmPciLpEvent_NodeFailed = 4, // Multi-adapter bridge has failed + XmPciLpEvent_NodeRecovered = 5, // Multi-adapter bridge has recovered + XmPciLpEvent_BusRecovered = 12, // PHB has been recovered + XmPciLpEvent_UnQuiesceBus = 18, // Secondary bus unqiescing + XmPciLpEvent_BridgeError = 21, // Bridge Error + XmPciLpEvent_SlotInterrupt = 22 // Slot interrupt +}; + +struct XmPciLpEvent_BusInterrupt { + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; +}; + +struct XmPciLpEvent_NodeInterrupt { + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; + HvAgentId deviceId; +}; + +struct XmPciLpEvent { + struct HvLpEvent hvLpEvent; + + union { + u64 alignData; // Align on an 8-byte boundary + + struct { + u32 fisr; + HvBusNumber busNumber; + HvSubBusNumber subBusNumber; + HvAgentId deviceId; + } slotInterrupt; + + struct XmPciLpEvent_BusInterrupt busFailed; + struct XmPciLpEvent_BusInterrupt busRecovered; + struct XmPciLpEvent_BusInterrupt busCreated; + + struct XmPciLpEvent_NodeInterrupt nodeFailed; + struct XmPciLpEvent_NodeInterrupt nodeRecovered; + + } eventData; + +}; + +static void intReceived(struct XmPciLpEvent *eventParm, + struct pt_regs *regsParm) +{ + int irq; + + ++Pci_Interrupt_Count; + + switch (eventParm->hvLpEvent.xSubtype) { + case XmPciLpEvent_SlotInterrupt: + irq = eventParm->hvLpEvent.xCorrelationToken; + /* Dispatch the interrupt handlers for this irq */ + ppc_irq_dispatch_handler(regsParm, irq); + HvCallPci_eoi(eventParm->eventData.slotInterrupt.busNumber, + eventParm->eventData.slotInterrupt.subBusNumber, + eventParm->eventData.slotInterrupt.deviceId); + break; + /* Ignore error recovery events for now */ + case XmPciLpEvent_BusCreated: + printk(KERN_INFO "intReceived: system bus %d created\n", + eventParm->eventData.busCreated.busNumber); + break; + case XmPciLpEvent_BusError: + case XmPciLpEvent_BusFailed: + printk(KERN_INFO "intReceived: system bus %d failed\n", + eventParm->eventData.busFailed.busNumber); + break; + case XmPciLpEvent_BusRecovered: + case XmPciLpEvent_UnQuiesceBus: + printk(KERN_INFO "intReceived: system bus %d recovered\n", + eventParm->eventData.busRecovered.busNumber); + break; + case XmPciLpEvent_NodeFailed: + case XmPciLpEvent_BridgeError: + printk(KERN_INFO + "intReceived: multi-adapter bridge %d/%d/%d failed\n", + eventParm->eventData.nodeFailed.busNumber, + eventParm->eventData.nodeFailed.subBusNumber, + eventParm->eventData.nodeFailed.deviceId); + break; + case XmPciLpEvent_NodeRecovered: + printk(KERN_INFO + "intReceived: multi-adapter bridge %d/%d/%d recovered\n", + eventParm->eventData.nodeRecovered.busNumber, + eventParm->eventData.nodeRecovered.subBusNumber, + eventParm->eventData.nodeRecovered.deviceId); + break; + default: + printk(KERN_ERR + "intReceived: unrecognized event subtype 0x%x\n", + eventParm->hvLpEvent.xSubtype); + break; + } +} + +static void XmPciLpEvent_handler(struct HvLpEvent *eventParm, + struct pt_regs *regsParm) +{ +#ifdef CONFIG_PCI + ++Pci_Event_Count; + + if (eventParm && (eventParm->xType == HvLpEvent_Type_PciIo)) { + switch (eventParm->xFlags.xFunction) { + case HvLpEvent_Function_Int: + intReceived((struct XmPciLpEvent *)eventParm, regsParm); + break; + case HvLpEvent_Function_Ack: + printk(KERN_ERR + "XmPciLpEvent_handler: unexpected ack received\n"); + break; + default: + printk(KERN_ERR + "XmPciLpEvent_handler: unexpected event function %d\n", + (int)eventParm->xFlags.xFunction); + break; + } + } else if (eventParm) + printk(KERN_ERR + "XmPciLpEvent_handler: Unrecognized PCI event type 0x%x\n", + (int)eventParm->xType); + else + printk(KERN_ERR "XmPciLpEvent_handler: NULL event received\n"); +#endif +} + +/* + * This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c + * It must be called before the bus walk. + */ +void __init iSeries_init_IRQ(void) +{ + /* Register PCI event handler and open an event path */ + int xRc; + + xRc = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo, + &XmPciLpEvent_handler); + if (xRc == 0) { + xRc = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0); + if (xRc != 0) + printk(KERN_ERR "iSeries_init_IRQ: open event path " + "failed with rc 0x%x\n", xRc); + } else + printk(KERN_ERR "iSeries_init_IRQ: register handler " + "failed with rc 0x%x\n", xRc); +} + +#define REAL_IRQ_TO_BUS(irq) ((((irq) >> 6) & 0xff) + 1) +#define REAL_IRQ_TO_IDSEL(irq) ((((irq) >> 3) & 7) + 1) +#define REAL_IRQ_TO_FUNC(irq) ((irq) & 7) + +/* + * This will be called by device drivers (via enable_IRQ) + * to enable INTA in the bridge interrupt status register. + */ +static void iSeries_enable_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + /* The IRQ has already been locked by the caller */ + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Unmask secondary INTA */ + mask = 0x80000000; + HvCallPci_unmaskInterrupts(bus, subBus, deviceId, mask); + PPCDBG(PPCDBG_BUSWALK, "iSeries_enable_IRQ 0x%02X.%02X.%02X 0x%04X\n", + bus, subBus, deviceId, irq); +} + +/* This is called by iSeries_activate_IRQs */ +static unsigned int iSeries_startup_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Link the IRQ number to the bridge */ + HvCallXm_connectBusUnit(bus, subBus, deviceId, irq); + + /* Unmask bridge interrupts in the FISR */ + mask = 0x01010000 << function; + HvCallPci_unmaskFisr(bus, subBus, deviceId, mask); + iSeries_enable_IRQ(irq); + return 0; +} + +/* + * This is called out of iSeries_fixup to activate interrupt + * generation for usable slots + */ +void __init iSeries_activate_IRQs() +{ + int irq; + unsigned long flags; + + for_each_irq (irq) { + irq_desc_t *desc = get_irq_desc(irq); + + if (desc && desc->handler && desc->handler->startup) { + spin_lock_irqsave(&desc->lock, flags); + desc->handler->startup(irq); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* this is not called anywhere currently */ +static void iSeries_shutdown_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + /* irq should be locked by the caller */ + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Invalidate the IRQ number in the bridge */ + HvCallXm_connectBusUnit(bus, subBus, deviceId, 0); + + /* Mask bridge interrupts in the FISR */ + mask = 0x01010000 << function; + HvCallPci_maskFisr(bus, subBus, deviceId, mask); +} + +/* + * This will be called by device drivers (via disable_IRQ) + * to disable INTA in the bridge interrupt status register. + */ +static void iSeries_disable_IRQ(unsigned int irq) +{ + u32 bus, deviceId, function, mask; + const u32 subBus = 0; + unsigned int rirq = virt_irq_to_real_map[irq]; + + /* The IRQ has already been locked by the caller */ + bus = REAL_IRQ_TO_BUS(rirq); + function = REAL_IRQ_TO_FUNC(rirq); + deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function; + + /* Mask secondary INTA */ + mask = 0x80000000; + HvCallPci_maskInterrupts(bus, subBus, deviceId, mask); + PPCDBG(PPCDBG_BUSWALK, "iSeries_disable_IRQ 0x%02X.%02X.%02X 0x%04X\n", + bus, subBus, deviceId, irq); +} + +/* + * Need to define this so ppc_irq_dispatch_handler will NOT call + * enable_IRQ at the end of interrupt handling. However, this does + * nothing because there is not enough information provided to do + * the EOI HvCall. This is done by XmPciLpEvent.c + */ +static void iSeries_end_IRQ(unsigned int irq) +{ +} + +static hw_irq_controller iSeries_IRQ_handler = { + .typename = "iSeries irq controller", + .startup = iSeries_startup_IRQ, + .shutdown = iSeries_shutdown_IRQ, + .enable = iSeries_enable_IRQ, + .disable = iSeries_disable_IRQ, + .end = iSeries_end_IRQ +}; + +/* + * This is called out of iSeries_scan_slot to allocate an IRQ for an EADS slot + * It calculates the irq value for the slot. + * Note that subBusNumber is always 0 (at the moment at least). + */ +int __init iSeries_allocate_IRQ(HvBusNumber busNumber, + HvSubBusNumber subBusNumber, HvAgentId deviceId) +{ + unsigned int realirq, virtirq; + u8 idsel = (deviceId >> 4); + u8 function = deviceId & 7; + + virtirq = next_virtual_irq++; + realirq = ((busNumber - 1) << 6) + ((idsel - 1) << 3) + function; + virt_irq_to_real_map[virtirq] = realirq; + + irq_desc[virtirq].handler = &iSeries_IRQ_handler; + return virtirq; +} + +int virt_irq_create_mapping(unsigned int real_irq) +{ + BUG(); /* Don't call this on iSeries, yet */ + + return 0; +} + +void virt_irq_init(void) +{ + return; +} diff --git a/arch/powerpc/platforms/iseries/ksyms.c b/arch/powerpc/platforms/iseries/ksyms.c new file mode 100644 index 00000000000..f271b353972 --- /dev/null +++ b/arch/powerpc/platforms/iseries/ksyms.c @@ -0,0 +1,27 @@ +/* + * (C) 2001-2005 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/module.h> + +#include <asm/hw_irq.h> +#include <asm/iSeries/HvCallSc.h> + +EXPORT_SYMBOL(HvCall0); +EXPORT_SYMBOL(HvCall1); +EXPORT_SYMBOL(HvCall2); +EXPORT_SYMBOL(HvCall3); +EXPORT_SYMBOL(HvCall4); +EXPORT_SYMBOL(HvCall5); +EXPORT_SYMBOL(HvCall6); +EXPORT_SYMBOL(HvCall7); + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(local_get_flags); +EXPORT_SYMBOL(local_irq_disable); +EXPORT_SYMBOL(local_irq_restore); +#endif diff --git a/arch/powerpc/platforms/iseries/lpardata.c b/arch/powerpc/platforms/iseries/lpardata.c new file mode 100644 index 00000000000..87b7ad8ca46 --- /dev/null +++ b/arch/powerpc/platforms/iseries/lpardata.c @@ -0,0 +1,227 @@ +/* + * Copyright 2001 Mike Corrigan, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <linux/types.h> +#include <linux/threads.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/naca.h> +#include <asm/abs_addr.h> +#include <asm/iSeries/ItLpNaca.h> +#include <asm/lppaca.h> +#include <asm/iSeries/ItLpRegSave.h> +#include <asm/paca.h> +#include <asm/iSeries/HvReleaseData.h> +#include <asm/iSeries/LparMap.h> +#include <asm/iSeries/ItVpdAreas.h> +#include <asm/iSeries/ItIplParmsReal.h> +#include <asm/iSeries/ItExtVpdPanel.h> +#include <asm/iSeries/ItLpQueue.h> +#include <asm/iSeries/IoHriProcessorVpd.h> +#include <asm/iSeries/ItSpCommArea.h> + + +/* The HvReleaseData is the root of the information shared between + * the hypervisor and Linux. + */ +struct HvReleaseData hvReleaseData = { + .xDesc = 0xc8a5d9c4, /* "HvRD" ebcdic */ + .xSize = sizeof(struct HvReleaseData), + .xVpdAreasPtrOffset = offsetof(struct naca_struct, xItVpdAreas), + .xSlicNacaAddr = &naca, /* 64-bit Naca address */ + .xMsNucDataOffset = LPARMAP_PHYS, + .xFlags = HVREL_TAGSINACTIVE /* tags inactive */ + /* 64 bit */ + /* shared processors */ + /* HMT allowed */ + | 6, /* TEMP: This allows non-GA driver */ + .xVrmIndex = 4, /* We are v5r2m0 */ + .xMinSupportedPlicVrmIndex = 3, /* v5r1m0 */ + .xMinCompatablePlicVrmIndex = 3, /* v5r1m0 */ + .xVrmName = { 0xd3, 0x89, 0x95, 0xa4, /* "Linux 2.4.64" ebcdic */ + 0xa7, 0x40, 0xf2, 0x4b, + 0xf4, 0x4b, 0xf6, 0xf4 }, +}; + +/* + * The NACA. The first dword of the naca is required by the iSeries + * hypervisor to point to itVpdAreas. The hypervisor finds the NACA + * through the pointer in hvReleaseData. + */ +struct naca_struct naca = { + .xItVpdAreas = &itVpdAreas, + .xRamDisk = 0, + .xRamDiskSize = 0, +}; + +extern void system_reset_iSeries(void); +extern void machine_check_iSeries(void); +extern void data_access_iSeries(void); +extern void instruction_access_iSeries(void); +extern void hardware_interrupt_iSeries(void); +extern void alignment_iSeries(void); +extern void program_check_iSeries(void); +extern void fp_unavailable_iSeries(void); +extern void decrementer_iSeries(void); +extern void trap_0a_iSeries(void); +extern void trap_0b_iSeries(void); +extern void system_call_iSeries(void); +extern void single_step_iSeries(void); +extern void trap_0e_iSeries(void); +extern void performance_monitor_iSeries(void); +extern void data_access_slb_iSeries(void); +extern void instruction_access_slb_iSeries(void); + +struct ItLpNaca itLpNaca = { + .xDesc = 0xd397d581, /* "LpNa" ebcdic */ + .xSize = 0x0400, /* size of ItLpNaca */ + .xIntHdlrOffset = 0x0300, /* offset to int array */ + .xMaxIntHdlrEntries = 19, /* # ents */ + .xPrimaryLpIndex = 0, /* Part # of primary */ + .xServiceLpIndex = 0, /* Part # of serv */ + .xLpIndex = 0, /* Part # of me */ + .xMaxLpQueues = 0, /* # of LP queues */ + .xLpQueueOffset = 0x100, /* offset of start of LP queues */ + .xPirEnvironMode = 0, /* Piranha stuff */ + .xPirConsoleMode = 0, + .xPirDasdMode = 0, + .xLparInstalled = 0, + .xSysPartitioned = 0, + .xHwSyncedTBs = 0, + .xIntProcUtilHmt = 0, + .xSpVpdFormat = 0, + .xIntProcRatio = 0, + .xPlicVrmIndex = 0, /* VRM index of PLIC */ + .xMinSupportedSlicVrmInd = 0, /* min supported SLIC */ + .xMinCompatableSlicVrmInd = 0, /* min compat SLIC */ + .xLoadAreaAddr = 0, /* 64-bit addr of load area */ + .xLoadAreaChunks = 0, /* chunks for load area */ + .xPaseSysCallCRMask = 0, /* PASE mask */ + .xSlicSegmentTablePtr = 0, /* seg table */ + .xOldLpQueue = { 0 }, /* Old LP Queue */ + .xInterruptHdlr = { + (u64)system_reset_iSeries, /* 0x100 System Reset */ + (u64)machine_check_iSeries, /* 0x200 Machine Check */ + (u64)data_access_iSeries, /* 0x300 Data Access */ + (u64)instruction_access_iSeries, /* 0x400 Instruction Access */ + (u64)hardware_interrupt_iSeries, /* 0x500 External */ + (u64)alignment_iSeries, /* 0x600 Alignment */ + (u64)program_check_iSeries, /* 0x700 Program Check */ + (u64)fp_unavailable_iSeries, /* 0x800 FP Unavailable */ + (u64)decrementer_iSeries, /* 0x900 Decrementer */ + (u64)trap_0a_iSeries, /* 0xa00 Trap 0A */ + (u64)trap_0b_iSeries, /* 0xb00 Trap 0B */ + (u64)system_call_iSeries, /* 0xc00 System Call */ + (u64)single_step_iSeries, /* 0xd00 Single Step */ + (u64)trap_0e_iSeries, /* 0xe00 Trap 0E */ + (u64)performance_monitor_iSeries,/* 0xf00 Performance Monitor */ + 0, /* int 0x1000 */ + 0, /* int 0x1010 */ + 0, /* int 0x1020 CPU ctls */ + (u64)hardware_interrupt_iSeries, /* SC Ret Hdlr */ + (u64)data_access_slb_iSeries, /* 0x380 D-SLB */ + (u64)instruction_access_slb_iSeries /* 0x480 I-SLB */ + } +}; +EXPORT_SYMBOL(itLpNaca); + +/* May be filled in by the hypervisor so cannot end up in the BSS */ +struct ItIplParmsReal xItIplParmsReal __attribute__((__section__(".data"))); + +/* May be filled in by the hypervisor so cannot end up in the BSS */ +struct ItExtVpdPanel xItExtVpdPanel __attribute__((__section__(".data"))); +EXPORT_SYMBOL(xItExtVpdPanel); + +#define maxPhysicalProcessors 32 + +struct IoHriProcessorVpd xIoHriProcessorVpd[maxPhysicalProcessors] = { + { + .xInstCacheOperandSize = 32, + .xDataCacheOperandSize = 32, + .xProcFreq = 50000000, + .xTimeBaseFreq = 50000000, + .xPVR = 0x3600 + } +}; + +/* Space for Main Store Vpd 27,200 bytes */ +/* May be filled in by the hypervisor so cannot end up in the BSS */ +u64 xMsVpd[3400] __attribute__((__section__(".data"))); + +/* Space for Recovery Log Buffer */ +/* May be filled in by the hypervisor so cannot end up in the BSS */ +u64 xRecoveryLogBuffer[32] __attribute__((__section__(".data"))); + +struct SpCommArea xSpCommArea = { + .xDesc = 0xE2D7C3C2, + .xFormat = 1, +}; + +/* The LparMap data is now located at offset 0x6000 in head.S + * It was put there so that the HvReleaseData could address it + * with a 32-bit offset as required by the iSeries hypervisor + * + * The Naca has a pointer to the ItVpdAreas. The hypervisor finds + * the Naca via the HvReleaseData area. The HvReleaseData has the + * offset into the Naca of the pointer to the ItVpdAreas. + */ +struct ItVpdAreas itVpdAreas = { + .xSlicDesc = 0xc9a3e5c1, /* "ItVA" */ + .xSlicSize = sizeof(struct ItVpdAreas), + .xSlicVpdEntries = ItVpdMaxEntries, /* # VPD array entries */ + .xSlicDmaEntries = ItDmaMaxEntries, /* # DMA array entries */ + .xSlicMaxLogicalProcs = NR_CPUS * 2, /* Max logical procs */ + .xSlicMaxPhysicalProcs = maxPhysicalProcessors, /* Max physical procs */ + .xSlicDmaToksOffset = offsetof(struct ItVpdAreas, xPlicDmaToks), + .xSlicVpdAdrsOffset = offsetof(struct ItVpdAreas, xSlicVpdAdrs), + .xSlicDmaLensOffset = offsetof(struct ItVpdAreas, xPlicDmaLens), + .xSlicVpdLensOffset = offsetof(struct ItVpdAreas, xSlicVpdLens), + .xSlicMaxSlotLabels = 0, /* max slot labels */ + .xSlicMaxLpQueues = 1, /* max LP queues */ + .xPlicDmaLens = { 0 }, /* DMA lengths */ + .xPlicDmaToks = { 0 }, /* DMA tokens */ + .xSlicVpdLens = { /* VPD lengths */ + 0,0,0, /* 0 - 2 */ + sizeof(xItExtVpdPanel), /* 3 Extended VPD */ + sizeof(struct paca_struct), /* 4 length of Paca */ + 0, /* 5 */ + sizeof(struct ItIplParmsReal),/* 6 length of IPL parms */ + 26992, /* 7 length of MS VPD */ + 0, /* 8 */ + sizeof(struct ItLpNaca),/* 9 length of LP Naca */ + 0, /* 10 */ + 256, /* 11 length of Recovery Log Buf */ + sizeof(struct SpCommArea), /* 12 length of SP Comm Area */ + 0,0,0, /* 13 - 15 */ + sizeof(struct IoHriProcessorVpd),/* 16 length of Proc Vpd */ + 0,0,0,0,0,0, /* 17 - 22 */ + sizeof(struct hvlpevent_queue), /* 23 length of Lp Queue */ + 0,0 /* 24 - 25 */ + }, + .xSlicVpdAdrs = { /* VPD addresses */ + 0,0,0, /* 0 - 2 */ + &xItExtVpdPanel, /* 3 Extended VPD */ + &paca[0], /* 4 first Paca */ + 0, /* 5 */ + &xItIplParmsReal, /* 6 IPL parms */ + &xMsVpd, /* 7 MS Vpd */ + 0, /* 8 */ + &itLpNaca, /* 9 LpNaca */ + 0, /* 10 */ + &xRecoveryLogBuffer, /* 11 Recovery Log Buffer */ + &xSpCommArea, /* 12 SP Comm Area */ + 0,0,0, /* 13 - 15 */ + &xIoHriProcessorVpd, /* 16 Proc Vpd */ + 0,0,0,0,0,0, /* 17 - 22 */ + &hvlpevent_queue, /* 23 Lp Queue */ + 0,0 + } +}; diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c new file mode 100644 index 00000000000..883603027cc --- /dev/null +++ b/arch/powerpc/platforms/iseries/lpevents.c @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2001 Mike Corrigan IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/stddef.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/proc_fs.h> +#include <asm/system.h> +#include <asm/paca.h> +#include <asm/iSeries/ItLpQueue.h> +#include <asm/iSeries/HvLpEvent.h> +#include <asm/iSeries/HvCallEvent.h> +#include <asm/iSeries/ItLpNaca.h> + +/* + * The LpQueue is used to pass event data from the hypervisor to + * the partition. This is where I/O interrupt events are communicated. + * + * It is written to by the hypervisor so cannot end up in the BSS. + */ +struct hvlpevent_queue hvlpevent_queue __attribute__((__section__(".data"))); + +DEFINE_PER_CPU(unsigned long[HvLpEvent_Type_NumTypes], hvlpevent_counts); + +static char *event_types[HvLpEvent_Type_NumTypes] = { + "Hypervisor", + "Machine Facilities", + "Session Manager", + "SPD I/O", + "Virtual Bus", + "PCI I/O", + "RIO I/O", + "Virtual Lan", + "Virtual I/O" +}; + +/* Array of LpEvent handler functions */ +static LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes]; +static unsigned lpEventHandlerPaths[HvLpEvent_Type_NumTypes]; + +static struct HvLpEvent * get_next_hvlpevent(void) +{ + struct HvLpEvent * event; + event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr; + + if (event->xFlags.xValid) { + /* rmb() needed only for weakly consistent machines (regatta) */ + rmb(); + /* Set pointer to next potential event */ + hvlpevent_queue.xSlicCurEventPtr += ((event->xSizeMinus1 + + LpEventAlign) / LpEventAlign) * LpEventAlign; + + /* Wrap to beginning if no room at end */ + if (hvlpevent_queue.xSlicCurEventPtr > + hvlpevent_queue.xSlicLastValidEventPtr) { + hvlpevent_queue.xSlicCurEventPtr = + hvlpevent_queue.xSlicEventStackPtr; + } + } else { + event = NULL; + } + + return event; +} + +static unsigned long spread_lpevents = NR_CPUS; + +int hvlpevent_is_pending(void) +{ + struct HvLpEvent *next_event; + + if (smp_processor_id() >= spread_lpevents) + return 0; + + next_event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr; + + return next_event->xFlags.xValid | + hvlpevent_queue.xPlicOverflowIntPending; +} + +static void hvlpevent_clear_valid(struct HvLpEvent * event) +{ + /* Tell the Hypervisor that we're done with this event. + * Also clear bits within this event that might look like valid bits. + * ie. on 64-byte boundaries. + */ + struct HvLpEvent *tmp; + unsigned extra = ((event->xSizeMinus1 + LpEventAlign) / + LpEventAlign) - 1; + + switch (extra) { + case 3: + tmp = (struct HvLpEvent*)((char*)event + 3 * LpEventAlign); + tmp->xFlags.xValid = 0; + case 2: + tmp = (struct HvLpEvent*)((char*)event + 2 * LpEventAlign); + tmp->xFlags.xValid = 0; + case 1: + tmp = (struct HvLpEvent*)((char*)event + 1 * LpEventAlign); + tmp->xFlags.xValid = 0; + } + + mb(); + + event->xFlags.xValid = 0; +} + +void process_hvlpevents(struct pt_regs *regs) +{ + struct HvLpEvent * event; + + /* If we have recursed, just return */ + if (!spin_trylock(&hvlpevent_queue.lock)) + return; + + for (;;) { + event = get_next_hvlpevent(); + if (event) { + /* Call appropriate handler here, passing + * a pointer to the LpEvent. The handler + * must make a copy of the LpEvent if it + * needs it in a bottom half. (perhaps for + * an ACK) + * + * Handlers are responsible for ACK processing + * + * The Hypervisor guarantees that LpEvents will + * only be delivered with types that we have + * registered for, so no type check is necessary + * here! + */ + if (event->xType < HvLpEvent_Type_NumTypes) + __get_cpu_var(hvlpevent_counts)[event->xType]++; + if (event->xType < HvLpEvent_Type_NumTypes && + lpEventHandler[event->xType]) + lpEventHandler[event->xType](event, regs); + else + printk(KERN_INFO "Unexpected Lp Event type=%d\n", event->xType ); + + hvlpevent_clear_valid(event); + } else if (hvlpevent_queue.xPlicOverflowIntPending) + /* + * No more valid events. If overflow events are + * pending process them + */ + HvCallEvent_getOverflowLpEvents(hvlpevent_queue.xIndex); + else + break; + } + + spin_unlock(&hvlpevent_queue.lock); +} + +static int set_spread_lpevents(char *str) +{ + unsigned long val = simple_strtoul(str, NULL, 0); + + /* + * The parameter is the number of processors to share in processing + * lp events. + */ + if (( val > 0) && (val <= NR_CPUS)) { + spread_lpevents = val; + printk("lpevent processing spread over %ld processors\n", val); + } else { + printk("invalid spread_lpevents %ld\n", val); + } + + return 1; +} +__setup("spread_lpevents=", set_spread_lpevents); + +void setup_hvlpevent_queue(void) +{ + void *eventStack; + + /* + * Allocate a page for the Event Stack. The Hypervisor needs the + * absolute real address, so we subtract out the KERNELBASE and add + * in the absolute real address of the kernel load area. + */ + eventStack = alloc_bootmem_pages(LpEventStackSize); + memset(eventStack, 0, LpEventStackSize); + + /* Invoke the hypervisor to initialize the event stack */ + HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize); + + hvlpevent_queue.xSlicEventStackPtr = (char *)eventStack; + hvlpevent_queue.xSlicCurEventPtr = (char *)eventStack; + hvlpevent_queue.xSlicLastValidEventPtr = (char *)eventStack + + (LpEventStackSize - LpEventMaxSize); + hvlpevent_queue.xIndex = 0; +} + +/* Register a handler for an LpEvent type */ +int HvLpEvent_registerHandler(HvLpEvent_Type eventType, LpEventHandler handler) +{ + if (eventType < HvLpEvent_Type_NumTypes) { + lpEventHandler[eventType] = handler; + return 0; + } + return 1; +} +EXPORT_SYMBOL(HvLpEvent_registerHandler); + +int HvLpEvent_unregisterHandler(HvLpEvent_Type eventType) +{ + might_sleep(); + + if (eventType < HvLpEvent_Type_NumTypes) { + if (!lpEventHandlerPaths[eventType]) { + lpEventHandler[eventType] = NULL; + /* + * We now sleep until all other CPUs have scheduled. + * This ensures that the deletion is seen by all + * other CPUs, and that the deleted handler isn't + * still running on another CPU when we return. + */ + synchronize_rcu(); + return 0; + } + } + return 1; +} +EXPORT_SYMBOL(HvLpEvent_unregisterHandler); + +/* + * lpIndex is the partition index of the target partition. + * needed only for VirtualIo, VirtualLan and SessionMgr. Zero + * indicates to use our partition index - for the other types. + */ +int HvLpEvent_openPath(HvLpEvent_Type eventType, HvLpIndex lpIndex) +{ + if ((eventType < HvLpEvent_Type_NumTypes) && + lpEventHandler[eventType]) { + if (lpIndex == 0) + lpIndex = itLpNaca.xLpIndex; + HvCallEvent_openLpEventPath(lpIndex, eventType); + ++lpEventHandlerPaths[eventType]; + return 0; + } + return 1; +} + +int HvLpEvent_closePath(HvLpEvent_Type eventType, HvLpIndex lpIndex) +{ + if ((eventType < HvLpEvent_Type_NumTypes) && + lpEventHandler[eventType] && + lpEventHandlerPaths[eventType]) { + if (lpIndex == 0) + lpIndex = itLpNaca.xLpIndex; + HvCallEvent_closeLpEventPath(lpIndex, eventType); + --lpEventHandlerPaths[eventType]; + return 0; + } + return 1; +} + +static int proc_lpevents_show(struct seq_file *m, void *v) +{ + int cpu, i; + unsigned long sum; + static unsigned long cpu_totals[NR_CPUS]; + + /* FIXME: do we care that there's no locking here? */ + sum = 0; + for_each_online_cpu(cpu) { + cpu_totals[cpu] = 0; + for (i = 0; i < HvLpEvent_Type_NumTypes; i++) { + cpu_totals[cpu] += per_cpu(hvlpevent_counts, cpu)[i]; + } + sum += cpu_totals[cpu]; + } + + seq_printf(m, "LpEventQueue 0\n"); + seq_printf(m, " events processed:\t%lu\n", sum); + + for (i = 0; i < HvLpEvent_Type_NumTypes; ++i) { + sum = 0; + for_each_online_cpu(cpu) { + sum += per_cpu(hvlpevent_counts, cpu)[i]; + } + + seq_printf(m, " %-20s %10lu\n", event_types[i], sum); + } + + seq_printf(m, "\n events processed by processor:\n"); + + for_each_online_cpu(cpu) { + seq_printf(m, " CPU%02d %10lu\n", cpu, cpu_totals[cpu]); + } + + return 0; +} + +static int proc_lpevents_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_lpevents_show, NULL); +} + +static struct file_operations proc_lpevents_operations = { + .open = proc_lpevents_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_lpevents_init(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL); + if (e) + e->proc_fops = &proc_lpevents_operations; + + return 0; +} +__initcall(proc_lpevents_init); + diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c new file mode 100644 index 00000000000..82f5abab9af --- /dev/null +++ b/arch/powerpc/platforms/iseries/mf.c @@ -0,0 +1,1316 @@ +/* + * Copyright (C) 2001 Troy D. Armstrong IBM Corporation + * Copyright (C) 2004-2005 Stephen Rothwell IBM Corporation + * + * This modules exists as an interface between a Linux secondary partition + * running on an iSeries and the primary partition's Virtual Service + * Processor (VSP) object. The VSP has final authority over powering on/off + * all partitions in the iSeries. It also provides miscellaneous low-level + * machine facility type operations. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/bcd.h> + +#include <asm/time.h> +#include <asm/uaccess.h> +#include <asm/paca.h> +#include <asm/iSeries/vio.h> +#include <asm/iSeries/mf.h> +#include <asm/iSeries/HvLpConfig.h> +#include <asm/iSeries/ItLpQueue.h> + +#include "setup.h" + +extern int piranha_simulator; + +/* + * This is the structure layout for the Machine Facilites LPAR event + * flows. + */ +struct vsp_cmd_data { + u64 token; + u16 cmd; + HvLpIndex lp_index; + u8 result_code; + u32 reserved; + union { + u64 state; /* GetStateOut */ + u64 ipl_type; /* GetIplTypeOut, Function02SelectIplTypeIn */ + u64 ipl_mode; /* GetIplModeOut, Function02SelectIplModeIn */ + u64 page[4]; /* GetSrcHistoryIn */ + u64 flag; /* GetAutoIplWhenPrimaryIplsOut, + SetAutoIplWhenPrimaryIplsIn, + WhiteButtonPowerOffIn, + Function08FastPowerOffIn, + IsSpcnRackPowerIncompleteOut */ + struct { + u64 token; + u64 address_type; + u64 side; + u32 length; + u32 offset; + } kern; /* SetKernelImageIn, GetKernelImageIn, + SetKernelCmdLineIn, GetKernelCmdLineIn */ + u32 length_out; /* GetKernelImageOut, GetKernelCmdLineOut */ + u8 reserved[80]; + } sub_data; +}; + +struct vsp_rsp_data { + struct completion com; + struct vsp_cmd_data *response; +}; + +struct alloc_data { + u16 size; + u16 type; + u32 count; + u16 reserved1; + u8 reserved2; + HvLpIndex target_lp; +}; + +struct ce_msg_data; + +typedef void (*ce_msg_comp_hdlr)(void *token, struct ce_msg_data *vsp_cmd_rsp); + +struct ce_msg_comp_data { + ce_msg_comp_hdlr handler; + void *token; +}; + +struct ce_msg_data { + u8 ce_msg[12]; + char reserved[4]; + struct ce_msg_comp_data *completion; +}; + +struct io_mf_lp_event { + struct HvLpEvent hp_lp_event; + u16 subtype_result_code; + u16 reserved1; + u32 reserved2; + union { + struct alloc_data alloc; + struct ce_msg_data ce_msg; + struct vsp_cmd_data vsp_cmd; + } data; +}; + +#define subtype_data(a, b, c, d) \ + (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) + +/* + * All outgoing event traffic is kept on a FIFO queue. The first + * pointer points to the one that is outstanding, and all new + * requests get stuck on the end. Also, we keep a certain number of + * preallocated pending events so that we can operate very early in + * the boot up sequence (before kmalloc is ready). + */ +struct pending_event { + struct pending_event *next; + struct io_mf_lp_event event; + MFCompleteHandler hdlr; + char dma_data[72]; + unsigned dma_data_length; + unsigned remote_address; +}; +static spinlock_t pending_event_spinlock; +static struct pending_event *pending_event_head; +static struct pending_event *pending_event_tail; +static struct pending_event *pending_event_avail; +static struct pending_event pending_event_prealloc[16]; + +/* + * Put a pending event onto the available queue, so it can get reused. + * Attention! You must have the pending_event_spinlock before calling! + */ +static void free_pending_event(struct pending_event *ev) +{ + if (ev != NULL) { + ev->next = pending_event_avail; + pending_event_avail = ev; + } +} + +/* + * Enqueue the outbound event onto the stack. If the queue was + * empty to begin with, we must also issue it via the Hypervisor + * interface. There is a section of code below that will touch + * the first stack pointer without the protection of the pending_event_spinlock. + * This is OK, because we know that nobody else will be modifying + * the first pointer when we do this. + */ +static int signal_event(struct pending_event *ev) +{ + int rc = 0; + unsigned long flags; + int go = 1; + struct pending_event *ev1; + HvLpEvent_Rc hv_rc; + + /* enqueue the event */ + if (ev != NULL) { + ev->next = NULL; + spin_lock_irqsave(&pending_event_spinlock, flags); + if (pending_event_head == NULL) + pending_event_head = ev; + else { + go = 0; + pending_event_tail->next = ev; + } + pending_event_tail = ev; + spin_unlock_irqrestore(&pending_event_spinlock, flags); + } + + /* send the event */ + while (go) { + go = 0; + + /* any DMA data to send beforehand? */ + if (pending_event_head->dma_data_length > 0) + HvCallEvent_dmaToSp(pending_event_head->dma_data, + pending_event_head->remote_address, + pending_event_head->dma_data_length, + HvLpDma_Direction_LocalToRemote); + + hv_rc = HvCallEvent_signalLpEvent( + &pending_event_head->event.hp_lp_event); + if (hv_rc != HvLpEvent_Rc_Good) { + printk(KERN_ERR "mf.c: HvCallEvent_signalLpEvent() " + "failed with %d\n", (int)hv_rc); + + spin_lock_irqsave(&pending_event_spinlock, flags); + ev1 = pending_event_head; + pending_event_head = pending_event_head->next; + if (pending_event_head != NULL) + go = 1; + spin_unlock_irqrestore(&pending_event_spinlock, flags); + + if (ev1 == ev) + rc = -EIO; + else if (ev1->hdlr != NULL) + (*ev1->hdlr)((void *)ev1->event.hp_lp_event.xCorrelationToken, -EIO); + + spin_lock_irqsave(&pending_event_spinlock, flags); + free_pending_event(ev1); + spin_unlock_irqrestore(&pending_event_spinlock, flags); + } + } + + return rc; +} + +/* + * Allocate a new pending_event structure, and initialize it. + */ +static struct pending_event *new_pending_event(void) +{ + struct pending_event *ev = NULL; + HvLpIndex primary_lp = HvLpConfig_getPrimaryLpIndex(); + unsigned long flags; + struct HvLpEvent *hev; + + spin_lock_irqsave(&pending_event_spinlock, flags); + if (pending_event_avail != NULL) { + ev = pending_event_avail; + pending_event_avail = pending_event_avail->next; + } + spin_unlock_irqrestore(&pending_event_spinlock, flags); + if (ev == NULL) { + ev = kmalloc(sizeof(struct pending_event), GFP_ATOMIC); + if (ev == NULL) { + printk(KERN_ERR "mf.c: unable to kmalloc %ld bytes\n", + sizeof(struct pending_event)); + return NULL; + } + } + memset(ev, 0, sizeof(struct pending_event)); + hev = &ev->event.hp_lp_event; + hev->xFlags.xValid = 1; + hev->xFlags.xAckType = HvLpEvent_AckType_ImmediateAck; + hev->xFlags.xAckInd = HvLpEvent_AckInd_DoAck; + hev->xFlags.xFunction = HvLpEvent_Function_Int; + hev->xType = HvLpEvent_Type_MachineFac; + hev->xSourceLp = HvLpConfig_getLpIndex(); + hev->xTargetLp = primary_lp; + hev->xSizeMinus1 = sizeof(ev->event) - 1; + hev->xRc = HvLpEvent_Rc_Good; + hev->xSourceInstanceId = HvCallEvent_getSourceLpInstanceId(primary_lp, + HvLpEvent_Type_MachineFac); + hev->xTargetInstanceId = HvCallEvent_getTargetLpInstanceId(primary_lp, + HvLpEvent_Type_MachineFac); + + return ev; +} + +static int signal_vsp_instruction(struct vsp_cmd_data *vsp_cmd) +{ + struct pending_event *ev = new_pending_event(); + int rc; + struct vsp_rsp_data response; + + if (ev == NULL) + return -ENOMEM; + + init_completion(&response.com); + response.response = vsp_cmd; + ev->event.hp_lp_event.xSubtype = 6; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'V', 'I'); + ev->event.data.vsp_cmd.token = (u64)&response; + ev->event.data.vsp_cmd.cmd = vsp_cmd->cmd; + ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex(); + ev->event.data.vsp_cmd.result_code = 0xFF; + ev->event.data.vsp_cmd.reserved = 0; + memcpy(&(ev->event.data.vsp_cmd.sub_data), + &(vsp_cmd->sub_data), sizeof(vsp_cmd->sub_data)); + mb(); + + rc = signal_event(ev); + if (rc == 0) + wait_for_completion(&response.com); + return rc; +} + + +/* + * Send a 12-byte CE message to the primary partition VSP object + */ +static int signal_ce_msg(char *ce_msg, struct ce_msg_comp_data *completion) +{ + struct pending_event *ev = new_pending_event(); + + if (ev == NULL) + return -ENOMEM; + + ev->event.hp_lp_event.xSubtype = 0; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'C', 'E'); + memcpy(ev->event.data.ce_msg.ce_msg, ce_msg, 12); + ev->event.data.ce_msg.completion = completion; + return signal_event(ev); +} + +/* + * Send a 12-byte CE message (with no data) to the primary partition VSP object + */ +static int signal_ce_msg_simple(u8 ce_op, struct ce_msg_comp_data *completion) +{ + u8 ce_msg[12]; + + memset(ce_msg, 0, sizeof(ce_msg)); + ce_msg[3] = ce_op; + return signal_ce_msg(ce_msg, completion); +} + +/* + * Send a 12-byte CE message and DMA data to the primary partition VSP object + */ +static int dma_and_signal_ce_msg(char *ce_msg, + struct ce_msg_comp_data *completion, void *dma_data, + unsigned dma_data_length, unsigned remote_address) +{ + struct pending_event *ev = new_pending_event(); + + if (ev == NULL) + return -ENOMEM; + + ev->event.hp_lp_event.xSubtype = 0; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'C', 'E'); + memcpy(ev->event.data.ce_msg.ce_msg, ce_msg, 12); + ev->event.data.ce_msg.completion = completion; + memcpy(ev->dma_data, dma_data, dma_data_length); + ev->dma_data_length = dma_data_length; + ev->remote_address = remote_address; + return signal_event(ev); +} + +/* + * Initiate a nice (hopefully) shutdown of Linux. We simply are + * going to try and send the init process a SIGINT signal. If + * this fails (why?), we'll simply force it off in a not-so-nice + * manner. + */ +static int shutdown(void) +{ + int rc = kill_proc(1, SIGINT, 1); + + if (rc) { + printk(KERN_ALERT "mf.c: SIGINT to init failed (%d), " + "hard shutdown commencing\n", rc); + mf_power_off(); + } else + printk(KERN_INFO "mf.c: init has been successfully notified " + "to proceed with shutdown\n"); + return rc; +} + +/* + * The primary partition VSP object is sending us a new + * event flow. Handle it... + */ +static void handle_int(struct io_mf_lp_event *event) +{ + struct ce_msg_data *ce_msg_data; + struct ce_msg_data *pce_msg_data; + unsigned long flags; + struct pending_event *pev; + + /* ack the interrupt */ + event->hp_lp_event.xRc = HvLpEvent_Rc_Good; + HvCallEvent_ackLpEvent(&event->hp_lp_event); + + /* process interrupt */ + switch (event->hp_lp_event.xSubtype) { + case 0: /* CE message */ + ce_msg_data = &event->data.ce_msg; + switch (ce_msg_data->ce_msg[3]) { + case 0x5B: /* power control notification */ + if ((ce_msg_data->ce_msg[5] & 0x20) != 0) { + printk(KERN_INFO "mf.c: Commencing partition shutdown\n"); + if (shutdown() == 0) + signal_ce_msg_simple(0xDB, NULL); + } + break; + case 0xC0: /* get time */ + spin_lock_irqsave(&pending_event_spinlock, flags); + pev = pending_event_head; + if (pev != NULL) + pending_event_head = pending_event_head->next; + spin_unlock_irqrestore(&pending_event_spinlock, flags); + if (pev == NULL) + break; + pce_msg_data = &pev->event.data.ce_msg; + if (pce_msg_data->ce_msg[3] != 0x40) + break; + if (pce_msg_data->completion != NULL) { + ce_msg_comp_hdlr handler = + pce_msg_data->completion->handler; + void *token = pce_msg_data->completion->token; + + if (handler != NULL) + (*handler)(token, ce_msg_data); + } + spin_lock_irqsave(&pending_event_spinlock, flags); + free_pending_event(pev); + spin_unlock_irqrestore(&pending_event_spinlock, flags); + /* send next waiting event */ + if (pending_event_head != NULL) + signal_event(NULL); + break; + } + break; + case 1: /* IT sys shutdown */ + printk(KERN_INFO "mf.c: Commencing system shutdown\n"); + shutdown(); + break; + } +} + +/* + * The primary partition VSP object is acknowledging the receipt + * of a flow we sent to them. If there are other flows queued + * up, we must send another one now... + */ +static void handle_ack(struct io_mf_lp_event *event) +{ + unsigned long flags; + struct pending_event *two = NULL; + unsigned long free_it = 0; + struct ce_msg_data *ce_msg_data; + struct ce_msg_data *pce_msg_data; + struct vsp_rsp_data *rsp; + + /* handle current event */ + if (pending_event_head == NULL) { + printk(KERN_ERR "mf.c: stack empty for receiving ack\n"); + return; + } + + switch (event->hp_lp_event.xSubtype) { + case 0: /* CE msg */ + ce_msg_data = &event->data.ce_msg; + if (ce_msg_data->ce_msg[3] != 0x40) { + free_it = 1; + break; + } + if (ce_msg_data->ce_msg[2] == 0) + break; + free_it = 1; + pce_msg_data = &pending_event_head->event.data.ce_msg; + if (pce_msg_data->completion != NULL) { + ce_msg_comp_hdlr handler = + pce_msg_data->completion->handler; + void *token = pce_msg_data->completion->token; + + if (handler != NULL) + (*handler)(token, ce_msg_data); + } + break; + case 4: /* allocate */ + case 5: /* deallocate */ + if (pending_event_head->hdlr != NULL) + (*pending_event_head->hdlr)((void *)event->hp_lp_event.xCorrelationToken, event->data.alloc.count); + free_it = 1; + break; + case 6: + free_it = 1; + rsp = (struct vsp_rsp_data *)event->data.vsp_cmd.token; + if (rsp == NULL) { + printk(KERN_ERR "mf.c: no rsp\n"); + break; + } + if (rsp->response != NULL) + memcpy(rsp->response, &event->data.vsp_cmd, + sizeof(event->data.vsp_cmd)); + complete(&rsp->com); + break; + } + + /* remove from queue */ + spin_lock_irqsave(&pending_event_spinlock, flags); + if ((pending_event_head != NULL) && (free_it == 1)) { + struct pending_event *oldHead = pending_event_head; + + pending_event_head = pending_event_head->next; + two = pending_event_head; + free_pending_event(oldHead); + } + spin_unlock_irqrestore(&pending_event_spinlock, flags); + + /* send next waiting event */ + if (two != NULL) + signal_event(NULL); +} + +/* + * This is the generic event handler we are registering with + * the Hypervisor. Ensure the flows are for us, and then + * parse it enough to know if it is an interrupt or an + * acknowledge. + */ +static void hv_handler(struct HvLpEvent *event, struct pt_regs *regs) +{ + if ((event != NULL) && (event->xType == HvLpEvent_Type_MachineFac)) { + switch(event->xFlags.xFunction) { + case HvLpEvent_Function_Ack: + handle_ack((struct io_mf_lp_event *)event); + break; + case HvLpEvent_Function_Int: + handle_int((struct io_mf_lp_event *)event); + break; + default: + printk(KERN_ERR "mf.c: non ack/int event received\n"); + break; + } + } else + printk(KERN_ERR "mf.c: alien event received\n"); +} + +/* + * Global kernel interface to allocate and seed events into the + * Hypervisor. + */ +void mf_allocate_lp_events(HvLpIndex target_lp, HvLpEvent_Type type, + unsigned size, unsigned count, MFCompleteHandler hdlr, + void *user_token) +{ + struct pending_event *ev = new_pending_event(); + int rc; + + if (ev == NULL) { + rc = -ENOMEM; + } else { + ev->event.hp_lp_event.xSubtype = 4; + ev->event.hp_lp_event.xCorrelationToken = (u64)user_token; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'M', 'A'); + ev->event.data.alloc.target_lp = target_lp; + ev->event.data.alloc.type = type; + ev->event.data.alloc.size = size; + ev->event.data.alloc.count = count; + ev->hdlr = hdlr; + rc = signal_event(ev); + } + if ((rc != 0) && (hdlr != NULL)) + (*hdlr)(user_token, rc); +} +EXPORT_SYMBOL(mf_allocate_lp_events); + +/* + * Global kernel interface to unseed and deallocate events already in + * Hypervisor. + */ +void mf_deallocate_lp_events(HvLpIndex target_lp, HvLpEvent_Type type, + unsigned count, MFCompleteHandler hdlr, void *user_token) +{ + struct pending_event *ev = new_pending_event(); + int rc; + + if (ev == NULL) + rc = -ENOMEM; + else { + ev->event.hp_lp_event.xSubtype = 5; + ev->event.hp_lp_event.xCorrelationToken = (u64)user_token; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'M', 'D'); + ev->event.data.alloc.target_lp = target_lp; + ev->event.data.alloc.type = type; + ev->event.data.alloc.count = count; + ev->hdlr = hdlr; + rc = signal_event(ev); + } + if ((rc != 0) && (hdlr != NULL)) + (*hdlr)(user_token, rc); +} +EXPORT_SYMBOL(mf_deallocate_lp_events); + +/* + * Global kernel interface to tell the VSP object in the primary + * partition to power this partition off. + */ +void mf_power_off(void) +{ + printk(KERN_INFO "mf.c: Down it goes...\n"); + signal_ce_msg_simple(0x4d, NULL); + for (;;) + ; +} + +/* + * Global kernel interface to tell the VSP object in the primary + * partition to reboot this partition. + */ +void mf_reboot(void) +{ + printk(KERN_INFO "mf.c: Preparing to bounce...\n"); + signal_ce_msg_simple(0x4e, NULL); + for (;;) + ; +} + +/* + * Display a single word SRC onto the VSP control panel. + */ +void mf_display_src(u32 word) +{ + u8 ce[12]; + + memset(ce, 0, sizeof(ce)); + ce[3] = 0x4a; + ce[7] = 0x01; + ce[8] = word >> 24; + ce[9] = word >> 16; + ce[10] = word >> 8; + ce[11] = word; + signal_ce_msg(ce, NULL); +} + +/* + * Display a single word SRC of the form "PROGXXXX" on the VSP control panel. + */ +void mf_display_progress(u16 value) +{ + u8 ce[12]; + u8 src[72]; + + memcpy(ce, "\x00\x00\x04\x4A\x00\x00\x00\x48\x00\x00\x00\x00", 12); + memcpy(src, "\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00PROGxxxx ", + 72); + src[6] = value >> 8; + src[7] = value & 255; + src[44] = "0123456789ABCDEF"[(value >> 12) & 15]; + src[45] = "0123456789ABCDEF"[(value >> 8) & 15]; + src[46] = "0123456789ABCDEF"[(value >> 4) & 15]; + src[47] = "0123456789ABCDEF"[value & 15]; + dma_and_signal_ce_msg(ce, NULL, src, sizeof(src), 9 * 64 * 1024); +} + +/* + * Clear the VSP control panel. Used to "erase" an SRC that was + * previously displayed. + */ +void mf_clear_src(void) +{ + signal_ce_msg_simple(0x4b, NULL); +} + +/* + * Initialization code here. + */ +void mf_init(void) +{ + int i; + + /* initialize */ + spin_lock_init(&pending_event_spinlock); + for (i = 0; + i < sizeof(pending_event_prealloc) / sizeof(*pending_event_prealloc); + ++i) + free_pending_event(&pending_event_prealloc[i]); + HvLpEvent_registerHandler(HvLpEvent_Type_MachineFac, &hv_handler); + + /* virtual continue ack */ + signal_ce_msg_simple(0x57, NULL); + + /* initialization complete */ + printk(KERN_NOTICE "mf.c: iSeries Linux LPAR Machine Facilities " + "initialized\n"); +} + +struct rtc_time_data { + struct completion com; + struct ce_msg_data ce_msg; + int rc; +}; + +static void get_rtc_time_complete(void *token, struct ce_msg_data *ce_msg) +{ + struct rtc_time_data *rtc = token; + + memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg)); + rtc->rc = 0; + complete(&rtc->com); +} + +static int rtc_set_tm(int rc, u8 *ce_msg, struct rtc_time *tm) +{ + tm->tm_wday = 0; + tm->tm_yday = 0; + tm->tm_isdst = 0; + if (rc) { + tm->tm_sec = 0; + tm->tm_min = 0; + tm->tm_hour = 0; + tm->tm_mday = 15; + tm->tm_mon = 5; + tm->tm_year = 52; + return rc; + } + + if ((ce_msg[2] == 0xa9) || + (ce_msg[2] == 0xaf)) { + /* TOD clock is not set */ + tm->tm_sec = 1; + tm->tm_min = 1; + tm->tm_hour = 1; + tm->tm_mday = 10; + tm->tm_mon = 8; + tm->tm_year = 71; + mf_set_rtc(tm); + } + { + u8 year = ce_msg[5]; + u8 sec = ce_msg[6]; + u8 min = ce_msg[7]; + u8 hour = ce_msg[8]; + u8 day = ce_msg[10]; + u8 mon = ce_msg[11]; + + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + + if (year <= 69) + year += 100; + + tm->tm_sec = sec; + tm->tm_min = min; + tm->tm_hour = hour; + tm->tm_mday = day; + tm->tm_mon = mon; + tm->tm_year = year; + } + + return 0; +} + +int mf_get_rtc(struct rtc_time *tm) +{ + struct ce_msg_comp_data ce_complete; + struct rtc_time_data rtc_data; + int rc; + + memset(&ce_complete, 0, sizeof(ce_complete)); + memset(&rtc_data, 0, sizeof(rtc_data)); + init_completion(&rtc_data.com); + ce_complete.handler = &get_rtc_time_complete; + ce_complete.token = &rtc_data; + rc = signal_ce_msg_simple(0x40, &ce_complete); + if (rc) + return rc; + wait_for_completion(&rtc_data.com); + return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm); +} + +struct boot_rtc_time_data { + int busy; + struct ce_msg_data ce_msg; + int rc; +}; + +static void get_boot_rtc_time_complete(void *token, struct ce_msg_data *ce_msg) +{ + struct boot_rtc_time_data *rtc = token; + + memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg)); + rtc->rc = 0; + rtc->busy = 0; +} + +int mf_get_boot_rtc(struct rtc_time *tm) +{ + struct ce_msg_comp_data ce_complete; + struct boot_rtc_time_data rtc_data; + int rc; + + memset(&ce_complete, 0, sizeof(ce_complete)); + memset(&rtc_data, 0, sizeof(rtc_data)); + rtc_data.busy = 1; + ce_complete.handler = &get_boot_rtc_time_complete; + ce_complete.token = &rtc_data; + rc = signal_ce_msg_simple(0x40, &ce_complete); + if (rc) + return rc; + /* We need to poll here as we are not yet taking interrupts */ + while (rtc_data.busy) { + if (hvlpevent_is_pending()) + process_hvlpevents(NULL); + } + return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm); +} + +int mf_set_rtc(struct rtc_time *tm) +{ + char ce_time[12]; + u8 day, mon, hour, min, sec, y1, y2; + unsigned year; + + year = 1900 + tm->tm_year; + y1 = year / 100; + y2 = year % 100; + + sec = tm->tm_sec; + min = tm->tm_min; + hour = tm->tm_hour; + day = tm->tm_mday; + mon = tm->tm_mon + 1; + + BIN_TO_BCD(sec); + BIN_TO_BCD(min); + BIN_TO_BCD(hour); + BIN_TO_BCD(mon); + BIN_TO_BCD(day); + BIN_TO_BCD(y1); + BIN_TO_BCD(y2); + + memset(ce_time, 0, sizeof(ce_time)); + ce_time[3] = 0x41; + ce_time[4] = y1; + ce_time[5] = y2; + ce_time[6] = sec; + ce_time[7] = min; + ce_time[8] = hour; + ce_time[10] = day; + ce_time[11] = mon; + + return signal_ce_msg(ce_time, NULL); +} + +#ifdef CONFIG_PROC_FS + +static int proc_mf_dump_cmdline(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + char *p; + struct vsp_cmd_data vsp_cmd; + int rc; + dma_addr_t dma_addr; + + /* The HV appears to return no more than 256 bytes of command line */ + if (off >= 256) + return 0; + if ((off + count) > 256) + count = 256 - off; + + dma_addr = dma_map_single(iSeries_vio_dev, page, off + count, + DMA_FROM_DEVICE); + if (dma_mapping_error(dma_addr)) + return -ENOMEM; + memset(page, 0, off + count); + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 33; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = (u64)data; + vsp_cmd.sub_data.kern.length = off + count; + mb(); + rc = signal_vsp_instruction(&vsp_cmd); + dma_unmap_single(iSeries_vio_dev, dma_addr, off + count, + DMA_FROM_DEVICE); + if (rc) + return rc; + if (vsp_cmd.result_code != 0) + return -ENOMEM; + p = page; + len = 0; + while (len < (off + count)) { + if ((*p == '\0') || (*p == '\n')) { + if (*p == '\0') + *p = '\n'; + p++; + len++; + *eof = 1; + break; + } + p++; + len++; + } + + if (len < off) { + *eof = 1; + len = 0; + } + return len; +} + +#if 0 +static int mf_getVmlinuxChunk(char *buffer, int *size, int offset, u64 side) +{ + struct vsp_cmd_data vsp_cmd; + int rc; + int len = *size; + dma_addr_t dma_addr; + + dma_addr = dma_map_single(iSeries_vio_dev, buffer, len, + DMA_FROM_DEVICE); + memset(buffer, 0, len); + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 32; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = side; + vsp_cmd.sub_data.kern.offset = offset; + vsp_cmd.sub_data.kern.length = len; + mb(); + rc = signal_vsp_instruction(&vsp_cmd); + if (rc == 0) { + if (vsp_cmd.result_code == 0) + *size = vsp_cmd.sub_data.length_out; + else + rc = -ENOMEM; + } + + dma_unmap_single(iSeries_vio_dev, dma_addr, len, DMA_FROM_DEVICE); + + return rc; +} + +static int proc_mf_dump_vmlinux(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int sizeToGet = count; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (mf_getVmlinuxChunk(page, &sizeToGet, off, (u64)data) == 0) { + if (sizeToGet != 0) { + *start = page + off; + return sizeToGet; + } + *eof = 1; + return 0; + } + *eof = 1; + return 0; +} +#endif + +static int proc_mf_dump_side(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + char mf_current_side = ' '; + struct vsp_cmd_data vsp_cmd; + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 2; + vsp_cmd.sub_data.ipl_type = 0; + mb(); + + if (signal_vsp_instruction(&vsp_cmd) == 0) { + if (vsp_cmd.result_code == 0) { + switch (vsp_cmd.sub_data.ipl_type) { + case 0: mf_current_side = 'A'; + break; + case 1: mf_current_side = 'B'; + break; + case 2: mf_current_side = 'C'; + break; + default: mf_current_side = 'D'; + break; + } + } + } + + len = sprintf(page, "%c\n", mf_current_side); + + if (len <= (off + count)) + *eof = 1; + *start = page + off; + len -= off; + if (len > count) + len = count; + if (len < 0) + len = 0; + return len; +} + +static int proc_mf_change_side(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char side; + u64 newSide; + struct vsp_cmd_data vsp_cmd; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (count == 0) + return 0; + + if (get_user(side, buffer)) + return -EFAULT; + + switch (side) { + case 'A': newSide = 0; + break; + case 'B': newSide = 1; + break; + case 'C': newSide = 2; + break; + case 'D': newSide = 3; + break; + default: + printk(KERN_ERR "mf_proc.c: proc_mf_change_side: invalid side\n"); + return -EINVAL; + } + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.sub_data.ipl_type = newSide; + vsp_cmd.cmd = 10; + + (void)signal_vsp_instruction(&vsp_cmd); + + return count; +} + +#if 0 +static void mf_getSrcHistory(char *buffer, int size) +{ + struct IplTypeReturnStuff return_stuff; + struct pending_event *ev = new_pending_event(); + int rc = 0; + char *pages[4]; + + pages[0] = kmalloc(4096, GFP_ATOMIC); + pages[1] = kmalloc(4096, GFP_ATOMIC); + pages[2] = kmalloc(4096, GFP_ATOMIC); + pages[3] = kmalloc(4096, GFP_ATOMIC); + if ((ev == NULL) || (pages[0] == NULL) || (pages[1] == NULL) + || (pages[2] == NULL) || (pages[3] == NULL)) + return -ENOMEM; + + return_stuff.xType = 0; + return_stuff.xRc = 0; + return_stuff.xDone = 0; + ev->event.hp_lp_event.xSubtype = 6; + ev->event.hp_lp_event.x.xSubtypeData = + subtype_data('M', 'F', 'V', 'I'); + ev->event.data.vsp_cmd.xEvent = &return_stuff; + ev->event.data.vsp_cmd.cmd = 4; + ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex(); + ev->event.data.vsp_cmd.result_code = 0xFF; + ev->event.data.vsp_cmd.reserved = 0; + ev->event.data.vsp_cmd.sub_data.page[0] = ISERIES_HV_ADDR(pages[0]); + ev->event.data.vsp_cmd.sub_data.page[1] = ISERIES_HV_ADDR(pages[1]); + ev->event.data.vsp_cmd.sub_data.page[2] = ISERIES_HV_ADDR(pages[2]); + ev->event.data.vsp_cmd.sub_data.page[3] = ISERIES_HV_ADDR(pages[3]); + mb(); + if (signal_event(ev) != 0) + return; + + while (return_stuff.xDone != 1) + udelay(10); + if (return_stuff.xRc == 0) + memcpy(buffer, pages[0], size); + kfree(pages[0]); + kfree(pages[1]); + kfree(pages[2]); + kfree(pages[3]); +} +#endif + +static int proc_mf_dump_src(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ +#if 0 + int len; + + mf_getSrcHistory(page, count); + len = count; + len -= off; + if (len < count) { + *eof = 1; + if (len <= 0) + return 0; + } else + len = count; + *start = page + off; + return len; +#else + return 0; +#endif +} + +static int proc_mf_change_src(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char stkbuf[10]; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if ((count < 4) && (count != 1)) { + printk(KERN_ERR "mf_proc: invalid src\n"); + return -EINVAL; + } + + if (count > (sizeof(stkbuf) - 1)) + count = sizeof(stkbuf) - 1; + if (copy_from_user(stkbuf, buffer, count)) + return -EFAULT; + + if ((count == 1) && (*stkbuf == '\0')) + mf_clear_src(); + else + mf_display_src(*(u32 *)stkbuf); + + return count; +} + +static int proc_mf_change_cmdline(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + struct vsp_cmd_data vsp_cmd; + dma_addr_t dma_addr; + char *page; + int ret = -EACCES; + + if (!capable(CAP_SYS_ADMIN)) + goto out; + + dma_addr = 0; + page = dma_alloc_coherent(iSeries_vio_dev, count, &dma_addr, + GFP_ATOMIC); + ret = -ENOMEM; + if (page == NULL) + goto out; + + ret = -EFAULT; + if (copy_from_user(page, buffer, count)) + goto out_free; + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 31; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = (u64)data; + vsp_cmd.sub_data.kern.length = count; + mb(); + (void)signal_vsp_instruction(&vsp_cmd); + ret = count; + +out_free: + dma_free_coherent(iSeries_vio_dev, count, page, dma_addr); +out: + return ret; +} + +static ssize_t proc_mf_change_vmlinux(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + ssize_t rc; + dma_addr_t dma_addr; + char *page; + struct vsp_cmd_data vsp_cmd; + + rc = -EACCES; + if (!capable(CAP_SYS_ADMIN)) + goto out; + + dma_addr = 0; + page = dma_alloc_coherent(iSeries_vio_dev, count, &dma_addr, + GFP_ATOMIC); + rc = -ENOMEM; + if (page == NULL) { + printk(KERN_ERR "mf.c: couldn't allocate memory to set vmlinux chunk\n"); + goto out; + } + rc = -EFAULT; + if (copy_from_user(page, buf, count)) + goto out_free; + + memset(&vsp_cmd, 0, sizeof(vsp_cmd)); + vsp_cmd.cmd = 30; + vsp_cmd.sub_data.kern.token = dma_addr; + vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; + vsp_cmd.sub_data.kern.side = (u64)dp->data; + vsp_cmd.sub_data.kern.offset = *ppos; + vsp_cmd.sub_data.kern.length = count; + mb(); + rc = signal_vsp_instruction(&vsp_cmd); + if (rc) + goto out_free; + rc = -ENOMEM; + if (vsp_cmd.result_code != 0) + goto out_free; + + *ppos += count; + rc = count; +out_free: + dma_free_coherent(iSeries_vio_dev, count, page, dma_addr); +out: + return rc; +} + +static struct file_operations proc_vmlinux_operations = { + .write = proc_mf_change_vmlinux, +}; + +static int __init mf_proc_init(void) +{ + struct proc_dir_entry *mf_proc_root; + struct proc_dir_entry *ent; + struct proc_dir_entry *mf; + char name[2]; + int i; + + mf_proc_root = proc_mkdir("iSeries/mf", NULL); + if (!mf_proc_root) + return 1; + + name[1] = '\0'; + for (i = 0; i < 4; i++) { + name[0] = 'A' + i; + mf = proc_mkdir(name, mf_proc_root); + if (!mf) + return 1; + + ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)(long)i; + ent->read_proc = proc_mf_dump_cmdline; + ent->write_proc = proc_mf_change_cmdline; + + if (i == 3) /* no vmlinux entry for 'D' */ + continue; + + ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)(long)i; + ent->proc_fops = &proc_vmlinux_operations; + } + + ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_mf_dump_side; + ent->write_proc = proc_mf_change_side; + + ent = create_proc_entry("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); + if (!ent) + return 1; + ent->nlink = 1; + ent->data = (void *)0; + ent->read_proc = proc_mf_dump_src; + ent->write_proc = proc_mf_change_src; + + return 0; +} + +__initcall(mf_proc_init); + +#endif /* CONFIG_PROC_FS */ + +/* + * Get the RTC from the virtual service processor + * This requires flowing LpEvents to the primary partition + */ +void iSeries_get_rtc_time(struct rtc_time *rtc_tm) +{ + if (piranha_simulator) + return; + + mf_get_rtc(rtc_tm); + rtc_tm->tm_mon--; +} + +/* + * Set the RTC in the virtual service processor + * This requires flowing LpEvents to the primary partition + */ +int iSeries_set_rtc_time(struct rtc_time *tm) +{ + mf_set_rtc(tm); + return 0; +} + +void iSeries_get_boot_time(struct rtc_time *tm) +{ + if (piranha_simulator) + return; + + mf_get_boot_rtc(tm); + tm->tm_mon -= 1; +} diff --git a/arch/powerpc/platforms/iseries/misc.S b/arch/powerpc/platforms/iseries/misc.S new file mode 100644 index 00000000000..09f14522e17 --- /dev/null +++ b/arch/powerpc/platforms/iseries/misc.S @@ -0,0 +1,55 @@ +/* + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-2005 IBM Corp + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/processor.h> +#include <asm/asm-offsets.h> + + .text + +/* unsigned long local_save_flags(void) */ +_GLOBAL(local_get_flags) + lbz r3,PACAPROCENABLED(r13) + blr + +/* unsigned long local_irq_disable(void) */ +_GLOBAL(local_irq_disable) + lbz r3,PACAPROCENABLED(r13) + li r4,0 + stb r4,PACAPROCENABLED(r13) + blr /* Done */ + +/* void local_irq_restore(unsigned long flags) */ +_GLOBAL(local_irq_restore) + lbz r5,PACAPROCENABLED(r13) + /* Check if things are setup the way we want _already_. */ + cmpw 0,r3,r5 + beqlr + /* are we enabling interrupts? */ + cmpdi 0,r3,0 + stb r3,PACAPROCENABLED(r13) + beqlr + /* Check pending interrupts */ + /* A decrementer, IPI or PMC interrupt may have occurred + * while we were in the hypervisor (which enables) */ + ld r4,PACALPPACA+LPPACAANYINT(r13) + cmpdi r4,0 + beqlr + + /* + * Handle pending interrupts in interrupt context + */ + li r0,0x5555 + sc + blr diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c new file mode 100644 index 00000000000..501b1dcbfac --- /dev/null +++ b/arch/powerpc/platforms/iseries/pci.c @@ -0,0 +1,912 @@ +/* + * Copyright (C) 2001 Allan Trautman, IBM Corporation + * + * iSeries specific routines for PCI. + * + * Based on code from pci.c and iSeries_pci.c 32bit + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/ide.h> +#include <linux/pci.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <asm/ppcdebug.h> +#include <asm/iommu.h> + +#include <asm/iSeries/HvCallPci.h> +#include <asm/iSeries/HvCallXm.h> +#include <asm/iSeries/iSeries_irq.h> +#include <asm/iSeries/iSeries_pci.h> +#include <asm/iSeries/mf.h> + +#include <asm/ppc-pci.h> + +extern unsigned long io_page_mask; + +/* + * Forward declares of prototypes. + */ +static struct device_node *find_Device_Node(int bus, int devfn); +static void scan_PHB_slots(struct pci_controller *Phb); +static void scan_EADS_bridge(HvBusNumber Bus, HvSubBusNumber SubBus, int IdSel); +static int scan_bridge_slot(HvBusNumber Bus, struct HvCallPci_BridgeInfo *Info); + +LIST_HEAD(iSeries_Global_Device_List); + +static int DeviceCount; + +/* Counters and control flags. */ +static long Pci_Io_Read_Count; +static long Pci_Io_Write_Count; +#if 0 +static long Pci_Cfg_Read_Count; +static long Pci_Cfg_Write_Count; +#endif +static long Pci_Error_Count; + +static int Pci_Retry_Max = 3; /* Only retry 3 times */ +static int Pci_Error_Flag = 1; /* Set Retry Error on. */ + +static struct pci_ops iSeries_pci_ops; + +/* + * Table defines + * Each Entry size is 4 MB * 1024 Entries = 4GB I/O address space. + */ +#define IOMM_TABLE_MAX_ENTRIES 1024 +#define IOMM_TABLE_ENTRY_SIZE 0x0000000000400000UL +#define BASE_IO_MEMORY 0xE000000000000000UL + +static unsigned long max_io_memory = 0xE000000000000000UL; +static long current_iomm_table_entry; + +/* + * Lookup Tables. + */ +static struct device_node **iomm_table; +static u8 *iobar_table; + +/* + * Static and Global variables + */ +static char *pci_io_text = "iSeries PCI I/O"; +static DEFINE_SPINLOCK(iomm_table_lock); + +/* + * iomm_table_initialize + * + * Allocates and initalizes the Address Translation Table and Bar + * Tables to get them ready for use. Must be called before any + * I/O space is handed out to the device BARs. + */ +static void iomm_table_initialize(void) +{ + spin_lock(&iomm_table_lock); + iomm_table = kmalloc(sizeof(*iomm_table) * IOMM_TABLE_MAX_ENTRIES, + GFP_KERNEL); + iobar_table = kmalloc(sizeof(*iobar_table) * IOMM_TABLE_MAX_ENTRIES, + GFP_KERNEL); + spin_unlock(&iomm_table_lock); + if ((iomm_table == NULL) || (iobar_table == NULL)) + panic("PCI: I/O tables allocation failed.\n"); +} + +/* + * iomm_table_allocate_entry + * + * Adds pci_dev entry in address translation table + * + * - Allocates the number of entries required in table base on BAR + * size. + * - Allocates starting at BASE_IO_MEMORY and increases. + * - The size is round up to be a multiple of entry size. + * - CurrentIndex is incremented to keep track of the last entry. + * - Builds the resource entry for allocated BARs. + */ +static void iomm_table_allocate_entry(struct pci_dev *dev, int bar_num) +{ + struct resource *bar_res = &dev->resource[bar_num]; + long bar_size = pci_resource_len(dev, bar_num); + + /* + * No space to allocate, quick exit, skip Allocation. + */ + if (bar_size == 0) + return; + /* + * Set Resource values. + */ + spin_lock(&iomm_table_lock); + bar_res->name = pci_io_text; + bar_res->start = + IOMM_TABLE_ENTRY_SIZE * current_iomm_table_entry; + bar_res->start += BASE_IO_MEMORY; + bar_res->end = bar_res->start + bar_size - 1; + /* + * Allocate the number of table entries needed for BAR. + */ + while (bar_size > 0 ) { + iomm_table[current_iomm_table_entry] = dev->sysdata; + iobar_table[current_iomm_table_entry] = bar_num; + bar_size -= IOMM_TABLE_ENTRY_SIZE; + ++current_iomm_table_entry; + } + max_io_memory = BASE_IO_MEMORY + + (IOMM_TABLE_ENTRY_SIZE * current_iomm_table_entry); + spin_unlock(&iomm_table_lock); +} + +/* + * allocate_device_bars + * + * - Allocates ALL pci_dev BAR's and updates the resources with the + * BAR value. BARS with zero length will have the resources + * The HvCallPci_getBarParms is used to get the size of the BAR + * space. It calls iomm_table_allocate_entry to allocate + * each entry. + * - Loops through The Bar resources(0 - 5) including the ROM + * is resource(6). + */ +static void allocate_device_bars(struct pci_dev *dev) +{ + struct resource *bar_res; + int bar_num; + + for (bar_num = 0; bar_num <= PCI_ROM_RESOURCE; ++bar_num) { + bar_res = &dev->resource[bar_num]; + iomm_table_allocate_entry(dev, bar_num); + } +} + +/* + * Log error information to system console. + * Filter out the device not there errors. + * PCI: EADs Connect Failed 0x18.58.10 Rc: 0x00xx + * PCI: Read Vendor Failed 0x18.58.10 Rc: 0x00xx + * PCI: Connect Bus Unit Failed 0x18.58.10 Rc: 0x00xx + */ +static void pci_Log_Error(char *Error_Text, int Bus, int SubBus, + int AgentId, int HvRc) +{ + if (HvRc == 0x0302) + return; + printk(KERN_ERR "PCI: %s Failed: 0x%02X.%02X.%02X Rc: 0x%04X", + Error_Text, Bus, SubBus, AgentId, HvRc); +} + +/* + * build_device_node(u16 Bus, int SubBus, u8 DevFn) + */ +static struct device_node *build_device_node(HvBusNumber Bus, + HvSubBusNumber SubBus, int AgentId, int Function) +{ + struct device_node *node; + struct pci_dn *pdn; + + PPCDBG(PPCDBG_BUSWALK, + "-build_device_node 0x%02X.%02X.%02X Function: %02X\n", + Bus, SubBus, AgentId, Function); + + node = kmalloc(sizeof(struct device_node), GFP_KERNEL); + if (node == NULL) + return NULL; + memset(node, 0, sizeof(struct device_node)); + pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); + if (pdn == NULL) { + kfree(node); + return NULL; + } + node->data = pdn; + list_add_tail(&node->Device_List, &iSeries_Global_Device_List); +#if 0 + pdn->DsaAddr = ((u64)Bus << 48) + ((u64)SubBus << 40) + ((u64)0x10 << 32); +#endif + pdn->DsaAddr.DsaAddr = 0; + pdn->DsaAddr.Dsa.busNumber = Bus; + pdn->DsaAddr.Dsa.subBusNumber = SubBus; + pdn->DsaAddr.Dsa.deviceId = 0x10; + pdn->devfn = PCI_DEVFN(ISERIES_ENCODE_DEVICE(AgentId), Function); + return node; +} + +/* + * unsigned long __init find_and_init_phbs(void) + * + * Description: + * This function checks for all possible system PCI host bridges that connect + * PCI buses. The system hypervisor is queried as to the guest partition + * ownership status. A pci_controller is built for any bus which is partially + * owned or fully owned by this guest partition. + */ +unsigned long __init find_and_init_phbs(void) +{ + struct pci_controller *phb; + HvBusNumber bus; + + PPCDBG(PPCDBG_BUSWALK, "find_and_init_phbs Entry\n"); + + /* Check all possible buses. */ + for (bus = 0; bus < 256; bus++) { + int ret = HvCallXm_testBus(bus); + if (ret == 0) { + printk("bus %d appears to exist\n", bus); + + phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), GFP_KERNEL); + if (phb == NULL) + return -ENOMEM; + pci_setup_pci_controller(phb); + + phb->pci_mem_offset = phb->local_number = bus; + phb->first_busno = bus; + phb->last_busno = bus; + phb->ops = &iSeries_pci_ops; + + PPCDBG(PPCDBG_BUSWALK, "PCI:Create iSeries pci_controller(%p), Bus: %04X\n", + phb, bus); + + /* Find and connect the devices. */ + scan_PHB_slots(phb); + } + /* + * Check for Unexpected Return code, a clue that something + * has gone wrong. + */ + else if (ret != 0x0301) + printk(KERN_ERR "Unexpected Return on Probe(0x%04X): 0x%04X", + bus, ret); + } + return 0; +} + +/* + * iSeries_pcibios_init + * + * Chance to initialize and structures or variable before PCI Bus walk. + */ +void iSeries_pcibios_init(void) +{ + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Entry.\n"); + iomm_table_initialize(); + find_and_init_phbs(); + io_page_mask = -1; + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Exit.\n"); +} + +/* + * iSeries_pci_final_fixup(void) + */ +void __init iSeries_pci_final_fixup(void) +{ + struct pci_dev *pdev = NULL; + struct device_node *node; + int DeviceCount = 0; + + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup Entry.\n"); + + /* Fix up at the device node and pci_dev relationship */ + mf_display_src(0xC9000100); + + printk("pcibios_final_fixup\n"); + for_each_pci_dev(pdev) { + node = find_Device_Node(pdev->bus->number, pdev->devfn); + printk("pci dev %p (%x.%x), node %p\n", pdev, + pdev->bus->number, pdev->devfn, node); + + if (node != NULL) { + ++DeviceCount; + pdev->sysdata = (void *)node; + PCI_DN(node)->pcidev = pdev; + PPCDBG(PPCDBG_BUSWALK, + "pdev 0x%p <==> DevNode 0x%p\n", + pdev, node); + allocate_device_bars(pdev); + iSeries_Device_Information(pdev, DeviceCount); + iommu_devnode_init_iSeries(node); + } else + printk("PCI: Device Tree not found for 0x%016lX\n", + (unsigned long)pdev); + pdev->irq = PCI_DN(node)->Irq; + } + iSeries_activate_IRQs(); + mf_display_src(0xC9000200); +} + +void pcibios_fixup_bus(struct pci_bus *PciBus) +{ + PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup_bus(0x%04X) Entry.\n", + PciBus->number); +} + +void pcibios_fixup_resources(struct pci_dev *pdev) +{ + PPCDBG(PPCDBG_BUSWALK, "fixup_resources pdev %p\n", pdev); +} + +/* + * Loop through each node function to find usable EADs bridges. + */ +static void scan_PHB_slots(struct pci_controller *Phb) +{ + struct HvCallPci_DeviceInfo *DevInfo; + HvBusNumber bus = Phb->local_number; /* System Bus */ + const HvSubBusNumber SubBus = 0; /* EADs is always 0. */ + int HvRc = 0; + int IdSel; + const int MaxAgents = 8; + + DevInfo = (struct HvCallPci_DeviceInfo*) + kmalloc(sizeof(struct HvCallPci_DeviceInfo), GFP_KERNEL); + if (DevInfo == NULL) + return; + + /* + * Probe for EADs Bridges + */ + for (IdSel = 1; IdSel < MaxAgents; ++IdSel) { + HvRc = HvCallPci_getDeviceInfo(bus, SubBus, IdSel, + ISERIES_HV_ADDR(DevInfo), + sizeof(struct HvCallPci_DeviceInfo)); + if (HvRc == 0) { + if (DevInfo->deviceType == HvCallPci_NodeDevice) + scan_EADS_bridge(bus, SubBus, IdSel); + else + printk("PCI: Invalid System Configuration(0x%02X)" + " for bus 0x%02x id 0x%02x.\n", + DevInfo->deviceType, bus, IdSel); + } + else + pci_Log_Error("getDeviceInfo", bus, SubBus, IdSel, HvRc); + } + kfree(DevInfo); +} + +static void scan_EADS_bridge(HvBusNumber bus, HvSubBusNumber SubBus, + int IdSel) +{ + struct HvCallPci_BridgeInfo *BridgeInfo; + HvAgentId AgentId; + int Function; + int HvRc; + + BridgeInfo = (struct HvCallPci_BridgeInfo *) + kmalloc(sizeof(struct HvCallPci_BridgeInfo), GFP_KERNEL); + if (BridgeInfo == NULL) + return; + + /* Note: hvSubBus and irq is always be 0 at this level! */ + for (Function = 0; Function < 8; ++Function) { + AgentId = ISERIES_PCI_AGENTID(IdSel, Function); + HvRc = HvCallXm_connectBusUnit(bus, SubBus, AgentId, 0); + if (HvRc == 0) { + printk("found device at bus %d idsel %d func %d (AgentId %x)\n", + bus, IdSel, Function, AgentId); + /* Connect EADs: 0x18.00.12 = 0x00 */ + PPCDBG(PPCDBG_BUSWALK, + "PCI:Connect EADs: 0x%02X.%02X.%02X\n", + bus, SubBus, AgentId); + HvRc = HvCallPci_getBusUnitInfo(bus, SubBus, AgentId, + ISERIES_HV_ADDR(BridgeInfo), + sizeof(struct HvCallPci_BridgeInfo)); + if (HvRc == 0) { + printk("bridge info: type %x subbus %x maxAgents %x maxsubbus %x logslot %x\n", + BridgeInfo->busUnitInfo.deviceType, + BridgeInfo->subBusNumber, + BridgeInfo->maxAgents, + BridgeInfo->maxSubBusNumber, + BridgeInfo->logicalSlotNumber); + PPCDBG(PPCDBG_BUSWALK, + "PCI: BridgeInfo, Type:0x%02X, SubBus:0x%02X, MaxAgents:0x%02X, MaxSubBus: 0x%02X, LSlot: 0x%02X\n", + BridgeInfo->busUnitInfo.deviceType, + BridgeInfo->subBusNumber, + BridgeInfo->maxAgents, + BridgeInfo->maxSubBusNumber, + BridgeInfo->logicalSlotNumber); + + if (BridgeInfo->busUnitInfo.deviceType == + HvCallPci_BridgeDevice) { + /* Scan_Bridge_Slot...: 0x18.00.12 */ + scan_bridge_slot(bus, BridgeInfo); + } else + printk("PCI: Invalid Bridge Configuration(0x%02X)", + BridgeInfo->busUnitInfo.deviceType); + } + } else if (HvRc != 0x000B) + pci_Log_Error("EADs Connect", + bus, SubBus, AgentId, HvRc); + } + kfree(BridgeInfo); +} + +/* + * This assumes that the node slot is always on the primary bus! + */ +static int scan_bridge_slot(HvBusNumber Bus, + struct HvCallPci_BridgeInfo *BridgeInfo) +{ + struct device_node *node; + HvSubBusNumber SubBus = BridgeInfo->subBusNumber; + u16 VendorId = 0; + int HvRc = 0; + u8 Irq = 0; + int IdSel = ISERIES_GET_DEVICE_FROM_SUBBUS(SubBus); + int Function = ISERIES_GET_FUNCTION_FROM_SUBBUS(SubBus); + HvAgentId EADsIdSel = ISERIES_PCI_AGENTID(IdSel, Function); + + /* iSeries_allocate_IRQ.: 0x18.00.12(0xA3) */ + Irq = iSeries_allocate_IRQ(Bus, 0, EADsIdSel); + PPCDBG(PPCDBG_BUSWALK, + "PCI:- allocate and assign IRQ 0x%02X.%02X.%02X = 0x%02X\n", + Bus, 0, EADsIdSel, Irq); + + /* + * Connect all functions of any device found. + */ + for (IdSel = 1; IdSel <= BridgeInfo->maxAgents; ++IdSel) { + for (Function = 0; Function < 8; ++Function) { + HvAgentId AgentId = ISERIES_PCI_AGENTID(IdSel, Function); + HvRc = HvCallXm_connectBusUnit(Bus, SubBus, + AgentId, Irq); + if (HvRc != 0) { + pci_Log_Error("Connect Bus Unit", + Bus, SubBus, AgentId, HvRc); + continue; + } + + HvRc = HvCallPci_configLoad16(Bus, SubBus, AgentId, + PCI_VENDOR_ID, &VendorId); + if (HvRc != 0) { + pci_Log_Error("Read Vendor", + Bus, SubBus, AgentId, HvRc); + continue; + } + printk("read vendor ID: %x\n", VendorId); + + /* FoundDevice: 0x18.28.10 = 0x12AE */ + PPCDBG(PPCDBG_BUSWALK, + "PCI:- FoundDevice: 0x%02X.%02X.%02X = 0x%04X, irq %d\n", + Bus, SubBus, AgentId, VendorId, Irq); + HvRc = HvCallPci_configStore8(Bus, SubBus, AgentId, + PCI_INTERRUPT_LINE, Irq); + if (HvRc != 0) + pci_Log_Error("PciCfgStore Irq Failed!", + Bus, SubBus, AgentId, HvRc); + + ++DeviceCount; + node = build_device_node(Bus, SubBus, EADsIdSel, Function); + PCI_DN(node)->Irq = Irq; + PCI_DN(node)->LogicalSlot = BridgeInfo->logicalSlotNumber; + + } /* for (Function = 0; Function < 8; ++Function) */ + } /* for (IdSel = 1; IdSel <= MaxAgents; ++IdSel) */ + return HvRc; +} + +/* + * I/0 Memory copy MUST use mmio commands on iSeries + * To do; For performance, include the hv call directly + */ +void iSeries_memset_io(volatile void __iomem *dest, char c, size_t Count) +{ + u8 ByteValue = c; + long NumberOfBytes = Count; + + while (NumberOfBytes > 0) { + iSeries_Write_Byte(ByteValue, dest++); + -- NumberOfBytes; + } +} +EXPORT_SYMBOL(iSeries_memset_io); + +void iSeries_memcpy_toio(volatile void __iomem *dest, void *source, size_t count) +{ + char *src = source; + long NumberOfBytes = count; + + while (NumberOfBytes > 0) { + iSeries_Write_Byte(*src++, dest++); + -- NumberOfBytes; + } +} +EXPORT_SYMBOL(iSeries_memcpy_toio); + +void iSeries_memcpy_fromio(void *dest, const volatile void __iomem *src, size_t count) +{ + char *dst = dest; + long NumberOfBytes = count; + + while (NumberOfBytes > 0) { + *dst++ = iSeries_Read_Byte(src++); + -- NumberOfBytes; + } +} +EXPORT_SYMBOL(iSeries_memcpy_fromio); + +/* + * Look down the chain to find the matching Device Device + */ +static struct device_node *find_Device_Node(int bus, int devfn) +{ + struct list_head *pos; + + list_for_each(pos, &iSeries_Global_Device_List) { + struct device_node *node = + list_entry(pos, struct device_node, Device_List); + + if ((bus == ISERIES_BUS(node)) && + (devfn == PCI_DN(node)->devfn)) + return node; + } + return NULL; +} + +#if 0 +/* + * Returns the device node for the passed pci_dev + * Sanity Check Node PciDev to passed pci_dev + * If none is found, returns a NULL which the client must handle. + */ +static struct device_node *get_Device_Node(struct pci_dev *pdev) +{ + struct device_node *node; + + node = pdev->sysdata; + if (node == NULL || PCI_DN(node)->pcidev != pdev) + node = find_Device_Node(pdev->bus->number, pdev->devfn); + return node; +} +#endif + +/* + * Config space read and write functions. + * For now at least, we look for the device node for the bus and devfn + * that we are asked to access. It may be possible to translate the devfn + * to a subbus and deviceid more directly. + */ +static u64 hv_cfg_read_func[4] = { + HvCallPciConfigLoad8, HvCallPciConfigLoad16, + HvCallPciConfigLoad32, HvCallPciConfigLoad32 +}; + +static u64 hv_cfg_write_func[4] = { + HvCallPciConfigStore8, HvCallPciConfigStore16, + HvCallPciConfigStore32, HvCallPciConfigStore32 +}; + +/* + * Read PCI config space + */ +static int iSeries_pci_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int size, u32 *val) +{ + struct device_node *node = find_Device_Node(bus->number, devfn); + u64 fn; + struct HvCallPci_LoadReturn ret; + + if (node == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset > 255) { + *val = ~0; + return PCIBIOS_BAD_REGISTER_NUMBER; + } + + fn = hv_cfg_read_func[(size - 1) & 3]; + HvCall3Ret16(fn, &ret, PCI_DN(node)->DsaAddr.DsaAddr, offset, 0); + + if (ret.rc != 0) { + *val = ~0; + return PCIBIOS_DEVICE_NOT_FOUND; /* or something */ + } + + *val = ret.value; + return 0; +} + +/* + * Write PCI config space + */ + +static int iSeries_pci_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int size, u32 val) +{ + struct device_node *node = find_Device_Node(bus->number, devfn); + u64 fn; + u64 ret; + + if (node == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset > 255) + return PCIBIOS_BAD_REGISTER_NUMBER; + + fn = hv_cfg_write_func[(size - 1) & 3]; + ret = HvCall4(fn, PCI_DN(node)->DsaAddr.DsaAddr, offset, val, 0); + + if (ret != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + + return 0; +} + +static struct pci_ops iSeries_pci_ops = { + .read = iSeries_pci_read_config, + .write = iSeries_pci_write_config +}; + +/* + * Check Return Code + * -> On Failure, print and log information. + * Increment Retry Count, if exceeds max, panic partition. + * + * PCI: Device 23.90 ReadL I/O Error( 0): 0x1234 + * PCI: Device 23.90 ReadL Retry( 1) + * PCI: Device 23.90 ReadL Retry Successful(1) + */ +static int CheckReturnCode(char *TextHdr, struct device_node *DevNode, + int *retry, u64 ret) +{ + if (ret != 0) { + struct pci_dn *pdn = PCI_DN(DevNode); + + ++Pci_Error_Count; + (*retry)++; + printk("PCI: %s: Device 0x%04X:%02X I/O Error(%2d): 0x%04X\n", + TextHdr, pdn->DsaAddr.Dsa.busNumber, pdn->devfn, + *retry, (int)ret); + /* + * Bump the retry and check for retry count exceeded. + * If, Exceeded, panic the system. + */ + if (((*retry) > Pci_Retry_Max) && + (Pci_Error_Flag > 0)) { + mf_display_src(0xB6000103); + panic_timeout = 0; + panic("PCI: Hardware I/O Error, SRC B6000103, " + "Automatic Reboot Disabled.\n"); + } + return -1; /* Retry Try */ + } + return 0; +} + +/* + * Translate the I/O Address into a device node, bar, and bar offset. + * Note: Make sure the passed variable end up on the stack to avoid + * the exposure of being device global. + */ +static inline struct device_node *xlate_iomm_address( + const volatile void __iomem *IoAddress, + u64 *dsaptr, u64 *BarOffsetPtr) +{ + unsigned long OrigIoAddr; + unsigned long BaseIoAddr; + unsigned long TableIndex; + struct device_node *DevNode; + + OrigIoAddr = (unsigned long __force)IoAddress; + if ((OrigIoAddr < BASE_IO_MEMORY) || (OrigIoAddr >= max_io_memory)) + return NULL; + BaseIoAddr = OrigIoAddr - BASE_IO_MEMORY; + TableIndex = BaseIoAddr / IOMM_TABLE_ENTRY_SIZE; + DevNode = iomm_table[TableIndex]; + + if (DevNode != NULL) { + int barnum = iobar_table[TableIndex]; + *dsaptr = PCI_DN(DevNode)->DsaAddr.DsaAddr | (barnum << 24); + *BarOffsetPtr = BaseIoAddr % IOMM_TABLE_ENTRY_SIZE; + } else + panic("PCI: Invalid PCI IoAddress detected!\n"); + return DevNode; +} + +/* + * Read MM I/O Instructions for the iSeries + * On MM I/O error, all ones are returned and iSeries_pci_IoError is cal + * else, data is returned in big Endian format. + * + * iSeries_Read_Byte = Read Byte ( 8 bit) + * iSeries_Read_Word = Read Word (16 bit) + * iSeries_Read_Long = Read Long (32 bit) + */ +u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + struct HvCallPci_LoadReturn ret; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Read_Byte: invalid access at IO address %p\n", IoAddress); + return 0xff; + } + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad8, &ret, dsa, BarOffset, 0); + } while (CheckReturnCode("RDB", DevNode, &retry, ret.rc) != 0); + + return (u8)ret.value; +} +EXPORT_SYMBOL(iSeries_Read_Byte); + +u16 iSeries_Read_Word(const volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + struct HvCallPci_LoadReturn ret; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Read_Word: invalid access at IO address %p\n", IoAddress); + return 0xffff; + } + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad16, &ret, dsa, + BarOffset, 0); + } while (CheckReturnCode("RDW", DevNode, &retry, ret.rc) != 0); + + return swab16((u16)ret.value); +} +EXPORT_SYMBOL(iSeries_Read_Word); + +u32 iSeries_Read_Long(const volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + struct HvCallPci_LoadReturn ret; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Read_Long: invalid access at IO address %p\n", IoAddress); + return 0xffffffff; + } + do { + ++Pci_Io_Read_Count; + HvCall3Ret16(HvCallPciBarLoad32, &ret, dsa, + BarOffset, 0); + } while (CheckReturnCode("RDL", DevNode, &retry, ret.rc) != 0); + + return swab32((u32)ret.value); +} +EXPORT_SYMBOL(iSeries_Read_Long); + +/* + * Write MM I/O Instructions for the iSeries + * + * iSeries_Write_Byte = Write Byte (8 bit) + * iSeries_Write_Word = Write Word(16 bit) + * iSeries_Write_Long = Write Long(32 bit) + */ +void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + u64 rc; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Write_Byte: invalid access at IO address %p\n", IoAddress); + return; + } + do { + ++Pci_Io_Write_Count; + rc = HvCall4(HvCallPciBarStore8, dsa, BarOffset, data, 0); + } while (CheckReturnCode("WWB", DevNode, &retry, rc) != 0); +} +EXPORT_SYMBOL(iSeries_Write_Byte); + +void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + u64 rc; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Write_Word: invalid access at IO address %p\n", IoAddress); + return; + } + do { + ++Pci_Io_Write_Count; + rc = HvCall4(HvCallPciBarStore16, dsa, BarOffset, swab16(data), 0); + } while (CheckReturnCode("WWW", DevNode, &retry, rc) != 0); +} +EXPORT_SYMBOL(iSeries_Write_Word); + +void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress) +{ + u64 BarOffset; + u64 dsa; + int retry = 0; + u64 rc; + struct device_node *DevNode = + xlate_iomm_address(IoAddress, &dsa, &BarOffset); + + if (DevNode == NULL) { + static unsigned long last_jiffies; + static int num_printed; + + if ((jiffies - last_jiffies) > 60 * HZ) { + last_jiffies = jiffies; + num_printed = 0; + } + if (num_printed++ < 10) + printk(KERN_ERR "iSeries_Write_Long: invalid access at IO address %p\n", IoAddress); + return; + } + do { + ++Pci_Io_Write_Count; + rc = HvCall4(HvCallPciBarStore32, dsa, BarOffset, swab32(data), 0); + } while (CheckReturnCode("WWL", DevNode, &retry, rc) != 0); +} +EXPORT_SYMBOL(iSeries_Write_Long); diff --git a/arch/powerpc/platforms/iseries/proc.c b/arch/powerpc/platforms/iseries/proc.c new file mode 100644 index 00000000000..d46b473ce4d --- /dev/null +++ b/arch/powerpc/platforms/iseries/proc.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2001 Kyle A. Lucke IBM Corporation + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/param.h> /* for HZ */ +#include <asm/paca.h> +#include <asm/processor.h> +#include <asm/time.h> +#include <asm/lppaca.h> +#include <asm/iSeries/ItLpQueue.h> +#include <asm/iSeries/HvCallXm.h> +#include <asm/iSeries/IoHriMainStore.h> +#include <asm/iSeries/IoHriProcessorVpd.h> + +static int __init iseries_proc_create(void) +{ + struct proc_dir_entry *e = proc_mkdir("iSeries", 0); + if (!e) + return 1; + + return 0; +} +core_initcall(iseries_proc_create); + +static unsigned long startTitan = 0; +static unsigned long startTb = 0; + +static int proc_titantod_show(struct seq_file *m, void *v) +{ + unsigned long tb0, titan_tod; + + tb0 = get_tb(); + titan_tod = HvCallXm_loadTod(); + + seq_printf(m, "Titan\n" ); + seq_printf(m, " time base = %016lx\n", tb0); + seq_printf(m, " titan tod = %016lx\n", titan_tod); + seq_printf(m, " xProcFreq = %016x\n", + xIoHriProcessorVpd[0].xProcFreq); + seq_printf(m, " xTimeBaseFreq = %016x\n", + xIoHriProcessorVpd[0].xTimeBaseFreq); + seq_printf(m, " tb_ticks_per_jiffy = %lu\n", tb_ticks_per_jiffy); + seq_printf(m, " tb_ticks_per_usec = %lu\n", tb_ticks_per_usec); + + if (!startTitan) { + startTitan = titan_tod; + startTb = tb0; + } else { + unsigned long titan_usec = (titan_tod - startTitan) >> 12; + unsigned long tb_ticks = (tb0 - startTb); + unsigned long titan_jiffies = titan_usec / (1000000/HZ); + unsigned long titan_jiff_usec = titan_jiffies * (1000000/HZ); + unsigned long titan_jiff_rem_usec = + titan_usec - titan_jiff_usec; + unsigned long tb_jiffies = tb_ticks / tb_ticks_per_jiffy; + unsigned long tb_jiff_ticks = tb_jiffies * tb_ticks_per_jiffy; + unsigned long tb_jiff_rem_ticks = tb_ticks - tb_jiff_ticks; + unsigned long tb_jiff_rem_usec = + tb_jiff_rem_ticks / tb_ticks_per_usec; + unsigned long new_tb_ticks_per_jiffy = + (tb_ticks * (1000000/HZ))/titan_usec; + + seq_printf(m, " titan elapsed = %lu uSec\n", titan_usec); + seq_printf(m, " tb elapsed = %lu ticks\n", tb_ticks); + seq_printf(m, " titan jiffies = %lu.%04lu \n", titan_jiffies, + titan_jiff_rem_usec); + seq_printf(m, " tb jiffies = %lu.%04lu\n", tb_jiffies, + tb_jiff_rem_usec); + seq_printf(m, " new tb_ticks_per_jiffy = %lu\n", + new_tb_ticks_per_jiffy); + } + + return 0; +} + +static int proc_titantod_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_titantod_show, NULL); +} + +static struct file_operations proc_titantod_operations = { + .open = proc_titantod_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init iseries_proc_init(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL); + if (e) + e->proc_fops = &proc_titantod_operations; + + return 0; +} +__initcall(iseries_proc_init); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c new file mode 100644 index 00000000000..ad78c8581a5 --- /dev/null +++ b/arch/powerpc/platforms/iseries/setup.c @@ -0,0 +1,1006 @@ +/* + * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com> + * Copyright (c) 1999-2000 Grant Erickson <grant@lcse.umn.edu> + * + * Description: + * Architecture- / platform-specific boot-time initialization code for + * the IBM iSeries LPAR. Adapted from original code by Grant Erickson and + * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek + * <dan@net4x.com>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/threads.h> +#include <linux/smp.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/initrd.h> +#include <linux/seq_file.h> +#include <linux/kdev_t.h> +#include <linux/major.h> +#include <linux/root_dev.h> + +#include <asm/processor.h> +#include <asm/machdep.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/mmu_context.h> +#include <asm/cputable.h> +#include <asm/sections.h> +#include <asm/iommu.h> +#include <asm/firmware.h> + +#include <asm/time.h> +#include <asm/naca.h> +#include <asm/paca.h> +#include <asm/cache.h> +#include <asm/sections.h> +#include <asm/abs_addr.h> +#include <asm/iSeries/HvCallHpt.h> +#include <asm/iSeries/HvLpConfig.h> +#include <asm/iSeries/HvCallEvent.h> +#include <asm/iSeries/HvCallSm.h> +#include <asm/iSeries/HvCallXm.h> +#include <asm/iSeries/ItLpQueue.h> +#include <asm/iSeries/IoHriMainStore.h> +#include <asm/iSeries/mf.h> +#include <asm/iSeries/HvLpEvent.h> +#include <asm/iSeries/iSeries_irq.h> +#include <asm/iSeries/IoHriProcessorVpd.h> +#include <asm/iSeries/ItVpdAreas.h> +#include <asm/iSeries/LparMap.h> + +#include "setup.h" + +extern void hvlog(char *fmt, ...); + +#ifdef DEBUG +#define DBG(fmt...) hvlog(fmt) +#else +#define DBG(fmt...) +#endif + +/* Function Prototypes */ +extern void ppcdbg_initialize(void); + +static void build_iSeries_Memory_Map(void); +static int iseries_shared_idle(void); +static int iseries_dedicated_idle(void); +#ifdef CONFIG_PCI +extern void iSeries_pci_final_fixup(void); +#else +static void iSeries_pci_final_fixup(void) { } +#endif + +/* Global Variables */ +int piranha_simulator; + +extern int rd_size; /* Defined in drivers/block/rd.c */ +extern unsigned long klimit; +extern unsigned long embedded_sysmap_start; +extern unsigned long embedded_sysmap_end; + +extern unsigned long iSeries_recal_tb; +extern unsigned long iSeries_recal_titan; + +static int mf_initialized; + +struct MemoryBlock { + unsigned long absStart; + unsigned long absEnd; + unsigned long logicalStart; + unsigned long logicalEnd; +}; + +/* + * Process the main store vpd to determine where the holes in memory are + * and return the number of physical blocks and fill in the array of + * block data. + */ +static unsigned long iSeries_process_Condor_mainstore_vpd( + struct MemoryBlock *mb_array, unsigned long max_entries) +{ + unsigned long holeFirstChunk, holeSizeChunks; + unsigned long numMemoryBlocks = 1; + struct IoHriMainStoreSegment4 *msVpd = + (struct IoHriMainStoreSegment4 *)xMsVpd; + unsigned long holeStart = msVpd->nonInterleavedBlocksStartAdr; + unsigned long holeEnd = msVpd->nonInterleavedBlocksEndAdr; + unsigned long holeSize = holeEnd - holeStart; + + printk("Mainstore_VPD: Condor\n"); + /* + * Determine if absolute memory has any + * holes so that we can interpret the + * access map we get back from the hypervisor + * correctly. + */ + mb_array[0].logicalStart = 0; + mb_array[0].logicalEnd = 0x100000000; + mb_array[0].absStart = 0; + mb_array[0].absEnd = 0x100000000; + + if (holeSize) { + numMemoryBlocks = 2; + holeStart = holeStart & 0x000fffffffffffff; + holeStart = addr_to_chunk(holeStart); + holeFirstChunk = holeStart; + holeSize = addr_to_chunk(holeSize); + holeSizeChunks = holeSize; + printk( "Main store hole: start chunk = %0lx, size = %0lx chunks\n", + holeFirstChunk, holeSizeChunks ); + mb_array[0].logicalEnd = holeFirstChunk; + mb_array[0].absEnd = holeFirstChunk; + mb_array[1].logicalStart = holeFirstChunk; + mb_array[1].logicalEnd = 0x100000000 - holeSizeChunks; + mb_array[1].absStart = holeFirstChunk + holeSizeChunks; + mb_array[1].absEnd = 0x100000000; + } + return numMemoryBlocks; +} + +#define MaxSegmentAreas 32 +#define MaxSegmentAdrRangeBlocks 128 +#define MaxAreaRangeBlocks 4 + +static unsigned long iSeries_process_Regatta_mainstore_vpd( + struct MemoryBlock *mb_array, unsigned long max_entries) +{ + struct IoHriMainStoreSegment5 *msVpdP = + (struct IoHriMainStoreSegment5 *)xMsVpd; + unsigned long numSegmentBlocks = 0; + u32 existsBits = msVpdP->msAreaExists; + unsigned long area_num; + + printk("Mainstore_VPD: Regatta\n"); + + for (area_num = 0; area_num < MaxSegmentAreas; ++area_num ) { + unsigned long numAreaBlocks; + struct IoHriMainStoreArea4 *currentArea; + + if (existsBits & 0x80000000) { + unsigned long block_num; + + currentArea = &msVpdP->msAreaArray[area_num]; + numAreaBlocks = currentArea->numAdrRangeBlocks; + printk("ms_vpd: processing area %2ld blocks=%ld", + area_num, numAreaBlocks); + for (block_num = 0; block_num < numAreaBlocks; + ++block_num ) { + /* Process an address range block */ + struct MemoryBlock tempBlock; + unsigned long i; + + tempBlock.absStart = + (unsigned long)currentArea->xAdrRangeBlock[block_num].blockStart; + tempBlock.absEnd = + (unsigned long)currentArea->xAdrRangeBlock[block_num].blockEnd; + tempBlock.logicalStart = 0; + tempBlock.logicalEnd = 0; + printk("\n block %ld absStart=%016lx absEnd=%016lx", + block_num, tempBlock.absStart, + tempBlock.absEnd); + + for (i = 0; i < numSegmentBlocks; ++i) { + if (mb_array[i].absStart == + tempBlock.absStart) + break; + } + if (i == numSegmentBlocks) { + if (numSegmentBlocks == max_entries) + panic("iSeries_process_mainstore_vpd: too many memory blocks"); + mb_array[numSegmentBlocks] = tempBlock; + ++numSegmentBlocks; + } else + printk(" (duplicate)"); + } + printk("\n"); + } + existsBits <<= 1; + } + /* Now sort the blocks found into ascending sequence */ + if (numSegmentBlocks > 1) { + unsigned long m, n; + + for (m = 0; m < numSegmentBlocks - 1; ++m) { + for (n = numSegmentBlocks - 1; m < n; --n) { + if (mb_array[n].absStart < + mb_array[n-1].absStart) { + struct MemoryBlock tempBlock; + + tempBlock = mb_array[n]; + mb_array[n] = mb_array[n-1]; + mb_array[n-1] = tempBlock; + } + } + } + } + /* + * Assign "logical" addresses to each block. These + * addresses correspond to the hypervisor "bitmap" space. + * Convert all addresses into units of 256K chunks. + */ + { + unsigned long i, nextBitmapAddress; + + printk("ms_vpd: %ld sorted memory blocks\n", numSegmentBlocks); + nextBitmapAddress = 0; + for (i = 0; i < numSegmentBlocks; ++i) { + unsigned long length = mb_array[i].absEnd - + mb_array[i].absStart; + + mb_array[i].logicalStart = nextBitmapAddress; + mb_array[i].logicalEnd = nextBitmapAddress + length; + nextBitmapAddress += length; + printk(" Bitmap range: %016lx - %016lx\n" + " Absolute range: %016lx - %016lx\n", + mb_array[i].logicalStart, + mb_array[i].logicalEnd, + mb_array[i].absStart, mb_array[i].absEnd); + mb_array[i].absStart = addr_to_chunk(mb_array[i].absStart & + 0x000fffffffffffff); + mb_array[i].absEnd = addr_to_chunk(mb_array[i].absEnd & + 0x000fffffffffffff); + mb_array[i].logicalStart = + addr_to_chunk(mb_array[i].logicalStart); + mb_array[i].logicalEnd = addr_to_chunk(mb_array[i].logicalEnd); + } + } + + return numSegmentBlocks; +} + +static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array, + unsigned long max_entries) +{ + unsigned long i; + unsigned long mem_blocks = 0; + + if (cpu_has_feature(CPU_FTR_SLB)) + mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array, + max_entries); + else + mem_blocks = iSeries_process_Condor_mainstore_vpd(mb_array, + max_entries); + + printk("Mainstore_VPD: numMemoryBlocks = %ld \n", mem_blocks); + for (i = 0; i < mem_blocks; ++i) { + printk("Mainstore_VPD: block %3ld logical chunks %016lx - %016lx\n" + " abs chunks %016lx - %016lx\n", + i, mb_array[i].logicalStart, mb_array[i].logicalEnd, + mb_array[i].absStart, mb_array[i].absEnd); + } + return mem_blocks; +} + +static void __init iSeries_get_cmdline(void) +{ + char *p, *q; + + /* copy the command line parameter from the primary VSP */ + HvCallEvent_dmaToSp(cmd_line, 2 * 64* 1024, 256, + HvLpDma_Direction_RemoteToLocal); + + p = cmd_line; + q = cmd_line + 255; + while(p < q) { + if (!*p || *p == '\n') + break; + ++p; + } + *p = 0; +} + +static void __init iSeries_init_early(void) +{ + extern unsigned long memory_limit; + + DBG(" -> iSeries_init_early()\n"); + + ppc64_firmware_features = FW_FEATURE_ISERIES; + + ppcdbg_initialize(); + + ppc64_interrupt_controller = IC_ISERIES; + +#if defined(CONFIG_BLK_DEV_INITRD) + /* + * If the init RAM disk has been configured and there is + * a non-zero starting address for it, set it up + */ + if (naca.xRamDisk) { + initrd_start = (unsigned long)__va(naca.xRamDisk); + initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE; + initrd_below_start_ok = 1; // ramdisk in kernel space + ROOT_DEV = Root_RAM0; + if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize) + rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024; + } else +#endif /* CONFIG_BLK_DEV_INITRD */ + { + /* ROOT_DEV = MKDEV(VIODASD_MAJOR, 1); */ + } + + iSeries_recal_tb = get_tb(); + iSeries_recal_titan = HvCallXm_loadTod(); + + /* + * Initialize the hash table management pointers + */ + hpte_init_iSeries(); + + /* + * Initialize the DMA/TCE management + */ + iommu_init_early_iSeries(); + + iSeries_get_cmdline(); + + /* Save unparsed command line copy for /proc/cmdline */ + strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); + + /* Parse early parameters, in particular mem=x */ + parse_early_param(); + + if (memory_limit) { + if (memory_limit < systemcfg->physicalMemorySize) + systemcfg->physicalMemorySize = memory_limit; + else { + printk("Ignoring mem=%lu >= ram_top.\n", memory_limit); + memory_limit = 0; + } + } + + /* Initialize machine-dependency vectors */ +#ifdef CONFIG_SMP + smp_init_iSeries(); +#endif + if (itLpNaca.xPirEnvironMode == 0) + piranha_simulator = 1; + + /* Associate Lp Event Queue 0 with processor 0 */ + HvCallEvent_setLpEventQueueInterruptProc(0, 0); + + mf_init(); + mf_initialized = 1; + mb(); + + /* If we were passed an initrd, set the ROOT_DEV properly if the values + * look sensible. If not, clear initrd reference. + */ +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && + initrd_end > initrd_start) + ROOT_DEV = Root_RAM0; + else + initrd_start = initrd_end = 0; +#endif /* CONFIG_BLK_DEV_INITRD */ + + DBG(" <- iSeries_init_early()\n"); +} + +struct mschunks_map mschunks_map = { + /* XXX We don't use these, but Piranha might need them. */ + .chunk_size = MSCHUNKS_CHUNK_SIZE, + .chunk_shift = MSCHUNKS_CHUNK_SHIFT, + .chunk_mask = MSCHUNKS_OFFSET_MASK, +}; +EXPORT_SYMBOL(mschunks_map); + +void mschunks_alloc(unsigned long num_chunks) +{ + klimit = _ALIGN(klimit, sizeof(u32)); + mschunks_map.mapping = (u32 *)klimit; + klimit += num_chunks * sizeof(u32); + mschunks_map.num_chunks = num_chunks; +} + +/* + * The iSeries may have very large memories ( > 128 GB ) and a partition + * may get memory in "chunks" that may be anywhere in the 2**52 real + * address space. The chunks are 256K in size. To map this to the + * memory model Linux expects, the AS/400 specific code builds a + * translation table to translate what Linux thinks are "physical" + * addresses to the actual real addresses. This allows us to make + * it appear to Linux that we have contiguous memory starting at + * physical address zero while in fact this could be far from the truth. + * To avoid confusion, I'll let the words physical and/or real address + * apply to the Linux addresses while I'll use "absolute address" to + * refer to the actual hardware real address. + * + * build_iSeries_Memory_Map gets information from the Hypervisor and + * looks at the Main Store VPD to determine the absolute addresses + * of the memory that has been assigned to our partition and builds + * a table used to translate Linux's physical addresses to these + * absolute addresses. Absolute addresses are needed when + * communicating with the hypervisor (e.g. to build HPT entries) + */ + +static void __init build_iSeries_Memory_Map(void) +{ + u32 loadAreaFirstChunk, loadAreaLastChunk, loadAreaSize; + u32 nextPhysChunk; + u32 hptFirstChunk, hptLastChunk, hptSizeChunks, hptSizePages; + u32 num_ptegs; + u32 totalChunks,moreChunks; + u32 currChunk, thisChunk, absChunk; + u32 currDword; + u32 chunkBit; + u64 map; + struct MemoryBlock mb[32]; + unsigned long numMemoryBlocks, curBlock; + + /* Chunk size on iSeries is 256K bytes */ + totalChunks = (u32)HvLpConfig_getMsChunks(); + mschunks_alloc(totalChunks); + + /* + * Get absolute address of our load area + * and map it to physical address 0 + * This guarantees that the loadarea ends up at physical 0 + * otherwise, it might not be returned by PLIC as the first + * chunks + */ + + loadAreaFirstChunk = (u32)addr_to_chunk(itLpNaca.xLoadAreaAddr); + loadAreaSize = itLpNaca.xLoadAreaChunks; + + /* + * Only add the pages already mapped here. + * Otherwise we might add the hpt pages + * The rest of the pages of the load area + * aren't in the HPT yet and can still + * be assigned an arbitrary physical address + */ + if ((loadAreaSize * 64) > HvPagesToMap) + loadAreaSize = HvPagesToMap / 64; + + loadAreaLastChunk = loadAreaFirstChunk + loadAreaSize - 1; + + /* + * TODO Do we need to do something if the HPT is in the 64MB load area? + * This would be required if the itLpNaca.xLoadAreaChunks includes + * the HPT size + */ + + printk("Mapping load area - physical addr = 0000000000000000\n" + " absolute addr = %016lx\n", + chunk_to_addr(loadAreaFirstChunk)); + printk("Load area size %dK\n", loadAreaSize * 256); + + for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk) + mschunks_map.mapping[nextPhysChunk] = + loadAreaFirstChunk + nextPhysChunk; + + /* + * Get absolute address of our HPT and remember it so + * we won't map it to any physical address + */ + hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); + hptSizePages = (u32)HvCallHpt_getHptPages(); + hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT); + hptLastChunk = hptFirstChunk + hptSizeChunks - 1; + + printk("HPT absolute addr = %016lx, size = %dK\n", + chunk_to_addr(hptFirstChunk), hptSizeChunks * 256); + + /* Fill in the hashed page table hash mask */ + num_ptegs = hptSizePages * + (PAGE_SIZE / (sizeof(hpte_t) * HPTES_PER_GROUP)); + htab_hash_mask = num_ptegs - 1; + + /* + * The actual hashed page table is in the hypervisor, + * we have no direct access + */ + htab_address = NULL; + + /* + * Determine if absolute memory has any + * holes so that we can interpret the + * access map we get back from the hypervisor + * correctly. + */ + numMemoryBlocks = iSeries_process_mainstore_vpd(mb, 32); + + /* + * Process the main store access map from the hypervisor + * to build up our physical -> absolute translation table + */ + curBlock = 0; + currChunk = 0; + currDword = 0; + moreChunks = totalChunks; + + while (moreChunks) { + map = HvCallSm_get64BitsOfAccessMap(itLpNaca.xLpIndex, + currDword); + thisChunk = currChunk; + while (map) { + chunkBit = map >> 63; + map <<= 1; + if (chunkBit) { + --moreChunks; + while (thisChunk >= mb[curBlock].logicalEnd) { + ++curBlock; + if (curBlock >= numMemoryBlocks) + panic("out of memory blocks"); + } + if (thisChunk < mb[curBlock].logicalStart) + panic("memory block error"); + + absChunk = mb[curBlock].absStart + + (thisChunk - mb[curBlock].logicalStart); + if (((absChunk < hptFirstChunk) || + (absChunk > hptLastChunk)) && + ((absChunk < loadAreaFirstChunk) || + (absChunk > loadAreaLastChunk))) { + mschunks_map.mapping[nextPhysChunk] = + absChunk; + ++nextPhysChunk; + } + } + ++thisChunk; + } + ++currDword; + currChunk += 64; + } + + /* + * main store size (in chunks) is + * totalChunks - hptSizeChunks + * which should be equal to + * nextPhysChunk + */ + systemcfg->physicalMemorySize = chunk_to_addr(nextPhysChunk); +} + +/* + * Document me. + */ +static void __init iSeries_setup_arch(void) +{ + unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index; + + if (get_paca()->lppaca.shared_proc) { + ppc_md.idle_loop = iseries_shared_idle; + printk(KERN_INFO "Using shared processor idle loop\n"); + } else { + ppc_md.idle_loop = iseries_dedicated_idle; + printk(KERN_INFO "Using dedicated idle loop\n"); + } + + /* Setup the Lp Event Queue */ + setup_hvlpevent_queue(); + + printk("Max logical processors = %d\n", + itVpdAreas.xSlicMaxLogicalProcs); + printk("Max physical processors = %d\n", + itVpdAreas.xSlicMaxPhysicalProcs); + + systemcfg->processor = xIoHriProcessorVpd[procIx].xPVR; + printk("Processor version = %x\n", systemcfg->processor); +} + +static void iSeries_get_cpuinfo(struct seq_file *m) +{ + seq_printf(m, "machine\t\t: 64-bit iSeries Logical Partition\n"); +} + +/* + * Document me. + * and Implement me. + */ +static int iSeries_get_irq(struct pt_regs *regs) +{ + /* -2 means ignore this interrupt */ + return -2; +} + +/* + * Document me. + */ +static void iSeries_restart(char *cmd) +{ + mf_reboot(); +} + +/* + * Document me. + */ +static void iSeries_power_off(void) +{ + mf_power_off(); +} + +/* + * Document me. + */ +static void iSeries_halt(void) +{ + mf_power_off(); +} + +static void __init iSeries_progress(char * st, unsigned short code) +{ + printk("Progress: [%04x] - %s\n", (unsigned)code, st); + if (!piranha_simulator && mf_initialized) { + if (code != 0xffff) + mf_display_progress(code); + else + mf_clear_src(); + } +} + +static void __init iSeries_fixup_klimit(void) +{ + /* + * Change klimit to take into account any ram disk + * that may be included + */ + if (naca.xRamDisk) + klimit = KERNELBASE + (u64)naca.xRamDisk + + (naca.xRamDiskSize * PAGE_SIZE); + else { + /* + * No ram disk was included - check and see if there + * was an embedded system map. Change klimit to take + * into account any embedded system map + */ + if (embedded_sysmap_end) + klimit = KERNELBASE + ((embedded_sysmap_end + 4095) & + 0xfffffffffffff000); + } +} + +static int __init iSeries_src_init(void) +{ + /* clear the progress line */ + ppc_md.progress(" ", 0xffff); + return 0; +} + +late_initcall(iSeries_src_init); + +static inline void process_iSeries_events(void) +{ + asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); +} + +static void yield_shared_processor(void) +{ + unsigned long tb; + + HvCall_setEnabledInterrupts(HvCall_MaskIPI | + HvCall_MaskLpEvent | + HvCall_MaskLpProd | + HvCall_MaskTimeout); + + tb = get_tb(); + /* Compute future tb value when yield should expire */ + HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); + + /* + * The decrementer stops during the yield. Force a fake decrementer + * here and let the timer_interrupt code sort out the actual time. + */ + get_paca()->lppaca.int_dword.fields.decr_int = 1; + process_iSeries_events(); +} + +static int iseries_shared_idle(void) +{ + while (1) { + while (!need_resched() && !hvlpevent_is_pending()) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* Recheck with irqs off */ + if (!need_resched() && !hvlpevent_is_pending()) + yield_shared_processor(); + + HMT_medium(); + local_irq_enable(); + } + + ppc64_runlatch_on(); + + if (hvlpevent_is_pending()) + process_iSeries_events(); + + schedule(); + } + + return 0; +} + +static int iseries_dedicated_idle(void) +{ + long oldval; + + while (1) { + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + while (!need_resched()) { + ppc64_runlatch_off(); + HMT_low(); + + if (hvlpevent_is_pending()) { + HMT_medium(); + ppc64_runlatch_on(); + process_iSeries_events(); + } + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + ppc64_runlatch_on(); + schedule(); + } + + return 0; +} + +#ifndef CONFIG_PCI +void __init iSeries_init_IRQ(void) { } +#endif + +static int __init iseries_probe(int platform) +{ + return PLATFORM_ISERIES_LPAR == platform; +} + +struct machdep_calls __initdata iseries_md = { + .setup_arch = iSeries_setup_arch, + .get_cpuinfo = iSeries_get_cpuinfo, + .init_IRQ = iSeries_init_IRQ, + .get_irq = iSeries_get_irq, + .init_early = iSeries_init_early, + .pcibios_fixup = iSeries_pci_final_fixup, + .restart = iSeries_restart, + .power_off = iSeries_power_off, + .halt = iSeries_halt, + .get_boot_time = iSeries_get_boot_time, + .set_rtc_time = iSeries_set_rtc_time, + .get_rtc_time = iSeries_get_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = iSeries_progress, + .probe = iseries_probe, + /* XXX Implement enable_pmcs for iSeries */ +}; + +struct blob { + unsigned char data[PAGE_SIZE]; + unsigned long next; +}; + +struct iseries_flat_dt { + struct boot_param_header header; + u64 reserve_map[2]; + struct blob dt; + struct blob strings; +}; + +struct iseries_flat_dt iseries_dt; + +void dt_init(struct iseries_flat_dt *dt) +{ + dt->header.off_mem_rsvmap = + offsetof(struct iseries_flat_dt, reserve_map); + dt->header.off_dt_struct = offsetof(struct iseries_flat_dt, dt); + dt->header.off_dt_strings = offsetof(struct iseries_flat_dt, strings); + dt->header.totalsize = sizeof(struct iseries_flat_dt); + dt->header.dt_strings_size = sizeof(struct blob); + + /* There is no notion of hardware cpu id on iSeries */ + dt->header.boot_cpuid_phys = smp_processor_id(); + + dt->dt.next = (unsigned long)&dt->dt.data; + dt->strings.next = (unsigned long)&dt->strings.data; + + dt->header.magic = OF_DT_HEADER; + dt->header.version = 0x10; + dt->header.last_comp_version = 0x10; + + dt->reserve_map[0] = 0; + dt->reserve_map[1] = 0; +} + +void dt_check_blob(struct blob *b) +{ + if (b->next >= (unsigned long)&b->next) { + DBG("Ran out of space in flat device tree blob!\n"); + BUG(); + } +} + +void dt_push_u32(struct iseries_flat_dt *dt, u32 value) +{ + *((u32*)dt->dt.next) = value; + dt->dt.next += sizeof(u32); + + dt_check_blob(&dt->dt); +} + +void dt_push_u64(struct iseries_flat_dt *dt, u64 value) +{ + *((u64*)dt->dt.next) = value; + dt->dt.next += sizeof(u64); + + dt_check_blob(&dt->dt); +} + +unsigned long dt_push_bytes(struct blob *blob, char *data, int len) +{ + unsigned long start = blob->next - (unsigned long)blob->data; + + memcpy((char *)blob->next, data, len); + blob->next = _ALIGN(blob->next + len, 4); + + dt_check_blob(blob); + + return start; +} + +void dt_start_node(struct iseries_flat_dt *dt, char *name) +{ + dt_push_u32(dt, OF_DT_BEGIN_NODE); + dt_push_bytes(&dt->dt, name, strlen(name) + 1); +} + +#define dt_end_node(dt) dt_push_u32(dt, OF_DT_END_NODE) + +void dt_prop(struct iseries_flat_dt *dt, char *name, char *data, int len) +{ + unsigned long offset; + + dt_push_u32(dt, OF_DT_PROP); + + /* Length of the data */ + dt_push_u32(dt, len); + + /* Put the property name in the string blob. */ + offset = dt_push_bytes(&dt->strings, name, strlen(name) + 1); + + /* The offset of the properties name in the string blob. */ + dt_push_u32(dt, (u32)offset); + + /* The actual data. */ + dt_push_bytes(&dt->dt, data, len); +} + +void dt_prop_str(struct iseries_flat_dt *dt, char *name, char *data) +{ + dt_prop(dt, name, data, strlen(data) + 1); /* + 1 for NULL */ +} + +void dt_prop_u32(struct iseries_flat_dt *dt, char *name, u32 data) +{ + dt_prop(dt, name, (char *)&data, sizeof(u32)); +} + +void dt_prop_u64(struct iseries_flat_dt *dt, char *name, u64 data) +{ + dt_prop(dt, name, (char *)&data, sizeof(u64)); +} + +void dt_prop_u64_list(struct iseries_flat_dt *dt, char *name, u64 *data, int n) +{ + dt_prop(dt, name, (char *)data, sizeof(u64) * n); +} + +void dt_prop_empty(struct iseries_flat_dt *dt, char *name) +{ + dt_prop(dt, name, NULL, 0); +} + +void dt_cpus(struct iseries_flat_dt *dt) +{ + unsigned char buf[32]; + unsigned char *p; + unsigned int i, index; + struct IoHriProcessorVpd *d; + + /* yuck */ + snprintf(buf, 32, "PowerPC,%s", cur_cpu_spec->cpu_name); + p = strchr(buf, ' '); + if (!p) p = buf + strlen(buf); + + dt_start_node(dt, "cpus"); + dt_prop_u32(dt, "#address-cells", 1); + dt_prop_u32(dt, "#size-cells", 0); + + for (i = 0; i < NR_CPUS; i++) { + if (paca[i].lppaca.dyn_proc_status >= 2) + continue; + + snprintf(p, 32 - (p - buf), "@%d", i); + dt_start_node(dt, buf); + + dt_prop_str(dt, "device_type", "cpu"); + + index = paca[i].lppaca.dyn_hv_phys_proc_index; + d = &xIoHriProcessorVpd[index]; + + dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024); + dt_prop_u32(dt, "i-cache-line-size", d->xInstCacheOperandSize); + + dt_prop_u32(dt, "d-cache-size", d->xDataL1CacheSizeKB * 1024); + dt_prop_u32(dt, "d-cache-line-size", d->xDataCacheOperandSize); + + /* magic conversions to Hz copied from old code */ + dt_prop_u32(dt, "clock-frequency", + ((1UL << 34) * 1000000) / d->xProcFreq); + dt_prop_u32(dt, "timebase-frequency", + ((1UL << 32) * 1000000) / d->xTimeBaseFreq); + + dt_prop_u32(dt, "reg", i); + + dt_end_node(dt); + } + + dt_end_node(dt); +} + +void build_flat_dt(struct iseries_flat_dt *dt) +{ + u64 tmp[2]; + + dt_init(dt); + + dt_start_node(dt, ""); + + dt_prop_u32(dt, "#address-cells", 2); + dt_prop_u32(dt, "#size-cells", 2); + + /* /memory */ + dt_start_node(dt, "memory@0"); + dt_prop_str(dt, "name", "memory"); + dt_prop_str(dt, "device_type", "memory"); + tmp[0] = 0; + tmp[1] = systemcfg->physicalMemorySize; + dt_prop_u64_list(dt, "reg", tmp, 2); + dt_end_node(dt); + + /* /chosen */ + dt_start_node(dt, "chosen"); + dt_prop_u32(dt, "linux,platform", PLATFORM_ISERIES_LPAR); + dt_end_node(dt); + + dt_cpus(dt); + + dt_end_node(dt); + + dt_push_u32(dt, OF_DT_END); +} + +void * __init iSeries_early_setup(void) +{ + iSeries_fixup_klimit(); + + /* + * Initialize the table which translate Linux physical addresses to + * AS/400 absolute addresses + */ + build_iSeries_Memory_Map(); + + build_flat_dt(&iseries_dt); + + return (void *) __pa(&iseries_dt); +} diff --git a/arch/powerpc/platforms/iseries/setup.h b/arch/powerpc/platforms/iseries/setup.h new file mode 100644 index 00000000000..6da89ae991c --- /dev/null +++ b/arch/powerpc/platforms/iseries/setup.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com> + * Copyright (c) 1999-2000 Grant Erickson <grant@lcse.umn.edu> + * + * Description: + * Architecture- / platform-specific boot-time initialization code for + * the IBM AS/400 LPAR. Adapted from original code by Grant Erickson and + * code by Gary Thomas, Cort Dougan <cort@cs.nmt.edu>, and Dan Malek + * <dan@netx4.com>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ISERIES_SETUP_H__ +#define __ISERIES_SETUP_H__ + +extern void iSeries_get_boot_time(struct rtc_time *tm); +extern int iSeries_set_rtc_time(struct rtc_time *tm); +extern void iSeries_get_rtc_time(struct rtc_time *tm); + +#endif /* __ISERIES_SETUP_H__ */ diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c new file mode 100644 index 00000000000..f720916682f --- /dev/null +++ b/arch/powerpc/platforms/iseries/smp.c @@ -0,0 +1,121 @@ +/* + * SMP support for iSeries machines. + * + * Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * Plus various changes from other IBM teams... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/cache.h> +#include <linux/err.h> +#include <linux/sysdev.h> +#include <linux/cpu.h> + +#include <asm/ptrace.h> +#include <asm/atomic.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/paca.h> +#include <asm/iSeries/HvCall.h> +#include <asm/time.h> +#include <asm/ppcdebug.h> +#include <asm/machdep.h> +#include <asm/cputable.h> +#include <asm/system.h> + +static unsigned long iSeries_smp_message[NR_CPUS]; + +void iSeries_smp_message_recv(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + int msg; + + if (num_online_cpus() < 2) + return; + + for (msg = 0; msg < 4; msg++) + if (test_and_clear_bit(msg, &iSeries_smp_message[cpu])) + smp_message_recv(msg, regs); +} + +static inline void smp_iSeries_do_message(int cpu, int msg) +{ + set_bit(msg, &iSeries_smp_message[cpu]); + HvCall_sendIPI(&(paca[cpu])); +} + +static void smp_iSeries_message_pass(int target, int msg) +{ + int i; + + if (target < NR_CPUS) + smp_iSeries_do_message(target, msg); + else { + for_each_online_cpu(i) { + if ((target == MSG_ALL_BUT_SELF) && + (i == smp_processor_id())) + continue; + smp_iSeries_do_message(i, msg); + } + } +} + +static int smp_iSeries_probe(void) +{ + return cpus_weight(cpu_possible_map); +} + +static void smp_iSeries_kick_cpu(int nr) +{ + BUG_ON((nr < 0) || (nr >= NR_CPUS)); + + /* Verify that our partition has a processor nr */ + if (paca[nr].lppaca.dyn_proc_status >= 2) + return; + + /* The processor is currently spinning, waiting + * for the cpu_start field to become non-zero + * After we set cpu_start, the processor will + * continue on to secondary_start in iSeries_head.S + */ + paca[nr].cpu_start = 1; +} + +static void __devinit smp_iSeries_setup_cpu(int nr) +{ +} + +static struct smp_ops_t iSeries_smp_ops = { + .message_pass = smp_iSeries_message_pass, + .probe = smp_iSeries_probe, + .kick_cpu = smp_iSeries_kick_cpu, + .setup_cpu = smp_iSeries_setup_cpu, +}; + +/* This is called very early. */ +void __init smp_init_iSeries(void) +{ + smp_ops = &iSeries_smp_ops; +} diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c new file mode 100644 index 00000000000..c0f7d2e9153 --- /dev/null +++ b/arch/powerpc/platforms/iseries/vio.c @@ -0,0 +1,156 @@ +/* + * IBM PowerPC iSeries Virtual I/O Infrastructure Support. + * + * Copyright (c) 2005 Stephen Rothwell, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/types.h> +#include <linux/device.h> +#include <linux/init.h> + +#include <asm/vio.h> +#include <asm/iommu.h> +#include <asm/tce.h> +#include <asm/abs_addr.h> +#include <asm/page.h> +#include <asm/iSeries/vio.h> +#include <asm/iSeries/HvTypes.h> +#include <asm/iSeries/HvLpConfig.h> +#include <asm/iSeries/HvCallXm.h> + +struct device *iSeries_vio_dev = &vio_bus_device.dev; +EXPORT_SYMBOL(iSeries_vio_dev); + +static struct iommu_table veth_iommu_table; +static struct iommu_table vio_iommu_table; + +static void __init iommu_vio_init(void) +{ + struct iommu_table *t; + struct iommu_table_cb cb; + unsigned long cbp; + unsigned long itc_entries; + + cb.itc_busno = 255; /* Bus 255 is the virtual bus */ + cb.itc_virtbus = 0xff; /* Ask for virtual bus */ + + cbp = virt_to_abs(&cb); + HvCallXm_getTceTableParms(cbp); + + itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); + veth_iommu_table.it_size = itc_entries / 2; + veth_iommu_table.it_busno = cb.itc_busno; + veth_iommu_table.it_offset = cb.itc_offset; + veth_iommu_table.it_index = cb.itc_index; + veth_iommu_table.it_type = TCE_VB; + veth_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&veth_iommu_table); + + if (!t) + printk("Virtual Bus VETH TCE table failed.\n"); + + vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; + vio_iommu_table.it_busno = cb.itc_busno; + vio_iommu_table.it_offset = cb.itc_offset + + veth_iommu_table.it_size; + vio_iommu_table.it_index = cb.itc_index; + vio_iommu_table.it_type = TCE_VB; + vio_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&vio_iommu_table); + + if (!t) + printk("Virtual Bus VIO TCE table failed.\n"); +} + +/** + * vio_register_device_iseries: - Register a new iSeries vio device. + * @voidev: The device to register. + */ +static struct vio_dev *__init vio_register_device_iseries(char *type, + uint32_t unit_num) +{ + struct vio_dev *viodev; + + /* allocate a vio_dev for this device */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) + return NULL; + memset(viodev, 0, sizeof(struct vio_dev)); + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); + + viodev->name = viodev->dev.bus_id; + viodev->type = type; + viodev->unit_address = unit_num; + viodev->iommu_table = &vio_iommu_table; + if (vio_register_device(viodev) == NULL) { + kfree(viodev); + return NULL; + } + return viodev; +} + +void __init probe_bus_iseries(void) +{ + HvLpIndexMap vlan_map; + struct vio_dev *viodev; + int i; + + /* there is only one of each of these */ + vio_register_device_iseries("viocons", 0); + vio_register_device_iseries("vscsi", 0); + + vlan_map = HvLpConfig_getVirtualLanIndexMap(); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { + if ((vlan_map & (0x8000 >> i)) == 0) + continue; + viodev = vio_register_device_iseries("vlan", i); + /* veth is special and has it own iommu_table */ + viodev->iommu_table = &veth_iommu_table; + } + for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) + vio_register_device_iseries("viodasd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) + vio_register_device_iseries("viocd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) + vio_register_device_iseries("viotape", i); +} + +/** + * vio_match_device_iseries: - Tell if a iSeries VIO device matches a + * vio_device_id + */ +static int vio_match_device_iseries(const struct vio_device_id *id, + const struct vio_dev *dev) +{ + return strncmp(dev->type, id->type, strlen(id->type)) == 0; +} + +static struct vio_bus_ops vio_bus_ops_iseries = { + .match = vio_match_device_iseries, +}; + +/** + * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus + */ +static int __init vio_bus_init_iseries(void) +{ + int err; + + err = vio_bus_init(&vio_bus_ops_iseries); + if (err == 0) { + iommu_vio_init(); + vio_bus_device.iommu_table = &vio_iommu_table; + iSeries_vio_dev = &vio_bus_device.dev; + probe_bus_iseries(); + } + return err; +} + +__initcall(vio_bus_init_iseries); diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c new file mode 100644 index 00000000000..c0c767bd37f --- /dev/null +++ b/arch/powerpc/platforms/iseries/viopath.c @@ -0,0 +1,672 @@ +/* -*- linux-c -*- + * + * iSeries Virtual I/O Message Path code + * + * Authors: Dave Boutcher <boutcher@us.ibm.com> + * Ryan Arnold <ryanarn@us.ibm.com> + * Colin Devilbiss <devilbis@us.ibm.com> + * + * (C) Copyright 2000-2005 IBM Corporation + * + * This code is used by the iSeries virtual disk, cd, + * tape, and console to communicate with OS/400 in another + * partition. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/vmalloc.h> +#include <linux/string.h> +#include <linux/proc_fs.h> +#include <linux/dma-mapping.h> +#include <linux/wait.h> +#include <linux/seq_file.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/iSeries/HvTypes.h> +#include <asm/iSeries/ItExtVpdPanel.h> +#include <asm/iSeries/HvLpEvent.h> +#include <asm/iSeries/HvLpConfig.h> +#include <asm/iSeries/mf.h> +#include <asm/iSeries/vio.h> + +/* Status of the path to each other partition in the system. + * This is overkill, since we will only ever establish connections + * to our hosting partition and the primary partition on the system. + * But this allows for other support in the future. + */ +static struct viopathStatus { + int isOpen; /* Did we open the path? */ + int isActive; /* Do we have a mon msg outstanding */ + int users[VIO_MAX_SUBTYPES]; + HvLpInstanceId mSourceInst; + HvLpInstanceId mTargetInst; + int numberAllocated; +} viopathStatus[HVMAXARCHITECTEDLPS]; + +static DEFINE_SPINLOCK(statuslock); + +/* + * For each kind of event we allocate a buffer that is + * guaranteed not to cross a page boundary + */ +static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned; +static atomic_t event_buffer_available[VIO_MAX_SUBTYPES]; +static int event_buffer_initialised; + +static void handleMonitorEvent(struct HvLpEvent *event); + +/* + * We use this structure to handle asynchronous responses. The caller + * blocks on the semaphore and the handler posts the semaphore. However, + * if system_state is not SYSTEM_RUNNING, then wait_atomic is used ... + */ +struct alloc_parms { + struct semaphore sem; + int number; + atomic_t wait_atomic; + int used_wait_atomic; +}; + +/* Put a sequence number in each mon msg. The value is not + * important. Start at something other than 0 just for + * readability. wrapping this is ok. + */ +static u8 viomonseq = 22; + +/* Our hosting logical partition. We get this at startup + * time, and different modules access this variable directly. + */ +HvLpIndex viopath_hostLp = HvLpIndexInvalid; +EXPORT_SYMBOL(viopath_hostLp); +HvLpIndex viopath_ourLp = HvLpIndexInvalid; +EXPORT_SYMBOL(viopath_ourLp); + +/* For each kind of incoming event we set a pointer to a + * routine to call. + */ +static vio_event_handler_t *vio_handler[VIO_MAX_SUBTYPES]; + +#define VIOPATH_KERN_WARN KERN_WARNING "viopath: " +#define VIOPATH_KERN_INFO KERN_INFO "viopath: " + +static int proc_viopath_show(struct seq_file *m, void *v) +{ + char *buf; + u16 vlanMap; + dma_addr_t handle; + HvLpEvent_Rc hvrc; + DECLARE_MUTEX_LOCKED(Semaphore); + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return 0; + memset(buf, 0, PAGE_SIZE); + + handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE, + DMA_FROM_DEVICE); + + hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_config | vioconfigget, + HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst(viopath_hostLp), + viopath_targetinst(viopath_hostLp), + (u64)(unsigned long)&Semaphore, VIOVERSION << 16, + ((u64)handle) << 32, PAGE_SIZE, 0, 0); + + if (hvrc != HvLpEvent_Rc_Good) + printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc); + + down(&Semaphore); + + vlanMap = HvLpConfig_getVirtualLanIndexMap(); + + buf[PAGE_SIZE-1] = '\0'; + seq_printf(m, "%s", buf); + seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap); + seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n", + e2a(xItExtVpdPanel.mfgID[2]), + e2a(xItExtVpdPanel.mfgID[3]), + e2a(xItExtVpdPanel.systemSerial[1]), + e2a(xItExtVpdPanel.systemSerial[2]), + e2a(xItExtVpdPanel.systemSerial[3]), + e2a(xItExtVpdPanel.systemSerial[4]), + e2a(xItExtVpdPanel.systemSerial[5])); + + dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE); + kfree(buf); + + return 0; +} + +static int proc_viopath_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_viopath_show, NULL); +} + +static struct file_operations proc_viopath_operations = { + .open = proc_viopath_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init vio_proc_init(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("iSeries/config", 0, NULL); + if (e) + e->proc_fops = &proc_viopath_operations; + + return 0; +} +__initcall(vio_proc_init); + +/* See if a given LP is active. Allow for invalid lps to be passed in + * and just return invalid + */ +int viopath_isactive(HvLpIndex lp) +{ + if (lp == HvLpIndexInvalid) + return 0; + if (lp < HVMAXARCHITECTEDLPS) + return viopathStatus[lp].isActive; + else + return 0; +} +EXPORT_SYMBOL(viopath_isactive); + +/* + * We cache the source and target instance ids for each + * partition. + */ +HvLpInstanceId viopath_sourceinst(HvLpIndex lp) +{ + return viopathStatus[lp].mSourceInst; +} +EXPORT_SYMBOL(viopath_sourceinst); + +HvLpInstanceId viopath_targetinst(HvLpIndex lp) +{ + return viopathStatus[lp].mTargetInst; +} +EXPORT_SYMBOL(viopath_targetinst); + +/* + * Send a monitor message. This is a message with the acknowledge + * bit on that the other side will NOT explicitly acknowledge. When + * the other side goes down, the hypervisor will acknowledge any + * outstanding messages....so we will know when the other side dies. + */ +static void sendMonMsg(HvLpIndex remoteLp) +{ + HvLpEvent_Rc hvrc; + + viopathStatus[remoteLp].mSourceInst = + HvCallEvent_getSourceLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].mTargetInst = + HvCallEvent_getTargetLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + + /* + * Deliberately ignore the return code here. if we call this + * more than once, we don't care. + */ + vio_setHandler(viomajorsubtype_monitor, handleMonitorEvent); + + hvrc = HvCallEvent_signalLpEventFast(remoteLp, HvLpEvent_Type_VirtualIo, + viomajorsubtype_monitor, HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_DeferredAck, + viopathStatus[remoteLp].mSourceInst, + viopathStatus[remoteLp].mTargetInst, + viomonseq++, 0, 0, 0, 0, 0); + + if (hvrc == HvLpEvent_Rc_Good) + viopathStatus[remoteLp].isActive = 1; + else { + printk(VIOPATH_KERN_WARN "could not connect to partition %d\n", + remoteLp); + viopathStatus[remoteLp].isActive = 0; + } +} + +static void handleMonitorEvent(struct HvLpEvent *event) +{ + HvLpIndex remoteLp; + int i; + + /* + * This handler is _also_ called as part of the loop + * at the end of this routine, so it must be able to + * ignore NULL events... + */ + if (!event) + return; + + /* + * First see if this is just a normal monitor message from the + * other partition + */ + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + remoteLp = event->xSourceLp; + if (!viopathStatus[remoteLp].isActive) + sendMonMsg(remoteLp); + return; + } + + /* + * This path is for an acknowledgement; the other partition + * died + */ + remoteLp = event->xTargetLp; + if ((event->xSourceInstanceId != viopathStatus[remoteLp].mSourceInst) || + (event->xTargetInstanceId != viopathStatus[remoteLp].mTargetInst)) { + printk(VIOPATH_KERN_WARN "ignoring ack....mismatched instances\n"); + return; + } + + printk(VIOPATH_KERN_WARN "partition %d ended\n", remoteLp); + + viopathStatus[remoteLp].isActive = 0; + + /* + * For each active handler, pass them a NULL + * message to indicate that the other partition + * died + */ + for (i = 0; i < VIO_MAX_SUBTYPES; i++) { + if (vio_handler[i] != NULL) + (*vio_handler[i])(NULL); + } +} + +int vio_setHandler(int subtype, vio_event_handler_t *beh) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + if (vio_handler[subtype] != NULL) + return -EBUSY; + vio_handler[subtype] = beh; + return 0; +} +EXPORT_SYMBOL(vio_setHandler); + +int vio_clearHandler(int subtype) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + if (vio_handler[subtype] == NULL) + return -EAGAIN; + vio_handler[subtype] = NULL; + return 0; +} +EXPORT_SYMBOL(vio_clearHandler); + +static void handleConfig(struct HvLpEvent *event) +{ + if (!event) + return; + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + printk(VIOPATH_KERN_WARN + "unexpected config request from partition %d", + event->xSourceLp); + + if ((event->xFlags.xFunction == HvLpEvent_Function_Int) && + (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + return; + } + + up((struct semaphore *)event->xCorrelationToken); +} + +/* + * Initialization of the hosting partition + */ +void vio_set_hostlp(void) +{ + /* + * If this has already been set then we DON'T want to either change + * it or re-register the proc file system + */ + if (viopath_hostLp != HvLpIndexInvalid) + return; + + /* + * Figure out our hosting partition. This isn't allowed to change + * while we're active + */ + viopath_ourLp = HvLpConfig_getLpIndex(); + viopath_hostLp = HvLpConfig_getHostingLpIndex(viopath_ourLp); + + if (viopath_hostLp != HvLpIndexInvalid) + vio_setHandler(viomajorsubtype_config, handleConfig); +} +EXPORT_SYMBOL(vio_set_hostlp); + +static void vio_handleEvent(struct HvLpEvent *event, struct pt_regs *regs) +{ + HvLpIndex remoteLp; + int subtype = (event->xSubtype & VIOMAJOR_SUBTYPE_MASK) + >> VIOMAJOR_SUBTYPE_SHIFT; + + if (event->xFlags.xFunction == HvLpEvent_Function_Int) { + remoteLp = event->xSourceLp; + /* + * The isActive is checked because if the hosting partition + * went down and came back up it would not be active but it + * would have different source and target instances, in which + * case we'd want to reset them. This case really protects + * against an unauthorized active partition sending interrupts + * or acks to this linux partition. + */ + if (viopathStatus[remoteLp].isActive + && (event->xSourceInstanceId != + viopathStatus[remoteLp].mTargetInst)) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "int msg rcvd, source inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mTargetInst, + event->xSourceInstanceId); + return; + } + + if (viopathStatus[remoteLp].isActive + && (event->xTargetInstanceId != + viopathStatus[remoteLp].mSourceInst)) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "int msg rcvd, target inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mSourceInst, + event->xTargetInstanceId); + return; + } + } else { + remoteLp = event->xTargetLp; + if (event->xSourceInstanceId != + viopathStatus[remoteLp].mSourceInst) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "ack msg rcvd, source inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mSourceInst, + event->xSourceInstanceId); + return; + } + + if (event->xTargetInstanceId != + viopathStatus[remoteLp].mTargetInst) { + printk(VIOPATH_KERN_WARN + "message from invalid partition. " + "viopath: ack msg rcvd, target inst (%d) doesnt match (%d)\n", + viopathStatus[remoteLp].mTargetInst, + event->xTargetInstanceId); + return; + } + } + + if (vio_handler[subtype] == NULL) { + printk(VIOPATH_KERN_WARN + "unexpected virtual io event subtype %d from partition %d\n", + event->xSubtype, remoteLp); + /* No handler. Ack if necessary */ + if ((event->xFlags.xFunction == HvLpEvent_Function_Int) && + (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + return; + } + + /* This innocuous little line is where all the real work happens */ + (*vio_handler[subtype])(event); +} + +static void viopath_donealloc(void *parm, int number) +{ + struct alloc_parms *parmsp = parm; + + parmsp->number = number; + if (parmsp->used_wait_atomic) + atomic_set(&parmsp->wait_atomic, 0); + else + up(&parmsp->sem); +} + +static int allocateEvents(HvLpIndex remoteLp, int numEvents) +{ + struct alloc_parms parms; + + if (system_state != SYSTEM_RUNNING) { + parms.used_wait_atomic = 1; + atomic_set(&parms.wait_atomic, 1); + } else { + parms.used_wait_atomic = 0; + init_MUTEX_LOCKED(&parms.sem); + } + mf_allocate_lp_events(remoteLp, HvLpEvent_Type_VirtualIo, 250, /* It would be nice to put a real number here! */ + numEvents, &viopath_donealloc, &parms); + if (system_state != SYSTEM_RUNNING) { + while (atomic_read(&parms.wait_atomic)) + mb(); + } else + down(&parms.sem); + return parms.number; +} + +int viopath_open(HvLpIndex remoteLp, int subtype, int numReq) +{ + int i; + unsigned long flags; + int tempNumAllocated; + + if ((remoteLp >= HVMAXARCHITECTEDLPS) || (remoteLp == HvLpIndexInvalid)) + return -EINVAL; + + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + + spin_lock_irqsave(&statuslock, flags); + + if (!event_buffer_initialised) { + for (i = 0; i < VIO_MAX_SUBTYPES; i++) + atomic_set(&event_buffer_available[i], 1); + event_buffer_initialised = 1; + } + + viopathStatus[remoteLp].users[subtype]++; + + if (!viopathStatus[remoteLp].isOpen) { + viopathStatus[remoteLp].isOpen = 1; + HvCallEvent_openLpEventPath(remoteLp, HvLpEvent_Type_VirtualIo); + + /* + * Don't hold the spinlock during an operation that + * can sleep. + */ + spin_unlock_irqrestore(&statuslock, flags); + tempNumAllocated = allocateEvents(remoteLp, 1); + spin_lock_irqsave(&statuslock, flags); + + viopathStatus[remoteLp].numberAllocated += tempNumAllocated; + + if (viopathStatus[remoteLp].numberAllocated == 0) { + HvCallEvent_closeLpEventPath(remoteLp, + HvLpEvent_Type_VirtualIo); + + spin_unlock_irqrestore(&statuslock, flags); + return -ENOMEM; + } + + viopathStatus[remoteLp].mSourceInst = + HvCallEvent_getSourceLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].mTargetInst = + HvCallEvent_getTargetLpInstanceId(remoteLp, + HvLpEvent_Type_VirtualIo); + HvLpEvent_registerHandler(HvLpEvent_Type_VirtualIo, + &vio_handleEvent); + sendMonMsg(remoteLp); + printk(VIOPATH_KERN_INFO "opening connection to partition %d, " + "setting sinst %d, tinst %d\n", + remoteLp, viopathStatus[remoteLp].mSourceInst, + viopathStatus[remoteLp].mTargetInst); + } + + spin_unlock_irqrestore(&statuslock, flags); + tempNumAllocated = allocateEvents(remoteLp, numReq); + spin_lock_irqsave(&statuslock, flags); + viopathStatus[remoteLp].numberAllocated += tempNumAllocated; + spin_unlock_irqrestore(&statuslock, flags); + + return 0; +} +EXPORT_SYMBOL(viopath_open); + +int viopath_close(HvLpIndex remoteLp, int subtype, int numReq) +{ + unsigned long flags; + int i; + int numOpen; + struct alloc_parms parms; + + if ((remoteLp >= HVMAXARCHITECTEDLPS) || (remoteLp == HvLpIndexInvalid)) + return -EINVAL; + + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return -EINVAL; + + spin_lock_irqsave(&statuslock, flags); + /* + * If the viopath_close somehow gets called before a + * viopath_open it could decrement to -1 which is a non + * recoverable state so we'll prevent this from + * happening. + */ + if (viopathStatus[remoteLp].users[subtype] > 0) + viopathStatus[remoteLp].users[subtype]--; + + spin_unlock_irqrestore(&statuslock, flags); + + parms.used_wait_atomic = 0; + init_MUTEX_LOCKED(&parms.sem); + mf_deallocate_lp_events(remoteLp, HvLpEvent_Type_VirtualIo, + numReq, &viopath_donealloc, &parms); + down(&parms.sem); + + spin_lock_irqsave(&statuslock, flags); + for (i = 0, numOpen = 0; i < VIO_MAX_SUBTYPES; i++) + numOpen += viopathStatus[remoteLp].users[i]; + + if ((viopathStatus[remoteLp].isOpen) && (numOpen == 0)) { + printk(VIOPATH_KERN_INFO "closing connection to partition %d", + remoteLp); + + HvCallEvent_closeLpEventPath(remoteLp, + HvLpEvent_Type_VirtualIo); + viopathStatus[remoteLp].isOpen = 0; + viopathStatus[remoteLp].isActive = 0; + + for (i = 0; i < VIO_MAX_SUBTYPES; i++) + atomic_set(&event_buffer_available[i], 0); + event_buffer_initialised = 0; + } + spin_unlock_irqrestore(&statuslock, flags); + return 0; +} +EXPORT_SYMBOL(viopath_close); + +void *vio_get_event_buffer(int subtype) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) + return NULL; + + if (atomic_dec_if_positive(&event_buffer_available[subtype]) == 0) + return &event_buffer[subtype * 256]; + else + return NULL; +} +EXPORT_SYMBOL(vio_get_event_buffer); + +void vio_free_event_buffer(int subtype, void *buffer) +{ + subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT; + if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) { + printk(VIOPATH_KERN_WARN + "unexpected subtype %d freeing event buffer\n", subtype); + return; + } + + if (atomic_read(&event_buffer_available[subtype]) != 0) { + printk(VIOPATH_KERN_WARN + "freeing unallocated event buffer, subtype %d\n", + subtype); + return; + } + + if (buffer != &event_buffer[subtype * 256]) { + printk(VIOPATH_KERN_WARN + "freeing invalid event buffer, subtype %d\n", subtype); + } + + atomic_set(&event_buffer_available[subtype], 1); +} +EXPORT_SYMBOL(vio_free_event_buffer); + +static const struct vio_error_entry vio_no_error = + { 0, 0, "Non-VIO Error" }; +static const struct vio_error_entry vio_unknown_error = + { 0, EIO, "Unknown Error" }; + +static const struct vio_error_entry vio_default_errors[] = { + {0x0001, EIO, "No Connection"}, + {0x0002, EIO, "No Receiver"}, + {0x0003, EIO, "No Buffer Available"}, + {0x0004, EBADRQC, "Invalid Message Type"}, + {0x0000, 0, NULL}, +}; + +const struct vio_error_entry *vio_lookup_rc( + const struct vio_error_entry *local_table, u16 rc) +{ + const struct vio_error_entry *cur; + + if (!rc) + return &vio_no_error; + if (local_table) + for (cur = local_table; cur->rc; ++cur) + if (cur->rc == rc) + return cur; + for (cur = vio_default_errors; cur->rc; ++cur) + if (cur->rc == rc) + return cur; + return &vio_unknown_error; +} +EXPORT_SYMBOL(vio_lookup_rc); diff --git a/arch/powerpc/platforms/iseries/vpdinfo.c b/arch/powerpc/platforms/iseries/vpdinfo.c new file mode 100644 index 00000000000..d8a6796924e --- /dev/null +++ b/arch/powerpc/platforms/iseries/vpdinfo.c @@ -0,0 +1,266 @@ +/* + * This code gets the card location of the hardware + * Copyright (C) 2001 <Allan H Trautman> <IBM Corp> + * Copyright (C) 2005 Stephen Rothwel, IBM Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the: + * Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, + * Boston, MA 02111-1307 USA + * + * Change Activity: + * Created, Feb 2, 2001 + * Ported to ppc64, August 20, 2001 + * End Change Activity + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <asm/types.h> +#include <asm/resource.h> + +#include <asm/iSeries/HvCallPci.h> +#include <asm/iSeries/HvTypes.h> +#include <asm/iSeries/iSeries_pci.h> + +/* + * Size of Bus VPD data + */ +#define BUS_VPDSIZE 1024 + +/* + * Bus Vpd Tags + */ +#define VpdEndOfAreaTag 0x79 +#define VpdIdStringTag 0x82 +#define VpdVendorAreaTag 0x84 + +/* + * Mfg Area Tags + */ +#define VpdFruFrameId 0x4649 // "FI" +#define VpdSlotMapFormat 0x4D46 // "MF" +#define VpdSlotMap 0x534D // "SM" + +/* + * Structures of the areas + */ +struct MfgVpdAreaStruct { + u16 Tag; + u8 TagLength; + u8 AreaData1; + u8 AreaData2; +}; +typedef struct MfgVpdAreaStruct MfgArea; +#define MFG_ENTRY_SIZE 3 + +struct SlotMapStruct { + u8 AgentId; + u8 SecondaryAgentId; + u8 PhbId; + char CardLocation[3]; + char Parms[8]; + char Reserved[2]; +}; +typedef struct SlotMapStruct SlotMap; +#define SLOT_ENTRY_SIZE 16 + +/* + * Parse the Slot Area + */ +static void __init iSeries_Parse_SlotArea(SlotMap *MapPtr, int MapLen, + HvAgentId agent, u8 *PhbId, char card[4]) +{ + int SlotMapLen = MapLen; + SlotMap *SlotMapPtr = MapPtr; + + /* + * Parse Slot label until we find the one requested + */ + while (SlotMapLen > 0) { + if (SlotMapPtr->AgentId == agent) { + /* + * If Phb wasn't found, grab the entry first one found. + */ + if (*PhbId == 0xff) + *PhbId = SlotMapPtr->PhbId; + /* Found it, extract the data. */ + if (SlotMapPtr->PhbId == *PhbId) { + memcpy(card, &SlotMapPtr->CardLocation, 3); + card[3] = 0; + break; + } + } + /* Point to the next Slot */ + SlotMapPtr = (SlotMap *)((char *)SlotMapPtr + SLOT_ENTRY_SIZE); + SlotMapLen -= SLOT_ENTRY_SIZE; + } +} + +/* + * Parse the Mfg Area + */ +static void __init iSeries_Parse_MfgArea(u8 *AreaData, int AreaLen, + HvAgentId agent, u8 *PhbId, + u8 *frame, char card[4]) +{ + MfgArea *MfgAreaPtr = (MfgArea *)AreaData; + int MfgAreaLen = AreaLen; + u16 SlotMapFmt = 0; + + /* Parse Mfg Data */ + while (MfgAreaLen > 0) { + int MfgTagLen = MfgAreaPtr->TagLength; + /* Frame ID (FI 4649020310 ) */ + if (MfgAreaPtr->Tag == VpdFruFrameId) /* FI */ + *frame = MfgAreaPtr->AreaData1; + /* Slot Map Format (MF 4D46020004 ) */ + else if (MfgAreaPtr->Tag == VpdSlotMapFormat) /* MF */ + SlotMapFmt = (MfgAreaPtr->AreaData1 * 256) + + MfgAreaPtr->AreaData2; + /* Slot Map (SM 534D90 */ + else if (MfgAreaPtr->Tag == VpdSlotMap) { /* SM */ + SlotMap *SlotMapPtr; + + if (SlotMapFmt == 0x1004) + SlotMapPtr = (SlotMap *)((char *)MfgAreaPtr + + MFG_ENTRY_SIZE + 1); + else + SlotMapPtr = (SlotMap *)((char *)MfgAreaPtr + + MFG_ENTRY_SIZE); + iSeries_Parse_SlotArea(SlotMapPtr, MfgTagLen, + agent, PhbId, card); + } + /* + * Point to the next Mfg Area + * Use defined size, sizeof give wrong answer + */ + MfgAreaPtr = (MfgArea *)((char *)MfgAreaPtr + MfgTagLen + + MFG_ENTRY_SIZE); + MfgAreaLen -= (MfgTagLen + MFG_ENTRY_SIZE); + } +} + +/* + * Look for "BUS".. Data is not Null terminated. + * PHBID of 0xFF indicates PHB was not found in VPD Data. + */ +static int __init iSeries_Parse_PhbId(u8 *AreaPtr, int AreaLength) +{ + u8 *PhbPtr = AreaPtr; + int DataLen = AreaLength; + char PhbId = 0xFF; + + while (DataLen > 0) { + if ((*PhbPtr == 'B') && (*(PhbPtr + 1) == 'U') + && (*(PhbPtr + 2) == 'S')) { + PhbPtr += 3; + while (*PhbPtr == ' ') + ++PhbPtr; + PhbId = (*PhbPtr & 0x0F); + break; + } + ++PhbPtr; + --DataLen; + } + return PhbId; +} + +/* + * Parse out the VPD Areas + */ +static void __init iSeries_Parse_Vpd(u8 *VpdData, int VpdDataLen, + HvAgentId agent, u8 *frame, char card[4]) +{ + u8 *TagPtr = VpdData; + int DataLen = VpdDataLen - 3; + u8 PhbId; + + while ((*TagPtr != VpdEndOfAreaTag) && (DataLen > 0)) { + int AreaLen = *(TagPtr + 1) + (*(TagPtr + 2) * 256); + u8 *AreaData = TagPtr + 3; + + if (*TagPtr == VpdIdStringTag) + PhbId = iSeries_Parse_PhbId(AreaData, AreaLen); + else if (*TagPtr == VpdVendorAreaTag) + iSeries_Parse_MfgArea(AreaData, AreaLen, + agent, &PhbId, frame, card); + /* Point to next Area. */ + TagPtr = AreaData + AreaLen; + DataLen -= AreaLen; + } +} + +static void __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent, + u8 *frame, char card[4]) +{ + int BusVpdLen = 0; + u8 *BusVpdPtr = kmalloc(BUS_VPDSIZE, GFP_KERNEL); + + if (BusVpdPtr == NULL) { + printk("PCI: Bus VPD Buffer allocation failure.\n"); + return; + } + BusVpdLen = HvCallPci_getBusVpd(bus, ISERIES_HV_ADDR(BusVpdPtr), + BUS_VPDSIZE); + if (BusVpdLen == 0) { + printk("PCI: Bus VPD Buffer zero length.\n"); + goto out_free; + } + /* printk("PCI: BusVpdPtr: %p, %d\n",BusVpdPtr, BusVpdLen); */ + /* Make sure this is what I think it is */ + if (*BusVpdPtr != VpdIdStringTag) { /* 0x82 */ + printk("PCI: Bus VPD Buffer missing starting tag.\n"); + goto out_free; + } + iSeries_Parse_Vpd(BusVpdPtr, BusVpdLen, agent, frame, card); +out_free: + kfree(BusVpdPtr); +} + +/* + * Prints the device information. + * - Pass in pci_dev* pointer to the device. + * - Pass in the device count + * + * Format: + * PCI: Bus 0, Device 26, Vendor 0x12AE Frame 1, Card C10 Ethernet + * controller + */ +void __init iSeries_Device_Information(struct pci_dev *PciDev, int count) +{ + struct device_node *DevNode = PciDev->sysdata; + u16 bus; + u8 frame; + char card[4]; + HvSubBusNumber subbus; + HvAgentId agent; + + if (DevNode == NULL) { + printk("%d. PCI: iSeries_Device_Information DevNode is NULL\n", + count); + return; + } + + bus = ISERIES_BUS(DevNode); + subbus = ISERIES_SUBBUS(DevNode); + agent = ISERIES_PCI_AGENTID(ISERIES_GET_DEVICE_FROM_SUBBUS(subbus), + ISERIES_GET_FUNCTION_FROM_SUBBUS(subbus)); + iSeries_Get_Location_Code(bus, agent, &frame, card); + + printk("%d. PCI: Bus%3d, Device%3d, Vendor %04X Frame%3d, Card %4s ", + count, bus, PCI_SLOT(PciDev->devfn), PciDev->vendor, + frame, card); + printk("0x%04X\n", (int)(PciDev->class >> 8)); +} |