aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile40
-rw-r--r--arch/powerpc/kernel/align.c530
-rw-r--r--arch/powerpc/kernel/asm-offsets.c48
-rw-r--r--arch/powerpc/kernel/cpu_setup_power4.S233
-rw-r--r--arch/powerpc/kernel/cputable.c19
-rw-r--r--arch/powerpc/kernel/dma_64.c151
-rw-r--r--arch/powerpc/kernel/entry_64.S4
-rw-r--r--arch/powerpc/kernel/firmware.c45
-rw-r--r--arch/powerpc/kernel/fpu.S24
-rw-r--r--arch/powerpc/kernel/head_32.S1
-rw-r--r--arch/powerpc/kernel/head_64.S91
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/idle_64.c121
-rw-r--r--arch/powerpc/kernel/ioctl32.c45
-rw-r--r--arch/powerpc/kernel/iomap.c146
-rw-r--r--arch/powerpc/kernel/iommu.c572
-rw-r--r--arch/powerpc/kernel/irq.c479
-rw-r--r--arch/powerpc/kernel/kprobes.c459
-rw-r--r--arch/powerpc/kernel/lparcfg.c608
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c358
-rw-r--r--arch/powerpc/kernel/misc_32.S31
-rw-r--r--arch/powerpc/kernel/misc_64.S8
-rw-r--r--arch/powerpc/kernel/module_64.c455
-rw-r--r--arch/powerpc/kernel/nvram_64.c742
-rw-r--r--arch/powerpc/kernel/paca.c135
-rw-r--r--arch/powerpc/kernel/pci_64.c1365
-rw-r--r--arch/powerpc/kernel/pci_direct_iommu.c94
-rw-r--r--arch/powerpc/kernel/pci_dn.c230
-rw-r--r--arch/powerpc/kernel/pci_iommu.c128
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c21
-rw-r--r--arch/powerpc/kernel/proc_ppc64.c126
-rw-r--r--arch/powerpc/kernel/process.c63
-rw-r--r--arch/powerpc/kernel/prom.c37
-rw-r--r--arch/powerpc/kernel/prom_init.c189
-rw-r--r--arch/powerpc/kernel/ptrace-common.h164
-rw-r--r--arch/powerpc/kernel/ptrace.c3
-rw-r--r--arch/powerpc/kernel/ptrace32.c3
-rw-r--r--arch/powerpc/kernel/rtas-proc.c3
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c105
-rw-r--r--arch/powerpc/kernel/rtas.c5
-rw-r--r--arch/powerpc/kernel/rtas_pci.c455
-rw-r--r--arch/powerpc/kernel/setup-common.c42
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kernel/setup_32.c22
-rw-r--r--arch/powerpc/kernel/setup_64.c138
-rw-r--r--arch/powerpc/kernel/signal_32.c20
-rw-r--r--arch/powerpc/kernel/signal_64.c6
-rw-r--r--arch/powerpc/kernel/smp.c16
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c1
-rw-r--r--arch/powerpc/kernel/syscalls.c2
-rw-r--r--arch/powerpc/kernel/sysfs.c383
-rw-r--r--arch/powerpc/kernel/time.c69
-rw-r--r--arch/powerpc/kernel/traps.c3
-rw-r--r--arch/powerpc/kernel/udbg.c125
-rw-r--r--arch/powerpc/kernel/udbg_16550.c123
-rw-r--r--arch/powerpc/kernel/udbg_scc.c135
-rw-r--r--arch/powerpc/kernel/vdso.c743
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile40
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S69
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S86
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S323
-rw-r--r--arch/powerpc/kernel/vdso32/note.S25
-rw-r--r--arch/powerpc/kernel/vdso32/sigtramp.S300
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S117
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32_wrapper.S13
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile35
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S68
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S86
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S253
-rw-r--r--arch/powerpc/kernel/vdso64/note.S1
-rw-r--r--arch/powerpc/kernel/vdso64/sigtramp.S295
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64.lds.S116
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64_wrapper.S13
73 files changed, 11570 insertions, 442 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b3ae2993efb..9ed551b6c17 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -4,6 +4,7 @@
ifeq ($(CONFIG_PPC64),y)
EXTRA_CFLAGS += -mno-minimal-toc
+CFLAGS_ioctl32.o += -Ifs/
endif
ifeq ($(CONFIG_PPC32),y)
CFLAGS_prom_init.o += -fPIC
@@ -11,17 +12,30 @@ CFLAGS_btext.o += -fPIC
endif
obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
- signal_32.o pmc.o
+ irq.o align.o signal_32.o pmc.o vdso.o
+obj-y += vdso32/
obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
- signal_64.o ptrace32.o systbl.o
+ signal_64.o ptrace32.o systbl.o \
+ paca.o ioctl32.o cpu_setup_power4.o \
+ firmware.o sysfs.o udbg.o idle_64.o
+obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
obj-$(CONFIG_POWER4) += idle_power4.o
obj-$(CONFIG_PPC_OF) += of_device.o
-obj-$(CONFIG_PPC_RTAS) += rtas.o
+procfs-$(CONFIG_PPC64) := proc_ppc64.o
+obj-$(CONFIG_PROC_FS) += $(procfs-y)
+rtaspci-$(CONFIG_PPC64) := rtas_pci.o
+obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y)
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
+obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
+obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o
+obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o
+udbgscc-$(CONFIG_PPC64) := udbg_scc.o
+obj-$(CONFIG_PPC_PMAC) += $(udbgscc-y)
+obj64-$(CONFIG_PPC_MULTIPLATFORM) += nvram_64.o
ifeq ($(CONFIG_PPC_MERGE),y)
@@ -36,12 +50,23 @@ extra-y += vmlinux.lds
obj-y += process.o init_task.o time.o \
prom.o traps.o setup-common.o
obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o
-obj-$(CONFIG_PPC64) += misc_64.o
+obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o
obj-$(CONFIG_PPC_OF) += prom_init.o
obj-$(CONFIG_MODULES) += ppc_ksyms.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
obj-$(CONFIG_6xx) += idle_6xx.o
obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+
+module-$(CONFIG_PPC64) += module_64.o
+obj-$(CONFIG_MODULES) += $(module-y)
+
+pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \
+ pci_direct_iommu.o iomap.o
+obj-$(CONFIG_PCI) += $(pci64-y)
+
+kexec64-$(CONFIG_PPC64) += machine_kexec_64.o
+obj-$(CONFIG_KEXEC) += $(kexec64-y)
ifeq ($(CONFIG_PPC_ISERIES),y)
$(obj)/head_64.o: $(obj)/lparmap.s
@@ -49,13 +74,12 @@ AFLAGS_head_64.o += -I$(obj)
endif
else
-# stuff used from here for ARCH=ppc or ARCH=ppc64
+# stuff used from here for ARCH=ppc
smpobj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o \
- setup-common.o $(smpobj-y)
-
endif
+obj-$(CONFIG_PPC64) += $(obj64-y)
+
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_PPC64) += entry_64.o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
new file mode 100644
index 00000000000..faaec9c6f78
--- /dev/null
+++ b/arch/powerpc/kernel/align.c
@@ -0,0 +1,530 @@
+/* align.c - handle alignment exceptions for the Power PC.
+ *
+ * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ * Copyright (c) 1998-1999 TiVo, Inc.
+ * PowerPC 403GCX modifications.
+ * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ * PowerPC 403GCX/405GP modifications.
+ * Copyright (c) 2001-2002 PPC64 team, IBM Corp
+ * 64-bit and Power4 support
+ * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp
+ * <benh@kernel.crashing.org>
+ * Merge ppc32 and ppc64 implementations
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/cache.h>
+#include <asm/cputable.h>
+
+struct aligninfo {
+ unsigned char len;
+ unsigned char flags;
+};
+
+#define IS_XFORM(inst) (((inst) >> 26) == 31)
+#define IS_DSFORM(inst) (((inst) >> 26) >= 56)
+
+#define INVALID { 0, 0 }
+
+#define LD 1 /* load */
+#define ST 2 /* store */
+#define SE 4 /* sign-extend value */
+#define F 8 /* to/from fp regs */
+#define U 0x10 /* update index register */
+#define M 0x20 /* multiple load/store */
+#define SW 0x40 /* byte swap int or ... */
+#define S 0x40 /* ... single-precision fp */
+#define SX 0x40 /* byte count in XER */
+#define HARD 0x80 /* string, stwcx. */
+
+#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
+
+#define SWAP(a, b) (t = (a), (a) = (b), (b) = t)
+
+/*
+ * The PowerPC stores certain bits of the instruction that caused the
+ * alignment exception in the DSISR register. This array maps those
+ * bits to information about the operand length and what the
+ * instruction would do.
+ */
+static struct aligninfo aligninfo[128] = {
+ { 4, LD }, /* 00 0 0000: lwz / lwarx */
+ INVALID, /* 00 0 0001 */
+ { 4, ST }, /* 00 0 0010: stw */
+ INVALID, /* 00 0 0011 */
+ { 2, LD }, /* 00 0 0100: lhz */
+ { 2, LD+SE }, /* 00 0 0101: lha */
+ { 2, ST }, /* 00 0 0110: sth */
+ { 4, LD+M }, /* 00 0 0111: lmw */
+ { 4, LD+F+S }, /* 00 0 1000: lfs */
+ { 8, LD+F }, /* 00 0 1001: lfd */
+ { 4, ST+F+S }, /* 00 0 1010: stfs */
+ { 8, ST+F }, /* 00 0 1011: stfd */
+ INVALID, /* 00 0 1100 */
+ { 8, LD }, /* 00 0 1101: ld/ldu/lwa */
+ INVALID, /* 00 0 1110 */
+ { 8, ST }, /* 00 0 1111: std/stdu */
+ { 4, LD+U }, /* 00 1 0000: lwzu */
+ INVALID, /* 00 1 0001 */
+ { 4, ST+U }, /* 00 1 0010: stwu */
+ INVALID, /* 00 1 0011 */
+ { 2, LD+U }, /* 00 1 0100: lhzu */
+ { 2, LD+SE+U }, /* 00 1 0101: lhau */
+ { 2, ST+U }, /* 00 1 0110: sthu */
+ { 4, ST+M }, /* 00 1 0111: stmw */
+ { 4, LD+F+S+U }, /* 00 1 1000: lfsu */
+ { 8, LD+F+U }, /* 00 1 1001: lfdu */
+ { 4, ST+F+S+U }, /* 00 1 1010: stfsu */
+ { 8, ST+F+U }, /* 00 1 1011: stfdu */
+ INVALID, /* 00 1 1100 */
+ INVALID, /* 00 1 1101 */
+ INVALID, /* 00 1 1110 */
+ INVALID, /* 00 1 1111 */
+ { 8, LD }, /* 01 0 0000: ldx */
+ INVALID, /* 01 0 0001 */
+ { 8, ST }, /* 01 0 0010: stdx */
+ INVALID, /* 01 0 0011 */
+ INVALID, /* 01 0 0100 */
+ { 4, LD+SE }, /* 01 0 0101: lwax */
+ INVALID, /* 01 0 0110 */
+ INVALID, /* 01 0 0111 */
+ { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */
+ { 4, LD+M+HARD }, /* 01 0 1001: lswi */
+ { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */
+ { 4, ST+M+HARD }, /* 01 0 1011: stswi */
+ INVALID, /* 01 0 1100 */
+ { 8, LD+U }, /* 01 0 1101: ldu */
+ INVALID, /* 01 0 1110 */
+ { 8, ST+U }, /* 01 0 1111: stdu */
+ { 8, LD+U }, /* 01 1 0000: ldux */
+ INVALID, /* 01 1 0001 */
+ { 8, ST+U }, /* 01 1 0010: stdux */
+ INVALID, /* 01 1 0011 */
+ INVALID, /* 01 1 0100 */
+ { 4, LD+SE+U }, /* 01 1 0101: lwaux */
+ INVALID, /* 01 1 0110 */
+ INVALID, /* 01 1 0111 */
+ INVALID, /* 01 1 1000 */
+ INVALID, /* 01 1 1001 */
+ INVALID, /* 01 1 1010 */
+ INVALID, /* 01 1 1011 */
+ INVALID, /* 01 1 1100 */
+ INVALID, /* 01 1 1101 */
+ INVALID, /* 01 1 1110 */
+ INVALID, /* 01 1 1111 */
+ INVALID, /* 10 0 0000 */
+ INVALID, /* 10 0 0001 */
+ INVALID, /* 10 0 0010: stwcx. */
+ INVALID, /* 10 0 0011 */
+ INVALID, /* 10 0 0100 */
+ INVALID, /* 10 0 0101 */
+ INVALID, /* 10 0 0110 */
+ INVALID, /* 10 0 0111 */
+ { 4, LD+SW }, /* 10 0 1000: lwbrx */
+ INVALID, /* 10 0 1001 */
+ { 4, ST+SW }, /* 10 0 1010: stwbrx */
+ INVALID, /* 10 0 1011 */
+ { 2, LD+SW }, /* 10 0 1100: lhbrx */
+ { 4, LD+SE }, /* 10 0 1101 lwa */
+ { 2, ST+SW }, /* 10 0 1110: sthbrx */
+ INVALID, /* 10 0 1111 */
+ INVALID, /* 10 1 0000 */
+ INVALID, /* 10 1 0001 */
+ INVALID, /* 10 1 0010 */
+ INVALID, /* 10 1 0011 */
+ INVALID, /* 10 1 0100 */
+ INVALID, /* 10 1 0101 */
+ INVALID, /* 10 1 0110 */
+ INVALID, /* 10 1 0111 */
+ INVALID, /* 10 1 1000 */
+ INVALID, /* 10 1 1001 */
+ INVALID, /* 10 1 1010 */
+ INVALID, /* 10 1 1011 */
+ INVALID, /* 10 1 1100 */
+ INVALID, /* 10 1 1101 */
+ INVALID, /* 10 1 1110 */
+ { 0, ST+HARD }, /* 10 1 1111: dcbz */
+ { 4, LD }, /* 11 0 0000: lwzx */
+ INVALID, /* 11 0 0001 */
+ { 4, ST }, /* 11 0 0010: stwx */
+ INVALID, /* 11 0 0011 */
+ { 2, LD }, /* 11 0 0100: lhzx */
+ { 2, LD+SE }, /* 11 0 0101: lhax */
+ { 2, ST }, /* 11 0 0110: sthx */
+ INVALID, /* 11 0 0111 */
+ { 4, LD+F+S }, /* 11 0 1000: lfsx */
+ { 8, LD+F }, /* 11 0 1001: lfdx */
+ { 4, ST+F+S }, /* 11 0 1010: stfsx */
+ { 8, ST+F }, /* 11 0 1011: stfdx */
+ INVALID, /* 11 0 1100 */
+ { 8, LD+M }, /* 11 0 1101: lmd */
+ INVALID, /* 11 0 1110 */
+ { 8, ST+M }, /* 11 0 1111: stmd */
+ { 4, LD+U }, /* 11 1 0000: lwzux */
+ INVALID, /* 11 1 0001 */
+ { 4, ST+U }, /* 11 1 0010: stwux */
+ INVALID, /* 11 1 0011 */
+ { 2, LD+U }, /* 11 1 0100: lhzux */
+ { 2, LD+SE+U }, /* 11 1 0101: lhaux */
+ { 2, ST+U }, /* 11 1 0110: sthux */
+ INVALID, /* 11 1 0111 */
+ { 4, LD+F+S+U }, /* 11 1 1000: lfsux */
+ { 8, LD+F+U }, /* 11 1 1001: lfdux */
+ { 4, ST+F+S+U }, /* 11 1 1010: stfsux */
+ { 8, ST+F+U }, /* 11 1 1011: stfdux */
+ INVALID, /* 11 1 1100 */
+ INVALID, /* 11 1 1101 */
+ INVALID, /* 11 1 1110 */
+ INVALID, /* 11 1 1111 */
+};
+
+/*
+ * Create a DSISR value from the instruction
+ */
+static inline unsigned make_dsisr(unsigned instr)
+{
+ unsigned dsisr;
+
+
+ /* bits 6:15 --> 22:31 */
+ dsisr = (instr & 0x03ff0000) >> 16;
+
+ if (IS_XFORM(instr)) {
+ /* bits 29:30 --> 15:16 */
+ dsisr |= (instr & 0x00000006) << 14;
+ /* bit 25 --> 17 */
+ dsisr |= (instr & 0x00000040) << 8;
+ /* bits 21:24 --> 18:21 */
+ dsisr |= (instr & 0x00000780) << 3;
+ } else {
+ /* bit 5 --> 17 */
+ dsisr |= (instr & 0x04000000) >> 12;
+ /* bits 1: 4 --> 18:21 */
+ dsisr |= (instr & 0x78000000) >> 17;
+ /* bits 30:31 --> 12:13 */
+ if (IS_DSFORM(instr))
+ dsisr |= (instr & 0x00000003) << 18;
+ }
+
+ return dsisr;
+}
+
+/*
+ * The dcbz (data cache block zero) instruction
+ * gives an alignment fault if used on non-cacheable
+ * memory. We handle the fault mainly for the
+ * case when we are running with the cache disabled
+ * for debugging.
+ */
+static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
+{
+ long __user *p;
+ int i, size;
+
+#ifdef __powerpc64__
+ size = ppc64_caches.dline_size;
+#else
+ size = L1_CACHE_BYTES;
+#endif
+ p = (long __user *) (regs->dar & -size);
+ if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
+ return -EFAULT;
+ for (i = 0; i < size / sizeof(long); ++i)
+ if (__put_user(0, p+i))
+ return -EFAULT;
+ return 1;
+}
+
+/*
+ * Emulate load & store multiple instructions
+ * On 64-bit machines, these instructions only affect/use the
+ * bottom 4 bytes of each register, and the loads clear the
+ * top 4 bytes of the affected register.
+ */
+#ifdef CONFIG_PPC64
+#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
+#else
+#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
+#endif
+
+static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
+ unsigned int reg, unsigned int nb,
+ unsigned int flags, unsigned int instr)
+{
+ unsigned long *rptr;
+ unsigned int nb0, i;
+
+ /*
+ * We do not try to emulate 8 bytes multiple as they aren't really
+ * available in our operating environments and we don't try to
+ * emulate multiples operations in kernel land as they should never
+ * be used/generated there at least not on unaligned boundaries
+ */
+ if (unlikely((nb > 4) || !user_mode(regs)))
+ return 0;
+
+ /* lmw, stmw, lswi/x, stswi/x */
+ nb0 = 0;
+ if (flags & HARD) {
+ if (flags & SX) {
+ nb = regs->xer & 127;
+ if (nb == 0)
+ return 1;
+ } else {
+ if (__get_user(instr,
+ (unsigned int __user *)regs->nip))
+ return -EFAULT;
+ nb = (instr >> 11) & 0x1f;
+ if (nb == 0)
+ nb = 32;
+ }
+ if (nb + reg * 4 > 128) {
+ nb0 = nb + reg * 4 - 128;
+ nb = 128 - reg * 4;
+ }
+ } else {
+ /* lwm, stmw */
+ nb = (32 - reg) * 4;
+ }
+
+ if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
+ return -EFAULT; /* bad address */
+
+ rptr = &regs->gpr[reg];
+ if (flags & LD) {
+ /*
+ * This zeroes the top 4 bytes of the affected registers
+ * in 64-bit mode, and also zeroes out any remaining
+ * bytes of the last register for lsw*.
+ */
+ memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long));
+ if (nb0 > 0)
+ memset(&regs->gpr[0], 0,
+ ((nb0 + 3) / 4) * sizeof(unsigned long));
+
+ for (i = 0; i < nb; ++i)
+ if (__get_user(REG_BYTE(rptr, i), addr + i))
+ return -EFAULT;
+ if (nb0 > 0) {
+ rptr = &regs->gpr[0];
+ addr += nb;
+ for (i = 0; i < nb0; ++i)
+ if (__get_user(REG_BYTE(rptr, i), addr + i))
+ return -EFAULT;
+ }
+
+ } else {
+ for (i = 0; i < nb; ++i)
+ if (__put_user(REG_BYTE(rptr, i), addr + i))
+ return -EFAULT;
+ if (nb0 > 0) {
+ rptr = &regs->gpr[0];
+ addr += nb;
+ for (i = 0; i < nb0; ++i)
+ if (__put_user(REG_BYTE(rptr, i), addr + i))
+ return -EFAULT;
+ }
+ }
+ return 1;
+}
+
+
+/*
+ * Called on alignment exception. Attempts to fixup
+ *
+ * Return 1 on success
+ * Return 0 if unable to handle the interrupt
+ * Return -EFAULT if data address is bad
+ */
+
+int fix_alignment(struct pt_regs *regs)
+{
+ unsigned int instr, nb, flags;
+ unsigned int reg, areg;
+ unsigned int dsisr;
+ unsigned char __user *addr;
+ unsigned char __user *p;
+ int ret, t;
+ union {
+ u64 ll;
+ double dd;
+ unsigned char v[8];
+ struct {
+ unsigned hi32;
+ int low32;
+ } x32;
+ struct {
+ unsigned char hi48[6];
+ short low16;
+ } x16;
+ } data;
+
+ /*
+ * We require a complete register set, if not, then our assembly
+ * is broken
+ */
+ CHECK_FULL_REGS(regs);
+
+ dsisr = regs->dsisr;
+
+ /* Some processors don't provide us with a DSISR we can use here,
+ * let's make one up from the instruction
+ */
+ if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
+ unsigned int real_instr;
+ if (unlikely(__get_user(real_instr,
+ (unsigned int __user *)regs->nip)))
+ return -EFAULT;
+ dsisr = make_dsisr(real_instr);
+ }
+
+ /* extract the operation and registers from the dsisr */
+ reg = (dsisr >> 5) & 0x1f; /* source/dest register */
+ areg = dsisr & 0x1f; /* register to update */
+ instr = (dsisr >> 10) & 0x7f;
+ instr |= (dsisr >> 13) & 0x60;
+
+ /* Lookup the operation in our table */
+ nb = aligninfo[instr].len;
+ flags = aligninfo[instr].flags;
+
+ /* DAR has the operand effective address */
+ addr = (unsigned char __user *)regs->dar;
+
+ /* A size of 0 indicates an instruction we don't support, with
+ * the exception of DCBZ which is handled as a special case here
+ */
+ if (instr == DCBZ)
+ return emulate_dcbz(regs, addr);
+ if (unlikely(nb == 0))
+ return 0;
+
+ /* Load/Store Multiple instructions are handled in their own
+ * function
+ */
+ if (flags & M)
+ return emulate_multiple(regs, addr, reg, nb, flags, instr);
+
+ /* Verify the address of the operand */
+ if (unlikely(user_mode(regs) &&
+ !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
+ addr, nb)))
+ return -EFAULT;
+
+ /* Force the fprs into the save area so we can reference them */
+ if (flags & F) {
+ /* userland only */
+ if (unlikely(!user_mode(regs)))
+ return 0;
+ flush_fp_to_thread(current);
+ }
+
+ /* If we are loading, get the data from user space, else
+ * get it from register values
+ */
+ if (flags & LD) {
+ data.ll = 0;
+ ret = 0;
+ p = addr;
+ switch (nb) {
+ case 8:
+ ret |= __get_user(data.v[0], p++);
+ ret |= __get_user(data.v[1], p++);
+ ret |= __get_user(data.v[2], p++);
+ ret |= __get_user(data.v[3], p++);
+ case 4:
+ ret |= __get_user(data.v[4], p++);
+ ret |= __get_user(data.v[5], p++);
+ case 2:
+ ret |= __get_user(data.v[6], p++);
+ ret |= __get_user(data.v[7], p++);
+ if (unlikely(ret))
+ return -EFAULT;
+ }
+ } else if (flags & F)
+ data.dd = current->thread.fpr[reg];
+ else
+ data.ll = regs->gpr[reg];
+
+ /* Perform other misc operations like sign extension, byteswap,
+ * or floating point single precision conversion
+ */
+ switch (flags & ~U) {
+ case LD+SE: /* sign extend */
+ if ( nb == 2 )
+ data.ll = data.x16.low16;
+ else /* nb must be 4 */
+ data.ll = data.x32.low32;
+ break;
+ case LD+S: /* byte-swap */
+ case ST+S:
+ if (nb == 2) {
+ SWAP(data.v[6], data.v[7]);
+ } else {
+ SWAP(data.v[4], data.v[7]);
+ SWAP(data.v[5], data.v[6]);
+ }
+ break;
+
+ /* Single-precision FP load and store require conversions... */
+ case LD+F+S:
+#ifdef CONFIG_PPC_FPU
+ preempt_disable();
+ enable_kernel_fp();
+ cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
+ preempt_enable();
+#else
+ return 0;
+#endif
+ break;
+ case ST+F+S:
+#ifdef CONFIG_PPC_FPU
+ preempt_disable();
+ enable_kernel_fp();
+ cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
+ preempt_enable();
+#else
+ return 0;
+#endif
+ break;
+ }
+
+ /* Store result to memory or update registers */
+ if (flags & ST) {
+ ret = 0;
+ p = addr;
+ switch (nb) {
+ case 8:
+ ret |= __put_user(data.v[0], p++);
+ ret |= __put_user(data.v[1], p++);
+ ret |= __put_user(data.v[2], p++);
+ ret |= __put_user(data.v[3], p++);
+ case 4:
+ ret |= __put_user(data.v[4], p++);
+ ret |= __put_user(data.v[5], p++);
+ case 2:
+ ret |= __put_user(data.v[6], p++);
+ ret |= __put_user(data.v[7], p++);
+ }
+ if (unlikely(ret))
+ return -EFAULT;
+ } else if (flags & F)
+ current->thread.fpr[reg] = data.dd;
+ else
+ regs->gpr[reg] = data.ll;
+
+ /* Update RA as needed */
+ if (flags & U)
+ regs->gpr[areg] = regs->dar;
+
+ return 1;
+}
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index b7575725199..91538d2445b 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -37,12 +37,12 @@
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/rtas.h>
+#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#include <asm/lppaca.h>
#include <asm/iseries/hv_lp_event.h>
#include <asm/cache.h>
-#include <asm/systemcfg.h>
#include <asm/compat.h>
#endif
@@ -106,7 +106,6 @@ int main(void)
DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
- DEFINE(PLATFORM, offsetof(struct systemcfg, platform));
DEFINE(PLATFORM_LPAR, PLATFORM_LPAR);
/* paca */
@@ -252,25 +251,44 @@ int main(void)
DEFINE(TASK_SIZE, TASK_SIZE);
DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
-#else /* CONFIG_PPC64 */
- /* systemcfg offsets for use by vdso */
- DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp));
- DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec));
- DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs));
- DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec));
- DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count));
- DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest));
- DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime));
- DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32));
- DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64));
+#endif /* ! CONFIG_PPC64 */
- /* timeval/timezone offsets for use by vdso */
+ /* datapage offsets for use by vdso */
+ DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp));
+ DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec));
+ DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs));
+ DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec));
+ DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count));
+ DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest));
+ DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
+ DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
+ DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
+ DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
+#ifdef CONFIG_PPC64
+ DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64));
DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
+ DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec));
+ DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec));
+ DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
+ DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
+#else
+ DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
+ DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
+ DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec));
+ DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
+#endif
+ /* timeval/timezone offsets for use by vdso */
DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
-#endif /* CONFIG_PPC64 */
+
+ /* Other bits used by the vdso */
+ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
+ DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
+ DEFINE(CLOCK_REALTIME_RES, TICK_NSEC);
+
return 0;
}
diff --git a/arch/powerpc/kernel/cpu_setup_power4.S b/arch/powerpc/kernel/cpu_setup_power4.S
new file mode 100644
index 00000000000..cca942fe611
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power4.S
@@ -0,0 +1,233 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+_GLOBAL(__970_cpu_preinit)
+ /*
+ * Do nothing if not running in HV mode
+ */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beqlr
+
+ /*
+ * Deal only with PPC970 and PPC970FX.
+ */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bnelr
+1:
+
+ /* Make sure HID4:rm_ci is off before MMU is turned off, that large
+ * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
+ * HID5:DCBZ32_ill
+ */
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+ mfspr r3,SPRN_HID5
+ rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */
+ sync
+ mtspr SPRN_HID5,r3
+ isync
+ sync
+
+ /* Setup some basic HID1 features */
+ mfspr r0,SPRN_HID1
+ li r3,0x1200 /* enable i-fetch cacheability */
+ sldi r3,r3,44 /* and prefetch */
+ or r0,r0,r3
+ mtspr SPRN_HID1,r0
+ mtspr SPRN_HID1,r0
+ isync
+
+ /* Clear HIOR */
+ li r0,0
+ sync
+ mtspr SPRN_HIOR,0 /* Clear interrupt prefix */
+ isync
+ blr
+
+_GLOBAL(__setup_cpu_power4)
+ blr
+
+_GLOBAL(__setup_cpu_be)
+ /* Set large page sizes LP=0: 16MB, LP=1: 64KB */
+ addi r3, 0, 0
+ ori r3, r3, HID6_LB
+ sldi r3, r3, 32
+ nor r3, r3, r3
+ mfspr r4, SPRN_HID6
+ and r4, r4, r3
+ addi r3, 0, 0x02000
+ sldi r3, r3, 32
+ or r4, r4, r3
+ mtspr SPRN_HID6, r4
+ blr
+
+_GLOBAL(__setup_cpu_ppc970)
+ mfspr r0,SPRN_HID0
+ li r11,5 /* clear DOZE and SLEEP */
+ rldimi r0,r11,52,8 /* set NAP and DPM */
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ sync
+ isync
+ blr
+
+/* Definitions for the table use to save CPU states */
+#define CS_HID0 0
+#define CS_HID1 8
+#define CS_HID4 16
+#define CS_HID5 24
+#define CS_SIZE 32
+
+ .data
+ .balign L1_CACHE_BYTES,0
+cpu_state_storage:
+ .space CS_SIZE
+ .balign L1_CACHE_BYTES,0
+ .text
+
+/* Called in normal context to backup CPU 0 state. This
+ * does not include cache settings. This function is also
+ * called for machine sleep. This does not include the MMU
+ * setup, BATs, etc... but rather the "special" registers
+ * like HID0, HID1, HID4, etc...
+ */
+_GLOBAL(__save_cpu_setup)
+ /* Some CR fields are volatile, we back it up all */
+ mfcr r7
+
+ /* Get storage ptr */
+ LOADADDR(r5,cpu_state_storage)
+
+ /* We only deal with 970 for now */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bne 2f
+
+1: /* Save HID0,1,4 and 5 */
+ mfspr r3,SPRN_HID0
+ std r3,CS_HID0(r5)
+ mfspr r3,SPRN_HID1
+ std r3,CS_HID1(r5)
+ mfspr r3,SPRN_HID4
+ std r3,CS_HID4(r5)
+ mfspr r3,SPRN_HID5
+ std r3,CS_HID5(r5)
+
+2:
+ mtcr r7
+ blr
+
+/* Called with no MMU context (typically MSR:IR/DR off) to
+ * restore CPU state as backed up by the previous
+ * function. This does not include cache setting
+ */
+_GLOBAL(__restore_cpu_setup)
+ /* Get storage ptr (FIXME when using anton reloc as we
+ * are running with translation disabled here
+ */
+ LOADADDR(r5,cpu_state_storage)
+
+ /* We only deal with 970 for now */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bnelr
+
+1: /* Before accessing memory, we make sure rm_ci is clear */
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+
+ /* Clear interrupt prefix */
+ li r0,0
+ sync
+ mtspr SPRN_HIOR,0
+ isync
+
+ /* Restore HID0 */
+ ld r3,CS_HID0(r5)
+ sync
+ isync
+ mtspr SPRN_HID0,r3
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ sync
+ isync
+
+ /* Restore HID1 */
+ ld r3,CS_HID1(r5)
+ sync
+ isync
+ mtspr SPRN_HID1,r3
+ mtspr SPRN_HID1,r3
+ sync
+ isync
+
+ /* Restore HID4 */
+ ld r3,CS_HID4(r5)
+ sync
+ isync
+ mtspr SPRN_HID4,r3
+ sync
+ isync
+
+ /* Restore HID5 */
+ ld r3,CS_HID5(r5)
+ sync
+ isync
+ mtspr SPRN_HID5,r3
+ sync
+ isync
+ blr
+
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index cc4e9eb1c13..1d85cedbbb7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -52,6 +52,9 @@ extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
PPC_FEATURE_HAS_MMU)
#define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64)
+#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
+#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5)
+#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS)
/* We only set the spe features if the kernel was compiled with
@@ -160,7 +163,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00350000,
.cpu_name = "POWER4 (gp)",
.cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -175,7 +178,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00380000,
.cpu_name = "POWER4+ (gq)",
.cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -190,7 +193,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00390000,
.cpu_name = "PPC970",
.cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -212,7 +215,7 @@ struct cpu_spec cpu_specs[] = {
#else
.cpu_features = CPU_FTRS_PPC970,
#endif
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -230,7 +233,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00440000,
.cpu_name = "PPC970MP",
.cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -245,7 +248,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x003a0000,
.cpu_name = "POWER5 (gr)",
.cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER5,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -260,7 +263,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x003b0000,
.cpu_name = "POWER5 (gs)",
.cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -276,7 +279,7 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "Cell Broadband Engine",
.cpu_features = CPU_FTRS_CELL,
.cpu_user_features = COMMON_USER_PPC64 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
+ PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
.cpu_setup = __setup_cpu_be,
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
new file mode 100644
index 00000000000..7c3419656cc
--- /dev/null
+++ b/arch/powerpc/kernel/dma_64.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Implements the generic device dma API for ppc64. Handles
+ * the pci and vio busses
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+/* Include the busses we support */
+#include <linux/pci.h>
+#include <asm/vio.h>
+#include <asm/scatterlist.h>
+#include <asm/bug.h>
+
+static struct dma_mapping_ops *get_dma_ops(struct device *dev)
+{
+#ifdef CONFIG_PCI
+ if (dev->bus == &pci_bus_type)
+ return &pci_dma_ops;
+#endif
+#ifdef CONFIG_IBMVIO
+ if (dev->bus == &vio_bus_type)
+ return &vio_dma_ops;
+#endif
+ return NULL;
+}
+
+int dma_supported(struct device *dev, u64 mask)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->dma_supported(dev, mask);
+ BUG();
+ return 0;
+}
+EXPORT_SYMBOL(dma_supported);
+
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+#ifdef CONFIG_PCI
+ if (dev->bus == &pci_bus_type)
+ return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
+#endif
+#ifdef CONFIG_IBMVIO
+ if (dev->bus == &vio_bus_type)
+ return -EIO;
+#endif /* CONFIG_IBMVIO */
+ BUG();
+ return 0;
+}
+EXPORT_SYMBOL(dma_set_mask);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
+ BUG();
+ return NULL;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->map_single(dev, cpu_addr, size, direction);
+ BUG();
+ return (dma_addr_t)0;
+}
+EXPORT_SYMBOL(dma_map_single);
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->unmap_single(dev, dma_addr, size, direction);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_unmap_single);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->map_single(dev,
+ (page_address(page) + offset), size, direction);
+ BUG();
+ return (dma_addr_t)0;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->unmap_single(dev, dma_address, size, direction);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_unmap_page);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->map_sg(dev, sg, nents, direction);
+ BUG();
+ return 0;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->unmap_sg(dev, sg, nhwentries, direction);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_unmap_sg);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2d22bf03484..bce33a38399 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -183,8 +183,8 @@ syscall_exit_trace_cont:
ld r13,GPR13(r1) /* returning to usermode */
1: ld r2,GPR2(r1)
li r12,MSR_RI
- andc r10,r10,r12
- mtmsrd r10,1 /* clear MSR.RI */
+ andc r11,r10,r12
+ mtmsrd r11,1 /* clear MSR.RI */
ld r1,GPR1(r1)
mtlr r4
mtcr r5
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
new file mode 100644
index 00000000000..65eae752a52
--- /dev/null
+++ b/arch/powerpc/kernel/firmware.c
@@ -0,0 +1,45 @@
+/*
+ * Extracted from cputable.c
+ *
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ * Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+
+#include <asm/firmware.h>
+
+unsigned long ppc64_firmware_features;
+
+#ifdef CONFIG_PPC_PSERIES
+firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
+ {FW_FEATURE_PFT, "hcall-pft"},
+ {FW_FEATURE_TCE, "hcall-tce"},
+ {FW_FEATURE_SPRG0, "hcall-sprg0"},
+ {FW_FEATURE_DABR, "hcall-dabr"},
+ {FW_FEATURE_COPY, "hcall-copy"},
+ {FW_FEATURE_ASR, "hcall-asr"},
+ {FW_FEATURE_DEBUG, "hcall-debug"},
+ {FW_FEATURE_PERF, "hcall-perf"},
+ {FW_FEATURE_DUMP, "hcall-dump"},
+ {FW_FEATURE_INTERRUPT, "hcall-interrupt"},
+ {FW_FEATURE_MIGRATE, "hcall-migrate"},
+ {FW_FEATURE_PERFMON, "hcall-perfmon"},
+ {FW_FEATURE_CRQ, "hcall-crq"},
+ {FW_FEATURE_VIO, "hcall-vio"},
+ {FW_FEATURE_RDMA, "hcall-rdma"},
+ {FW_FEATURE_LLAN, "hcall-lLAN"},
+ {FW_FEATURE_BULK, "hcall-bulk"},
+ {FW_FEATURE_XDABR, "hcall-xdabr"},
+ {FW_FEATURE_MULTITCE, "hcall-multi-tce"},
+ {FW_FEATURE_SPLPAR, "hcall-splpar"},
+};
+#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 4d6001fa1cf..b780b42c95f 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -41,20 +41,20 @@ _GLOBAL(load_up_fpu)
#ifndef CONFIG_SMP
LOADBASE(r3, last_task_used_math)
toreal(r3)
- LDL r4,OFF(last_task_used_math)(r3)
- CMPI 0,r4,0
+ PPC_LL r4,OFF(last_task_used_math)(r3)
+ PPC_LCMPI 0,r4,0
beq 1f
toreal(r4)
addi r4,r4,THREAD /* want last_task_used_math->thread */
SAVE_32FPRS(0, r4)
mffs fr0
stfd fr0,THREAD_FPSCR(r4)
- LDL r5,PT_REGS(r4)
+ PPC_LL r5,PT_REGS(r4)
toreal(r5)
- LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
li r10,MSR_FP|MSR_FE0|MSR_FE1
andc r4,r4,r10 /* disable FP for previous task */
- STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#endif /* CONFIG_SMP */
/* enable use of FP after return */
@@ -77,7 +77,7 @@ _GLOBAL(load_up_fpu)
#ifndef CONFIG_SMP
subi r4,r5,THREAD
fromreal(r4)
- STL r4,OFF(last_task_used_math)(r3)
+ PPC_STL r4,OFF(last_task_used_math)(r3)
#endif /* CONFIG_SMP */
/* restore registers and return */
/* we haven't used ctr or xer or lr */
@@ -97,24 +97,24 @@ _GLOBAL(giveup_fpu)
MTMSRD(r5) /* enable use of fpu now */
SYNC_601
isync
- CMPI 0,r3,0
+ PPC_LCMPI 0,r3,0
beqlr- /* if no previous owner, done */
addi r3,r3,THREAD /* want THREAD of task */
- LDL r5,PT_REGS(r3)
- CMPI 0,r5,0
+ PPC_LL r5,PT_REGS(r3)
+ PPC_LCMPI 0,r5,0
SAVE_32FPRS(0, r3)
mffs fr0
stfd fr0,THREAD_FPSCR(r3)
beq 1f
- LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
li r3,MSR_FP|MSR_FE0|MSR_FE1
andc r4,r4,r3 /* disable FP for previous task */
- STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#ifndef CONFIG_SMP
li r5,0
LOADBASE(r4,last_task_used_math)
- STL r5,OFF(last_task_used_math)(r4)
+ PPC_STL r5,OFF(last_task_used_math)(r4)
#endif /* CONFIG_SMP */
blr
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index b102e3a2415..ccdf94731e3 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -1100,6 +1100,7 @@ start_here:
mr r3,r31
mr r4,r30
bl machine_init
+ bl __save_cpu_setup
bl MMU_init
#ifdef CONFIG_APUS
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 16ab40daa73..8a8bf79ef04 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -28,7 +28,6 @@
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/systemcfg.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/bug.h>
@@ -1697,25 +1696,14 @@ _GLOBAL(pmac_secondary_start)
* SPRG3 = paca virtual address
*/
_GLOBAL(__secondary_start)
+ /* Set thread priority to MEDIUM */
+ HMT_MEDIUM
- HMT_MEDIUM /* Set thread priority to MEDIUM */
-
+ /* Load TOC */
ld r2,PACATOC(r13)
- li r6,0
- stb r6,PACAPROCENABLED(r13)
-
-#ifndef CONFIG_PPC_ISERIES
- /* Initialize the page table pointer register. */
- LOADADDR(r6,_SDR1)
- ld r6,0(r6) /* get the value of _SDR1 */
- mtspr SPRN_SDR1,r6 /* set the htab location */
-#endif
- /* Initialize the first segment table (or SLB) entry */
- ld r3,PACASTABVIRT(r13) /* get addr of segment table */
-BEGIN_FTR_SECTION
- bl .stab_initialize
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
- bl .slb_initialize
+
+ /* Do early setup for that CPU (stab, slb, hash table pointer) */
+ bl .early_setup_secondary
/* Initialize the kernel stack. Just a repeat for iSeries. */
LOADADDR(r3,current_set)
@@ -1724,37 +1712,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
std r1,PACAKSAVE(r13)
- ld r3,PACASTABREAL(r13) /* get raddr of segment table */
- ori r4,r3,1 /* turn on valid bit */
-
-#ifdef CONFIG_PPC_ISERIES
- li r0,-1 /* hypervisor call */
- li r3,1
- sldi r3,r3,63 /* 0x8000000000000000 */
- ori r3,r3,4 /* 0x8000000000000004 */
- sc /* HvCall_setASR */
-#else
- /* set the ASR */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
- beq 98f /* branch if result is 0 */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmpwi r3,0x37 /* SStar */
- beq 97f
- cmpwi r3,0x36 /* IStar */
- beq 97f
- cmpwi r3,0x34 /* Pulsar */
- bne 98f
-97: li r3,H_SET_ASR /* hcall = H_SET_ASR */
- HVSC /* Invoking hcall */
- b 99f
-98: /* !(rpa hypervisor) || !(star) */
- mtasr r4 /* set the stab location */
-99:
-#endif
+ /* Clear backchain so we get nice backtraces */
li r7,0
mtlr r7
@@ -1777,6 +1735,7 @@ _GLOBAL(start_secondary_prolog)
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
bl .start_secondary
+ b .
#endif
/*
@@ -1896,40 +1855,6 @@ _STATIC(start_here_multiplatform)
mr r3,r31
bl .early_setup
- /* set the ASR */
- ld r3,PACASTABREAL(r13)
- ori r4,r3,1 /* turn on valid bit */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
- beq 98f /* branch if result is 0 */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmpwi r3,0x37 /* SStar */
- beq 97f
- cmpwi r3,0x36 /* IStar */
- beq 97f
- cmpwi r3,0x34 /* Pulsar */
- bne 98f
-97: li r3,H_SET_ASR /* hcall = H_SET_ASR */
- HVSC /* Invoking hcall */
- b 99f
-98: /* !(rpa hypervisor) || !(star) */
- mtasr r4 /* set the stab location */
-99:
- /* Set SDR1 (hash table pointer) */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- /* Test if bit 0 is set (LPAR bit) */
- andi. r3,r3,PLATFORM_LPAR
- bne 98f /* branch if result is !0 */
- LOADADDR(r6,_SDR1) /* Only if NOT LPAR */
- add r6,r6,r26
- ld r6,0(r6) /* get the value of _SDR1 */
- mtspr SPRN_SDR1,r6 /* set the htab location */
-98:
LOADADDR(r3,.start_here_common)
SET_REG_TO_CONST(r4, MSR_KERNEL)
mtspr SPRN_SRR0,r3
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 5063c603fad..8d60fa99fc4 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -24,7 +24,7 @@
* Copyright 2002-2004 MontaVista Software, Inc.
* PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
* Copyright 2004 Freescale Semiconductor, Inc
- * PowerPC e500 modifications, Kumar Gala <kumar.gala@freescale.com>
+ * PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
diff --git a/arch/powerpc/kernel/idle_64.c b/arch/powerpc/kernel/idle_64.c
new file mode 100644
index 00000000000..b879d3057ef
--- /dev/null
+++ b/arch/powerpc/kernel/idle_64.c
@@ -0,0 +1,121 @@
+/*
+ * Idle daemon for PowerPC. Idle daemon will handle any action
+ * that needs to be taken when the system becomes idle.
+ *
+ * Originally Written by Cort Dougan (cort@cs.nmt.edu)
+ *
+ * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
+ *
+ * Additional shared processor, SMT, and firmware support
+ * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+#include <linux/sysctl.h>
+
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+extern void power4_idle(void);
+
+void default_idle(void)
+{
+ unsigned int cpu = smp_processor_id();
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ while (1) {
+ if (!need_resched()) {
+ while (!need_resched() && !cpu_is_offline(cpu)) {
+ ppc64_runlatch_off();
+
+ /*
+ * Go into low thread priority and possibly
+ * low power mode.
+ */
+ HMT_low();
+ HMT_very_low();
+ }
+
+ HMT_medium();
+ }
+
+ ppc64_runlatch_on();
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
+ if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
+ cpu_die();
+ }
+}
+
+void native_idle(void)
+{
+ while (1) {
+ ppc64_runlatch_off();
+
+ if (!need_resched())
+ power4_idle();
+
+ if (need_resched()) {
+ ppc64_runlatch_on();
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
+ }
+
+ if (cpu_is_offline(smp_processor_id()) &&
+ system_state == SYSTEM_RUNNING)
+ cpu_die();
+ }
+}
+
+void cpu_idle(void)
+{
+ BUG_ON(NULL == ppc_md.idle_loop);
+ ppc_md.idle_loop();
+}
+
+int powersave_nap;
+
+#ifdef CONFIG_SYSCTL
+/*
+ * Register the sysctl to set/clear powersave_nap.
+ */
+static ctl_table powersave_nap_ctl_table[]={
+ {
+ .ctl_name = KERN_PPC_POWERSAVE_NAP,
+ .procname = "powersave-nap",
+ .data = &powersave_nap,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { 0, },
+};
+static ctl_table powersave_nap_sysctl_root[] = {
+ { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, },
+ { 0,},
+};
+
+static int __init
+register_powersave_nap_sysctl(void)
+{
+ register_sysctl_table(powersave_nap_sysctl_root, 0);
+
+ return 0;
+}
+__initcall(register_powersave_nap_sysctl);
+#endif
diff --git a/arch/powerpc/kernel/ioctl32.c b/arch/powerpc/kernel/ioctl32.c
new file mode 100644
index 00000000000..0fa3d27fef0
--- /dev/null
+++ b/arch/powerpc/kernel/ioctl32.c
@@ -0,0 +1,45 @@
+/*
+ * ioctl32.c: Conversion between 32bit and 64bit native ioctls.
+ *
+ * Based on sparc64 ioctl32.c by:
+ *
+ * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
+ * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
+ *
+ * ppc64 changes:
+ *
+ * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com)
+ * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com)
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * ioctls.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define INCLUDES
+#include "compat_ioctl.c"
+#include <linux/syscalls.h>
+
+#define CODE
+#include "compat_ioctl.c"
+
+#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL },
+#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
+
+#define IOCTL_TABLE_START \
+ struct ioctl_trans ioctl_start[] = {
+#define IOCTL_TABLE_END \
+ };
+
+IOCTL_TABLE_START
+#include <linux/compat_ioctl.h>
+#define DECLARES
+#include "compat_ioctl.c"
+
+IOCTL_TABLE_END
+
+int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
new file mode 100644
index 00000000000..6160c8dbb7c
--- /dev/null
+++ b/arch/powerpc/kernel/iomap.c
@@ -0,0 +1,146 @@
+/*
+ * arch/ppc64/kernel/iomap.c
+ *
+ * ppc64 "iomap" interface implementation.
+ *
+ * (C) Copyright 2004 Linus Torvalds
+ */
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+
+/*
+ * Here comes the ppc64 implementation of the IOMAP
+ * interfaces.
+ */
+unsigned int fastcall ioread8(void __iomem *addr)
+{
+ return readb(addr);
+}
+unsigned int fastcall ioread16(void __iomem *addr)
+{
+ return readw(addr);
+}
+unsigned int fastcall ioread16be(void __iomem *addr)
+{
+ return in_be16(addr);
+}
+unsigned int fastcall ioread32(void __iomem *addr)
+{
+ return readl(addr);
+}
+unsigned int fastcall ioread32be(void __iomem *addr)
+{
+ return in_be32(addr);
+}
+EXPORT_SYMBOL(ioread8);
+EXPORT_SYMBOL(ioread16);
+EXPORT_SYMBOL(ioread16be);
+EXPORT_SYMBOL(ioread32);
+EXPORT_SYMBOL(ioread32be);
+
+void fastcall iowrite8(u8 val, void __iomem *addr)
+{
+ writeb(val, addr);
+}
+void fastcall iowrite16(u16 val, void __iomem *addr)
+{
+ writew(val, addr);
+}
+void fastcall iowrite16be(u16 val, void __iomem *addr)
+{
+ out_be16(addr, val);
+}
+void fastcall iowrite32(u32 val, void __iomem *addr)
+{
+ writel(val, addr);
+}
+void fastcall iowrite32be(u32 val, void __iomem *addr)
+{
+ out_be32(addr, val);
+}
+EXPORT_SYMBOL(iowrite8);
+EXPORT_SYMBOL(iowrite16);
+EXPORT_SYMBOL(iowrite16be);
+EXPORT_SYMBOL(iowrite32);
+EXPORT_SYMBOL(iowrite32be);
+
+/*
+ * These are the "repeat read/write" functions. Note the
+ * non-CPU byte order. We do things in "IO byteorder"
+ * here.
+ *
+ * FIXME! We could make these do EEH handling if we really
+ * wanted. Not clear if we do.
+ */
+void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insb((u8 __iomem *) addr, dst, count);
+}
+void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insw_ns((u16 __iomem *) addr, dst, count);
+}
+void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insl_ns((u32 __iomem *) addr, dst, count);
+}
+EXPORT_SYMBOL(ioread8_rep);
+EXPORT_SYMBOL(ioread16_rep);
+EXPORT_SYMBOL(ioread32_rep);
+
+void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsb((u8 __iomem *) addr, src, count);
+}
+void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsw_ns((u16 __iomem *) addr, src, count);
+}
+void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsl_ns((u32 __iomem *) addr, src, count);
+}
+EXPORT_SYMBOL(iowrite8_rep);
+EXPORT_SYMBOL(iowrite16_rep);
+EXPORT_SYMBOL(iowrite32_rep);
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+ if (!_IO_IS_VALID(port))
+ return NULL;
+ return (void __iomem *) (port+pci_io_base);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+ /* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
+{
+ unsigned long start = pci_resource_start(dev, bar);
+ unsigned long len = pci_resource_len(dev, bar);
+ unsigned long flags = pci_resource_flags(dev, bar);
+
+ if (!len)
+ return NULL;
+ if (max && len > max)
+ len = max;
+ if (flags & IORESOURCE_IO)
+ return ioport_map(start, len);
+ if (flags & IORESOURCE_MEM)
+ return ioremap(start, len);
+ /* What? */
+ return NULL;
+}
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+ /* Nothing to do */
+}
+EXPORT_SYMBOL(pci_iomap);
+EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
new file mode 100644
index 00000000000..4d9b4388918
--- /dev/null
+++ b/arch/powerpc/kernel/iommu.c
@@ -0,0 +1,572 @@
+/*
+ * arch/ppc64/kernel/iommu.c
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup, new allocation schemes, virtual merging:
+ * Copyright (C) 2004 Olof Johansson, IBM Corporation
+ * and Ben. Herrenschmidt, IBM Corporation
+ *
+ * Dynamic DMA mapping support, bus-independent parts.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+
+#define DBG(...)
+
+#ifdef CONFIG_IOMMU_VMERGE
+static int novmerge = 0;
+#else
+static int novmerge = 1;
+#endif
+
+static int __init setup_iommu(char *str)
+{
+ if (!strcmp(str, "novmerge"))
+ novmerge = 1;
+ else if (!strcmp(str, "vmerge"))
+ novmerge = 0;
+ return 1;
+}
+
+__setup("iommu=", setup_iommu);
+
+static unsigned long iommu_range_alloc(struct iommu_table *tbl,
+ unsigned long npages,
+ unsigned long *handle,
+ unsigned int align_order)
+{
+ unsigned long n, end, i, start;
+ unsigned long limit;
+ int largealloc = npages > 15;
+ int pass = 0;
+ unsigned long align_mask;
+
+ align_mask = 0xffffffffffffffffl >> (64 - align_order);
+
+ /* This allocator was derived from x86_64's bit string search */
+
+ /* Sanity check */
+ if (unlikely(npages) == 0) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return DMA_ERROR_CODE;
+ }
+
+ if (handle && *handle)
+ start = *handle;
+ else
+ start = largealloc ? tbl->it_largehint : tbl->it_hint;
+
+ /* Use only half of the table for small allocs (15 pages or less) */
+ limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
+
+ if (largealloc && start < tbl->it_halfpoint)
+ start = tbl->it_halfpoint;
+
+ /* The case below can happen if we have a small segment appended
+ * to a large, or when the previous alloc was at the very end of
+ * the available space. If so, go back to the initial start.
+ */
+ if (start >= limit)
+ start = largealloc ? tbl->it_largehint : tbl->it_hint;
+
+ again:
+
+ n = find_next_zero_bit(tbl->it_map, limit, start);
+
+ /* Align allocation */
+ n = (n + align_mask) & ~align_mask;
+
+ end = n + npages;
+
+ if (unlikely(end >= limit)) {
+ if (likely(pass < 2)) {
+ /* First failure, just rescan the half of the table.
+ * Second failure, rescan the other half of the table.
+ */
+ start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
+ limit = pass ? tbl->it_size : limit;
+ pass++;
+ goto again;
+ } else {
+ /* Third failure, give up */
+ return DMA_ERROR_CODE;
+ }
+ }
+
+ for (i = n; i < end; i++)
+ if (test_bit(i, tbl->it_map)) {
+ start = i+1;
+ goto again;
+ }
+
+ for (i = n; i < end; i++)
+ __set_bit(i, tbl->it_map);
+
+ /* Bump the hint to a new block for small allocs. */
+ if (largealloc) {
+ /* Don't bump to new block to avoid fragmentation */
+ tbl->it_largehint = end;
+ } else {
+ /* Overflow will be taken care of at the next allocation */
+ tbl->it_hint = (end + tbl->it_blocksize - 1) &
+ ~(tbl->it_blocksize - 1);
+ }
+
+ /* Update handle for SG allocations */
+ if (handle)
+ *handle = end;
+
+ return n;
+}
+
+static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
+ unsigned int npages, enum dma_data_direction direction,
+ unsigned int align_order)
+{
+ unsigned long entry, flags;
+ dma_addr_t ret = DMA_ERROR_CODE;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ entry = iommu_range_alloc(tbl, npages, NULL, align_order);
+
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return DMA_ERROR_CODE;
+ }
+
+ entry += tbl->it_offset; /* Offset into real TCE table */
+ ret = entry << PAGE_SHIFT; /* Set the return dma address */
+
+ /* Put the TCEs in the HW table */
+ ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
+ direction);
+
+
+ /* Flush/invalidate TLB caches if necessary */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+
+ /* Make sure updates are seen by hardware */
+ mb();
+
+ return ret;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long entry, free_entry;
+ unsigned long i;
+
+ entry = dma_addr >> PAGE_SHIFT;
+ free_entry = entry - tbl->it_offset;
+
+ if (((free_entry + npages) > tbl->it_size) ||
+ (entry < tbl->it_offset)) {
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "iommu_free: invalid entry\n");
+ printk(KERN_INFO "\tentry = 0x%lx\n", entry);
+ printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
+ printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl);
+ printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno);
+ printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size);
+ printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset);
+ printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index);
+ WARN_ON(1);
+ }
+ return;
+ }
+
+ ppc_md.tce_free(tbl, entry, npages);
+
+ for (i = 0; i < npages; i++)
+ __clear_bit(free_entry+i, tbl->it_map);
+}
+
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ __iommu_free(tbl, dma_addr, npages);
+
+ /* Make sure TLB cache is flushed if the HW needs it. We do
+ * not do an mb() here on purpose, it is not needed on any of
+ * the current platforms.
+ */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+}
+
+int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction)
+{
+ dma_addr_t dma_next = 0, dma_addr;
+ unsigned long flags;
+ struct scatterlist *s, *outs, *segstart;
+ int outcount, incount;
+ unsigned long handle;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if ((nelems == 0) || !tbl)
+ return 0;
+
+ outs = s = segstart = &sglist[0];
+ outcount = 1;
+ incount = nelems;
+ handle = 0;
+
+ /* Init first segment length for backout at failure */
+ outs->dma_length = 0;
+
+ DBG("mapping %d elements:\n", nelems);
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ for (s = outs; nelems; nelems--, s++) {
+ unsigned long vaddr, npages, entry, slen;
+
+ slen = s->length;
+ /* Sanity check */
+ if (slen == 0) {
+ dma_next = 0;
+ continue;
+ }
+ /* Allocate iommu entries for that segment */
+ vaddr = (unsigned long)page_address(s->page) + s->offset;
+ npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
+ npages >>= PAGE_SHIFT;
+ entry = iommu_range_alloc(tbl, npages, &handle, 0);
+
+ DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
+
+ /* Handle failure */
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (printk_ratelimit())
+ printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
+ " npages %lx\n", tbl, vaddr, npages);
+ goto failure;
+ }
+
+ /* Convert entry to a dma_addr_t */
+ entry += tbl->it_offset;
+ dma_addr = entry << PAGE_SHIFT;
+ dma_addr |= s->offset;
+
+ DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n",
+ npages, entry, dma_addr);
+
+ /* Insert into HW table */
+ ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
+
+ /* If we are in an open segment, try merging */
+ if (segstart != s) {
+ DBG(" - trying merge...\n");
+ /* We cannot merge if:
+ * - allocated dma_addr isn't contiguous to previous allocation
+ */
+ if (novmerge || (dma_addr != dma_next)) {
+ /* Can't merge: create a new segment */
+ segstart = s;
+ outcount++; outs++;
+ DBG(" can't merge, new segment.\n");
+ } else {
+ outs->dma_length += s->length;
+ DBG(" merged, new len: %lx\n", outs->dma_length);
+ }
+ }
+
+ if (segstart == s) {
+ /* This is a new segment, fill entries */
+ DBG(" - filling new segment.\n");
+ outs->dma_address = dma_addr;
+ outs->dma_length = slen;
+ }
+
+ /* Calculate next page pointer for contiguous check */
+ dma_next = dma_addr + slen;
+
+ DBG(" - dma next is: %lx\n", dma_next);
+ }
+
+ /* Flush/invalidate TLB caches if necessary */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+
+ /* Make sure updates are seen by hardware */
+ mb();
+
+ DBG("mapped %d elements:\n", outcount);
+
+ /* For the sake of iommu_unmap_sg, we clear out the length in the
+ * next entry of the sglist if we didn't fill the list completely
+ */
+ if (outcount < incount) {
+ outs++;
+ outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_length = 0;
+ }
+ return outcount;
+
+ failure:
+ for (s = &sglist[0]; s <= outs; s++) {
+ if (s->dma_length != 0) {
+ unsigned long vaddr, npages;
+
+ vaddr = s->dma_address & PAGE_MASK;
+ npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
+ >> PAGE_SHIFT;
+ __iommu_free(tbl, vaddr, npages);
+ }
+ }
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return 0;
+}
+
+
+void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ unsigned long flags;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (!tbl)
+ return;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ while (nelems--) {
+ unsigned int npages;
+ dma_addr_t dma_handle = sglist->dma_address;
+
+ if (sglist->dma_length == 0)
+ break;
+ npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
+ - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
+ __iommu_free(tbl, dma_handle, npages);
+ sglist++;
+ }
+
+ /* Flush/invalidate TLBs if necessary. As for iommu_free(), we
+ * do not do an mb() here, the affected platforms do not need it
+ * when freeing.
+ */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+}
+
+/*
+ * Build a iommu_table structure. This contains a bit map which
+ * is used to manage allocation of the tce space.
+ */
+struct iommu_table *iommu_init_table(struct iommu_table *tbl)
+{
+ unsigned long sz;
+ static int welcomed = 0;
+
+ /* Set aside 1/4 of the table for large allocations. */
+ tbl->it_halfpoint = tbl->it_size * 3 / 4;
+
+ /* number of bytes needed for the bitmap */
+ sz = (tbl->it_size + 7) >> 3;
+
+ tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
+ if (!tbl->it_map)
+ panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
+
+ memset(tbl->it_map, 0, sz);
+
+ tbl->it_hint = 0;
+ tbl->it_largehint = tbl->it_halfpoint;
+ spin_lock_init(&tbl->it_lock);
+
+ /* Clear the hardware table in case firmware left allocations in it */
+ ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+
+ if (!welcomed) {
+ printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
+ novmerge ? "disabled" : "enabled");
+ welcomed = 1;
+ }
+
+ return tbl;
+}
+
+void iommu_free_table(struct device_node *dn)
+{
+ struct pci_dn *pdn = dn->data;
+ struct iommu_table *tbl = pdn->iommu_table;
+ unsigned long bitmap_sz, i;
+ unsigned int order;
+
+ if (!tbl || !tbl->it_map) {
+ printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__,
+ dn->full_name);
+ return;
+ }
+
+ /* verify that table contains no entries */
+ /* it_size is in entries, and we're examining 64 at a time */
+ for (i = 0; i < (tbl->it_size/64); i++) {
+ if (tbl->it_map[i] != 0) {
+ printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
+ __FUNCTION__, dn->full_name);
+ break;
+ }
+ }
+
+ /* calculate bitmap size in bytes */
+ bitmap_sz = (tbl->it_size + 7) / 8;
+
+ /* free bitmap */
+ order = get_order(bitmap_sz);
+ free_pages((unsigned long) tbl->it_map, order);
+
+ /* free table */
+ kfree(tbl);
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address of the buffer
+ * passed here is the kernel (virtual) address of the buffer. The buffer
+ * need not be page aligned, the dma_addr_t returned will point to the same
+ * byte within the page as vaddr.
+ */
+dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
+ size_t size, enum dma_data_direction direction)
+{
+ dma_addr_t dma_handle = DMA_ERROR_CODE;
+ unsigned long uaddr;
+ unsigned int npages;
+
+ BUG_ON(direction == DMA_NONE);
+
+ uaddr = (unsigned long)vaddr;
+ npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
+ npages >>= PAGE_SHIFT;
+
+ if (tbl) {
+ dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0);
+ if (dma_handle == DMA_ERROR_CODE) {
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "iommu_alloc failed, "
+ "tbl %p vaddr %p npages %d\n",
+ tbl, vaddr, npages);
+ }
+ } else
+ dma_handle |= (uaddr & ~PAGE_MASK);
+ }
+
+ return dma_handle;
+}
+
+void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+
+ if (tbl)
+ iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
+ (dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
+}
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ void *ret = NULL;
+ dma_addr_t mapping;
+ unsigned int npages, order;
+
+ size = PAGE_ALIGN(size);
+ npages = size >> PAGE_SHIFT;
+ order = get_order(size);
+
+ /*
+ * Client asked for way too much space. This is checked later
+ * anyway. It is easier to debug here for the drivers than in
+ * the tce tables.
+ */
+ if (order >= IOMAP_MAX_ORDER) {
+ printk("iommu_alloc_consistent size too large: 0x%lx\n", size);
+ return NULL;
+ }
+
+ if (!tbl)
+ return NULL;
+
+ /* Alloc enough pages (and possibly more) */
+ ret = (void *)__get_free_pages(flag, order);
+ if (!ret)
+ return NULL;
+ memset(ret, 0, size);
+
+ /* Set up tces to cover the allocated range */
+ mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order);
+ if (mapping == DMA_ERROR_CODE) {
+ free_pages((unsigned long)ret, order);
+ ret = NULL;
+ } else
+ *dma_handle = mapping;
+ return ret;
+}
+
+void iommu_free_coherent(struct iommu_table *tbl, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ unsigned int npages;
+
+ if (tbl) {
+ size = PAGE_ALIGN(size);
+ npages = size >> PAGE_SHIFT;
+ iommu_free(tbl, dma_handle, npages);
+ free_pages((unsigned long)vaddr, get_order(size));
+ }
+}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
new file mode 100644
index 00000000000..5a71ed9612f
--- /dev/null
+++ b/arch/powerpc/kernel/irq.c
@@ -0,0 +1,479 @@
+/*
+ * arch/ppc/kernel/irq.c
+ *
+ * Derived from arch/i386/kernel/irq.c
+ * Copyright (C) 1992 Linus Torvalds
+ * Adapted from arch/i386 by Gary Thomas
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ * Copyright (C) 1996-2001 Cort Dougan
+ * Adapted for Power Macintosh by Paul Mackerras
+ * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * The MPC8xx has an interrupt mask in the SIU. If a bit is set, the
+ * interrupt is _enabled_. As expected, IRQ0 is bit 0 in the 32-bit
+ * mask register (of which only 16 are defined), hence the weird shifting
+ * and complement of the cached_irq_mask. I want to be able to stuff
+ * this right into the SIU SMASK register.
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx
+ * to reduce code space and undefined function references.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/threads.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#ifdef CONFIG_PPC64
+#include <linux/kallsyms.h>
+#endif
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/cache.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#ifdef CONFIG_PPC64
+#include <asm/iseries/it_lp_queue.h>
+#include <asm/paca.h>
+#endif
+
+int __irq_offset_value;
+#ifdef CONFIG_PPC32
+EXPORT_SYMBOL(__irq_offset_value);
+#endif
+
+static int ppc_spurious_interrupts;
+
+#if defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP)
+extern void iSeries_smp_message_recv(struct pt_regs *);
+#endif
+
+#ifdef CONFIG_PPC32
+#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
+
+unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
+atomic_t ppc_n_lost_interrupts;
+
+#ifdef CONFIG_TAU_INT
+extern int tau_initialized;
+extern int tau_interrupts(int);
+#endif
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
+extern atomic_t ipi_recv;
+extern atomic_t ipi_sent;
+#endif
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(irq_desc);
+
+int distribute_irqs = 1;
+u64 ppc64_interrupt_controller;
+#endif /* CONFIG_PPC64 */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *)v, j;
+ struct irqaction *action;
+ irq_desc_t *desc;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_puts(p, " ");
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%d ", j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ desc = get_irq_desc(i);
+ spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
+ if (!action || !action->handler)
+ goto skip;
+ seq_printf(p, "%3d: ", i);
+#ifdef CONFIG_SMP
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#else
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#endif /* CONFIG_SMP */
+ if (desc->handler)
+ seq_printf(p, " %s ", desc->handler->typename);
+ else
+ seq_puts(p, " None ");
+ seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge ");
+ seq_printf(p, " %s", action->name);
+ for (action = action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&desc->lock, flags);
+ } else if (i == NR_IRQS) {
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_TAU_INT
+ if (tau_initialized){
+ seq_puts(p, "TAU: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", tau_interrupts(j));
+ seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n");
+ }
+#endif
+#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
+ /* should this be per processor send/receive? */
+ seq_printf(p, "IPI (recv/sent): %10u/%u\n",
+ atomic_read(&ipi_recv), atomic_read(&ipi_sent));
+#endif
+#endif /* CONFIG_PPC32 */
+ seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts);
+ }
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void fixup_irqs(cpumask_t map)
+{
+ unsigned int irq;
+ static int warned;
+
+ for_each_irq(irq) {
+ cpumask_t mask;
+
+ if (irq_desc[irq].status & IRQ_PER_CPU)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ printk("Breaking affinity for irq %i\n", irq);
+ mask = map;
+ }
+ if (irq_desc[irq].handler->set_affinity)
+ irq_desc[irq].handler->set_affinity(irq, mask);
+ else if (irq_desc[irq].action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+}
+#endif
+
+#ifdef CONFIG_PPC_ISERIES
+void do_IRQ(struct pt_regs *regs)
+{
+ struct paca_struct *lpaca;
+
+ irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 2KB free? */
+ {
+ long sp;
+
+ sp = __get_SP() & (THREAD_SIZE-1);
+
+ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+ lpaca = get_paca();
+#ifdef CONFIG_SMP
+ if (lpaca->lppaca.int_dword.fields.ipi_cnt) {
+ lpaca->lppaca.int_dword.fields.ipi_cnt = 0;
+ iSeries_smp_message_recv(regs);
+ }
+#endif /* CONFIG_SMP */
+ if (hvlpevent_is_pending())
+ process_hvlpevents(regs);
+
+ irq_exit();
+
+ if (lpaca->lppaca.int_dword.fields.decr_int) {
+ lpaca->lppaca.int_dword.fields.decr_int = 0;
+ /* Signal a fake decrementer interrupt */
+ timer_interrupt(regs);
+ }
+}
+
+#else /* CONFIG_PPC_ISERIES */
+
+void do_IRQ(struct pt_regs *regs)
+{
+ int irq;
+#ifdef CONFIG_IRQSTACKS
+ struct thread_info *curtp, *irqtp;
+#endif
+
+ irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 2KB free? */
+ {
+ long sp;
+
+ sp = __get_SP() & (THREAD_SIZE-1);
+
+ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+ /*
+ * Every platform is required to implement ppc_md.get_irq.
+ * This function will either return an irq number or -1 to
+ * indicate there are no more pending.
+ * The value -2 is for buggy hardware and means that this IRQ
+ * has already been handled. -- Tom
+ */
+ irq = ppc_md.get_irq(regs);
+
+ if (irq >= 0) {
+#ifdef CONFIG_IRQSTACKS
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[smp_processor_id()];
+ if (curtp != irqtp) {
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+ call___do_IRQ(irq, regs, irqtp);
+ irqtp->task = NULL;
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+ } else
+#endif
+ __do_IRQ(irq, regs);
+ } else
+#ifdef CONFIG_PPC32
+ if (irq != -2)
+#endif
+ /* That's not SMP safe ... but who cares ? */
+ ppc_spurious_interrupts++;
+ irq_exit();
+}
+
+#endif /* CONFIG_PPC_ISERIES */
+
+void __init init_IRQ(void)
+{
+#ifdef CONFIG_PPC64
+ static int once = 0;
+
+ if (once)
+ return;
+
+ once++;
+
+#endif
+ ppc_md.init_IRQ();
+#ifdef CONFIG_PPC64
+ irq_ctx_init();
+#endif
+}
+
+#ifdef CONFIG_PPC64
+/*
+ * Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
+ */
+
+#define UNDEFINED_IRQ 0xffffffff
+unsigned int virt_irq_to_real_map[NR_IRQS];
+
+/*
+ * Don't use virtual irqs 0, 1, 2 for devices.
+ * The pcnet32 driver considers interrupt numbers < 2 to be invalid,
+ * and 2 is the XICS IPI interrupt.
+ * We limit virtual irqs to 17 less than NR_IRQS so that when we
+ * offset them by 16 (to reserve the first 16 for ISA interrupts)
+ * we don't end up with an interrupt number >= NR_IRQS.
+ */
+#define MIN_VIRT_IRQ 3
+#define MAX_VIRT_IRQ (NR_IRQS - NUM_ISA_INTERRUPTS - 1)
+#define NR_VIRT_IRQS (MAX_VIRT_IRQ - MIN_VIRT_IRQ + 1)
+
+void
+virt_irq_init(void)
+{
+ int i;
+ for (i = 0; i < NR_IRQS; i++)
+ virt_irq_to_real_map[i] = UNDEFINED_IRQ;
+}
+
+/* Create a mapping for a real_irq if it doesn't already exist.
+ * Return the virtual irq as a convenience.
+ */
+int virt_irq_create_mapping(unsigned int real_irq)
+{
+ unsigned int virq, first_virq;
+ static int warned;
+
+ if (ppc64_interrupt_controller == IC_OPEN_PIC)
+ return real_irq; /* no mapping for openpic (for now) */
+
+ if (ppc64_interrupt_controller == IC_CELL_PIC)
+ return real_irq; /* no mapping for iic either */
+
+ /* don't map interrupts < MIN_VIRT_IRQ */
+ if (real_irq < MIN_VIRT_IRQ) {
+ virt_irq_to_real_map[real_irq] = real_irq;
+ return real_irq;
+ }
+
+ /* map to a number between MIN_VIRT_IRQ and MAX_VIRT_IRQ */
+ virq = real_irq;
+ if (virq > MAX_VIRT_IRQ)
+ virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
+
+ /* search for this number or a free slot */
+ first_virq = virq;
+ while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) {
+ if (virt_irq_to_real_map[virq] == real_irq)
+ return virq;
+ if (++virq > MAX_VIRT_IRQ)
+ virq = MIN_VIRT_IRQ;
+ if (virq == first_virq)
+ goto nospace; /* oops, no free slots */
+ }
+
+ virt_irq_to_real_map[virq] = real_irq;
+ return virq;
+
+ nospace:
+ if (!warned) {
+ printk(KERN_CRIT "Interrupt table is full\n");
+ printk(KERN_CRIT "Increase NR_IRQS (currently %d) "
+ "in your kernel sources and rebuild.\n", NR_IRQS);
+ warned = 1;
+ }
+ return NO_IRQ;
+}
+
+/*
+ * In most cases will get a hit on the very first slot checked in the
+ * virt_irq_to_real_map. Only when there are a large number of
+ * IRQs will this be expensive.
+ */
+unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
+{
+ unsigned int virq;
+ unsigned int first_virq;
+
+ virq = real_irq;
+
+ if (virq > MAX_VIRT_IRQ)
+ virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
+
+ first_virq = virq;
+
+ do {
+ if (virt_irq_to_real_map[virq] == real_irq)
+ return virq;
+
+ virq++;
+
+ if (virq >= MAX_VIRT_IRQ)
+ virq = 0;
+
+ } while (first_virq != virq);
+
+ return NO_IRQ;
+
+}
+
+#ifdef CONFIG_IRQSTACKS
+struct thread_info *softirq_ctx[NR_CPUS];
+struct thread_info *hardirq_ctx[NR_CPUS];
+
+void irq_ctx_init(void)
+{
+ struct thread_info *tp;
+ int i;
+
+ for_each_cpu(i) {
+ memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
+ tp = softirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = SOFTIRQ_OFFSET;
+
+ memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
+ tp = hardirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = HARDIRQ_OFFSET;
+ }
+}
+
+void do_softirq(void)
+{
+ unsigned long flags;
+ struct thread_info *curtp, *irqtp;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending()) {
+ curtp = current_thread_info();
+ irqtp = softirq_ctx[smp_processor_id()];
+ irqtp->task = curtp->task;
+ call_do_softirq(irqtp);
+ irqtp->task = NULL;
+ }
+
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(do_softirq);
+
+#endif /* CONFIG_IRQSTACKS */
+
+static int __init setup_noirqdistrib(char *str)
+{
+ distribute_irqs = 0;
+ return 1;
+}
+
+__setup("noirqdistrib", setup_noirqdistrib);
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
new file mode 100644
index 00000000000..5368f9c2e6b
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes.c
@@ -0,0 +1,459 @@
+/*
+ * Kernel Probes (KProbes)
+ * arch/ppc64/kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ * Probes initial implementation ( includes contributions from
+ * Rusty Russell).
+ * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ * interface to access function arguments.
+ * 2004-Nov Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port
+ * for PPC64
+ */
+
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/sstep.h>
+
+static DECLARE_MUTEX(kprobe_mutex);
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ int ret = 0;
+ kprobe_opcode_t insn = *p->addr;
+
+ if ((unsigned long)p->addr & 0x03) {
+ printk("Attempt to register kprobe at an unaligned address\n");
+ ret = -EINVAL;
+ } else if (IS_MTMSRD(insn) || IS_RFID(insn)) {
+ printk("Cannot register a kprobe on rfid or mtmsrd\n");
+ ret = -EINVAL;
+ }
+
+ /* insn must be on a special executable page on ppc64 */
+ if (!ret) {
+ down(&kprobe_mutex);
+ p->ainsn.insn = get_insn_slot();
+ up(&kprobe_mutex);
+ if (!p->ainsn.insn)
+ ret = -ENOMEM;
+ }
+ return ret;
+}
+
+void __kprobes arch_copy_kprobe(struct kprobe *p)
+{
+ memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ p->opcode = *p->addr;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ *p->addr = BREAKPOINT_INSTRUCTION;
+ flush_icache_range((unsigned long) p->addr,
+ (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ *p->addr = p->opcode;
+ flush_icache_range((unsigned long) p->addr,
+ (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ down(&kprobe_mutex);
+ free_insn_slot(p->ainsn.insn);
+ up(&kprobe_mutex);
+}
+
+static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+ kprobe_opcode_t insn = *p->ainsn.insn;
+
+ regs->msr |= MSR_SE;
+
+ /* single step inline if it is a trap variant */
+ if (is_trap(insn))
+ regs->nip = (unsigned long)p->addr;
+ else
+ regs->nip = (unsigned long)p->ainsn.insn;
+}
+
+static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
+}
+
+static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
+}
+
+static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = p;
+ kcb->kprobe_saved_msr = regs->msr;
+}
+
+/* Called with kretprobe_lock held */
+void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+ struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri;
+
+ if ((ri = get_free_rp_inst(rp)) != NULL) {
+ ri->rp = rp;
+ ri->task = current;
+ ri->ret_addr = (kprobe_opcode_t *)regs->link;
+
+ /* Replace the return addr with trampoline addr */
+ regs->link = (unsigned long)kretprobe_trampoline;
+ add_rp_inst(ri);
+ } else {
+ rp->nmissed++;
+ }
+}
+
+static inline int kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p;
+ int ret = 0;
+ unsigned int *addr = (unsigned int *)regs->nip;
+ struct kprobe_ctlblk *kcb;
+
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing
+ */
+ preempt_disable();
+ kcb = get_kprobe_ctlblk();
+
+ /* Check we're not actually recursing */
+ if (kprobe_running()) {
+ p = get_kprobe(addr);
+ if (p) {
+ kprobe_opcode_t insn = *p->ainsn.insn;
+ if (kcb->kprobe_status == KPROBE_HIT_SS &&
+ is_trap(insn)) {
+ regs->msr &= ~MSR_SE;
+ regs->msr |= kcb->kprobe_saved_msr;
+ goto no_kprobe;
+ }
+ /* We have reentered the kprobe_handler(), since
+ * another probe was hit while within the handler.
+ * We here save the original kprobes variables and
+ * just single step on the instruction of the new probe
+ * without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_saved_msr = regs->msr;
+ kprobes_inc_nmissed_count(p);
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_REENTER;
+ return 1;
+ } else {
+ p = __get_cpu_var(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs)) {
+ goto ss_probe;
+ }
+ }
+ goto no_kprobe;
+ }
+
+ p = get_kprobe(addr);
+ if (!p) {
+ if (*addr != BREAKPOINT_INSTRUCTION) {
+ /*
+ * PowerPC has multiple variants of the "trap"
+ * instruction. If the current instruction is a
+ * trap variant, it could belong to someone else
+ */
+ kprobe_opcode_t cur_insn = *addr;
+ if (is_trap(cur_insn))
+ goto no_kprobe;
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ */
+ ret = 1;
+ }
+ /* Not one of ours: let kernel handle it */
+ goto no_kprobe;
+ }
+
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ set_current_kprobe(p, regs, kcb);
+ if (p->pre_handler && p->pre_handler(p, regs))
+ /* handler has already set things up, so skip ss setup */
+ return 1;
+
+ss_probe:
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ return 1;
+
+no_kprobe:
+ preempt_enable_no_resched();
+ return ret;
+}
+
+/*
+ * Function return probe trampoline:
+ * - init_kprobes() establishes a probepoint here
+ * - When the probed function returns, this probe
+ * causes the handlers to fire
+ */
+void kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n"
+ "nop\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head;
+ struct hlist_node *node, *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+ spin_lock_irqsave(&kretprobe_lock, flags);
+ head = kretprobe_inst_table_head(current);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+ regs->nip = orig_ret_address;
+
+ reset_current_kprobe();
+ spin_unlock_irqrestore(&kretprobe_lock, flags);
+ preempt_enable_no_resched();
+
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is p->ainsn.insn.
+ */
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+ int ret;
+ unsigned int insn = *p->ainsn.insn;
+
+ regs->nip = (unsigned long)p->addr;
+ ret = emulate_step(regs, insn);
+ if (ret == 0)
+ regs->nip = (unsigned long)p->addr + 4;
+}
+
+static inline int post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
+ return 0;
+
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ resume_execution(cur, regs);
+ regs->msr |= kcb->kprobe_saved_msr;
+
+ /*Restore back the original saved kprobes variables and continue. */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ goto out;
+ }
+ reset_current_kprobe();
+out:
+ preempt_enable_no_resched();
+
+ /*
+ * if somebody else is singlestepping across a probe point, msr
+ * will have SE set, in which case, continue the remaining processing
+ * of do_debug, as if this is not a probe hit.
+ */
+ if (regs->msr & MSR_SE)
+ return 0;
+
+ return 1;
+}
+
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ if (kcb->kprobe_status & KPROBE_HIT_SS) {
+ resume_execution(cur, regs);
+ regs->msr &= ~MSR_SE;
+ regs->msr |= kcb->kprobe_saved_msr;
+
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ }
+ return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_BPT:
+ if (kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (post_kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_PAGE_FAULT:
+ /* kprobe_running() needs smp_processor_id() */
+ preempt_disable();
+ if (kprobe_running() &&
+ kprobe_fault_handler(args->regs, args->trapnr))
+ ret = NOTIFY_STOP;
+ preempt_enable();
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+ /* setup return addr to the jprobe handler routine */
+ regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
+ regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
+
+ return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+ asm volatile("trap" ::: "memory");
+}
+
+void __kprobes jprobe_return_end(void)
+{
+};
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ /*
+ * FIXME - we should ideally be validating that we got here 'cos
+ * of the "trap" in jprobe_return() above, before restoring the
+ * saved regs...
+ */
+ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+ preempt_enable_no_resched();
+ return 1;
+}
+
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline_p);
+}
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
new file mode 100644
index 00000000000..9dda16ccde7
--- /dev/null
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -0,0 +1,608 @@
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ * Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <asm/iseries/hv_lp_config.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/iseries/it_exp_vpd_panel.h>
+#include <asm/prom.h>
+#include <asm/vdso_datapage.h>
+
+#define MODULE_VERS "1.6"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+static struct proc_dir_entry *proc_ppc64_lparcfg;
+#define LPARCFG_BUFF_SIZE 4096
+
+#ifdef CONFIG_PPC_ISERIES
+
+/*
+ * For iSeries legacy systems, the PPA purr function is available from the
+ * emulated_time_base field in the paca.
+ */
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+ struct paca_struct *lpaca;
+
+ for_each_cpu(cpu) {
+ lpaca = paca + cpu;
+ sum_purr += lpaca->lppaca.emulated_time_base;
+
+#ifdef PURR_DEBUG
+ printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n",
+ cpu, lpaca->lppaca.emulated_time_base);
+#endif
+ }
+ return sum_purr;
+}
+
+#define lparcfg_write NULL
+
+/*
+ * Methods used to fetch LPAR data when running on an iSeries platform.
+ */
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ unsigned long pool_id, lp_index;
+ int shared, entitled_capacity, max_entitled_capacity;
+ int processors, max_processors;
+ struct paca_struct *lpaca = get_paca();
+ unsigned long purr = get_purr();
+
+ seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+ shared = (int)(lpaca->lppaca_ptr->shared_proc);
+ seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n",
+ e2a(xItExtVpdPanel.mfgID[2]),
+ e2a(xItExtVpdPanel.mfgID[3]),
+ e2a(xItExtVpdPanel.systemSerial[1]),
+ e2a(xItExtVpdPanel.systemSerial[2]),
+ e2a(xItExtVpdPanel.systemSerial[3]),
+ e2a(xItExtVpdPanel.systemSerial[4]),
+ e2a(xItExtVpdPanel.systemSerial[5]));
+
+ seq_printf(m, "system_type=%c%c%c%c\n",
+ e2a(xItExtVpdPanel.machineType[0]),
+ e2a(xItExtVpdPanel.machineType[1]),
+ e2a(xItExtVpdPanel.machineType[2]),
+ e2a(xItExtVpdPanel.machineType[3]));
+
+ lp_index = HvLpConfig_getLpIndex();
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ seq_printf(m, "system_active_processors=%d\n",
+ (int)HvLpConfig_getSystemPhysicalProcessors());
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ (int)HvLpConfig_getSystemPhysicalProcessors());
+
+ processors = (int)HvLpConfig_getPhysicalProcessors();
+ seq_printf(m, "partition_active_processors=%d\n", processors);
+
+ max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
+ seq_printf(m, "partition_potential_processors=%d\n", max_processors);
+
+ if (shared) {
+ entitled_capacity = HvLpConfig_getSharedProcUnits();
+ max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
+ } else {
+ entitled_capacity = processors * 100;
+ max_entitled_capacity = max_processors * 100;
+ }
+ seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ max_entitled_capacity);
+
+ if (shared) {
+ pool_id = HvLpConfig_getSharedPoolIndex();
+ seq_printf(m, "pool=%d\n", (int)pool_id);
+ seq_printf(m, "pool_capacity=%d\n",
+ (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
+ 100));
+ seq_printf(m, "purr=%ld\n", purr);
+ }
+
+ seq_printf(m, "shared_processor_mode=%d\n", shared);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_ISERIES */
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+/* find a better place for this function... */
+static void log_plpar_hcall_return(unsigned long rc, char *tag)
+{
+ if (rc == 0) /* success, return */
+ return;
+/* check for null tag ? */
+ if (rc == H_Hardware)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with hardware fault\n", tag);
+ else if (rc == H_Function)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; function not allowed\n", tag);
+ else if (rc == H_Authority)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; not authorized to this function\n",
+ tag);
+ else if (rc == H_Parameter)
+ printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
+ tag);
+ else
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
+ tag, rc);
+
+}
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ * entitled_capacity,unallocated_capacity,
+ * aggregation, resource_capability).
+ *
+ * R4 = Entitled Processor Capacity Percentage.
+ * R5 = Unallocated Processor Capacity Percentage.
+ * R6 (AABBCCDDEEFFGGHH).
+ * XXXX - reserved (0)
+ * XXXX - reserved (0)
+ * XXXX - Group Number
+ * XXXX - Pool Number.
+ * R7 (IIJJKKLLMMNNOOPP).
+ * XX - reserved. (0)
+ * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
+ * XX - variable processor Capacity Weight
+ * XX - Unallocated Variable Processor Capacity Weight.
+ * XXXX - Active processors in Physical Processor Pool.
+ * XXXX - Processors active on platform.
+ */
+static unsigned int h_get_ppp(unsigned long *entitled,
+ unsigned long *unallocated,
+ unsigned long *aggregation,
+ unsigned long *resource)
+{
+ unsigned long rc;
+ rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
+ aggregation, resource);
+
+ log_plpar_hcall_return(rc, "H_GET_PPP");
+
+ return rc;
+}
+
+static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
+{
+ unsigned long rc;
+ unsigned long dummy;
+ rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
+
+ if (rc != H_Authority)
+ log_plpar_hcall_return(rc, "H_PIC");
+}
+
+/* Track sum of all purrs across all processors. This is used to further */
+/* calculate usage values by different applications */
+
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+ struct cpu_usage *cu;
+
+ for_each_cpu(cpu) {
+ cu = &per_cpu(cpu_usage_array, cpu);
+ sum_purr += cu->current_tb;
+ }
+ return sum_purr;
+}
+
+#define SPLPAR_CHARACTERISTICS_TOKEN 20
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call. Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+ int call_status;
+
+ char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!local_buffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+ __FILE__, __FUNCTION__, __LINE__);
+ return;
+ }
+
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ SPLPAR_CHARACTERISTICS_TOKEN,
+ __pa(rtas_data_buf));
+ memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (call_status != 0) {
+ printk(KERN_INFO
+ "%s %s Error calling get-system-parameter (0x%x)\n",
+ __FILE__, __FUNCTION__, call_status);
+ } else {
+ int splpar_strlen;
+ int idx, w_idx;
+ char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!workbuffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+ __FILE__, __FUNCTION__, __LINE__);
+ kfree(local_buffer);
+ return;
+ }
+#ifdef LPARCFG_DEBUG
+ printk(KERN_INFO "success calling get-system-parameter \n");
+#endif
+ splpar_strlen = local_buffer[0] * 16 + local_buffer[1];
+ local_buffer += 2; /* step over strlen value */
+
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ w_idx = 0;
+ idx = 0;
+ while ((*local_buffer) && (idx < splpar_strlen)) {
+ workbuffer[w_idx++] = local_buffer[idx++];
+ if ((local_buffer[idx] == ',')
+ || (local_buffer[idx] == '\0')) {
+ workbuffer[w_idx] = '\0';
+ if (w_idx) {
+ /* avoid the empty string */
+ seq_printf(m, "%s\n", workbuffer);
+ }
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ idx++; /* skip the comma */
+ w_idx = 0;
+ } else if (local_buffer[idx] == '=') {
+ /* code here to replace workbuffer contents
+ with different keyword strings */
+ if (0 == strcmp(workbuffer, "MaxEntCap")) {
+ strcpy(workbuffer,
+ "partition_max_entitled_capacity");
+ w_idx = strlen(workbuffer);
+ }
+ if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+ strcpy(workbuffer,
+ "system_potential_processors");
+ w_idx = strlen(workbuffer);
+ }
+ }
+ }
+ kfree(workbuffer);
+ local_buffer -= 2; /* back up over strlen value */
+ }
+ kfree(local_buffer);
+}
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+ struct device_node *cpus_dn = NULL;
+ int count = 0;
+
+ while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+#ifdef LPARCFG_DEBUG
+ printk(KERN_ERR "cpus_dn %p \n", cpus_dn);
+#endif
+ count++;
+ }
+ return count;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ int partition_potential_processors;
+ int partition_active_processors;
+ struct device_node *rootdn;
+ const char *model = "";
+ const char *system_id = "";
+ unsigned int *lp_index_ptr, lp_index = 0;
+ struct device_node *rtas_node;
+ int *lrdrp;
+
+ rootdn = find_path_device("/");
+ if (rootdn) {
+ model = get_property(rootdn, "model", NULL);
+ system_id = get_property(rootdn, "system-id", NULL);
+ lp_index_ptr = (unsigned int *)
+ get_property(rootdn, "ibm,partition-no", NULL);
+ if (lp_index_ptr)
+ lp_index = *lp_index_ptr;
+ }
+
+ seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+ seq_printf(m, "serial_number=%s\n", system_id);
+
+ seq_printf(m, "system_type=%s\n", model);
+
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ rtas_node = find_path_device("/rtas");
+ lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+ if (lrdrp == NULL) {
+ partition_potential_processors = vdso_data->processorCount;
+ } else {
+ partition_potential_processors = *(lrdrp + 4);
+ }
+
+ partition_active_processors = lparcfg_count_active_processors();
+
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ unsigned long h_entitled, h_unallocated;
+ unsigned long h_aggregation, h_resource;
+ unsigned long pool_idle_time, pool_procs;
+ unsigned long purr;
+
+ h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
+ &h_resource);
+
+ seq_printf(m, "R4=0x%lx\n", h_entitled);
+ seq_printf(m, "R5=0x%lx\n", h_unallocated);
+ seq_printf(m, "R6=0x%lx\n", h_aggregation);
+ seq_printf(m, "R7=0x%lx\n", h_resource);
+
+ purr = get_purr();
+
+ /* this call handles the ibm,get-system-parameter contents */
+ parse_system_parameter_string(m);
+
+ seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
+
+ seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
+
+ seq_printf(m, "system_active_processors=%ld\n",
+ (h_resource >> 0 * 8) & 0xffff);
+
+ /* pool related entries are apropriate for shared configs */
+ if (paca[0].lppaca.shared_proc) {
+
+ h_pic(&pool_idle_time, &pool_procs);
+
+ seq_printf(m, "pool=%ld\n",
+ (h_aggregation >> 0 * 8) & 0xffff);
+
+ /* report pool_capacity in percentage */
+ seq_printf(m, "pool_capacity=%ld\n",
+ ((h_resource >> 2 * 8) & 0xffff) * 100);
+
+ seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+
+ seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ }
+
+ seq_printf(m, "unallocated_capacity_weight=%ld\n",
+ (h_resource >> 4 * 8) & 0xFF);
+
+ seq_printf(m, "capacity_weight=%ld\n",
+ (h_resource >> 5 * 8) & 0xFF);
+
+ seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
+
+ seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
+
+ seq_printf(m, "purr=%ld\n", purr);
+
+ } else { /* non SPLPAR case */
+
+ seq_printf(m, "system_active_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ partition_potential_processors * 100);
+
+ seq_printf(m, "partition_entitled_capacity=%d\n",
+ partition_active_processors * 100);
+ }
+
+ seq_printf(m, "partition_active_processors=%d\n",
+ partition_active_processors);
+
+ seq_printf(m, "partition_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc);
+
+ return 0;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity). Format of input is "param_name=value";
+ * anything after value is ignored. Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight". We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+ size_t count, loff_t * off)
+{
+ char *kbuf;
+ char *tmp;
+ u64 new_entitled, *new_entitled_ptr = &new_entitled;
+ u8 new_weight, *new_weight_ptr = &new_weight;
+
+ unsigned long current_entitled; /* parameters for h_get_ppp */
+ unsigned long dummy;
+ unsigned long resource;
+ u8 current_weight;
+
+ ssize_t retval = -ENOMEM;
+
+ kbuf = kmalloc(count, GFP_KERNEL);
+ if (!kbuf)
+ goto out;
+
+ retval = -EFAULT;
+ if (copy_from_user(kbuf, buf, count))
+ goto out;
+
+ retval = -EINVAL;
+ kbuf[count - 1] = '\0';
+ tmp = strchr(kbuf, '=');
+ if (!tmp)
+ goto out;
+
+ *tmp++ = '\0';
+
+ if (!strcmp(kbuf, "partition_entitled_capacity")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ goto out;
+ new_weight_ptr = &current_weight;
+ } else if (!strcmp(kbuf, "capacity_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ goto out;
+ new_entitled_ptr = &current_entitled;
+ } else
+ goto out;
+
+ /* Get our current parameters */
+ retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
+ if (retval) {
+ retval = -EIO;
+ goto out;
+ }
+
+ current_weight = (resource >> 5 * 8) & 0xFF;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %lu\n",
+ __FUNCTION__, current_entitled, current_weight);
+
+ pr_debug("%s: new_entitled = %lu, new_weight = %lu\n",
+ __FUNCTION__, *new_entitled_ptr, *new_weight_ptr);
+
+ retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
+ *new_weight_ptr);
+
+ if (retval == H_Success || retval == H_Constrained) {
+ retval = count;
+ } else if (retval == H_Busy) {
+ retval = -EBUSY;
+ } else if (retval == H_Hardware) {
+ retval = -EIO;
+ } else if (retval == H_Parameter) {
+ retval = -EINVAL;
+ } else {
+ printk(KERN_WARNING "%s: received unknown hv return code %ld",
+ __FUNCTION__, retval);
+ retval = -EIO;
+ }
+
+out:
+ kfree(kbuf);
+ return retval;
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lparcfg_data, NULL);
+}
+
+struct file_operations lparcfg_fops = {
+ .owner = THIS_MODULE,
+ .read = seq_read,
+ .open = lparcfg_open,
+ .release = single_release,
+};
+
+int __init lparcfg_init(void)
+{
+ struct proc_dir_entry *ent;
+ mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+ /* Allow writing if we have FW_FEATURE_SPLPAR */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ lparcfg_fops.write = lparcfg_write;
+ mode |= S_IWUSR;
+ }
+
+ ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
+ if (ent) {
+ ent->proc_fops = &lparcfg_fops;
+ ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL);
+ if (!ent->data) {
+ printk(KERN_ERR
+ "Failed to allocate buffer for lparcfg\n");
+ remove_proc_entry("lparcfg", ent->parent);
+ return -ENOMEM;
+ }
+ } else {
+ printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
+ return -EIO;
+ }
+
+ proc_ppc64_lparcfg = ent;
+ return 0;
+}
+
+void __exit lparcfg_cleanup(void)
+{
+ if (proc_ppc64_lparcfg) {
+ kfree(proc_ppc64_lparcfg->data);
+ remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
+ }
+}
+
+module_init(lparcfg_init);
+module_exit(lparcfg_cleanup);
+MODULE_DESCRIPTION("Interface for LPAR configuration data");
+MODULE_AUTHOR("Dave Engebretsen");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
new file mode 100644
index 00000000000..97c51e452be
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -0,0 +1,358 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ *
+ * Copyright (C) 2004-2005, IBM Corp.
+ *
+ * Created by: Milton D Miller II
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+
+#include <linux/cpumask.h>
+#include <linux/kexec.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/paca.h>
+#include <asm/mmu.h>
+#include <asm/sections.h> /* _end */
+#include <asm/prom.h>
+#include <asm/smp.h>
+
+#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */
+
+/* Have this around till we move it into crash specific file */
+note_buf_t crash_notes[NR_CPUS];
+
+/* Dummy for now. Not sure if we need to have a crash shutdown in here
+ * and if what it will achieve. Letting it be now to compile the code
+ * in generic kexec environment
+ */
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* do nothing right now */
+ /* smp_relase_cpus() if we want smp on panic kernel */
+ /* cpu_irq_down to isolate us until we are ready */
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+ int i;
+ unsigned long begin, end; /* limits of segment */
+ unsigned long low, high; /* limits of blocked memory range */
+ struct device_node *node;
+ unsigned long *basep;
+ unsigned int *sizep;
+
+ if (!ppc_md.hpte_clear_all)
+ return -ENOENT;
+
+ /*
+ * Since we use the kernel fault handlers and paging code to
+ * handle the virtual mode, we must make sure no destination
+ * overlaps kernel static data or bss.
+ */
+ for (i = 0; i < image->nr_segments; i++)
+ if (image->segment[i].mem < __pa(_end))
+ return -ETXTBSY;
+
+ /*
+ * For non-LPAR, we absolutely can not overwrite the mmu hash
+ * table, since we are still using the bolted entries in it to
+ * do the copy. Check that here.
+ *
+ * It is safe if the end is below the start of the blocked
+ * region (end <= low), or if the beginning is after the
+ * end of the blocked region (begin >= high). Use the
+ * boolean identity !(a || b) === (!a && !b).
+ */
+ if (htab_address) {
+ low = __pa(htab_address);
+ high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ begin = image->segment[i].mem;
+ end = begin + image->segment[i].memsz;
+
+ if ((begin < high) && (end > low))
+ return -ETXTBSY;
+ }
+ }
+
+ /* We also should not overwrite the tce tables */
+ for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
+ node = of_find_node_by_type(node, "pci")) {
+ basep = (unsigned long *)get_property(node, "linux,tce-base",
+ NULL);
+ sizep = (unsigned int *)get_property(node, "linux,tce-size",
+ NULL);
+ if (basep == NULL || sizep == NULL)
+ continue;
+
+ low = *basep;
+ high = low + (*sizep);
+
+ for (i = 0; i < image->nr_segments; i++) {
+ begin = image->segment[i].mem;
+ end = begin + image->segment[i].memsz;
+
+ if ((begin < high) && (end > low))
+ return -ETXTBSY;
+ }
+ }
+
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+ /* we do nothing in prepare that needs to be undone */
+}
+
+#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
+
+static void copy_segments(unsigned long ind)
+{
+ unsigned long entry;
+ unsigned long *ptr;
+ void *dest;
+ void *addr;
+
+ /*
+ * We rely on kexec_load to create a lists that properly
+ * initializes these pointers before they are used.
+ * We will still crash if the list is wrong, but at least
+ * the compiler will be quiet.
+ */
+ ptr = NULL;
+ dest = NULL;
+
+ for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
+ addr = __va(entry & PAGE_MASK);
+
+ switch (entry & IND_FLAGS) {
+ case IND_DESTINATION:
+ dest = addr;
+ break;
+ case IND_INDIRECTION:
+ ptr = addr;
+ break;
+ case IND_SOURCE:
+ copy_page(dest, addr);
+ dest += PAGE_SIZE;
+ }
+ }
+}
+
+void kexec_copy_flush(struct kimage *image)
+{
+ long i, nr_segments = image->nr_segments;
+ struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
+
+ /* save the ranges on the stack to efficiently flush the icache */
+ memcpy(ranges, image->segment, sizeof(ranges));
+
+ /*
+ * After this call we may not use anything allocated in dynamic
+ * memory, including *image.
+ *
+ * Only globals and the stack are allowed.
+ */
+ copy_segments(image->head);
+
+ /*
+ * we need to clear the icache for all dest pages sometime,
+ * including ones that were in place on the original copy
+ */
+ for (i = 0; i < nr_segments; i++)
+ flush_icache_range(ranges[i].mem + KERNELBASE,
+ ranges[i].mem + KERNELBASE +
+ ranges[i].memsz);
+}
+
+#ifdef CONFIG_SMP
+
+/* FIXME: we should schedule this function to be called on all cpus based
+ * on calling the interrupts, but we would like to call it off irq level
+ * so that the interrupt controller is clean.
+ */
+void kexec_smp_down(void *arg)
+{
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 1);
+
+ local_irq_disable();
+ kexec_smp_wait();
+ /* NOTREACHED */
+}
+
+static void kexec_prepare_cpus(void)
+{
+ int my_cpu, i, notified=-1;
+
+ smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
+ my_cpu = get_cpu();
+
+ /* check the others cpus are now down (via paca hw cpu id == -1) */
+ for (i=0; i < NR_CPUS; i++) {
+ if (i == my_cpu)
+ continue;
+
+ while (paca[i].hw_cpu_id != -1) {
+ barrier();
+ if (!cpu_possible(i)) {
+ printk("kexec: cpu %d hw_cpu_id %d is not"
+ " possible, ignoring\n",
+ i, paca[i].hw_cpu_id);
+ break;
+ }
+ if (!cpu_online(i)) {
+ /* Fixme: this can be spinning in
+ * pSeries_secondary_wait with a paca
+ * waiting for it to go online.
+ */
+ printk("kexec: cpu %d hw_cpu_id %d is not"
+ " online, ignoring\n",
+ i, paca[i].hw_cpu_id);
+ break;
+ }
+ if (i != notified) {
+ printk( "kexec: waiting for cpu %d (physical"
+ " %d) to go down\n",
+ i, paca[i].hw_cpu_id);
+ notified = i;
+ }
+ }
+ }
+
+ /* after we tell the others to go down */
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 0);
+
+ put_cpu();
+
+ local_irq_disable();
+}
+
+#else /* ! SMP */
+
+static void kexec_prepare_cpus(void)
+{
+ /*
+ * move the secondarys to us so that we can copy
+ * the new kernel 0-0x100 safely
+ *
+ * do this if kexec in setup.c ?
+ *
+ * We need to release the cpus if we are ever going from an
+ * UP to an SMP kernel.
+ */
+ smp_release_cpus();
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 0);
+ local_irq_disable();
+}
+
+#endif /* SMP */
+
+/*
+ * kexec thread structure and stack.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled. It also must be statically allocated
+ * or allocated as part of the kimage, because everything else may be
+ * overwritten when we copy the kexec image. We piggyback on the
+ * "init_task" linker section here to statically allocate a stack.
+ *
+ * We could use a smaller stack if we don't care about anything using
+ * current, but that audit has not been performed.
+ */
+union thread_union kexec_stack
+ __attribute__((__section__(".data.init_task"))) = { };
+
+/* Our assembly helper, in kexec_stub.S */
+extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
+ void *image, void *control,
+ void (*clear_all)(void)) ATTRIB_NORET;
+
+/* too late to fail here */
+void machine_kexec(struct kimage *image)
+{
+
+ /* prepare control code if any */
+
+ /* shutdown other cpus into our wait loop and quiesce interrupts */
+ kexec_prepare_cpus();
+
+ /* switch to a staticly allocated stack. Based on irq stack code.
+ * XXX: the task struct will likely be invalid once we do the copy!
+ */
+ kexec_stack.thread_info.task = current_thread_info()->task;
+ kexec_stack.thread_info.flags = 0;
+
+ /* Some things are best done in assembly. Finding globals with
+ * a toc is easier in C, so pass in what we can.
+ */
+ kexec_sequence(&kexec_stack, image->start, image,
+ page_address(image->control_code_page),
+ ppc_md.hpte_clear_all);
+ /* NOTREACHED */
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static unsigned long htab_base, htab_size, kernel_end;
+
+static struct property htab_base_prop = {
+ .name = "linux,htab-base",
+ .length = sizeof(unsigned long),
+ .value = (unsigned char *)&htab_base,
+};
+
+static struct property htab_size_prop = {
+ .name = "linux,htab-size",
+ .length = sizeof(unsigned long),
+ .value = (unsigned char *)&htab_size,
+};
+
+static struct property kernel_end_prop = {
+ .name = "linux,kernel-end",
+ .length = sizeof(unsigned long),
+ .value = (unsigned char *)&kernel_end,
+};
+
+static void __init export_htab_values(void)
+{
+ struct device_node *node;
+
+ node = of_find_node_by_path("/chosen");
+ if (!node)
+ return;
+
+ kernel_end = __pa(_end);
+ prom_add_property(node, &kernel_end_prop);
+
+ /* On machines with no htab htab_address is NULL */
+ if (NULL == htab_address)
+ goto out;
+
+ htab_base = __pa(htab_address);
+ prom_add_property(node, &htab_base_prop);
+
+ htab_size = 1UL << ppc64_pft_size;
+ prom_add_property(node, &htab_size_prop);
+
+ out:
+ of_node_put(node);
+}
+
+void __init kexec_setup(void)
+{
+ export_htab_values();
+}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 3bedb532aed..624a983a967 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -27,14 +27,6 @@
.text
- .align 5
-_GLOBAL(__delay)
- cmpwi 0,r3,0
- mtctr r3
- beqlr
-1: bdnz 1b
- blr
-
/*
* This returns the high 64 bits of the product of two 64-bit numbers.
*/
@@ -519,7 +511,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
*
* flush_icache_range(unsigned long start, unsigned long stop)
*/
-_GLOBAL(flush_icache_range)
+_GLOBAL(__flush_icache_range)
BEGIN_FTR_SECTION
blr /* for 601, do nothing */
END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
@@ -607,27 +599,6 @@ _GLOBAL(invalidate_dcache_range)
sync /* wait for dcbi's to get to ram */
blr
-#ifdef CONFIG_NOT_COHERENT_CACHE
-/*
- * 40x cores have 8K or 16K dcache and 32 byte line size.
- * 44x has a 32K dcache and 32 byte line size.
- * 8xx has 1, 2, 4, 8K variants.
- * For now, cover the worst case of the 44x.
- * Must be called with external interrupts disabled.
- */
-#define CACHE_NWAYS 64
-#define CACHE_NLINES 16
-
-_GLOBAL(flush_dcache_all)
- li r4, (2 * CACHE_NWAYS * CACHE_NLINES)
- mtctr r4
- lis r5, KERNELBASE@h
-1: lwz r3, 0(r5) /* Load one word from every line */
- addi r5, r5, L1_CACHE_BYTES
- bdnz 1b
- blr
-#endif /* CONFIG_NOT_COHERENT_CACHE */
-
/*
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index ae1433da09b..ae48a002f81 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -89,12 +89,12 @@ _GLOBAL(call_do_softirq)
mtlr r0
blr
-_GLOBAL(call_handle_IRQ_event)
+_GLOBAL(call___do_IRQ)
mflr r0
std r0,16(r1)
- stdu r1,THREAD_SIZE-112(r6)
- mr r1,r6
- bl .handle_IRQ_event
+ stdu r1,THREAD_SIZE-112(r5)
+ mr r1,r5
+ bl .__do_IRQ
ld r1,0(r1)
ld r0,16(r1)
mtlr r0
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
new file mode 100644
index 00000000000..928b8581fcb
--- /dev/null
+++ b/arch/powerpc/kernel/module_64.c
@@ -0,0 +1,455 @@
+/* Kernel module help for PPC64.
+ Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <asm/module.h>
+#include <asm/uaccess.h>
+
+/* FIXME: We don't do .init separately. To do this, we'd need to have
+ a separate r2 value in the init and core section, and stub between
+ them, too.
+
+ Using a magic allocator which places modules within 32MB solves
+ this, and makes other things simpler. Anton?
+ --RR. */
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+/* There's actually a third entry here, but it's unused */
+struct ppc64_opd_entry
+{
+ unsigned long funcaddr;
+ unsigned long r2;
+};
+
+/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
+ the kernel itself). But on PPC64, these need to be used for every
+ jump, actually, to reset r2 (TOC+0x8000). */
+struct ppc64_stub_entry
+{
+ /* 28 byte jump instruction sequence (7 instructions) */
+ unsigned char jump[28];
+ unsigned char unused[4];
+ /* Data for the above code */
+ struct ppc64_opd_entry opd;
+};
+
+/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
+ function which may be more than 24-bits away. We could simply
+ patch the new r2 value and function pointer into the stub, but it's
+ significantly shorter to put these values at the end of the stub
+ code, and patch the stub address (32-bits relative to the TOC ptr,
+ r2) into the stub. */
+static struct ppc64_stub_entry ppc64_stub =
+{ .jump = {
+ 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */
+ 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */
+ /* Save current r2 value in magic place on the stack. */
+ 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */
+ 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */
+ 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */
+ 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */
+ 0x4e, 0x80, 0x04, 0x20 /* bctr */
+} };
+
+/* Count how many different 24-bit relocations (different symbol,
+ different addend) */
+static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
+{
+ unsigned int i, j, ret = 0;
+
+ /* FIXME: Only count external ones --RR */
+ /* Sure, this is order(n^2), but it's usually short, and not
+ time critical */
+ for (i = 0; i < num; i++) {
+ /* Only count 24-bit relocs, others don't need stubs */
+ if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24)
+ continue;
+ for (j = 0; j < i; j++) {
+ /* If this addend appeared before, it's
+ already been counted */
+ if (rela[i].r_info == rela[j].r_info
+ && rela[i].r_addend == rela[j].r_addend)
+ break;
+ }
+ if (j == i) ret++;
+ }
+ return ret;
+}
+
+void *module_alloc(unsigned long size)
+{
+ if (size == 0)
+ return NULL;
+
+ return vmalloc_exec(size);
+}
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+ vfree(module_region);
+ /* FIXME: If module_region == mod->init_region, trim exception
+ table entries. */
+}
+
+/* Get size of potential trampolines required. */
+static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
+ const Elf64_Shdr *sechdrs)
+{
+ /* One extra reloc so it's always 0-funcaddr terminated */
+ unsigned long relocs = 1;
+ unsigned i;
+
+ /* Every relocated section... */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_RELA) {
+ DEBUGP("Found relocations in section %u\n", i);
+ DEBUGP("Ptr: %p. Number: %lu\n",
+ (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela));
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela));
+ }
+ }
+
+ DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
+ return relocs * sizeof(struct ppc64_stub_entry);
+}
+
+static void dedotify_versions(struct modversion_info *vers,
+ unsigned long size)
+{
+ struct modversion_info *end;
+
+ for (end = (void *)vers + size; vers < end; vers++)
+ if (vers->name[0] == '.')
+ memmove(vers->name, vers->name+1, strlen(vers->name));
+}
+
+/* Undefined symbols which refer to .funcname, hack to funcname */
+static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
+{
+ unsigned int i;
+
+ for (i = 1; i < numsyms; i++) {
+ if (syms[i].st_shndx == SHN_UNDEF) {
+ char *name = strtab + syms[i].st_name;
+ if (name[0] == '.')
+ memmove(name, name+1, strlen(name));
+ }
+ }
+}
+
+int module_frob_arch_sections(Elf64_Ehdr *hdr,
+ Elf64_Shdr *sechdrs,
+ char *secstrings,
+ struct module *me)
+{
+ unsigned int i;
+
+ /* Find .toc and .stubs sections, symtab and strtab */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ char *p;
+ if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
+ me->arch.stubs_section = i;
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
+ me->arch.toc_section = i;
+ else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
+ dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size);
+
+ /* We don't handle .init for the moment: rename to _init */
+ while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
+ p[0] = '_';
+
+ if (sechdrs[i].sh_type == SHT_SYMTAB)
+ dedotify((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size / sizeof(Elf64_Sym),
+ (void *)hdr
+ + sechdrs[sechdrs[i].sh_link].sh_offset);
+ }
+ if (!me->arch.stubs_section || !me->arch.toc_section) {
+ printk("%s: doesn't contain .toc or .stubs.\n", me->name);
+ return -ENOEXEC;
+ }
+
+ /* Override the stubs size */
+ sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
+ return 0;
+}
+
+int apply_relocate(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name);
+ return -ENOEXEC;
+}
+
+/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
+ gives the value maximum span in an instruction which uses a signed
+ offset) */
+static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
+{
+ return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
+}
+
+/* Both low and high 16 bits are added as SIGNED additions, so if low
+ 16 bits has high bit set, high 16 bits must be adjusted. These
+ macros do that (stolen from binutils). */
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+
+/* Patch stub to reference function and correct r2 value. */
+static inline int create_stub(Elf64_Shdr *sechdrs,
+ struct ppc64_stub_entry *entry,
+ struct ppc64_opd_entry *opd,
+ struct module *me)
+{
+ Elf64_Half *loc1, *loc2;
+ long reladdr;
+
+ *entry = ppc64_stub;
+
+ loc1 = (Elf64_Half *)&entry->jump[2];
+ loc2 = (Elf64_Half *)&entry->jump[6];
+
+ /* Stub uses address relative to r2. */
+ reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ printk("%s: Address %p of stub out of range of %p.\n",
+ me->name, (void *)reladdr, (void *)my_r2);
+ return 0;
+ }
+ DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+ *loc1 = PPC_HA(reladdr);
+ *loc2 = PPC_LO(reladdr);
+ entry->opd.funcaddr = opd->funcaddr;
+ entry->opd.r2 = opd->r2;
+ return 1;
+}
+
+/* Create stub to jump to function described in this OPD: we need the
+ stub to set up the TOC ptr (r2) for the function. */
+static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
+ unsigned long opdaddr,
+ struct module *me)
+{
+ struct ppc64_stub_entry *stubs;
+ struct ppc64_opd_entry *opd = (void *)opdaddr;
+ unsigned int i, num_stubs;
+
+ num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
+
+ /* Find this stub, or if that fails, the next avail. entry */
+ stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
+ for (i = 0; stubs[i].opd.funcaddr; i++) {
+ BUG_ON(i >= num_stubs);
+
+ if (stubs[i].opd.funcaddr == opd->funcaddr)
+ return (unsigned long)&stubs[i];
+ }
+
+ if (!create_stub(sechdrs, &stubs[i], opd, me))
+ return 0;
+
+ return (unsigned long)&stubs[i];
+}
+
+/* We expect a noop next: if it is, replace it with instruction to
+ restore r2. */
+static int restore_r2(u32 *instruction, struct module *me)
+{
+ if (*instruction != 0x60000000) {
+ printk("%s: Expect noop after relocate, got %08x\n",
+ me->name, *instruction);
+ return 0;
+ }
+ *instruction = 0xe8410028; /* ld r2,40(r1) */
+ return 1;
+}
+
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ unsigned int i;
+ Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+ Elf64_Sym *sym;
+ unsigned long *location;
+ unsigned long value;
+
+ DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+ sechdrs[relsec].sh_info);
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
+ /* This is where to make the change */
+ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ + rela[i].r_offset;
+ /* This is the symbol it is referring to */
+ sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ + ELF64_R_SYM(rela[i].r_info);
+
+ DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
+ location, (long)ELF64_R_TYPE(rela[i].r_info),
+ strtab + sym->st_name, (unsigned long)sym->st_value,
+ (long)rela[i].r_addend);
+
+ /* `Everything is relative'. */
+ value = sym->st_value + rela[i].r_addend;
+
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_PPC64_ADDR32:
+ /* Simply set it */
+ *(u32 *)location = value;
+ break;
+
+ case R_PPC64_ADDR64:
+ /* Simply set it */
+ *(unsigned long *)location = value;
+ break;
+
+ case R_PPC64_TOC:
+ *(unsigned long *)location = my_r2(sechdrs, me);
+ break;
+
+ case R_PPC64_TOC16:
+ /* Subtact TOC pointer */
+ value -= my_r2(sechdrs, me);
+ if (value + 0x8000 > 0xffff) {
+ printk("%s: bad TOC16 relocation (%lu)\n",
+ me->name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ case R_PPC64_TOC16_DS:
+ /* Subtact TOC pointer */
+ value -= my_r2(sechdrs, me);
+ if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
+ printk("%s: bad TOC16_DS relocation (%lu)\n",
+ me->name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xfffc)
+ | (value & 0xfffc);
+ break;
+
+ case R_PPC_REL24:
+ /* FIXME: Handle weak symbols here --RR */
+ if (sym->st_shndx == SHN_UNDEF) {
+ /* External: go via stub */
+ value = stub_for_addr(sechdrs, value, me);
+ if (!value)
+ return -ENOENT;
+ if (!restore_r2((u32 *)location + 1, me))
+ return -ENOEXEC;
+ }
+
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+ if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
+ printk("%s: REL24 %li out of range!\n",
+ me->name, (long int)value);
+ return -ENOEXEC;
+ }
+
+ /* Only replace bits 2 through 26 */
+ *(uint32_t *)location
+ = (*(uint32_t *)location & ~0x03fffffc)
+ | (value & 0x03fffffc);
+ break;
+
+ default:
+ printk("%s: Unknown ADD relocation: %lu\n",
+ me->name,
+ (unsigned long)ELF64_R_TYPE(rela[i].r_info));
+ return -ENOEXEC;
+ }
+ }
+
+ return 0;
+}
+
+LIST_HEAD(module_bug_list);
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs, struct module *me)
+{
+ char *secstrings;
+ unsigned int i;
+
+ me->arch.bug_table = NULL;
+ me->arch.num_bugs = 0;
+
+ /* Find the __bug_table section, if present */
+ secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
+ continue;
+ me->arch.bug_table = (void *) sechdrs[i].sh_addr;
+ me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
+ break;
+ }
+
+ /*
+ * Strictly speaking this should have a spinlock to protect against
+ * traversals, but since we only traverse on BUG()s, a spinlock
+ * could potentially lead to deadlock and thus be counter-productive.
+ */
+ list_add(&me->arch.bug_list, &module_bug_list);
+
+ return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+ list_del(&mod->arch.bug_list);
+}
+
+struct bug_entry *module_find_bug(unsigned long bugaddr)
+{
+ struct mod_arch_specific *mod;
+ unsigned int i;
+ struct bug_entry *bug;
+
+ list_for_each_entry(mod, &module_bug_list, bug_list) {
+ bug = mod->bug_table;
+ for (i = 0; i < mod->num_bugs; ++i, ++bug)
+ if (bugaddr == bug->bug_addr)
+ return bug;
+ }
+ return NULL;
+}
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
new file mode 100644
index 00000000000..c0fcd29918c
--- /dev/null
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -0,0 +1,742 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * /dev/nvram driver for PPC64
+ *
+ * This perhaps should live in drivers/char
+ *
+ * TODO: Split the /dev/nvram part (that one can use
+ * drivers/char/generic_nvram.c) from the arch & partition
+ * parsing code.
+ */
+
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/fcntl.h>
+#include <linux/nvram.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+#include <asm/nvram.h>
+#include <asm/rtas.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+
+#undef DEBUG_NVRAM
+
+static int nvram_scan_partitions(void);
+static int nvram_setup_partition(void);
+static int nvram_create_os_partition(void);
+static int nvram_remove_os_partition(void);
+
+static struct nvram_partition * nvram_part;
+static long nvram_error_log_index = -1;
+static long nvram_error_log_size = 0;
+
+int no_logging = 1; /* Until we initialize everything,
+ * make sure we don't try logging
+ * anything */
+
+extern volatile int error_log_cnt;
+
+struct err_log_info {
+ int error_type;
+ unsigned int seq_num;
+};
+
+static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
+{
+ int size;
+
+ if (ppc_md.nvram_size == NULL)
+ return -ENODEV;
+ size = ppc_md.nvram_size();
+
+ switch (origin) {
+ case 1:
+ offset += file->f_pos;
+ break;
+ case 2:
+ offset += size;
+ break;
+ }
+ if (offset < 0)
+ return -EINVAL;
+ file->f_pos = offset;
+ return file->f_pos;
+}
+
+
+static ssize_t dev_nvram_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ ssize_t len;
+ char *tmp_buffer;
+ int size;
+
+ if (ppc_md.nvram_size == NULL)
+ return -ENODEV;
+ size = ppc_md.nvram_size();
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+ if (*ppos >= size)
+ return 0;
+ if (count > size)
+ count = size;
+
+ tmp_buffer = (char *) kmalloc(count, GFP_KERNEL);
+ if (!tmp_buffer) {
+ printk(KERN_ERR "dev_read_nvram: kmalloc failed\n");
+ return -ENOMEM;
+ }
+
+ len = ppc_md.nvram_read(tmp_buffer, count, ppos);
+ if ((long)len <= 0) {
+ kfree(tmp_buffer);
+ return len;
+ }
+
+ if (copy_to_user(buf, tmp_buffer, len)) {
+ kfree(tmp_buffer);
+ return -EFAULT;
+ }
+
+ kfree(tmp_buffer);
+ return len;
+
+}
+
+static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ ssize_t len;
+ char * tmp_buffer;
+ int size;
+
+ if (ppc_md.nvram_size == NULL)
+ return -ENODEV;
+ size = ppc_md.nvram_size();
+
+ if (!access_ok(VERIFY_READ, buf, count))
+ return -EFAULT;
+ if (*ppos >= size)
+ return 0;
+ if (count > size)
+ count = size;
+
+ tmp_buffer = (char *) kmalloc(count, GFP_KERNEL);
+ if (!tmp_buffer) {
+ printk(KERN_ERR "dev_nvram_write: kmalloc failed\n");
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(tmp_buffer, buf, count)) {
+ kfree(tmp_buffer);
+ return -EFAULT;
+ }
+
+ len = ppc_md.nvram_write(tmp_buffer, count, ppos);
+ if ((long)len <= 0) {
+ kfree(tmp_buffer);
+ return len;
+ }
+
+ kfree(tmp_buffer);
+ return len;
+}
+
+static int dev_nvram_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ switch(cmd) {
+#ifdef CONFIG_PPC_PMAC
+ case OBSOLETE_PMAC_NVRAM_GET_OFFSET:
+ printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n");
+ case IOC_NVRAM_GET_OFFSET: {
+ int part, offset;
+
+ if (_machine != PLATFORM_POWERMAC)
+ return -EINVAL;
+ if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
+ return -EFAULT;
+ if (part < pmac_nvram_OF || part > pmac_nvram_NR)
+ return -EINVAL;
+ offset = pmac_get_partition(part);
+ if (offset < 0)
+ return offset;
+ if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0)
+ return -EFAULT;
+ return 0;
+ }
+#endif /* CONFIG_PPC_PMAC */
+ }
+ return -EINVAL;
+}
+
+struct file_operations nvram_fops = {
+ .owner = THIS_MODULE,
+ .llseek = dev_nvram_llseek,
+ .read = dev_nvram_read,
+ .write = dev_nvram_write,
+ .ioctl = dev_nvram_ioctl,
+};
+
+static struct miscdevice nvram_dev = {
+ NVRAM_MINOR,
+ "nvram",
+ &nvram_fops
+};
+
+
+#ifdef DEBUG_NVRAM
+static void nvram_print_partitions(char * label)
+{
+ struct list_head * p;
+ struct nvram_partition * tmp_part;
+
+ printk(KERN_WARNING "--------%s---------\n", label);
+ printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");
+ list_for_each(p, &nvram_part->partition) {
+ tmp_part = list_entry(p, struct nvram_partition, partition);
+ printk(KERN_WARNING "%d \t%02x\t%02x\t%d\t%s\n",
+ tmp_part->index, tmp_part->header.signature,
+ tmp_part->header.checksum, tmp_part->header.length,
+ tmp_part->header.name);
+ }
+}
+#endif
+
+
+static int nvram_write_header(struct nvram_partition * part)
+{
+ loff_t tmp_index;
+ int rc;
+
+ tmp_index = part->index;
+ rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index);
+
+ return rc;
+}
+
+
+static unsigned char nvram_checksum(struct nvram_header *p)
+{
+ unsigned int c_sum, c_sum2;
+ unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */
+ c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5];
+
+ /* The sum may have spilled into the 3rd byte. Fold it back. */
+ c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff;
+ /* The sum cannot exceed 2 bytes. Fold it into a checksum */
+ c_sum2 = (c_sum >> 8) + (c_sum << 8);
+ c_sum = ((c_sum + c_sum2) >> 8) & 0xff;
+ return c_sum;
+}
+
+
+/*
+ * Find an nvram partition, sig can be 0 for any
+ * partition or name can be NULL for any name, else
+ * tries to match both
+ */
+struct nvram_partition *nvram_find_partition(int sig, const char *name)
+{
+ struct nvram_partition * part;
+ struct list_head * p;
+
+ list_for_each(p, &nvram_part->partition) {
+ part = list_entry(p, struct nvram_partition, partition);
+
+ if (sig && part->header.signature != sig)
+ continue;
+ if (name && 0 != strncmp(name, part->header.name, 12))
+ continue;
+ return part;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(nvram_find_partition);
+
+
+static int nvram_remove_os_partition(void)
+{
+ struct list_head *i;
+ struct list_head *j;
+ struct nvram_partition * part;
+ struct nvram_partition * cur_part;
+ int rc;
+
+ list_for_each(i, &nvram_part->partition) {
+ part = list_entry(i, struct nvram_partition, partition);
+ if (part->header.signature != NVRAM_SIG_OS)
+ continue;
+
+ /* Make os partition a free partition */
+ part->header.signature = NVRAM_SIG_FREE;
+ sprintf(part->header.name, "wwwwwwwwwwww");
+ part->header.checksum = nvram_checksum(&part->header);
+
+ /* Merge contiguous free partitions backwards */
+ list_for_each_prev(j, &part->partition) {
+ cur_part = list_entry(j, struct nvram_partition, partition);
+ if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
+ break;
+ }
+
+ part->header.length += cur_part->header.length;
+ part->header.checksum = nvram_checksum(&part->header);
+ part->index = cur_part->index;
+
+ list_del(&cur_part->partition);
+ kfree(cur_part);
+ j = &part->partition; /* fixup our loop */
+ }
+
+ /* Merge contiguous free partitions forwards */
+ list_for_each(j, &part->partition) {
+ cur_part = list_entry(j, struct nvram_partition, partition);
+ if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
+ break;
+ }
+
+ part->header.length += cur_part->header.length;
+ part->header.checksum = nvram_checksum(&part->header);
+
+ list_del(&cur_part->partition);
+ kfree(cur_part);
+ j = &part->partition; /* fixup our loop */
+ }
+
+ rc = nvram_write_header(part);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc);
+ return rc;
+ }
+
+ }
+
+ return 0;
+}
+
+/* nvram_create_os_partition
+ *
+ * Create a OS linux partition to buffer error logs.
+ * Will create a partition starting at the first free
+ * space found if space has enough room.
+ */
+static int nvram_create_os_partition(void)
+{
+ struct nvram_partition *part;
+ struct nvram_partition *new_part;
+ struct nvram_partition *free_part = NULL;
+ int seq_init[2] = { 0, 0 };
+ loff_t tmp_index;
+ long size = 0;
+ int rc;
+
+ /* Find a free partition that will give us the maximum needed size
+ If can't find one that will give us the minimum size needed */
+ list_for_each_entry(part, &nvram_part->partition, partition) {
+ if (part->header.signature != NVRAM_SIG_FREE)
+ continue;
+
+ if (part->header.length >= NVRAM_MAX_REQ) {
+ size = NVRAM_MAX_REQ;
+ free_part = part;
+ break;
+ }
+ if (!size && part->header.length >= NVRAM_MIN_REQ) {
+ size = NVRAM_MIN_REQ;
+ free_part = part;
+ }
+ }
+ if (!size)
+ return -ENOSPC;
+
+ /* Create our OS partition */
+ new_part = kmalloc(sizeof(*new_part), GFP_KERNEL);
+ if (!new_part) {
+ printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n");
+ return -ENOMEM;
+ }
+
+ new_part->index = free_part->index;
+ new_part->header.signature = NVRAM_SIG_OS;
+ new_part->header.length = size;
+ strcpy(new_part->header.name, "ppc64,linux");
+ new_part->header.checksum = nvram_checksum(&new_part->header);
+
+ rc = nvram_write_header(new_part);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \
+ failed (%d)\n", rc);
+ return rc;
+ }
+
+ /* make sure and initialize to zero the sequence number and the error
+ type logged */
+ tmp_index = new_part->index + NVRAM_HEADER_LEN;
+ rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_create_os_partition: nvram_write "
+ "failed (%d)\n", rc);
+ return rc;
+ }
+
+ nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN;
+ nvram_error_log_size = ((part->header.length - 1) *
+ NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
+
+ list_add_tail(&new_part->partition, &free_part->partition);
+
+ if (free_part->header.length <= size) {
+ list_del(&free_part->partition);
+ kfree(free_part);
+ return 0;
+ }
+
+ /* Adjust the partition we stole the space from */
+ free_part->index += size * NVRAM_BLOCK_LEN;
+ free_part->header.length -= size;
+ free_part->header.checksum = nvram_checksum(&free_part->header);
+
+ rc = nvram_write_header(free_part);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_create_os_partition: nvram_write_header "
+ "failed (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+
+/* nvram_setup_partition
+ *
+ * This will setup the partition we need for buffering the
+ * error logs and cleanup partitions if needed.
+ *
+ * The general strategy is the following:
+ * 1.) If there is ppc64,linux partition large enough then use it.
+ * 2.) If there is not a ppc64,linux partition large enough, search
+ * for a free partition that is large enough.
+ * 3.) If there is not a free partition large enough remove
+ * _all_ OS partitions and consolidate the space.
+ * 4.) Will first try getting a chunk that will satisfy the maximum
+ * error log size (NVRAM_MAX_REQ).
+ * 5.) If the max chunk cannot be allocated then try finding a chunk
+ * that will satisfy the minum needed (NVRAM_MIN_REQ).
+ */
+static int nvram_setup_partition(void)
+{
+ struct list_head * p;
+ struct nvram_partition * part;
+ int rc;
+
+ /* For now, we don't do any of this on pmac, until I
+ * have figured out if it's worth killing some unused stuffs
+ * in our nvram, as Apple defined partitions use pretty much
+ * all of the space
+ */
+ if (_machine == PLATFORM_POWERMAC)
+ return -ENOSPC;
+
+ /* see if we have an OS partition that meets our needs.
+ will try getting the max we need. If not we'll delete
+ partitions and try again. */
+ list_for_each(p, &nvram_part->partition) {
+ part = list_entry(p, struct nvram_partition, partition);
+ if (part->header.signature != NVRAM_SIG_OS)
+ continue;
+
+ if (strcmp(part->header.name, "ppc64,linux"))
+ continue;
+
+ if (part->header.length >= NVRAM_MIN_REQ) {
+ /* found our partition */
+ nvram_error_log_index = part->index + NVRAM_HEADER_LEN;
+ nvram_error_log_size = ((part->header.length - 1) *
+ NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
+ return 0;
+ }
+ }
+
+ /* try creating a partition with the free space we have */
+ rc = nvram_create_os_partition();
+ if (!rc) {
+ return 0;
+ }
+
+ /* need to free up some space */
+ rc = nvram_remove_os_partition();
+ if (rc) {
+ return rc;
+ }
+
+ /* create a partition in this new space */
+ rc = nvram_create_os_partition();
+ if (rc) {
+ printk(KERN_ERR "nvram_create_os_partition: Could not find a "
+ "NVRAM partition large enough\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+
+static int nvram_scan_partitions(void)
+{
+ loff_t cur_index = 0;
+ struct nvram_header phead;
+ struct nvram_partition * tmp_part;
+ unsigned char c_sum;
+ char * header;
+ int total_size;
+ int err;
+
+ if (ppc_md.nvram_size == NULL)
+ return -ENODEV;
+ total_size = ppc_md.nvram_size();
+
+ header = (char *) kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL);
+ if (!header) {
+ printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n");
+ return -ENOMEM;
+ }
+
+ while (cur_index < total_size) {
+
+ err = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index);
+ if (err != NVRAM_HEADER_LEN) {
+ printk(KERN_ERR "nvram_scan_partitions: Error parsing "
+ "nvram partitions\n");
+ goto out;
+ }
+
+ cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */
+
+ memcpy(&phead, header, NVRAM_HEADER_LEN);
+
+ err = 0;
+ c_sum = nvram_checksum(&phead);
+ if (c_sum != phead.checksum) {
+ printk(KERN_WARNING "WARNING: nvram partition checksum"
+ " was %02x, should be %02x!\n",
+ phead.checksum, c_sum);
+ printk(KERN_WARNING "Terminating nvram partition scan\n");
+ goto out;
+ }
+ if (!phead.length) {
+ printk(KERN_WARNING "WARNING: nvram corruption "
+ "detected: 0-length partition\n");
+ goto out;
+ }
+ tmp_part = (struct nvram_partition *)
+ kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!tmp_part) {
+ printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
+ goto out;
+ }
+
+ memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN);
+ tmp_part->index = cur_index;
+ list_add_tail(&tmp_part->partition, &nvram_part->partition);
+
+ cur_index += phead.length * NVRAM_BLOCK_LEN;
+ }
+ err = 0;
+
+ out:
+ kfree(header);
+ return err;
+}
+
+static int __init nvram_init(void)
+{
+ int error;
+ int rc;
+
+ if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
+ return -ENODEV;
+
+ rc = misc_register(&nvram_dev);
+ if (rc != 0) {
+ printk(KERN_ERR "nvram_init: failed to register device\n");
+ return rc;
+ }
+
+ /* initialize our anchor for the nvram partition list */
+ nvram_part = (struct nvram_partition *) kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+ if (!nvram_part) {
+ printk(KERN_ERR "nvram_init: Failed kmalloc\n");
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&nvram_part->partition);
+
+ /* Get all the NVRAM partitions */
+ error = nvram_scan_partitions();
+ if (error) {
+ printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n");
+ return error;
+ }
+
+ if(nvram_setup_partition())
+ printk(KERN_WARNING "nvram_init: Could not find nvram partition"
+ " for nvram buffered error logging.\n");
+
+#ifdef DEBUG_NVRAM
+ nvram_print_partitions("NVRAM Partitions");
+#endif
+
+ return rc;
+}
+
+void __exit nvram_cleanup(void)
+{
+ misc_deregister( &nvram_dev );
+}
+
+
+#ifdef CONFIG_PPC_PSERIES
+
+/* nvram_write_error_log
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode. If we have a severe error there
+ * is no way to guarantee that the OS or the machine is in a state to
+ * get back to user land and write the error to disk. For example if
+ * the SCSI device driver causes a Machine Check by writing to a bad
+ * IO address, there is no way of guaranteeing that the device driver
+ * is in any state that is would also be able to write the error data
+ * captured to disk, thus we buffer it in NVRAM for analysis on the
+ * next boot.
+ *
+ * In NVRAM the partition containing the error log buffer will looks like:
+ * Header (in bytes):
+ * +-----------+----------+--------+------------+------------------+
+ * | signature | checksum | length | name | data |
+ * |0 |1 |2 3|4 15|16 length-1|
+ * +-----------+----------+--------+------------+------------------+
+ *
+ * The 'data' section would look like (in bytes):
+ * +--------------+------------+-----------------------------------+
+ * | event_logged | sequence # | error log |
+ * |0 3|4 7|8 nvram_error_log_size-1|
+ * +--------------+------------+-----------------------------------+
+ *
+ * event_logged: 0 if event has not been logged to syslog, 1 if it has
+ * sequence #: The unique sequence # for each event. (until it wraps)
+ * error log: The error log from event_scan
+ */
+int nvram_write_error_log(char * buff, int length, unsigned int err_type)
+{
+ int rc;
+ loff_t tmp_index;
+ struct err_log_info info;
+
+ if (no_logging) {
+ return -EPERM;
+ }
+
+ if (nvram_error_log_index == -1) {
+ return -ESPIPE;
+ }
+
+ if (length > nvram_error_log_size) {
+ length = nvram_error_log_size;
+ }
+
+ info.error_type = err_type;
+ info.seq_num = error_log_cnt;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ rc = ppc_md.nvram_write(buff, length, &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char * buff, int length, unsigned int * err_type)
+{
+ int rc;
+ loff_t tmp_index;
+ struct err_log_info info;
+
+ if (nvram_error_log_index == -1)
+ return -1;
+
+ if (length > nvram_error_log_size)
+ length = nvram_error_log_size;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+ return rc;
+ }
+
+ rc = ppc_md.nvram_read(buff, length, &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+ return rc;
+ }
+
+ error_log_cnt = info.seq_num;
+ *err_type = info.error_type;
+
+ return 0;
+}
+
+/* This doesn't actually zero anything, but it sets the event_logged
+ * word to tell that this event is safely in syslog.
+ */
+int nvram_clear_error_log(void)
+{
+ loff_t tmp_index;
+ int clear_word = ERR_FLAG_ALREADY_LOGGED;
+ int rc;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+
+module_init(nvram_init);
+module_exit(nvram_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
new file mode 100644
index 00000000000..a7b68f911eb
--- /dev/null
+++ b/arch/powerpc/kernel/paca.c
@@ -0,0 +1,135 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/module.h>
+
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+#include <asm/lppaca.h>
+#include <asm/iseries/it_lp_queue.h>
+#include <asm/paca.h>
+
+
+/* This symbol is provided by the linker - let it fill in the paca
+ * field correctly */
+extern unsigned long __toc_start;
+
+/* The Paca is an array with one entry per processor. Each contains an
+ * lppaca, which contains the information shared between the
+ * hypervisor and Linux. Each also contains an ItLpRegSave area which
+ * is used by the hypervisor to save registers.
+ * On systems with hardware multi-threading, there are two threads
+ * per processor. The Paca array must contain an entry for each thread.
+ * The VPD Areas will give a max logical processors = 2 * max physical
+ * processors. The processor VPD array needs one entry per physical
+ * processor (not thread).
+ */
+#define PACA_INIT_COMMON(number, start, asrr, asrv) \
+ .lock_token = 0x8000, \
+ .paca_index = (number), /* Paca Index */ \
+ .default_decr = 0x00ff0000, /* Initial Decr */ \
+ .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \
+ .stab_real = (asrr), /* Real pointer to segment table */ \
+ .stab_addr = (asrv), /* Virt pointer to segment table */ \
+ .cpu_start = (start), /* Processor start */ \
+ .hw_cpu_id = 0xffff, \
+ .lppaca = { \
+ .desc = 0xd397d781, /* "LpPa" */ \
+ .size = sizeof(struct lppaca), \
+ .dyn_proc_status = 2, \
+ .decr_val = 0x00ff0000, \
+ .fpregs_in_use = 1, \
+ .end_of_quantum = 0xfffffffffffffffful, \
+ .slb_count = 64, \
+ .vmxregs_in_use = 0, \
+ }, \
+
+#ifdef CONFIG_PPC_ISERIES
+#define PACA_INIT_ISERIES(number) \
+ .lppaca_ptr = &paca[number].lppaca, \
+ .reg_save_ptr = &paca[number].reg_save, \
+ .reg_save = { \
+ .xDesc = 0xd397d9e2, /* "LpRS" */ \
+ .xSize = sizeof(struct ItLpRegSave) \
+ }
+
+#define PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 0, 0, 0) \
+ PACA_INIT_ISERIES(number) \
+}
+
+#define BOOTCPU_PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \
+ PACA_INIT_ISERIES(number) \
+}
+
+#else
+#define PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 0, 0, 0) \
+}
+
+#define BOOTCPU_PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \
+}
+#endif
+
+struct paca_struct paca[] = {
+ BOOTCPU_PACA_INIT(0),
+#if NR_CPUS > 1
+ PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3),
+#if NR_CPUS > 4
+ PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7),
+#if NR_CPUS > 8
+ PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11),
+ PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15),
+ PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19),
+ PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23),
+ PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27),
+ PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31),
+#if NR_CPUS > 32
+ PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35),
+ PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39),
+ PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43),
+ PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47),
+ PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51),
+ PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55),
+ PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59),
+ PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63),
+#if NR_CPUS > 64
+ PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67),
+ PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71),
+ PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75),
+ PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79),
+ PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83),
+ PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87),
+ PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91),
+ PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95),
+ PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99),
+ PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103),
+ PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107),
+ PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111),
+ PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115),
+ PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119),
+ PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123),
+ PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127),
+#endif
+#endif
+#endif
+#endif
+#endif
+};
+EXPORT_SYMBOL(paca);
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
new file mode 100644
index 00000000000..8b6008ab217
--- /dev/null
+++ b/arch/powerpc/kernel/pci_64.c
@@ -0,0 +1,1365 @@
+/*
+ * Port for PPC64 David Engebretsen, IBM Corp.
+ * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Rework, based on alpha PCI code.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+unsigned long pci_probe_only = 1;
+int pci_assign_all_buses = 0;
+
+/*
+ * legal IO pages under MAX_ISA_PORT. This is to ensure we don't touch
+ * devices we don't have access to.
+ */
+unsigned long io_page_mask;
+
+EXPORT_SYMBOL(io_page_mask);
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static void fixup_resource(struct resource *res, struct pci_dev *dev);
+static void do_bus_setup(struct pci_bus *bus);
+#endif
+
+/* pci_io_base -- the base address from which io bars are offsets.
+ * This is the lowest I/O base address (so bar values are always positive),
+ * and it *must* be the start of ISA space if an ISA bus exists because
+ * ISA drivers use hard coded offsets. If no ISA bus exists a dummy
+ * page is mapped and isa_io_limit prevents access to it.
+ */
+unsigned long isa_io_base; /* NULL if no ISA bus */
+EXPORT_SYMBOL(isa_io_base);
+unsigned long pci_io_base;
+EXPORT_SYMBOL(pci_io_base);
+
+void iSeries_pcibios_init(void);
+
+LIST_HEAD(hose_list);
+
+struct dma_mapping_ops pci_dma_ops;
+EXPORT_SYMBOL(pci_dma_ops);
+
+int global_phb_number; /* Global phb counter */
+
+/* Cached ISA bridge dev. */
+struct pci_dev *ppc64_isabridge_dev = NULL;
+
+static void fixup_broken_pcnet32(struct pci_dev* dev)
+{
+ if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
+ dev->vendor = PCI_VENDOR_ID_AMD;
+ pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
+
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+ struct resource *res)
+{
+ unsigned long offset = 0;
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ if (!hose)
+ return;
+
+ if (res->flags & IORESOURCE_IO)
+ offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+ if (res->flags & IORESOURCE_MEM)
+ offset = hose->pci_mem_offset;
+
+ region->start = res->start - offset;
+ region->end = res->end - offset;
+}
+
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+ struct pci_bus_region *region)
+{
+ unsigned long offset = 0;
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ if (!hose)
+ return;
+
+ if (res->flags & IORESOURCE_IO)
+ offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+ if (res->flags & IORESOURCE_MEM)
+ offset = hose->pci_mem_offset;
+
+ res->start = region->start + offset;
+ res->end = region->end + offset;
+}
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+#endif
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void pcibios_align_resource(void *data, struct resource *res,
+ unsigned long size, unsigned long align)
+{
+ struct pci_dev *dev = data;
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long start = res->start;
+ unsigned long alignto;
+
+ if (res->flags & IORESOURCE_IO) {
+ unsigned long offset = (unsigned long)hose->io_base_virt -
+ pci_io_base;
+ /* Make sure we start at our min on all hoses */
+ if (start - offset < PCIBIOS_MIN_IO)
+ start = PCIBIOS_MIN_IO + offset;
+
+ /*
+ * Put everything into 0x00-0xff region modulo 0x400
+ */
+ if (start & 0x300)
+ start = (start + 0x3ff) & ~0x3ff;
+
+ } else if (res->flags & IORESOURCE_MEM) {
+ /* Make sure we start at our min on all hoses */
+ if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM)
+ start = PCIBIOS_MIN_MEM + hose->pci_mem_offset;
+
+ /* Align to multiple of size of minimum base. */
+ alignto = max(0x1000UL, align);
+ start = ALIGN(start, alignto);
+ }
+
+ res->start = start;
+}
+
+static DEFINE_SPINLOCK(hose_spinlock);
+
+/*
+ * pci_controller(phb) initialized common variables.
+ */
+static void __devinit pci_setup_pci_controller(struct pci_controller *hose)
+{
+ memset(hose, 0, sizeof(struct pci_controller));
+
+ spin_lock(&hose_spinlock);
+ hose->global_number = global_phb_number++;
+ list_add_tail(&hose->list_node, &hose_list);
+ spin_unlock(&hose_spinlock);
+}
+
+static void add_linux_pci_domain(struct device_node *dev,
+ struct pci_controller *phb)
+{
+ struct property *of_prop;
+ unsigned int size;
+
+ of_prop = (struct property *)
+ get_property(dev, "linux,pci-domain", &size);
+ if (of_prop != NULL)
+ return;
+ WARN_ON(of_prop && size < sizeof(int));
+ if (of_prop && size < sizeof(int))
+ of_prop = NULL;
+ size = sizeof(struct property) + sizeof(int);
+ if (of_prop == NULL) {
+ if (mem_init_done)
+ of_prop = kmalloc(size, GFP_KERNEL);
+ else
+ of_prop = alloc_bootmem(size);
+ }
+ memset(of_prop, 0, sizeof(struct property));
+ of_prop->name = "linux,pci-domain";
+ of_prop->length = sizeof(int);
+ of_prop->value = (unsigned char *)&of_prop[1];
+ *((int *)of_prop->value) = phb->global_number;
+ prom_add_property(dev, of_prop);
+}
+
+struct pci_controller * pcibios_alloc_controller(struct device_node *dev)
+{
+ struct pci_controller *phb;
+
+ if (mem_init_done)
+ phb = kmalloc(sizeof(struct pci_controller), GFP_KERNEL);
+ else
+ phb = alloc_bootmem(sizeof (struct pci_controller));
+ if (phb == NULL)
+ return NULL;
+ pci_setup_pci_controller(phb);
+ phb->arch_data = dev;
+ phb->is_dynamic = mem_init_done;
+ if (dev)
+ add_linux_pci_domain(dev, phb);
+ return phb;
+}
+
+void pcibios_free_controller(struct pci_controller *phb)
+{
+ if (phb->arch_data) {
+ struct device_node *np = phb->arch_data;
+ int *domain = (int *)get_property(np,
+ "linux,pci-domain", NULL);
+ if (domain)
+ *domain = -1;
+ }
+ if (phb->is_dynamic)
+ kfree(phb);
+}
+
+static void __init pcibios_claim_one_bus(struct pci_bus *b)
+{
+ struct pci_dev *dev;
+ struct pci_bus *child_bus;
+
+ list_for_each_entry(dev, &b->devices, bus_list) {
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *r = &dev->resource[i];
+
+ if (r->parent || !r->start || !r->flags)
+ continue;
+ pci_claim_resource(dev, i);
+ }
+ }
+
+ list_for_each_entry(child_bus, &b->children, node)
+ pcibios_claim_one_bus(child_bus);
+}
+
+#ifndef CONFIG_PPC_ISERIES
+static void __init pcibios_claim_of_setup(void)
+{
+ struct pci_bus *b;
+
+ list_for_each_entry(b, &pci_root_buses, node)
+ pcibios_claim_one_bus(b);
+}
+#endif
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
+{
+ u32 *prop;
+ int len;
+
+ prop = (u32 *) get_property(np, name, &len);
+ if (prop && len >= 4)
+ return *prop;
+ return def;
+}
+
+static unsigned int pci_parse_of_flags(u32 addr0)
+{
+ unsigned int flags = 0;
+
+ if (addr0 & 0x02000000) {
+ flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+ flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
+ flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+ if (addr0 & 0x40000000)
+ flags |= IORESOURCE_PREFETCH
+ | PCI_BASE_ADDRESS_MEM_PREFETCH;
+ } else if (addr0 & 0x01000000)
+ flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+ return flags;
+}
+
+#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
+
+static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
+{
+ u64 base, size;
+ unsigned int flags;
+ struct resource *res;
+ u32 *addrs, i;
+ int proplen;
+
+ addrs = (u32 *) get_property(node, "assigned-addresses", &proplen);
+ if (!addrs)
+ return;
+ for (; proplen >= 20; proplen -= 20, addrs += 5) {
+ flags = pci_parse_of_flags(addrs[0]);
+ if (!flags)
+ continue;
+ base = GET_64BIT(addrs, 1);
+ size = GET_64BIT(addrs, 3);
+ if (!size)
+ continue;
+ i = addrs[0] & 0xff;
+ if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+ res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+ } else if (i == dev->rom_base_reg) {
+ res = &dev->resource[PCI_ROM_RESOURCE];
+ flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+ } else {
+ printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
+ continue;
+ }
+ res->start = base;
+ res->end = base + size - 1;
+ res->flags = flags;
+ res->name = pci_name(dev);
+ fixup_resource(res, dev);
+ }
+}
+
+struct pci_dev *of_create_pci_dev(struct device_node *node,
+ struct pci_bus *bus, int devfn)
+{
+ struct pci_dev *dev;
+ const char *type;
+
+ dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL);
+ if (!dev)
+ return NULL;
+ type = get_property(node, "device_type", NULL);
+ if (type == NULL)
+ type = "";
+
+ memset(dev, 0, sizeof(struct pci_dev));
+ dev->bus = bus;
+ dev->sysdata = node;
+ dev->dev.parent = bus->bridge;
+ dev->dev.bus = &pci_bus_type;
+ dev->devfn = devfn;
+ dev->multifunction = 0; /* maybe a lie? */
+
+ dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
+ dev->device = get_int_prop(node, "device-id", 0xffff);
+ dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
+ dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
+
+ dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/
+
+ sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+ dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ dev->class = get_int_prop(node, "class-code", 0);
+
+ dev->current_state = 4; /* unknown power state */
+
+ if (!strcmp(type, "pci")) {
+ /* a PCI-PCI bridge */
+ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+ dev->rom_base_reg = PCI_ROM_ADDRESS1;
+ } else if (!strcmp(type, "cardbus")) {
+ dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+ } else {
+ dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+ dev->rom_base_reg = PCI_ROM_ADDRESS;
+ dev->irq = NO_IRQ;
+ if (node->n_intrs > 0) {
+ dev->irq = node->intrs[0].line;
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
+ dev->irq);
+ }
+ }
+
+ pci_parse_of_addrs(node, dev);
+
+ pci_device_add(dev, bus);
+
+ /* XXX pci_scan_msi_device(dev); */
+
+ return dev;
+}
+EXPORT_SYMBOL(of_create_pci_dev);
+
+void __devinit of_scan_bus(struct device_node *node,
+ struct pci_bus *bus)
+{
+ struct device_node *child = NULL;
+ u32 *reg;
+ int reglen, devfn;
+ struct pci_dev *dev;
+
+ while ((child = of_get_next_child(node, child)) != NULL) {
+ reg = (u32 *) get_property(child, "reg", &reglen);
+ if (reg == NULL || reglen < 20)
+ continue;
+ devfn = (reg[0] >> 8) & 0xff;
+ /* create a new pci_dev for this device */
+ dev = of_create_pci_dev(child, bus, devfn);
+ if (!dev)
+ continue;
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+ dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ of_scan_pci_bridge(child, dev);
+ }
+
+ do_bus_setup(bus);
+}
+EXPORT_SYMBOL(of_scan_bus);
+
+void __devinit of_scan_pci_bridge(struct device_node *node,
+ struct pci_dev *dev)
+{
+ struct pci_bus *bus;
+ u32 *busrange, *ranges;
+ int len, i, mode;
+ struct resource *res;
+ unsigned int flags;
+ u64 size;
+
+ /* parse bus-range property */
+ busrange = (u32 *) get_property(node, "bus-range", &len);
+ if (busrange == NULL || len != 8) {
+ printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+ ranges = (u32 *) get_property(node, "ranges", &len);
+ if (ranges == NULL) {
+ printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+ if (!bus) {
+ printk(KERN_ERR "Failed to create pci bus for %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus->primary = dev->bus->number;
+ bus->subordinate = busrange[1];
+ bus->bridge_ctl = 0;
+ bus->sysdata = node;
+
+ /* parse ranges property */
+ /* PCI #address-cells == 3 and #size-cells == 2 always */
+ res = &dev->resource[PCI_BRIDGE_RESOURCES];
+ for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+ res->flags = 0;
+ bus->resource[i] = res;
+ ++res;
+ }
+ i = 1;
+ for (; len >= 32; len -= 32, ranges += 8) {
+ flags = pci_parse_of_flags(ranges[0]);
+ size = GET_64BIT(ranges, 6);
+ if (flags == 0 || size == 0)
+ continue;
+ if (flags & IORESOURCE_IO) {
+ res = bus->resource[0];
+ if (res->flags) {
+ printk(KERN_ERR "PCI: ignoring extra I/O range"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ } else {
+ if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+ printk(KERN_ERR "PCI: too many memory ranges"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ res = bus->resource[i];
+ ++i;
+ }
+ res->start = GET_64BIT(ranges, 1);
+ res->end = res->start + size - 1;
+ res->flags = flags;
+ fixup_resource(res, dev);
+ }
+ sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+ bus->number);
+
+ mode = PCI_PROBE_NORMAL;
+ if (ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+ if (mode == PCI_PROBE_DEVTREE)
+ of_scan_bus(node, bus);
+ else if (mode == PCI_PROBE_NORMAL)
+ pci_scan_child_bus(bus);
+}
+EXPORT_SYMBOL(of_scan_pci_bridge);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+void __devinit scan_phb(struct pci_controller *hose)
+{
+ struct pci_bus *bus;
+ struct device_node *node = hose->arch_data;
+ int i, mode;
+ struct resource *res;
+
+ bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node);
+ if (bus == NULL) {
+ printk(KERN_ERR "Failed to create bus for PCI domain %04x\n",
+ hose->global_number);
+ return;
+ }
+ bus->secondary = hose->first_busno;
+ hose->bus = bus;
+
+ bus->resource[0] = res = &hose->io_resource;
+ if (res->flags && request_resource(&ioport_resource, res))
+ printk(KERN_ERR "Failed to request PCI IO region "
+ "on PCI domain %04x\n", hose->global_number);
+
+ for (i = 0; i < 3; ++i) {
+ res = &hose->mem_resources[i];
+ bus->resource[i+1] = res;
+ if (res->flags && request_resource(&iomem_resource, res))
+ printk(KERN_ERR "Failed to request PCI memory region "
+ "on PCI domain %04x\n", hose->global_number);
+ }
+
+ mode = PCI_PROBE_NORMAL;
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ if (ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+ if (mode == PCI_PROBE_DEVTREE) {
+ bus->subordinate = hose->last_busno;
+ of_scan_bus(node, bus);
+ }
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+ if (mode == PCI_PROBE_NORMAL)
+ hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
+ pci_bus_add_devices(bus);
+}
+
+static int __init pcibios_init(void)
+{
+ struct pci_controller *hose, *tmp;
+
+ /* For now, override phys_mem_access_prot. If we need it,
+ * later, we may move that initialization to each ppc_md
+ */
+ ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
+
+#ifdef CONFIG_PPC_ISERIES
+ iSeries_pcibios_init();
+#endif
+
+ printk("PCI: Probing PCI hardware\n");
+
+ /* Scan all of the recorded PCI controllers. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ scan_phb(hose);
+
+#ifndef CONFIG_PPC_ISERIES
+ if (pci_probe_only)
+ pcibios_claim_of_setup();
+ else
+ /* FIXME: `else' will be removed when
+ pci_assign_unassigned_resources() is able to work
+ correctly with [partially] allocated PCI tree. */
+ pci_assign_unassigned_resources();
+#endif /* !CONFIG_PPC_ISERIES */
+
+ /* Call machine dependent final fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
+ /* Cache the location of the ISA bridge (if we have one) */
+ ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
+ if (ppc64_isabridge_dev != NULL)
+ printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ /* map in PCI I/O space */
+ phbs_remap_io();
+#endif
+
+ printk("PCI: Probing PCI hardware done\n");
+
+ return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+char __init *pcibios_setup(char *str)
+{
+ return str;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+ u16 cmd, oldcmd;
+ int i;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ oldcmd = cmd;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *res = &dev->resource[i];
+
+ /* Only set up the requested stuff */
+ if (!(mask & (1<<i)))
+ continue;
+
+ if (res->flags & IORESOURCE_IO)
+ cmd |= PCI_COMMAND_IO;
+ if (res->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+
+ if (cmd != oldcmd) {
+ printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
+ pci_name(dev), cmd);
+ /* Enable the appropriate bits in the PCI command register. */
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+ return 0;
+}
+
+/*
+ * Return the domain number for this bus.
+ */
+int pci_domain_nr(struct pci_bus *bus)
+{
+#ifdef CONFIG_PPC_ISERIES
+ return 0;
+#else
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ return hose->global_number;
+#endif
+}
+
+EXPORT_SYMBOL(pci_domain_nr);
+
+/* Decide whether to display the domain number in /proc */
+int pci_proc_domain(struct pci_bus *bus)
+{
+#ifdef CONFIG_PPC_ISERIES
+ return 0;
+#else
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ return hose->buid;
+#endif
+}
+
+/*
+ * Platform support for /proc/bus/pci/X/Y mmap()s,
+ * modelled on the sparc64 implementation by Dave Miller.
+ * -- paulus.
+ */
+
+/*
+ * Adjust vm_pgoff of VMA such that it is the physical page offset
+ * corresponding to the 32-bit pci bus offset for DEV requested by the user.
+ *
+ * Basically, the user finds the base address for his device which he wishes
+ * to mmap. They read the 32-bit value from the config space base register,
+ * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
+ * offset parameter of mmap on /proc/bus/pci/XXX for that device.
+ *
+ * Returns negative error code on failure, zero on success.
+ */
+static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
+ unsigned long *offset,
+ enum pci_mmap_state mmap_state)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long io_offset = 0;
+ int i, res_bit;
+
+ if (hose == 0)
+ return NULL; /* should never happen */
+
+ /* If memory, add on the PCI bridge address offset */
+ if (mmap_state == pci_mmap_mem) {
+ *offset += hose->pci_mem_offset;
+ res_bit = IORESOURCE_MEM;
+ } else {
+ io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+ *offset += io_offset;
+ res_bit = IORESOURCE_IO;
+ }
+
+ /*
+ * Check that the offset requested corresponds to one of the
+ * resources of the device.
+ */
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &dev->resource[i];
+ int flags = rp->flags;
+
+ /* treat ROM as memory (should be already) */
+ if (i == PCI_ROM_RESOURCE)
+ flags |= IORESOURCE_MEM;
+
+ /* Active and same type? */
+ if ((flags & res_bit) == 0)
+ continue;
+
+ /* In the range of this resource? */
+ if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
+ continue;
+
+ /* found it! construct the final physical address */
+ if (mmap_state == pci_mmap_io)
+ *offset += hose->io_base_phys - io_offset;
+ return rp;
+ }
+
+ return NULL;
+}
+
+/*
+ * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
+ * device mapping.
+ */
+static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
+ pgprot_t protection,
+ enum pci_mmap_state mmap_state,
+ int write_combine)
+{
+ unsigned long prot = pgprot_val(protection);
+
+ /* Write combine is always 0 on non-memory space mappings. On
+ * memory space, if the user didn't pass 1, we check for a
+ * "prefetchable" resource. This is a bit hackish, but we use
+ * this to workaround the inability of /sysfs to provide a write
+ * combine bit
+ */
+ if (mmap_state != pci_mmap_mem)
+ write_combine = 0;
+ else if (write_combine == 0) {
+ if (rp->flags & IORESOURCE_PREFETCH)
+ write_combine = 1;
+ }
+
+ /* XXX would be nice to have a way to ask for write-through */
+ prot |= _PAGE_NO_CACHE;
+ if (write_combine)
+ prot &= ~_PAGE_GUARDED;
+ else
+ prot |= _PAGE_GUARDED;
+
+ printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start,
+ prot);
+
+ return __pgprot(prot);
+}
+
+/*
+ * This one is used by /dev/mem and fbdev who have no clue about the
+ * PCI device, it tries to find the PCI device first and calls the
+ * above routine
+ */
+pgprot_t pci_phys_mem_access_prot(struct file *file,
+ unsigned long pfn,
+ unsigned long size,
+ pgprot_t protection)
+{
+ struct pci_dev *pdev = NULL;
+ struct resource *found = NULL;
+ unsigned long prot = pgprot_val(protection);
+ unsigned long offset = pfn << PAGE_SHIFT;
+ int i;
+
+ if (page_is_ram(pfn))
+ return __pgprot(prot);
+
+ prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+
+ for_each_pci_dev(pdev) {
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &pdev->resource[i];
+ int flags = rp->flags;
+
+ /* Active and same type? */
+ if ((flags & IORESOURCE_MEM) == 0)
+ continue;
+ /* In the range of this resource? */
+ if (offset < (rp->start & PAGE_MASK) ||
+ offset > rp->end)
+ continue;
+ found = rp;
+ break;
+ }
+ if (found)
+ break;
+ }
+ if (found) {
+ if (found->flags & IORESOURCE_PREFETCH)
+ prot &= ~_PAGE_GUARDED;
+ pci_dev_put(pdev);
+ }
+
+ DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
+
+ return __pgprot(prot);
+}
+
+
+/*
+ * Perform the actual remap of the pages for a PCI device mapping, as
+ * appropriate for this architecture. The region in the process to map
+ * is described by vm_start and vm_end members of VMA, the base physical
+ * address is found in vm_pgoff.
+ * The pci device structure is provided so that architectures may make mapping
+ * decisions on a per-device or per-bus basis.
+ *
+ * Returns a negative error code on failure, zero on success.
+ */
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state,
+ int write_combine)
+{
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ struct resource *rp;
+ int ret;
+
+ rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
+ if (rp == NULL)
+ return -EINVAL;
+
+ vma->vm_pgoff = offset >> PAGE_SHIFT;
+ vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO;
+ vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
+ vma->vm_page_prot,
+ mmap_state, write_combine);
+
+ ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot);
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev;
+ struct device_node *np;
+
+ pdev = to_pci_dev (dev);
+ np = pci_device_to_OF_node(pdev);
+ if (np == NULL || np->full_name == NULL)
+ return 0;
+ return sprintf(buf, "%s", np->full_name);
+}
+static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+void pcibios_add_platform_entries(struct pci_dev *pdev)
+{
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ device_create_file(&pdev->dev, &dev_attr_devspec);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+}
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+
+#define ISA_SPACE_MASK 0x1
+#define ISA_SPACE_IO 0x1
+
+static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
+ unsigned long phb_io_base_phys,
+ void __iomem * phb_io_base_virt)
+{
+ struct isa_range *range;
+ unsigned long pci_addr;
+ unsigned int isa_addr;
+ unsigned int size;
+ int rlen = 0;
+
+ range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
+ if (range == NULL || (rlen < sizeof(struct isa_range))) {
+ printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
+ "mapping 64k\n");
+ __ioremap_explicit(phb_io_base_phys,
+ (unsigned long)phb_io_base_virt,
+ 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED);
+ return;
+ }
+
+ /* From "ISA Binding to 1275"
+ * The ranges property is laid out as an array of elements,
+ * each of which comprises:
+ * cells 0 - 1: an ISA address
+ * cells 2 - 4: a PCI address
+ * (size depending on dev->n_addr_cells)
+ * cell 5: the size of the range
+ */
+ if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
+ isa_addr = range->isa_addr.a_lo;
+ pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
+ range->pci_addr.a_lo;
+
+ /* Assume these are both zero */
+ if ((pci_addr != 0) || (isa_addr != 0)) {
+ printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
+ __FUNCTION__);
+ return;
+ }
+
+ size = PAGE_ALIGN(range->size);
+
+ __ioremap_explicit(phb_io_base_phys,
+ (unsigned long) phb_io_base_virt,
+ size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+ }
+}
+
+void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
+ struct device_node *dev, int prim)
+{
+ unsigned int *ranges, pci_space;
+ unsigned long size;
+ int rlen = 0;
+ int memno = 0;
+ struct resource *res;
+ int np, na = prom_n_addr_cells(dev);
+ unsigned long pci_addr, cpu_phys_addr;
+
+ np = na + 5;
+
+ /* From "PCI Binding to 1275"
+ * The ranges property is laid out as an array of elements,
+ * each of which comprises:
+ * cells 0 - 2: a PCI address
+ * cells 3 or 3+4: a CPU physical address
+ * (size depending on dev->n_addr_cells)
+ * cells 4+5 or 5+6: the size of the range
+ */
+ ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
+ if (ranges == NULL)
+ return;
+ hose->io_base_phys = 0;
+ while ((rlen -= np * sizeof(unsigned int)) >= 0) {
+ res = NULL;
+ pci_space = ranges[0];
+ pci_addr = ((unsigned long)ranges[1] << 32) | ranges[2];
+
+ cpu_phys_addr = ranges[3];
+ if (na >= 2)
+ cpu_phys_addr = (cpu_phys_addr << 32) | ranges[4];
+
+ size = ((unsigned long)ranges[na+3] << 32) | ranges[na+4];
+ ranges += np;
+ if (size == 0)
+ continue;
+
+ /* Now consume following elements while they are contiguous */
+ while (rlen >= np * sizeof(unsigned int)) {
+ unsigned long addr, phys;
+
+ if (ranges[0] != pci_space)
+ break;
+ addr = ((unsigned long)ranges[1] << 32) | ranges[2];
+ phys = ranges[3];
+ if (na >= 2)
+ phys = (phys << 32) | ranges[4];
+ if (addr != pci_addr + size ||
+ phys != cpu_phys_addr + size)
+ break;
+
+ size += ((unsigned long)ranges[na+3] << 32)
+ | ranges[na+4];
+ ranges += np;
+ rlen -= np * sizeof(unsigned int);
+ }
+
+ switch ((pci_space >> 24) & 0x3) {
+ case 1: /* I/O space */
+ hose->io_base_phys = cpu_phys_addr;
+ hose->pci_io_size = size;
+
+ res = &hose->io_resource;
+ res->flags = IORESOURCE_IO;
+ res->start = pci_addr;
+ DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number,
+ res->start, res->start + size - 1);
+ break;
+ case 2: /* memory space */
+ memno = 0;
+ while (memno < 3 && hose->mem_resources[memno].flags)
+ ++memno;
+
+ if (memno == 0)
+ hose->pci_mem_offset = cpu_phys_addr - pci_addr;
+ if (memno < 3) {
+ res = &hose->mem_resources[memno];
+ res->flags = IORESOURCE_MEM;
+ res->start = cpu_phys_addr;
+ DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number,
+ res->start, res->start + size - 1);
+ }
+ break;
+ }
+ if (res != NULL) {
+ res->name = dev->full_name;
+ res->end = res->start + size - 1;
+ res->parent = NULL;
+ res->sibling = NULL;
+ res->child = NULL;
+ }
+ }
+}
+
+void __init pci_setup_phb_io(struct pci_controller *hose, int primary)
+{
+ unsigned long size = hose->pci_io_size;
+ unsigned long io_virt_offset;
+ struct resource *res;
+ struct device_node *isa_dn;
+
+ hose->io_base_virt = reserve_phb_iospace(size);
+ DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+ hose->global_number, hose->io_base_phys,
+ (unsigned long) hose->io_base_virt);
+
+ if (primary) {
+ pci_io_base = (unsigned long)hose->io_base_virt;
+ isa_dn = of_find_node_by_type(NULL, "isa");
+ if (isa_dn) {
+ isa_io_base = pci_io_base;
+ pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
+ hose->io_base_virt);
+ of_node_put(isa_dn);
+ /* Allow all IO */
+ io_page_mask = -1;
+ }
+ }
+
+ io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+ res = &hose->io_resource;
+ res->start += io_virt_offset;
+ res->end += io_virt_offset;
+}
+
+void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
+ int primary)
+{
+ unsigned long size = hose->pci_io_size;
+ unsigned long io_virt_offset;
+ struct resource *res;
+
+ hose->io_base_virt = __ioremap(hose->io_base_phys, size,
+ _PAGE_NO_CACHE | _PAGE_GUARDED);
+ DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+ hose->global_number, hose->io_base_phys,
+ (unsigned long) hose->io_base_virt);
+
+ if (primary)
+ pci_io_base = (unsigned long)hose->io_base_virt;
+
+ io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+ res = &hose->io_resource;
+ res->start += io_virt_offset;
+ res->end += io_virt_offset;
+}
+
+
+static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
+ unsigned long *start_virt, unsigned long *size)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct pci_bus_region region;
+ struct resource *res;
+
+ if (bus->self) {
+ res = bus->resource[0];
+ pcibios_resource_to_bus(bus->self, &region, res);
+ *start_phys = hose->io_base_phys + region.start;
+ *start_virt = (unsigned long) hose->io_base_virt +
+ region.start;
+ if (region.end > region.start)
+ *size = region.end - region.start + 1;
+ else {
+ printk("%s(): unexpected region 0x%lx->0x%lx\n",
+ __FUNCTION__, region.start, region.end);
+ return 1;
+ }
+
+ } else {
+ /* Root Bus */
+ res = &hose->io_resource;
+ *start_phys = hose->io_base_phys;
+ *start_virt = (unsigned long) hose->io_base_virt;
+ if (res->end > res->start)
+ *size = res->end - res->start + 1;
+ else {
+ printk("%s(): unexpected region 0x%lx->0x%lx\n",
+ __FUNCTION__, res->start, res->end);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int unmap_bus_range(struct pci_bus *bus)
+{
+ unsigned long start_phys;
+ unsigned long start_virt;
+ unsigned long size;
+
+ if (!bus) {
+ printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+ return 1;
+ }
+
+ if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+ return 1;
+ if (iounmap_explicit((void __iomem *) start_virt, size))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(unmap_bus_range);
+
+int remap_bus_range(struct pci_bus *bus)
+{
+ unsigned long start_phys;
+ unsigned long start_virt;
+ unsigned long size;
+
+ if (!bus) {
+ printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+ return 1;
+ }
+
+
+ if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+ return 1;
+ if (start_phys == 0)
+ return 1;
+ printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
+ if (__ioremap_explicit(start_phys, start_virt, size,
+ _PAGE_NO_CACHE | _PAGE_GUARDED))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(remap_bus_range);
+
+void phbs_remap_io(void)
+{
+ struct pci_controller *hose, *tmp;
+
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ remap_bus_range(hose->bus);
+}
+
+static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long start, end, mask, offset;
+
+ if (res->flags & IORESOURCE_IO) {
+ offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+ start = res->start += offset;
+ end = res->end += offset;
+
+ /* Need to allow IO access to pages that are in the
+ ISA range */
+ if (start < MAX_ISA_PORT) {
+ if (end > MAX_ISA_PORT)
+ end = MAX_ISA_PORT;
+
+ start >>= PAGE_SHIFT;
+ end >>= PAGE_SHIFT;
+
+ /* get the range of pages for the map */
+ mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1);
+ io_page_mask |= mask;
+ }
+ } else if (res->flags & IORESOURCE_MEM) {
+ res->start += hose->pci_mem_offset;
+ res->end += hose->pci_mem_offset;
+ }
+}
+
+void __devinit pcibios_fixup_device_resources(struct pci_dev *dev,
+ struct pci_bus *bus)
+{
+ /* Update device resources. */
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++)
+ if (dev->resource[i].flags)
+ fixup_resource(&dev->resource[i], dev);
+}
+EXPORT_SYMBOL(pcibios_fixup_device_resources);
+
+static void __devinit do_bus_setup(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ ppc_md.iommu_bus_setup(bus);
+
+ list_for_each_entry(dev, &bus->devices, bus_list)
+ ppc_md.iommu_dev_setup(dev);
+
+ if (ppc_md.irq_bus_setup)
+ ppc_md.irq_bus_setup(bus);
+}
+
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+ struct pci_dev *dev = bus->self;
+
+ if (dev && pci_probe_only &&
+ (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+ /* This is a subordinate bridge */
+
+ pci_read_bridge_bases(bus);
+ pcibios_fixup_device_resources(dev, bus);
+ }
+
+ do_bus_setup(bus);
+
+ if (!pci_probe_only)
+ return;
+
+ list_for_each_entry(dev, &bus->devices, bus_list)
+ if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+ pcibios_fixup_device_resources(dev, bus);
+}
+EXPORT_SYMBOL(pcibios_fixup_bus);
+
+/*
+ * Reads the interrupt pin to determine if interrupt is use by card.
+ * If the interrupt is used, then gets the interrupt line from the
+ * openfirmware and sets it in the pci_dev and pci_config line.
+ */
+int pci_read_irq_line(struct pci_dev *pci_dev)
+{
+ u8 intpin;
+ struct device_node *node;
+
+ pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin);
+ if (intpin == 0)
+ return 0;
+
+ node = pci_device_to_OF_node(pci_dev);
+ if (node == NULL)
+ return -1;
+
+ if (node->n_intrs == 0)
+ return -1;
+
+ pci_dev->irq = node->intrs[0].line;
+
+ pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq);
+
+ return 0;
+}
+EXPORT_SYMBOL(pci_read_irq_line);
+
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+ const struct resource *rsrc,
+ u64 *start, u64 *end)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long offset = 0;
+
+ if (hose == NULL)
+ return;
+
+ if (rsrc->flags & IORESOURCE_IO)
+ offset = pci_io_base - (unsigned long)hose->io_base_virt +
+ hose->io_base_phys;
+
+ *start = rsrc->start + offset;
+ *end = rsrc->end + offset;
+}
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+
+#define IOBASE_BRIDGE_NUMBER 0
+#define IOBASE_MEMORY 1
+#define IOBASE_IO 2
+#define IOBASE_ISA_IO 3
+#define IOBASE_ISA_MEM 4
+
+long sys_pciconfig_iobase(long which, unsigned long in_bus,
+ unsigned long in_devfn)
+{
+ struct pci_controller* hose;
+ struct list_head *ln;
+ struct pci_bus *bus = NULL;
+ struct device_node *hose_node;
+
+ /* Argh ! Please forgive me for that hack, but that's the
+ * simplest way to get existing XFree to not lockup on some
+ * G5 machines... So when something asks for bus 0 io base
+ * (bus 0 is HT root), we return the AGP one instead.
+ */
+ if (machine_is_compatible("MacRISC4"))
+ if (in_bus == 0)
+ in_bus = 0xf0;
+
+ /* That syscall isn't quite compatible with PCI domains, but it's
+ * used on pre-domains setup. We return the first match
+ */
+
+ for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
+ bus = pci_bus_b(ln);
+ if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate))
+ break;
+ bus = NULL;
+ }
+ if (bus == NULL || bus->sysdata == NULL)
+ return -ENODEV;
+
+ hose_node = (struct device_node *)bus->sysdata;
+ hose = PCI_DN(hose_node)->phb;
+
+ switch (which) {
+ case IOBASE_BRIDGE_NUMBER:
+ return (long)hose->first_busno;
+ case IOBASE_MEMORY:
+ return (long)hose->pci_mem_offset;
+ case IOBASE_IO:
+ return (long)hose->io_base_phys;
+ case IOBASE_ISA_IO:
+ return (long)isa_io_base;
+ case IOBASE_ISA_MEM:
+ return -EINVAL;
+ }
+
+ return -EOPNOTSUPP;
+}
diff --git a/arch/powerpc/kernel/pci_direct_iommu.c b/arch/powerpc/kernel/pci_direct_iommu.c
new file mode 100644
index 00000000000..e1a32f802c0
--- /dev/null
+++ b/arch/powerpc/kernel/pci_direct_iommu.c
@@ -0,0 +1,94 @@
+/*
+ * Support for DMA from PCI devices to main memory on
+ * machines without an iommu or with directly addressable
+ * RAM (typically a pmac with 2Gb of RAM or less)
+ *
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/abs_addr.h>
+#include <asm/ppc-pci.h>
+
+static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ void *ret;
+
+ ret = (void *)__get_free_pages(flag, get_order(size));
+ if (ret != NULL) {
+ memset(ret, 0, size);
+ *dma_handle = virt_to_abs(ret);
+ }
+ return ret;
+}
+
+static void pci_direct_free_coherent(struct device *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ free_pages((unsigned long)vaddr, get_order(size));
+}
+
+static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr,
+ size_t size, enum dma_data_direction direction)
+{
+ return virt_to_abs(ptr);
+}
+
+static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction direction)
+{
+}
+
+static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+{
+ int i;
+
+ for (i = 0; i < nents; i++, sg++) {
+ sg->dma_address = page_to_phys(sg->page) + sg->offset;
+ sg->dma_length = sg->length;
+ }
+
+ return nents;
+}
+
+static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+{
+}
+
+static int pci_direct_dma_supported(struct device *dev, u64 mask)
+{
+ return mask < 0x100000000ull;
+}
+
+void __init pci_direct_iommu_init(void)
+{
+ pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent;
+ pci_dma_ops.free_coherent = pci_direct_free_coherent;
+ pci_dma_ops.map_single = pci_direct_map_single;
+ pci_dma_ops.unmap_single = pci_direct_unmap_single;
+ pci_dma_ops.map_sg = pci_direct_map_sg;
+ pci_dma_ops.unmap_sg = pci_direct_unmap_sg;
+ pci_dma_ops.dma_supported = pci_direct_dma_supported;
+}
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
new file mode 100644
index 00000000000..12c4c9e9bbc
--- /dev/null
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -0,0 +1,230 @@
+/*
+ * pci_dn.c
+ *
+ * Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * PCI manipulation via device_nodes.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/pSeries_reconfig.h>
+#include <asm/ppc-pci.h>
+
+/*
+ * Traverse_func that inits the PCI fields of the device node.
+ * NOTE: this *must* be done before read/write config to the device.
+ */
+static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
+{
+ struct pci_controller *phb = data;
+ int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL);
+ u32 *regs;
+ struct pci_dn *pdn;
+
+ if (mem_init_done)
+ pdn = kmalloc(sizeof(*pdn), GFP_KERNEL);
+ else
+ pdn = alloc_bootmem(sizeof(*pdn));
+ if (pdn == NULL)
+ return NULL;
+ memset(pdn, 0, sizeof(*pdn));
+ dn->data = pdn;
+ pdn->node = dn;
+ pdn->phb = phb;
+ regs = (u32 *)get_property(dn, "reg", NULL);
+ if (regs) {
+ /* First register entry is addr (00BBSS00) */
+ pdn->busno = (regs[0] >> 16) & 0xff;
+ pdn->devfn = (regs[0] >> 8) & 0xff;
+ }
+
+ pdn->pci_ext_config_space = (type && *type == 1);
+ return NULL;
+}
+
+/*
+ * Traverse a device tree stopping each PCI device in the tree.
+ * This is done depth first. As each node is processed, a "pre"
+ * function is called and the children are processed recursively.
+ *
+ * The "pre" func returns a value. If non-zero is returned from
+ * the "pre" func, the traversal stops and this value is returned.
+ * This return value is useful when using traverse as a method of
+ * finding a device.
+ *
+ * NOTE: we do not run the func for devices that do not appear to
+ * be PCI except for the start node which we assume (this is good
+ * because the start node is often a phb which may be missing PCI
+ * properties).
+ * We use the class-code as an indicator. If we run into
+ * one of these nodes we also assume its siblings are non-pci for
+ * performance.
+ */
+void *traverse_pci_devices(struct device_node *start, traverse_func pre,
+ void *data)
+{
+ struct device_node *dn, *nextdn;
+ void *ret;
+
+ /* We started with a phb, iterate all childs */
+ for (dn = start->child; dn; dn = nextdn) {
+ u32 *classp, class;
+
+ nextdn = NULL;
+ classp = (u32 *)get_property(dn, "class-code", NULL);
+ class = classp ? *classp : 0;
+
+ if (pre && ((ret = pre(dn, data)) != NULL))
+ return ret;
+
+ /* If we are a PCI bridge, go down */
+ if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
+ (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS))
+ /* Depth first...do children */
+ nextdn = dn->child;
+ else if (dn->sibling)
+ /* ok, try next sibling instead. */
+ nextdn = dn->sibling;
+ if (!nextdn) {
+ /* Walk up to next valid sibling. */
+ do {
+ dn = dn->parent;
+ if (dn == start)
+ return NULL;
+ } while (dn->sibling == NULL);
+ nextdn = dn->sibling;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * pci_devs_phb_init_dynamic - setup pci devices under this PHB
+ * phb: pci-to-host bridge (top-level bridge connecting to cpu)
+ *
+ * This routine is called both during boot, (before the memory
+ * subsystem is set up, before kmalloc is valid) and during the
+ * dynamic lpar operation of adding a PHB to a running system.
+ */
+void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
+{
+ struct device_node * dn = (struct device_node *) phb->arch_data;
+ struct pci_dn *pdn;
+
+ /* PHB nodes themselves must not match */
+ update_dn_pci_info(dn, phb);
+ pdn = dn->data;
+ if (pdn) {
+ pdn->devfn = pdn->busno = -1;
+ pdn->phb = phb;
+ }
+
+ /* Update dn->phb ptrs for new phb and children devices */
+ traverse_pci_devices(dn, update_dn_pci_info, phb);
+}
+
+/*
+ * Traversal func that looks for a <busno,devfcn> value.
+ * If found, the pci_dn is returned (thus terminating the traversal).
+ */
+static void *is_devfn_node(struct device_node *dn, void *data)
+{
+ int busno = ((unsigned long)data >> 8) & 0xff;
+ int devfn = ((unsigned long)data) & 0xff;
+ struct pci_dn *pci = dn->data;
+
+ if (pci && (devfn == pci->devfn) && (busno == pci->busno))
+ return dn;
+ return NULL;
+}
+
+/*
+ * This is the "slow" path for looking up a device_node from a
+ * pci_dev. It will hunt for the device under its parent's
+ * phb and then update sysdata for a future fastpath.
+ *
+ * It may also do fixups on the actual device since this happens
+ * on the first read/write.
+ *
+ * Note that it also must deal with devices that don't exist.
+ * In this case it may probe for real hardware ("just in case")
+ * and add a device_node to the device tree if necessary.
+ *
+ */
+struct device_node *fetch_dev_dn(struct pci_dev *dev)
+{
+ struct device_node *orig_dn = dev->sysdata;
+ struct device_node *dn;
+ unsigned long searchval = (dev->bus->number << 8) | dev->devfn;
+
+ dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval);
+ if (dn)
+ dev->sysdata = dn;
+ return dn;
+}
+EXPORT_SYMBOL(fetch_dev_dn);
+
+static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
+{
+ struct device_node *np = node;
+ struct pci_dn *pci = NULL;
+ int err = NOTIFY_OK;
+
+ switch (action) {
+ case PSERIES_RECONFIG_ADD:
+ pci = np->parent->data;
+ if (pci)
+ update_dn_pci_info(np, pci->phb);
+ break;
+ default:
+ err = NOTIFY_DONE;
+ break;
+ }
+ return err;
+}
+
+static struct notifier_block pci_dn_reconfig_nb = {
+ .notifier_call = pci_dn_reconfig_notifier,
+};
+
+/**
+ * pci_devs_phb_init - Initialize phbs and pci devs under them.
+ *
+ * This routine walks over all phb's (pci-host bridges) on the
+ * system, and sets up assorted pci-related structures
+ * (including pci info in the device node structs) for each
+ * pci device found underneath. This routine runs once,
+ * early in the boot sequence.
+ */
+void __init pci_devs_phb_init(void)
+{
+ struct pci_controller *phb, *tmp;
+
+ /* This must be done first so the device nodes have valid pci info! */
+ list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
+ pci_devs_phb_init_dynamic(phb);
+
+ pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
+}
diff --git a/arch/powerpc/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c
new file mode 100644
index 00000000000..bdf15dbbf4f
--- /dev/null
+++ b/arch/powerpc/kernel/pci_iommu.c
@@ -0,0 +1,128 @@
+/*
+ * arch/ppc64/kernel/pci_iommu.c
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup, new allocation schemes:
+ * Copyright (C) 2004 Olof Johansson, IBM Corporation
+ *
+ * Dynamic DMA mapping support, platform-independent parts.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+
+/*
+ * We can use ->sysdata directly and avoid the extra work in
+ * pci_device_to_OF_node since ->sysdata will have been initialised
+ * in the iommu init code for all devices.
+ */
+#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata))
+
+static inline struct iommu_table *devnode_table(struct device *dev)
+{
+ struct pci_dev *pdev;
+
+ if (!dev) {
+ pdev = ppc64_isabridge_dev;
+ if (!pdev)
+ return NULL;
+ } else
+ pdev = to_pci_dev(dev);
+
+ return PCI_DN(PCI_GET_DN(pdev))->iommu_table;
+}
+
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle,
+ flag);
+}
+
+static void pci_iommu_free_coherent(struct device *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle);
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address of the buffer
+ * passed here is the kernel (virtual) address of the buffer. The buffer
+ * need not be page aligned, the dma_addr_t returned will point to the same
+ * byte within the page as vaddr.
+ */
+static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr,
+ size_t size, enum dma_data_direction direction)
+{
+ return iommu_map_single(devnode_table(hwdev), vaddr, size, direction);
+}
+
+
+static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction);
+}
+
+
+static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ return iommu_map_sg(pdev, devnode_table(pdev), sglist,
+ nelems, direction);
+}
+
+static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction);
+}
+
+/* We support DMA to/from any memory page via the iommu */
+static int pci_iommu_dma_supported(struct device *dev, u64 mask)
+{
+ return 1;
+}
+
+void pci_iommu_init(void)
+{
+ pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent;
+ pci_dma_ops.free_coherent = pci_iommu_free_coherent;
+ pci_dma_ops.map_single = pci_iommu_map_single;
+ pci_dma_ops.unmap_single = pci_iommu_unmap_single;
+ pci_dma_ops.map_sg = pci_iommu_map_sg;
+ pci_dma_ops.unmap_sg = pci_iommu_unmap_sg;
+ pci_dma_ops.dma_supported = pci_iommu_dma_supported;
+}
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 47d6f7e2ea9..94db2570845 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -44,6 +44,7 @@
#include <asm/cputable.h>
#include <asm/btext.h>
#include <asm/div64.h>
+#include <asm/signal.h>
#ifdef CONFIG_8xx
#include <asm/commproc.h>
@@ -56,7 +57,6 @@ extern void machine_check_exception(struct pt_regs *regs);
extern void alignment_exception(struct pt_regs *regs);
extern void program_check_exception(struct pt_regs *regs);
extern void single_step_exception(struct pt_regs *regs);
-extern int do_signal(sigset_t *, struct pt_regs *);
extern int pmac_newworld;
extern int sys_sigreturn(struct pt_regs *regs);
@@ -105,6 +105,13 @@ EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(__strnlen_user);
+#ifndef __powerpc64__
+EXPORT_SYMBOL(__ide_mm_insl);
+EXPORT_SYMBOL(__ide_mm_outsw);
+EXPORT_SYMBOL(__ide_mm_insw);
+EXPORT_SYMBOL(__ide_mm_outsl);
+#endif
+
EXPORT_SYMBOL(_insb);
EXPORT_SYMBOL(_outsb);
EXPORT_SYMBOL(_insw);
@@ -139,9 +146,6 @@ EXPORT_SYMBOL(pci_bus_io_base);
EXPORT_SYMBOL(pci_bus_io_base_phys);
EXPORT_SYMBOL(pci_bus_mem_base_phys);
EXPORT_SYMBOL(pci_bus_to_hose);
-EXPORT_SYMBOL(pci_resource_to_bus);
-EXPORT_SYMBOL(pci_phys_to_bus);
-EXPORT_SYMBOL(pci_bus_to_phys);
#endif /* CONFIG_PCI */
#ifdef CONFIG_NOT_COHERENT_CACHE
@@ -159,15 +163,13 @@ EXPORT_SYMBOL(giveup_altivec);
EXPORT_SYMBOL(giveup_spe);
#endif /* CONFIG_SPE */
-#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(__flush_icache_range);
-#else
+#ifndef CONFIG_PPC64
EXPORT_SYMBOL(flush_instruction_cache);
-EXPORT_SYMBOL(flush_icache_range);
EXPORT_SYMBOL(flush_tlb_kernel_range);
EXPORT_SYMBOL(flush_tlb_page);
EXPORT_SYMBOL(_tlbie);
#endif
+EXPORT_SYMBOL(__flush_icache_range);
EXPORT_SYMBOL(flush_dcache_range);
#ifdef CONFIG_SMP
@@ -188,9 +190,6 @@ EXPORT_SYMBOL(adb_try_handler_change);
EXPORT_SYMBOL(cuda_request);
EXPORT_SYMBOL(cuda_poll);
#endif /* CONFIG_ADB_CUDA */
-#if defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_PPC32)
-EXPORT_SYMBOL(_machine);
-#endif
#ifdef CONFIG_PPC_PMAC
EXPORT_SYMBOL(sys_ctrler);
#endif
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
new file mode 100644
index 00000000000..7ba42a405f4
--- /dev/null
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+#include <asm/vdso_datapage.h>
+#include <asm/rtas.h>
+#include <asm/uaccess.h>
+#include <asm/prom.h>
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence);
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos);
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma );
+
+static struct file_operations page_map_fops = {
+ .llseek = page_map_seek,
+ .read = page_map_read,
+ .mmap = page_map_mmap
+};
+
+/*
+ * Create the ppc64 and ppc64/rtas directories early. This allows us to
+ * assume that they have been previously created in drivers.
+ */
+static int __init proc_ppc64_create(void)
+{
+ struct proc_dir_entry *root;
+
+ root = proc_mkdir("ppc64", NULL);
+ if (!root)
+ return 1;
+
+ if (!(platform_is_pseries() || _machine == PLATFORM_CELL))
+ return 0;
+
+ if (!proc_mkdir("rtas", root))
+ return 1;
+
+ if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
+ return 1;
+
+ return 0;
+}
+core_initcall(proc_ppc64_create);
+
+static int __init proc_ppc64_init(void)
+{
+ struct proc_dir_entry *pde;
+
+ pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
+ if (!pde)
+ return 1;
+ pde->nlink = 1;
+ pde->data = vdso_data;
+ pde->size = PAGE_SIZE;
+ pde->proc_fops = &page_map_fops;
+
+ return 0;
+}
+__initcall(proc_ppc64_init);
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence)
+{
+ loff_t new;
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+ switch(whence) {
+ case 0:
+ new = off;
+ break;
+ case 1:
+ new = file->f_pos + off;
+ break;
+ case 2:
+ new = dp->size + off;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if ( new < 0 || new > dp->size )
+ return -EINVAL;
+ return (file->f_pos = new);
+}
+
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos)
+{
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+ return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
+}
+
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
+{
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+ vma->vm_flags |= VM_SHM | VM_LOCKED;
+
+ if ((vma->vm_end - vma->vm_start) > dp->size)
+ return -EINVAL;
+
+ remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
+ dp->size, vma->vm_page_prot);
+ return 0;
+}
+
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index de69fb37c73..105d5609ff5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -201,6 +201,28 @@ int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs)
}
#endif /* CONFIG_SPE */
+/*
+ * If we are doing lazy switching of CPU state (FP, altivec or SPE),
+ * and the current task has some state, discard it.
+ */
+static inline void discard_lazy_cpu_state(void)
+{
+#ifndef CONFIG_SMP
+ preempt_disable();
+ if (last_task_used_math == current)
+ last_task_used_math = NULL;
+#ifdef CONFIG_ALTIVEC
+ if (last_task_used_altivec == current)
+ last_task_used_altivec = NULL;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+ if (last_task_used_spe == current)
+ last_task_used_spe = NULL;
+#endif
+ preempt_enable();
+#endif /* CONFIG_SMP */
+}
+
int set_dabr(unsigned long dabr)
{
if (ppc_md.set_dabr)
@@ -434,19 +456,7 @@ void show_regs(struct pt_regs * regs)
void exit_thread(void)
{
kprobe_flush_task(current);
-
-#ifndef CONFIG_SMP
- if (last_task_used_math == current)
- last_task_used_math = NULL;
-#ifdef CONFIG_ALTIVEC
- if (last_task_used_altivec == current)
- last_task_used_altivec = NULL;
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
- if (last_task_used_spe == current)
- last_task_used_spe = NULL;
-#endif
-#endif /* CONFIG_SMP */
+ discard_lazy_cpu_state();
}
void flush_thread(void)
@@ -457,20 +467,8 @@ void flush_thread(void)
if (t->flags & _TIF_ABI_PENDING)
t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
#endif
- kprobe_flush_task(current);
-#ifndef CONFIG_SMP
- if (last_task_used_math == current)
- last_task_used_math = NULL;
-#ifdef CONFIG_ALTIVEC
- if (last_task_used_altivec == current)
- last_task_used_altivec = NULL;
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
- if (last_task_used_spe == current)
- last_task_used_spe = NULL;
-#endif
-#endif /* CONFIG_SMP */
+ discard_lazy_cpu_state();
#ifdef CONFIG_PPC64 /* for now */
if (current->thread.dabr) {
@@ -636,18 +634,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
}
#endif
-#ifndef CONFIG_SMP
- if (last_task_used_math == current)
- last_task_used_math = NULL;
-#ifdef CONFIG_ALTIVEC
- if (last_task_used_altivec == current)
- last_task_used_altivec = NULL;
-#endif
-#ifdef CONFIG_SPE
- if (last_task_used_spe == current)
- last_task_used_spe = NULL;
-#endif
-#endif /* CONFIG_SMP */
+ discard_lazy_cpu_state();
memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
current->thread.fpscr.val = 0;
#ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f645adb5753..3bf968e7409 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -48,9 +48,6 @@
#include <asm/machdep.h>
#include <asm/pSeries_reconfig.h>
#include <asm/pci-bridge.h>
-#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
-#endif
#ifdef DEBUG
#define DBG(fmt...) printk(KERN_ERR fmt)
@@ -74,10 +71,6 @@ struct isa_reg_property {
typedef int interpret_func(struct device_node *, unsigned long *,
int, int, int);
-extern struct rtas_t rtas;
-extern struct lmb lmb;
-extern unsigned long klimit;
-
static int __initdata dt_root_addr_cells;
static int __initdata dt_root_size_cells;
@@ -391,7 +384,7 @@ static int __devinit finish_node_interrupts(struct device_node *np,
#ifdef CONFIG_PPC64
/* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
- if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) {
+ if (_machine == PLATFORM_POWERMAC && ic && ic->parent) {
char *name = get_property(ic->parent, "name", NULL);
if (name && !strcmp(name, "u3"))
np->intrs[intrcount].line += 128;
@@ -1087,9 +1080,9 @@ void __init unflatten_device_tree(void)
static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = of_get_flat_dt_prop(node, "device_type", NULL);
u32 *prop;
- unsigned long size = 0;
+ unsigned long size;
+ char *type = of_get_flat_dt_prop(node, "device_type", &size);
/* We are scanning "cpu" nodes only */
if (type == NULL || strcmp(type, "cpu") != 0)
@@ -1115,7 +1108,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
#ifdef CONFIG_ALTIVEC
/* Check if we have a VMX and eventually update CPU features */
- prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", &size);
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL);
if (prop && (*prop) > 0) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1161,13 +1154,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
if (prop == NULL)
return 0;
-#ifdef CONFIG_PPC64
- systemcfg->platform = *prop;
-#else
#ifdef CONFIG_PPC_MULTIPLATFORM
_machine = *prop;
#endif
-#endif
#ifdef CONFIG_PPC64
/* check if iommu is forced on or off */
@@ -1264,7 +1253,14 @@ static int __init early_init_dt_scan_memory(unsigned long node,
unsigned long l;
/* We are scanning "memory" nodes only */
- if (type == NULL || strcmp(type, "memory") != 0)
+ if (type == NULL) {
+ /*
+ * The longtrail doesn't have a device_type on the
+ * /memory node, so look for the node called /memory@0.
+ */
+ if (depth != 1 || strcmp(uname, "memory@0") != 0)
+ return 0;
+ } else if (strcmp(type, "memory") != 0)
return 0;
reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
@@ -1339,9 +1335,6 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_memory, NULL);
lmb_enforce_memory_limit(memory_limit);
lmb_analyze();
-#ifdef CONFIG_PPC64
- systemcfg->physicalMemorySize = lmb_phys_mem_size();
-#endif
lmb_reserve(0, __pa(klimit));
DBG("Phys. mem: %lx\n", lmb_phys_mem_size());
@@ -1375,6 +1368,7 @@ prom_n_addr_cells(struct device_node* np)
/* No #address-cells property for the root node, default to 1 */
return 1;
}
+EXPORT_SYMBOL(prom_n_addr_cells);
int
prom_n_size_cells(struct device_node* np)
@@ -1390,6 +1384,7 @@ prom_n_size_cells(struct device_node* np)
/* No #size-cells property for the root node, default to 1 */
return 1;
}
+EXPORT_SYMBOL(prom_n_size_cells);
/**
* Work out the sense (active-low level / active-high edge)
@@ -1908,7 +1903,7 @@ static int of_finish_dynamic_node(struct device_node *node,
/* We don't support that function on PowerMac, at least
* not yet
*/
- if (systemcfg->platform == PLATFORM_POWERMAC)
+ if (_machine == PLATFORM_POWERMAC)
return -ENODEV;
/* fix up new node's linux_phandle field */
@@ -1992,9 +1987,11 @@ int prom_add_property(struct device_node* np, struct property* prop)
*next = prop;
write_unlock(&devtree_lock);
+#ifdef CONFIG_PROC_DEVICETREE
/* try to add to proc as well if it was initialized */
if (np->pde)
proc_device_tree_add_prop(np->pde, prop);
+#endif /* CONFIG_PROC_DEVICETREE */
return 0;
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 6dc33d19fc2..bcdc209dca8 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -94,11 +94,17 @@ extern const struct linux_logo logo_linux_clut224;
#ifdef CONFIG_PPC64
#define RELOC(x) (*PTRRELOC(&(x)))
#define ADDR(x) (u32) add_reloc_offset((unsigned long)(x))
+#define OF_WORKAROUNDS 0
#else
#define RELOC(x) (x)
#define ADDR(x) (u32) (x)
+#define OF_WORKAROUNDS of_workarounds
+int of_workarounds;
#endif
+#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
+#define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */
+
#define PROM_BUG() do { \
prom_printf("kernel BUG at %s line 0x%x!\n", \
RELOC(__FILE__), __LINE__); \
@@ -111,11 +117,6 @@ extern const struct linux_logo logo_linux_clut224;
#define prom_debug(x...)
#endif
-#ifdef CONFIG_PPC32
-#define PLATFORM_POWERMAC _MACH_Pmac
-#define PLATFORM_CHRP _MACH_chrp
-#endif
-
typedef u32 prom_arg_t;
@@ -128,10 +129,11 @@ struct prom_args {
struct prom_t {
ihandle root;
- ihandle chosen;
+ phandle chosen;
int cpu;
ihandle stdout;
ihandle mmumap;
+ ihandle memory;
};
struct mem_map_entry {
@@ -263,7 +265,7 @@ static int __init call_prom_ret(const char *service, int nargs, int nret,
va_end(list);
for (i = 0; i < nret; i++)
- rets[nargs+i] = 0;
+ args.args[nargs+i] = 0;
if (enter_prom(&args, RELOC(prom_entry)) < 0)
return PROM_ERROR;
@@ -360,16 +362,36 @@ static void __init prom_printf(const char *format, ...)
static unsigned int __init prom_claim(unsigned long virt, unsigned long size,
unsigned long align)
{
- int ret;
struct prom_t *_prom = &RELOC(prom);
- ret = call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size,
- (prom_arg_t)align);
- if (ret != -1 && _prom->mmumap != 0)
- /* old pmacs need us to map as well */
+ if (align == 0 && (OF_WORKAROUNDS & OF_WA_CLAIM)) {
+ /*
+ * Old OF requires we claim physical and virtual separately
+ * and then map explicitly (assuming virtual mode)
+ */
+ int ret;
+ prom_arg_t result;
+
+ ret = call_prom_ret("call-method", 5, 2, &result,
+ ADDR("claim"), _prom->memory,
+ align, size, virt);
+ if (ret != 0 || result == -1)
+ return -1;
+ ret = call_prom_ret("call-method", 5, 2, &result,
+ ADDR("claim"), _prom->mmumap,
+ align, size, virt);
+ if (ret != 0) {
+ call_prom("call-method", 4, 1, ADDR("release"),
+ _prom->memory, size, virt);
+ return -1;
+ }
+ /* the 0x12 is M (coherence) + PP == read/write */
call_prom("call-method", 6, 1,
- ADDR("map"), _prom->mmumap, 0, size, virt, virt);
- return ret;
+ ADDR("map"), _prom->mmumap, 0x12, size, virt, virt);
+ return virt;
+ }
+ return call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size,
+ (prom_arg_t)align);
}
static void __init __attribute__((noreturn)) prom_panic(const char *reason)
@@ -415,11 +437,52 @@ static int inline prom_getproplen(phandle node, const char *pname)
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
-static int inline prom_setprop(phandle node, const char *pname,
- void *value, size_t valuelen)
+static void add_string(char **str, const char *q)
{
- return call_prom("setprop", 4, 1, node, ADDR(pname),
- (u32)(unsigned long) value, (u32) valuelen);
+ char *p = *str;
+
+ while (*q)
+ *p++ = *q++;
+ *p++ = ' ';
+ *str = p;
+}
+
+static char *tohex(unsigned int x)
+{
+ static char digits[] = "0123456789abcdef";
+ static char result[9];
+ int i;
+
+ result[8] = 0;
+ i = 8;
+ do {
+ --i;
+ result[i] = digits[x & 0xf];
+ x >>= 4;
+ } while (x != 0 && i > 0);
+ return &result[i];
+}
+
+static int __init prom_setprop(phandle node, const char *nodename,
+ const char *pname, void *value, size_t valuelen)
+{
+ char cmd[256], *p;
+
+ if (!(OF_WORKAROUNDS & OF_WA_LONGTRAIL))
+ return call_prom("setprop", 4, 1, node, ADDR(pname),
+ (u32)(unsigned long) value, (u32) valuelen);
+
+ /* gah... setprop doesn't work on longtrail, have to use interpret */
+ p = cmd;
+ add_string(&p, "dev");
+ add_string(&p, nodename);
+ add_string(&p, tohex((u32)(unsigned long) value));
+ add_string(&p, tohex(valuelen));
+ add_string(&p, tohex(ADDR(pname)));
+ add_string(&p, tohex(strlen(RELOC(pname))));
+ add_string(&p, "property");
+ *p = 0;
+ return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
}
/* We can't use the standard versions because of RELOC headaches. */
@@ -980,7 +1043,7 @@ static void __init prom_instantiate_rtas(void)
rtas_inst = call_prom("open", 1, 1, ADDR("/rtas"));
if (!IHANDLE_VALID(rtas_inst)) {
- prom_printf("opening rtas package failed");
+ prom_printf("opening rtas package failed (%x)\n", rtas_inst);
return;
}
@@ -988,7 +1051,7 @@ static void __init prom_instantiate_rtas(void)
if (call_prom_ret("call-method", 3, 2, &entry,
ADDR("instantiate-rtas"),
- rtas_inst, base) == PROM_ERROR
+ rtas_inst, base) != 0
|| entry == 0) {
prom_printf(" failed\n");
return;
@@ -997,8 +1060,10 @@ static void __init prom_instantiate_rtas(void)
reserve_mem(base, size);
- prom_setprop(rtas_node, "linux,rtas-base", &base, sizeof(base));
- prom_setprop(rtas_node, "linux,rtas-entry", &entry, sizeof(entry));
+ prom_setprop(rtas_node, "/rtas", "linux,rtas-base",
+ &base, sizeof(base));
+ prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
+ &entry, sizeof(entry));
prom_debug("rtas base = 0x%x\n", base);
prom_debug("rtas entry = 0x%x\n", entry);
@@ -1089,10 +1154,6 @@ static void __init prom_initialize_tce_table(void)
if (base < local_alloc_bottom)
local_alloc_bottom = base;
- /* Save away the TCE table attributes for later use. */
- prom_setprop(node, "linux,tce-base", &base, sizeof(base));
- prom_setprop(node, "linux,tce-size", &minsize, sizeof(minsize));
-
/* It seems OF doesn't null-terminate the path :-( */
memset(path, 0, sizeof(path));
/* Call OF to setup the TCE hardware */
@@ -1101,6 +1162,10 @@ static void __init prom_initialize_tce_table(void)
prom_printf("package-to-path failed\n");
}
+ /* Save away the TCE table attributes for later use. */
+ prom_setprop(node, path, "linux,tce-base", &base, sizeof(base));
+ prom_setprop(node, path, "linux,tce-size", &minsize, sizeof(minsize));
+
prom_debug("TCE table: %s\n", path);
prom_debug("\tnode = 0x%x\n", node);
prom_debug("\tbase = 0x%x\n", base);
@@ -1342,6 +1407,7 @@ static void __init prom_init_client_services(unsigned long pp)
/*
* For really old powermacs, we need to map things we claim.
* For that, we need the ihandle of the mmu.
+ * Also, on the longtrail, we need to work around other bugs.
*/
static void __init prom_find_mmu(void)
{
@@ -1355,12 +1421,19 @@ static void __init prom_find_mmu(void)
if (prom_getprop(oprom, "model", version, sizeof(version)) <= 0)
return;
version[sizeof(version) - 1] = 0;
- prom_printf("OF version is '%s'\n", version);
/* XXX might need to add other versions here */
- if (strcmp(version, "Open Firmware, 1.0.5") != 0)
+ if (strcmp(version, "Open Firmware, 1.0.5") == 0)
+ of_workarounds = OF_WA_CLAIM;
+ else if (strncmp(version, "FirmWorks,3.", 12) == 0) {
+ of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
+ call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
+ } else
return;
+ _prom->memory = call_prom("open", 1, 1, ADDR("/memory"));
prom_getprop(_prom->chosen, "mmu", &_prom->mmumap,
sizeof(_prom->mmumap));
+ if (!IHANDLE_VALID(_prom->memory) || !IHANDLE_VALID(_prom->mmumap))
+ of_workarounds &= ~OF_WA_CLAIM; /* hmmm */
}
#else
#define prom_find_mmu()
@@ -1382,16 +1455,17 @@ static void __init prom_init_stdout(void)
memset(path, 0, 256);
call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255);
val = call_prom("instance-to-package", 1, 1, _prom->stdout);
- prom_setprop(_prom->chosen, "linux,stdout-package", &val, sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,stdout-package",
+ &val, sizeof(val));
prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device));
- prom_setprop(_prom->chosen, "linux,stdout-path",
- RELOC(of_stdout_device), strlen(RELOC(of_stdout_device))+1);
+ prom_setprop(_prom->chosen, "/chosen", "linux,stdout-path",
+ path, strlen(path) + 1);
/* If it's a display, note it */
memset(type, 0, sizeof(type));
prom_getprop(val, "device_type", type, sizeof(type));
if (strcmp(type, RELOC("display")) == 0)
- prom_setprop(val, "linux,boot-display", NULL, 0);
+ prom_setprop(val, path, "linux,boot-display", NULL, 0);
}
static void __init prom_close_stdin(void)
@@ -1514,7 +1588,7 @@ static void __init prom_check_displays(void)
/* Success */
prom_printf("done\n");
- prom_setprop(node, "linux,opened", NULL, 0);
+ prom_setprop(node, path, "linux,opened", NULL, 0);
/* Setup a usable color table when the appropriate
* method is available. Should update this to set-colors */
@@ -1884,9 +1958,11 @@ static void __init fixup_device_tree(void)
/* interrupt on this revision of u3 is number 0 and level */
interrupts[0] = 0;
interrupts[1] = 1;
- prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts));
+ prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupts",
+ &interrupts, sizeof(interrupts));
parent = (u32)mpic;
- prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent));
+ prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent",
+ &parent, sizeof(parent));
#endif
}
@@ -1922,11 +1998,11 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4;
val = RELOC(prom_initrd_start);
- prom_setprop(_prom->chosen, "linux,initrd-start", &val,
- sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,initrd-start",
+ &val, sizeof(val));
val = RELOC(prom_initrd_end);
- prom_setprop(_prom->chosen, "linux,initrd-end", &val,
- sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,initrd-end",
+ &val, sizeof(val));
reserve_mem(RELOC(prom_initrd_start),
RELOC(prom_initrd_end) - RELOC(prom_initrd_start));
@@ -1969,14 +2045,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_init_client_services(pp);
/*
- * Init prom stdout device
+ * See if this OF is old enough that we need to do explicit maps
+ * and other workarounds
*/
- prom_init_stdout();
+ prom_find_mmu();
/*
- * See if this OF is old enough that we need to do explicit maps
+ * Init prom stdout device
*/
- prom_find_mmu();
+ prom_init_stdout();
/*
* Check for an initrd
@@ -1989,14 +2066,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
RELOC(of_platform) = prom_find_machine_type();
getprop_rval = RELOC(of_platform);
- prom_setprop(_prom->chosen, "linux,platform",
+ prom_setprop(_prom->chosen, "/chosen", "linux,platform",
&getprop_rval, sizeof(getprop_rval));
#ifdef CONFIG_PPC_PSERIES
/*
* On pSeries, inform the firmware about our capabilities
*/
- if (RELOC(of_platform) & PLATFORM_PSERIES)
+ if (RELOC(of_platform) == PLATFORM_PSERIES ||
+ RELOC(of_platform) == PLATFORM_PSERIES_LPAR)
prom_send_capabilities();
#endif
@@ -2050,21 +2128,23 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
* Fill in some infos for use by the kernel later on
*/
if (RELOC(prom_memory_limit))
- prom_setprop(_prom->chosen, "linux,memory-limit",
+ prom_setprop(_prom->chosen, "/chosen", "linux,memory-limit",
&RELOC(prom_memory_limit),
sizeof(prom_memory_limit));
#ifdef CONFIG_PPC64
if (RELOC(ppc64_iommu_off))
- prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0);
+ prom_setprop(_prom->chosen, "/chosen", "linux,iommu-off",
+ NULL, 0);
if (RELOC(iommu_force_on))
- prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0);
+ prom_setprop(_prom->chosen, "/chosen", "linux,iommu-force-on",
+ NULL, 0);
if (RELOC(prom_tce_alloc_start)) {
- prom_setprop(_prom->chosen, "linux,tce-alloc-start",
+ prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-start",
&RELOC(prom_tce_alloc_start),
sizeof(prom_tce_alloc_start));
- prom_setprop(_prom->chosen, "linux,tce-alloc-end",
+ prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-end",
&RELOC(prom_tce_alloc_end),
sizeof(prom_tce_alloc_end));
}
@@ -2081,8 +2161,13 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_printf("copying OF device tree ...\n");
flatten_device_tree();
- /* in case stdin is USB and still active on IBM machines... */
- prom_close_stdin();
+ /*
+ * in case stdin is USB and still active on IBM machines...
+ * Unfortunately quiesce crashes on some powermacs if we have
+ * closed stdin already (in particular the powerbook 101).
+ */
+ if (RELOC(of_platform) != PLATFORM_POWERMAC)
+ prom_close_stdin();
/*
* Call OF "quiesce" method to shut down pending DMA's from
diff --git a/arch/powerpc/kernel/ptrace-common.h b/arch/powerpc/kernel/ptrace-common.h
new file mode 100644
index 00000000000..b1babb72967
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace-common.h
@@ -0,0 +1,164 @@
+/*
+ * linux/arch/ppc64/kernel/ptrace-common.h
+ *
+ * Copyright (c) 2002 Stephen Rothwell, IBM Coproration
+ * Extracted from ptrace.c and ptrace32.c
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file README.legal in the main directory of
+ * this archive for more details.
+ */
+
+#ifndef _PPC64_PTRACE_COMMON_H
+#define _PPC64_PTRACE_COMMON_H
+
+#include <linux/config.h>
+#include <asm/system.h>
+
+/*
+ * Set of msr bits that gdb can change on behalf of a process.
+ */
+#define MSR_DEBUGCHANGE (MSR_FE0 | MSR_SE | MSR_BE | MSR_FE1)
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+static inline unsigned long get_reg(struct task_struct *task, int regno)
+{
+ unsigned long tmp = 0;
+
+ /*
+ * Put the correct FP bits in, they might be wrong as a result
+ * of our lazy FP restore.
+ */
+ if (regno == PT_MSR) {
+ tmp = ((unsigned long *)task->thread.regs)[PT_MSR];
+ tmp |= task->thread.fpexc_mode;
+ } else if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
+ tmp = ((unsigned long *)task->thread.regs)[regno];
+ }
+
+ return tmp;
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+static inline int put_reg(struct task_struct *task, int regno,
+ unsigned long data)
+{
+ if (regno < PT_SOFTE) {
+ if (regno == PT_MSR)
+ data = (data & MSR_DEBUGCHANGE)
+ | (task->thread.regs->msr & ~MSR_DEBUGCHANGE);
+ ((unsigned long *)task->thread.regs)[regno] = data;
+ return 0;
+ }
+ return -EIO;
+}
+
+static inline void set_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+ if (regs != NULL)
+ regs->msr |= MSR_SE;
+ set_ti_thread_flag(task->thread_info, TIF_SINGLESTEP);
+}
+
+static inline void clear_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+ if (regs != NULL)
+ regs->msr &= ~MSR_SE;
+ clear_ti_thread_flag(task->thread_info, TIF_SINGLESTEP);
+}
+
+#ifdef CONFIG_ALTIVEC
+/*
+ * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * The transfer totals 34 quadword. Quadwords 0-31 contain the
+ * corresponding vector registers. Quadword 32 contains the vscr as the
+ * last word (offset 12) within that quadword. Quadword 33 contains the
+ * vrsave as the first word (offset 0) within the quadword.
+ *
+ * This definition of the VMX state is compatible with the current PPC32
+ * ptrace interface. This allows signal handling and ptrace to use the
+ * same structures. This also simplifies the implementation of a bi-arch
+ * (combined (32- and 64-bit) gdb.
+ */
+
+/*
+ * Get contents of AltiVec register state in task TASK
+ */
+static inline int get_vrregs(unsigned long __user *data,
+ struct task_struct *task)
+{
+ unsigned long regsize;
+
+ /* copy AltiVec registers VR[0] .. VR[31] */
+ regsize = 32 * sizeof(vector128);
+ if (copy_to_user(data, task->thread.vr, regsize))
+ return -EFAULT;
+ data += (regsize / sizeof(unsigned long));
+
+ /* copy VSCR */
+ regsize = 1 * sizeof(vector128);
+ if (copy_to_user(data, &task->thread.vscr, regsize))
+ return -EFAULT;
+ data += (regsize / sizeof(unsigned long));
+
+ /* copy VRSAVE */
+ if (put_user(task->thread.vrsave, (u32 __user *)data))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * Write contents of AltiVec register state into task TASK.
+ */
+static inline int set_vrregs(struct task_struct *task,
+ unsigned long __user *data)
+{
+ unsigned long regsize;
+
+ /* copy AltiVec registers VR[0] .. VR[31] */
+ regsize = 32 * sizeof(vector128);
+ if (copy_from_user(task->thread.vr, data, regsize))
+ return -EFAULT;
+ data += (regsize / sizeof(unsigned long));
+
+ /* copy VSCR */
+ regsize = 1 * sizeof(vector128);
+ if (copy_from_user(&task->thread.vscr, data, regsize))
+ return -EFAULT;
+ data += (regsize / sizeof(unsigned long));
+
+ /* copy VRSAVE */
+ if (get_user(task->thread.vrsave, (u32 __user *)data))
+ return -EFAULT;
+
+ return 0;
+}
+#endif
+
+static inline int ptrace_set_debugreg(struct task_struct *task,
+ unsigned long addr, unsigned long data)
+{
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ return -EINVAL;
+
+ /* The bottom 3 bits are flags */
+ if ((data & ~0x7UL) >= TASK_SIZE)
+ return -EIO;
+
+ /* Ensure translation is on */
+ if (data && !(data & DABR_TRANSLATION))
+ return -EIO;
+
+ task->thread.dabr = data;
+ return 0;
+}
+
+#endif /* _PPC64_PTRACE_COMMON_H */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 3d2abd95c7a..400793c7130 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -36,8 +36,9 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/system.h>
+
#ifdef CONFIG_PPC64
-#include <asm/ptrace-common.h>
+#include "ptrace-common.h"
#endif
#ifdef CONFIG_PPC32
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 91eb952e029..61762640b87 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -33,7 +33,8 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/system.h>
-#include <asm/ptrace-common.h>
+
+#include "ptrace-common.h"
/*
* does not yet catch signals sent when the child dies.
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 5bdd5b079d9..7a95b8a2835 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -32,7 +32,6 @@
#include <asm/rtas.h>
#include <asm/machdep.h> /* for ppc_md */
#include <asm/time.h>
-#include <asm/systemcfg.h>
/* Token for Sensors */
#define KEY_SWITCH 0x0001
@@ -259,7 +258,7 @@ static int __init proc_rtas_init(void)
{
struct proc_dir_entry *entry;
- if (!(systemcfg->platform & PLATFORM_PSERIES))
+ if (_machine != PLATFORM_PSERIES && _machine != PLATFORM_PSERIES_LPAR)
return 1;
rtas_node = of_find_node_by_name(NULL, "rtas");
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
new file mode 100644
index 00000000000..635d3b9a881
--- /dev/null
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -0,0 +1,105 @@
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+
+
+#define MAX_RTC_WAIT 5000 /* 5 sec */
+#define RTAS_CLOCK_BUSY (-2)
+unsigned long __init rtas_get_boot_time(void)
+{
+ int ret[8];
+ int error, wait_time;
+ u64 max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
+ wait_time = rtas_extended_busy_delay_time(error);
+ /* This is boot time so we spin. */
+ udelay(wait_time*1000);
+ error = RTAS_CLOCK_BUSY;
+ }
+ } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
+
+ if (error != 0 && printk_ratelimit()) {
+ printk(KERN_WARNING "error: reading the clock failed (%d)\n",
+ error);
+ return 0;
+ }
+
+ return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]);
+}
+
+/* NOTE: get_rtc_time will get an error if executed in interrupt context
+ * and if a delay is needed to read the clock. In this case we just
+ * silently return without updating rtc_tm.
+ */
+void rtas_get_rtc_time(struct rtc_time *rtc_tm)
+{
+ int ret[8];
+ int error, wait_time;
+ u64 max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
+ if (in_interrupt() && printk_ratelimit()) {
+ memset(&rtc_tm, 0, sizeof(struct rtc_time));
+ printk(KERN_WARNING "error: reading clock"
+ " would delay interrupt\n");
+ return; /* delay not allowed */
+ }
+ wait_time = rtas_extended_busy_delay_time(error);
+ msleep(wait_time);
+ error = RTAS_CLOCK_BUSY;
+ }
+ } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
+
+ if (error != 0 && printk_ratelimit()) {
+ printk(KERN_WARNING "error: reading the clock failed (%d)\n",
+ error);
+ return;
+ }
+
+ rtc_tm->tm_sec = ret[5];
+ rtc_tm->tm_min = ret[4];
+ rtc_tm->tm_hour = ret[3];
+ rtc_tm->tm_mday = ret[2];
+ rtc_tm->tm_mon = ret[1] - 1;
+ rtc_tm->tm_year = ret[0] - 1900;
+}
+
+int rtas_set_rtc_time(struct rtc_time *tm)
+{
+ int error, wait_time;
+ u64 max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
+ tm->tm_year + 1900, tm->tm_mon + 1,
+ tm->tm_mday, tm->tm_hour, tm->tm_min,
+ tm->tm_sec, 0);
+ if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
+ if (in_interrupt())
+ return 1; /* probably decrementer */
+ wait_time = rtas_extended_busy_delay_time(error);
+ msleep(wait_time);
+ error = RTAS_CLOCK_BUSY;
+ }
+ } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
+
+ if (error != 0 && printk_ratelimit())
+ printk(KERN_WARNING "error: setting the clock failed (%d)\n",
+ error);
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 9d4e07f6f1e..4283fa33f78 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -29,9 +29,6 @@
#include <asm/delay.h>
#include <asm/uaccess.h>
#include <asm/lmb.h>
-#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
-#endif
struct rtas_t rtas = {
.lock = SPIN_LOCK_UNLOCKED
@@ -671,7 +668,7 @@ void __init rtas_initialize(void)
* the stop-self token if any
*/
#ifdef CONFIG_PPC64
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
+ if (_machine == PLATFORM_PSERIES_LPAR)
rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX);
#endif
rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
new file mode 100644
index 00000000000..60dec2401c2
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -0,0 +1,455 @@
+/*
+ * arch/ppc64/kernel/rtas_pci.c
+ *
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * RTAS specific routines for PCI.
+ *
+ * Based on code from pci.c, chrp_pci.c and pSeries_pci.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/rtas.h>
+#include <asm/mpic.h>
+#include <asm/ppc-pci.h>
+
+/* RTAS tokens */
+static int read_pci_config;
+static int write_pci_config;
+static int ibm_read_pci_config;
+static int ibm_write_pci_config;
+
+static inline int config_access_valid(struct pci_dn *dn, int where)
+{
+ if (where < 256)
+ return 1;
+ if (where < 4096 && dn->pci_ext_config_space)
+ return 1;
+
+ return 0;
+}
+
+static int of_device_available(struct device_node * dn)
+{
+ char * status;
+
+ status = get_property(dn, "status", NULL);
+
+ if (!status)
+ return 1;
+
+ if (!strcmp(status, "okay"))
+ return 1;
+
+ return 0;
+}
+
+static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+{
+ int returnval = -1;
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+ (pdn->devfn << 8) | (where & 0xff);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
+ addr, BUID_HI(buid), BUID_LO(buid), size);
+ } else {
+ ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
+ }
+ *val = returnval;
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (returnval == EEH_IO_ERROR_VALUE(size) &&
+ eeh_dn_check_failure (pdn->node, NULL))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_read_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 *val)
+{
+ struct device_node *busdn, *dn;
+
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else
+ busdn = bus->sysdata; /* must be a phb */
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_available(dn))
+ return rtas_read_config(pdn, where, size, val);
+ }
+
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+{
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+ (pdn->devfn << 8) | (where & 0xff);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
+ BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
+ } else {
+ ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
+ }
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_write_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 val)
+{
+ struct device_node *busdn, *dn;
+
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else
+ busdn = bus->sysdata; /* must be a phb */
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_available(dn))
+ return rtas_write_config(pdn, where, size, val);
+ }
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+struct pci_ops rtas_pci_ops = {
+ rtas_pci_read_config,
+ rtas_pci_write_config
+};
+
+int is_python(struct device_node *dev)
+{
+ char *model = (char *)get_property(dev, "model", NULL);
+
+ if (model && strstr(model, "Python"))
+ return 1;
+
+ return 0;
+}
+
+static int get_phb_reg_prop(struct device_node *dev,
+ unsigned int addr_size_words,
+ struct reg_property64 *reg)
+{
+ unsigned int *ui_ptr = NULL, len;
+
+ /* Found a PHB, now figure out where his registers are mapped. */
+ ui_ptr = (unsigned int *)get_property(dev, "reg", &len);
+ if (ui_ptr == NULL)
+ return 1;
+
+ if (addr_size_words == 1) {
+ reg->address = ((struct reg_property32 *)ui_ptr)->address;
+ reg->size = ((struct reg_property32 *)ui_ptr)->size;
+ } else {
+ *reg = *((struct reg_property64 *)ui_ptr);
+ }
+
+ return 0;
+}
+
+static void python_countermeasures(struct device_node *dev,
+ unsigned int addr_size_words)
+{
+ struct reg_property64 reg_struct;
+ void __iomem *chip_regs;
+ volatile u32 val;
+
+ if (get_phb_reg_prop(dev, addr_size_words, &reg_struct))
+ return;
+
+ /* Python's register file is 1 MB in size. */
+ chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000);
+
+ /*
+ * Firmware doesn't always clear this bit which is critical
+ * for good performance - Anton
+ */
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+ val = in_be32(chip_regs + 0xf6030);
+ if (val & PRG_CL_RESET_VALID) {
+ printk(KERN_INFO "Python workaround: ");
+ val &= ~PRG_CL_RESET_VALID;
+ out_be32(chip_regs + 0xf6030, val);
+ /*
+ * We must read it back for changes to
+ * take effect
+ */
+ val = in_be32(chip_regs + 0xf6030);
+ printk("reg0: %x\n", val);
+ }
+
+ iounmap(chip_regs);
+}
+
+void __init init_pci_config_tokens (void)
+{
+ read_pci_config = rtas_token("read-pci-config");
+ write_pci_config = rtas_token("write-pci-config");
+ ibm_read_pci_config = rtas_token("ibm,read-pci-config");
+ ibm_write_pci_config = rtas_token("ibm,write-pci-config");
+}
+
+unsigned long __devinit get_phb_buid (struct device_node *phb)
+{
+ int addr_cells;
+ unsigned int *buid_vals;
+ unsigned int len;
+ unsigned long buid;
+
+ if (ibm_read_pci_config == -1) return 0;
+
+ /* PHB's will always be children of the root node,
+ * or so it is promised by the current firmware. */
+ if (phb->parent == NULL)
+ return 0;
+ if (phb->parent->parent)
+ return 0;
+
+ buid_vals = (unsigned int *) get_property(phb, "reg", &len);
+ if (buid_vals == NULL)
+ return 0;
+
+ addr_cells = prom_n_addr_cells(phb);
+ if (addr_cells == 1) {
+ buid = (unsigned long) buid_vals[0];
+ } else {
+ buid = (((unsigned long)buid_vals[0]) << 32UL) |
+ (((unsigned long)buid_vals[1]) & 0xffffffff);
+ }
+ return buid;
+}
+
+static int phb_set_bus_ranges(struct device_node *dev,
+ struct pci_controller *phb)
+{
+ int *bus_range;
+ unsigned int len;
+
+ bus_range = (int *) get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ return 1;
+ }
+
+ phb->first_busno = bus_range[0];
+ phb->last_busno = bus_range[1];
+
+ return 0;
+}
+
+static int __devinit setup_phb(struct device_node *dev,
+ struct pci_controller *phb,
+ unsigned int addr_size_words)
+{
+ if (is_python(dev))
+ python_countermeasures(dev, addr_size_words);
+
+ if (phb_set_bus_ranges(dev, phb))
+ return 1;
+
+ phb->ops = &rtas_pci_ops;
+ phb->buid = get_phb_buid(dev);
+
+ return 0;
+}
+
+unsigned long __init find_and_init_phbs(void)
+{
+ struct device_node *node;
+ struct pci_controller *phb;
+ unsigned int root_size_cells = 0;
+ unsigned int index;
+ unsigned int *opprop = NULL;
+ struct device_node *root = of_find_node_by_path("/");
+
+ if (ppc64_interrupt_controller == IC_OPEN_PIC) {
+ opprop = (unsigned int *)get_property(root,
+ "platform-open-pic", NULL);
+ }
+
+ root_size_cells = prom_n_size_cells(root);
+
+ index = 0;
+
+ for (node = of_get_next_child(root, NULL);
+ node != NULL;
+ node = of_get_next_child(root, node)) {
+ if (node->type == NULL || strcmp(node->type, "pci") != 0)
+ continue;
+
+ phb = pcibios_alloc_controller(node);
+ if (!phb)
+ continue;
+ setup_phb(node, phb, root_size_cells);
+ pci_process_bridge_OF_ranges(phb, node, 0);
+ pci_setup_phb_io(phb, index == 0);
+#ifdef CONFIG_PPC_PSERIES
+ if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
+ int addr = root_size_cells * (index + 2) - 1;
+ mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
+ }
+#endif
+ index++;
+ }
+
+ of_node_put(root);
+ pci_devs_phb_init();
+
+ /*
+ * pci_probe_only and pci_assign_all_buses can be set via properties
+ * in chosen.
+ */
+ if (of_chosen) {
+ int *prop;
+
+ prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
+ NULL);
+ if (prop)
+ pci_probe_only = *prop;
+
+ prop = (int *)get_property(of_chosen,
+ "linux,pci-assign-all-buses", NULL);
+ if (prop)
+ pci_assign_all_buses = *prop;
+ }
+
+ return 0;
+}
+
+struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ unsigned int root_size_cells = 0;
+ struct pci_controller *phb;
+ int primary;
+
+ root_size_cells = prom_n_size_cells(root);
+
+ primary = list_empty(&hose_list);
+ phb = pcibios_alloc_controller(dn);
+ if (!phb)
+ return NULL;
+ setup_phb(dn, phb, root_size_cells);
+ pci_process_bridge_OF_ranges(phb, dn, primary);
+
+ pci_setup_phb_io_dynamic(phb, primary);
+ of_node_put(root);
+
+ pci_devs_phb_init_dynamic(phb);
+ scan_phb(phb);
+
+ return phb;
+}
+EXPORT_SYMBOL(init_phb_dynamic);
+
+/* RPA-specific bits for removing PHBs */
+int pcibios_remove_root_bus(struct pci_controller *phb)
+{
+ struct pci_bus *b = phb->bus;
+ struct resource *res;
+ int rc, i;
+
+ res = b->resource[0];
+ if (!res->flags) {
+ printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__,
+ b->name);
+ return 1;
+ }
+
+ rc = unmap_bus_range(b);
+ if (rc) {
+ printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+
+ if (release_resource(res)) {
+ printk(KERN_ERR "%s: failed to release IO on bus %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+
+ for (i = 1; i < 3; ++i) {
+ res = b->resource[i];
+ if (!res->flags && i == 0) {
+ printk(KERN_ERR "%s: no MEM resource for PHB %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+ if (res->flags && release_resource(res)) {
+ printk(KERN_ERR
+ "%s: failed to release IO %d on bus %s\n",
+ __FUNCTION__, i, b->name);
+ return 1;
+ }
+ }
+
+ list_del(&phb->list_node);
+ pcibios_free_controller(phb);
+
+ return 0;
+}
+EXPORT_SYMBOL(pcibios_remove_root_bus);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index e22856ecb5a..bd3eb4292b5 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,6 +33,7 @@
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/processor.h>
+#include <asm/vdso_datapage.h>
#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
@@ -51,15 +52,26 @@
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/lmb.h>
+#include <asm/xmon.h>
+
+#include "setup.h"
#undef DEBUG
#ifdef DEBUG
+#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
#endif
+#ifdef CONFIG_PPC_MULTIPLATFORM
+int _machine = 0;
+EXPORT_SYMBOL(_machine);
+#endif
+
+unsigned long klimit = (unsigned long) _end;
+
/*
* This still seems to be needed... -- paulus
*/
@@ -433,10 +445,8 @@ void __init check_for_initrd(void)
if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
initrd_end > initrd_start)
ROOT_DEV = Root_RAM0;
- else {
- printk("Bogus initrd %08lx %08lx\n", initrd_start, initrd_end);
+ else
initrd_start = initrd_end = 0;
- }
if (initrd_start)
printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
@@ -510,8 +520,8 @@ void __init smp_setup_cpu_maps(void)
* On pSeries LPAR, we need to know how many cpus
* could possibly be added to this partition.
*/
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR &&
- (dn = of_find_node_by_path("/rtas"))) {
+ if (_machine == PLATFORM_PSERIES_LPAR &&
+ (dn = of_find_node_by_path("/rtas"))) {
int num_addr_cell, num_size_cell, maxcpus;
unsigned int *ireg;
@@ -555,7 +565,27 @@ void __init smp_setup_cpu_maps(void)
cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]);
}
- systemcfg->processorCount = num_present_cpus();
+ vdso_data->processorCount = num_present_cpus();
#endif /* CONFIG_PPC64 */
}
#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_XMON
+static int __init early_xmon(char *p)
+{
+ /* ensure xmon is enabled */
+ if (p) {
+ if (strncmp(p, "on", 2) == 0)
+ xmon_init(1);
+ if (strncmp(p, "off", 3) == 0)
+ xmon_init(0);
+ if (strncmp(p, "early", 5) != 0)
+ return 0;
+ }
+ xmon_init(1);
+ debugger(NULL);
+
+ return 0;
+}
+early_param("xmon", early_xmon);
+#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
new file mode 100644
index 00000000000..2ebba755272
--- /dev/null
+++ b/arch/powerpc/kernel/setup.h
@@ -0,0 +1,6 @@
+#ifndef _POWERPC_KERNEL_SETUP_H
+#define _POWERPC_KERNEL_SETUP_H
+
+void check_for_initrd(void);
+
+#endif /* _POWERPC_KERNEL_SETUP_H */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 3af2631e3fa..e5694335bf1 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -40,6 +40,8 @@
#include <asm/xmon.h>
#include <asm/time.h>
+#include "setup.h"
+
#define DBG(fmt...)
#if defined CONFIG_KGDB
@@ -55,10 +57,6 @@ extern void power4_idle(void);
boot_infos_t *boot_infos;
struct ide_machdep_calls ppc_ide_md;
-/* XXX should go elsewhere */
-int __irq_offset_value;
-EXPORT_SYMBOL(__irq_offset_value);
-
int boot_cpuid;
EXPORT_SYMBOL_GPL(boot_cpuid);
int boot_cpuid_phys;
@@ -70,8 +68,6 @@ unsigned int DMA_MODE_WRITE;
int have_of = 1;
#ifdef CONFIG_PPC_MULTIPLATFORM
-int _machine = 0;
-
extern void prep_init(void);
extern void pmac_init(void);
extern void chrp_init(void);
@@ -279,7 +275,6 @@ arch_initcall(ppc_init);
/* Warning, IO base is not yet inited */
void __init setup_arch(char **cmdline_p)
{
- extern char *klimit;
extern void do_init_bootmem(void);
/* so udelay does something sensible, assume <= 1000 bogomips */
@@ -303,14 +298,9 @@ void __init setup_arch(char **cmdline_p)
pmac_feature_init(); /* New cool way */
#endif
-#ifdef CONFIG_XMON
- xmon_map_scc();
- if (strstr(cmd_line, "xmon")) {
- xmon_init(1);
- debugger(NULL);
- }
-#endif /* CONFIG_XMON */
- if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab);
+#ifdef CONFIG_XMON_DEFAULT
+ xmon_init(1);
+#endif
#if defined(CONFIG_KGDB)
if (ppc_md.kgdb_map_scc)
@@ -343,7 +333,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.start_code = PAGE_OFFSET;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) klimit;
+ init_mm.brk = klimit;
/* Save unparsed command line copy for /proc/cmdline */
strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 0471e843b6c..e3fb78397dc 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -57,9 +57,11 @@
#include <asm/lmb.h>
#include <asm/iseries/it_lp_naca.h>
#include <asm/firmware.h>
-#include <asm/systemcfg.h>
#include <asm/xmon.h>
#include <asm/udbg.h>
+#include <asm/kexec.h>
+
+#include "setup.h"
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -94,22 +96,21 @@ extern void udbg_init_maple_realmode(void);
do { udbg_putc = call_rtas_display_status_delay; } while(0)
#endif
-/* extern void *stab; */
-extern unsigned long klimit;
-
-extern void mm_init_ppc64(void);
-extern void stab_initialize(unsigned long stab);
-extern void htab_initialize(void);
-extern void early_init_devtree(void *flat_dt);
-extern void unflatten_device_tree(void);
-
int have_of = 1;
int boot_cpuid = 0;
int boot_cpuid_phys = 0;
dev_t boot_dev;
u64 ppc64_pft_size;
-struct ppc64_caches ppc64_caches;
+/* Pick defaults since we might want to patch instructions
+ * before we've read this from the device tree.
+ */
+struct ppc64_caches ppc64_caches = {
+ .dline_size = 0x80,
+ .log_dline_size = 7,
+ .iline_size = 0x80,
+ .log_iline_size = 7
+};
EXPORT_SYMBOL_GPL(ppc64_caches);
/*
@@ -254,11 +255,10 @@ void __init early_setup(unsigned long dt_ptr)
* Iterate all ppc_md structures until we find the proper
* one for the current machine type
*/
- DBG("Probing machine type for platform %x...\n",
- systemcfg->platform);
+ DBG("Probing machine type for platform %x...\n", _machine);
for (mach = machines; *mach; mach++) {
- if ((*mach)->probe(systemcfg->platform))
+ if ((*mach)->probe(_machine))
break;
}
/* What can we do if we didn't find ? */
@@ -290,6 +290,28 @@ void __init early_setup(unsigned long dt_ptr)
DBG(" <- early_setup()\n");
}
+#ifdef CONFIG_SMP
+void early_setup_secondary(void)
+{
+ struct paca_struct *lpaca = get_paca();
+
+ /* Mark enabled in PACA */
+ lpaca->proc_enabled = 0;
+
+ /* Initialize hash table for that CPU */
+ htab_initialize_secondary();
+
+ /* Initialize STAB/SLB. We use a virtual address as it works
+ * in real mode on pSeries and we want a virutal address on
+ * iSeries anyway
+ */
+ if (cpu_has_feature(CPU_FTR_SLB))
+ slb_initialize();
+ else
+ stab_initialize(lpaca->stab_addr);
+}
+
+#endif /* CONFIG_SMP */
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
void smp_release_cpus(void)
@@ -315,7 +337,8 @@ void smp_release_cpus(void)
#endif /* CONFIG_SMP || CONFIG_KEXEC */
/*
- * Initialize some remaining members of the ppc64_caches and systemcfg structures
+ * Initialize some remaining members of the ppc64_caches and systemcfg
+ * structures
* (at least until we get rid of them completely). This is mostly some
* cache informations about the CPU that will be used by cache flush
* routines and/or provided to userland
@@ -340,7 +363,7 @@ static void __init initialize_cache_info(void)
const char *dc, *ic;
/* Then read cache informations */
- if (systemcfg->platform == PLATFORM_POWERMAC) {
+ if (_machine == PLATFORM_POWERMAC) {
dc = "d-cache-block-size";
ic = "i-cache-block-size";
} else {
@@ -360,9 +383,8 @@ static void __init initialize_cache_info(void)
DBG("Argh, can't find dcache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
- systemcfg->dcache_size = ppc64_caches.dsize = size;
- systemcfg->dcache_line_size =
- ppc64_caches.dline_size = lsize;
+ ppc64_caches.dsize = size;
+ ppc64_caches.dline_size = lsize;
ppc64_caches.log_dline_size = __ilog2(lsize);
ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
@@ -378,20 +400,13 @@ static void __init initialize_cache_info(void)
DBG("Argh, can't find icache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
- systemcfg->icache_size = ppc64_caches.isize = size;
- systemcfg->icache_line_size =
- ppc64_caches.iline_size = lsize;
+ ppc64_caches.isize = size;
+ ppc64_caches.iline_size = lsize;
ppc64_caches.log_iline_size = __ilog2(lsize);
ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
}
}
- /* Add an eye catcher and the systemcfg layout version number */
- strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
- systemcfg->version.major = SYSTEMCFG_MAJOR;
- systemcfg->version.minor = SYSTEMCFG_MINOR;
- systemcfg->processor = mfspr(SPRN_PVR);
-
DBG(" <- initialize_cache_info()\n");
}
@@ -409,6 +424,10 @@ void __init setup_system(void)
*/
unflatten_device_tree();
+#ifdef CONFIG_KEXEC
+ kexec_setup(); /* requires unflattened device tree. */
+#endif
+
/*
* Fill the ppc64_caches & systemcfg structures with informations
* retreived from the device-tree. Need to be called before
@@ -478,15 +497,14 @@ void __init setup_system(void)
printk("-----------------------------------------------------\n");
printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size);
- printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller);
- printk("systemcfg = 0x%p\n", systemcfg);
- printk("systemcfg->platform = 0x%x\n", systemcfg->platform);
- printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount);
- printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize);
+ printk("ppc64_interrupt_controller = 0x%ld\n",
+ ppc64_interrupt_controller);
+ printk("platform = 0x%x\n", _machine);
+ printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size());
printk("ppc64_caches.dcache_line_size = 0x%x\n",
- ppc64_caches.dline_size);
+ ppc64_caches.dline_size);
printk("ppc64_caches.icache_line_size = 0x%x\n",
- ppc64_caches.iline_size);
+ ppc64_caches.iline_size);
printk("htab_address = 0x%p\n", htab_address);
printk("htab_hash_mask = 0x%lx\n", htab_hash_mask);
printk("-----------------------------------------------------\n");
@@ -551,33 +569,6 @@ static void __init emergency_stack_init(void)
}
/*
- * Called from setup_arch to initialize the bitmap of available
- * syscalls in the systemcfg page
- */
-void __init setup_syscall_map(void)
-{
- unsigned int i, count64 = 0, count32 = 0;
- extern unsigned long *sys_call_table;
- extern unsigned long sys_ni_syscall;
-
-
- for (i = 0; i < __NR_syscalls; i++) {
- if (sys_call_table[i*2] != sys_ni_syscall) {
- count64++;
- systemcfg->syscall_map_64[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
- }
- if (sys_call_table[i*2+1] != sys_ni_syscall) {
- count32++;
- systemcfg->syscall_map_32[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
- }
- }
- printk(KERN_INFO "Syscall map setup, %d 32-bit and %d 64-bit syscalls\n",
- count32, count64);
-}
-
-/*
* Called into from start_kernel, after lock_kernel has been called.
* Initializes bootmem, which is unsed to manage page allocation until
* mem_init is called.
@@ -618,9 +609,6 @@ void __init setup_arch(char **cmdline_p)
do_init_bootmem();
sparse_init();
- /* initialize the syscall map in systemcfg */
- setup_syscall_map();
-
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
#endif
@@ -858,26 +846,6 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
-#ifdef CONFIG_XMON
-static int __init early_xmon(char *p)
-{
- /* ensure xmon is enabled */
- if (p) {
- if (strncmp(p, "on", 2) == 0)
- xmon_init(1);
- if (strncmp(p, "off", 3) == 0)
- xmon_init(0);
- if (strncmp(p, "early", 5) != 0)
- return 0;
- }
- xmon_init(1);
- debugger(NULL);
-
- return 0;
-}
-early_param("xmon", early_xmon);
-#endif
-
void cpu_die(void)
{
if (ppc_md.cpu_die)
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 081d931eae4..5a2eba60dd3 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -42,10 +42,11 @@
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
+#include <asm/sigcontext.h>
+#include <asm/vdso.h>
#ifdef CONFIG_PPC64
#include "ppc32.h"
#include <asm/unistd.h>
-#include <asm/vdso.h>
#else
#include <asm/ucontext.h>
#include <asm/pgtable.h>
@@ -402,8 +403,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
ELF_NFPREG * sizeof(double)))
return 1;
- current->thread.fpscr.val = 0; /* turn off all fp exceptions */
-
#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
@@ -808,18 +807,18 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka,
/* Save user registers on the stack */
frame = &rt_sf->uc.uc_mcontext;
-#ifdef CONFIG_PPC64
if (vdso32_rt_sigtramp && current->thread.vdso_base) {
if (save_user_regs(regs, frame, 0))
goto badframe;
regs->link = current->thread.vdso_base + vdso32_rt_sigtramp;
- } else
-#endif
- {
+ } else {
if (save_user_regs(regs, frame, __NR_rt_sigreturn))
goto badframe;
regs->link = (unsigned long) frame->tramp;
}
+
+ current->thread.fpscr.val = 0; /* turn off all fp exceptions */
+
if (put_user(regs->gpr[1], (u32 __user *)newsp))
goto badframe;
regs->gpr[1] = newsp;
@@ -1089,19 +1088,18 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
|| __put_user(sig, &sc->signal))
goto badframe;
-#ifdef CONFIG_PPC64
if (vdso32_sigtramp && current->thread.vdso_base) {
if (save_user_regs(regs, &frame->mctx, 0))
goto badframe;
regs->link = current->thread.vdso_base + vdso32_sigtramp;
- } else
-#endif
- {
+ } else {
if (save_user_regs(regs, &frame->mctx, __NR_sigreturn))
goto badframe;
regs->link = (unsigned long) frame->mctx.tramp;
}
+ current->thread.fpscr.val = 0; /* turn off all fp exceptions */
+
if (put_user(regs->gpr[1], (u32 __user *)newsp))
goto badframe;
regs->gpr[1] = newsp;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 58194e15071..1decf278553 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -131,9 +131,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
flush_fp_to_thread(current);
- /* Make sure signal doesn't get spurrious FP exceptions */
- current->thread.fpscr.val = 0;
-
#ifdef CONFIG_ALTIVEC
err |= __put_user(v_regs, &sc->v_regs);
@@ -423,6 +420,9 @@ static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
if (err)
goto badframe;
+ /* Make sure signal handler doesn't get spurious FP exceptions */
+ current->thread.fpscr.val = 0;
+
/* Set up to return from userspace. */
if (vdso64_rt_sigtramp && current->thread.vdso_base) {
regs->link = current->thread.vdso_base + vdso64_rt_sigtramp;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 36d67a8d7cb..30374d2f88e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -44,19 +44,21 @@
#include <asm/cputable.h>
#include <asm/system.h>
#include <asm/mpic.h>
+#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
-int smp_hw_index[NR_CPUS];
-struct thread_info *secondary_ti;
-
#ifdef DEBUG
+#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
#endif
+int smp_hw_index[NR_CPUS];
+struct thread_info *secondary_ti;
+
cpumask_t cpu_possible_map = CPU_MASK_NONE;
cpumask_t cpu_online_map = CPU_MASK_NONE;
cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
@@ -368,9 +370,11 @@ int generic_cpu_disable(void)
if (cpu == boot_cpuid)
return -EBUSY;
- systemcfg->processorCount--;
cpu_clear(cpu, cpu_online_map);
+#ifdef CONFIG_PPC64
+ vdso_data->processorCount--;
fixup_irqs(cpu_online_map);
+#endif
return 0;
}
@@ -388,9 +392,11 @@ int generic_cpu_enable(unsigned int cpu)
while (!cpu_online(cpu))
cpu_relax();
+#ifdef CONFIG_PPC64
fixup_irqs(cpu_online_map);
/* counter the irq disable in fixup_irqs */
local_irq_enable();
+#endif
return 0;
}
@@ -419,7 +425,9 @@ void generic_mach_cpu_die(void)
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
cpu_relax();
+#ifdef CONFIG_PPC64
flush_tlb_pending();
+#endif
cpu_set(cpu, cpu_online_map);
local_irq_enable();
}
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index a8210ed5c68..9c921d1c408 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -52,7 +52,6 @@
#include <asm/semaphore.h>
#include <asm/time.h>
#include <asm/mmu_context.h>
-#include <asm/systemcfg.h>
#include <asm/ppc-pci.h>
/* readdir & getdents */
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index f72ced11212..91b93d917b6 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -247,7 +247,7 @@ long ppc64_personality(unsigned long personality)
#define OVERRIDE_MACHINE 0
#endif
-static inline int override_machine(char *mach)
+static inline int override_machine(char __user *mach)
{
if (OVERRIDE_MACHINE) {
/* change ppc64 to ppc */
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
new file mode 100644
index 00000000000..0f0c3a9ae2e
--- /dev/null
+++ b/arch/powerpc/kernel/sysfs.c
@@ -0,0 +1,383 @@
+#include <linux/config.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/cpumask.h>
+#include <linux/notifier.h>
+
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/prom.h>
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+/* SMT stuff */
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+/* default to snooze disabled */
+DEFINE_PER_CPU(unsigned long, smt_snooze_delay);
+
+static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+ ssize_t ret;
+ unsigned long snooze;
+
+ ret = sscanf(buf, "%lu", &snooze);
+ if (ret != 1)
+ return -EINVAL;
+
+ per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+
+ return count;
+}
+
+static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+ return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
+}
+
+static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
+ store_smt_snooze_delay);
+
+/* Only parse OF options if the matching cmdline option was not specified */
+static int smt_snooze_cmdline;
+
+static int __init smt_setup(void)
+{
+ struct device_node *options;
+ unsigned int *val;
+ unsigned int cpu;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ options = find_path_device("/options");
+ if (!options)
+ return 1;
+
+ val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
+ NULL);
+ if (!smt_snooze_cmdline && val) {
+ for_each_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = *val;
+ }
+
+ return 1;
+}
+__initcall(smt_setup);
+
+static int __init setup_smt_snooze_delay(char *str)
+{
+ unsigned int cpu;
+ int snooze;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ smt_snooze_cmdline = 1;
+
+ if (get_option(&str, &snooze)) {
+ for_each_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = snooze;
+ }
+
+ return 1;
+}
+__setup("smt-snooze-delay=", setup_smt_snooze_delay);
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+/*
+ * Enabling PMCs will slow partition context switch times so we only do
+ * it the first time we write to the PMCs.
+ */
+
+static DEFINE_PER_CPU(char, pmcs_enabled);
+
+void ppc64_enable_pmcs(void)
+{
+ /* Only need to enable them once */
+ if (__get_cpu_var(pmcs_enabled))
+ return;
+
+ __get_cpu_var(pmcs_enabled) = 1;
+
+ if (ppc_md.enable_pmcs)
+ ppc_md.enable_pmcs();
+}
+EXPORT_SYMBOL(ppc64_enable_pmcs);
+
+/* XXX convert to rusty's on_one_cpu */
+static unsigned long run_on_cpu(unsigned long cpu,
+ unsigned long (*func)(unsigned long),
+ unsigned long arg)
+{
+ cpumask_t old_affinity = current->cpus_allowed;
+ unsigned long ret;
+
+ /* should return -EINVAL to userspace */
+ if (set_cpus_allowed(current, cpumask_of_cpu(cpu)))
+ return 0;
+
+ ret = func(arg);
+
+ set_cpus_allowed(current, old_affinity);
+
+ return ret;
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+static unsigned long read_##NAME(unsigned long junk) \
+{ \
+ return mfspr(ADDRESS); \
+} \
+static unsigned long write_##NAME(unsigned long val) \
+{ \
+ ppc64_enable_pmcs(); \
+ mtspr(ADDRESS, val); \
+ return 0; \
+} \
+static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+ unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
+ return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __attribute_used__ \
+ store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+ unsigned long val; \
+ int ret = sscanf(buf, "%lx", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ run_on_cpu(cpu->sysdev.id, write_##NAME, val); \
+ return count; \
+}
+
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
+SYSFS_PMCSETUP(purr, SPRN_PURR);
+
+static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0);
+static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1);
+static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1);
+static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2);
+static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3);
+static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4);
+static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5);
+static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6);
+static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7);
+static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8);
+static SYSDEV_ATTR(purr, 0600, show_purr, NULL);
+
+static void register_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct sys_device *s = &c->sysdev;
+
+#ifndef CONFIG_PPC_ISERIES
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_create_file(s, &attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+
+ sysdev_create_file(s, &attr_mmcr0);
+ sysdev_create_file(s, &attr_mmcr1);
+
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ sysdev_create_file(s, &attr_mmcra);
+
+ if (cur_cpu_spec->num_pmcs >= 1)
+ sysdev_create_file(s, &attr_pmc1);
+ if (cur_cpu_spec->num_pmcs >= 2)
+ sysdev_create_file(s, &attr_pmc2);
+ if (cur_cpu_spec->num_pmcs >= 3)
+ sysdev_create_file(s, &attr_pmc3);
+ if (cur_cpu_spec->num_pmcs >= 4)
+ sysdev_create_file(s, &attr_pmc4);
+ if (cur_cpu_spec->num_pmcs >= 5)
+ sysdev_create_file(s, &attr_pmc5);
+ if (cur_cpu_spec->num_pmcs >= 6)
+ sysdev_create_file(s, &attr_pmc6);
+ if (cur_cpu_spec->num_pmcs >= 7)
+ sysdev_create_file(s, &attr_pmc7);
+ if (cur_cpu_spec->num_pmcs >= 8)
+ sysdev_create_file(s, &attr_pmc8);
+
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_create_file(s, &attr_purr);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void unregister_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct sys_device *s = &c->sysdev;
+
+ BUG_ON(c->no_control);
+
+#ifndef CONFIG_PPC_ISERIES
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_remove_file(s, &attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+
+ sysdev_remove_file(s, &attr_mmcr0);
+ sysdev_remove_file(s, &attr_mmcr1);
+
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ sysdev_remove_file(s, &attr_mmcra);
+
+ if (cur_cpu_spec->num_pmcs >= 1)
+ sysdev_remove_file(s, &attr_pmc1);
+ if (cur_cpu_spec->num_pmcs >= 2)
+ sysdev_remove_file(s, &attr_pmc2);
+ if (cur_cpu_spec->num_pmcs >= 3)
+ sysdev_remove_file(s, &attr_pmc3);
+ if (cur_cpu_spec->num_pmcs >= 4)
+ sysdev_remove_file(s, &attr_pmc4);
+ if (cur_cpu_spec->num_pmcs >= 5)
+ sysdev_remove_file(s, &attr_pmc5);
+ if (cur_cpu_spec->num_pmcs >= 6)
+ sysdev_remove_file(s, &attr_pmc6);
+ if (cur_cpu_spec->num_pmcs >= 7)
+ sysdev_remove_file(s, &attr_pmc7);
+ if (cur_cpu_spec->num_pmcs >= 8)
+ sysdev_remove_file(s, &attr_pmc8);
+
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_remove_file(s, &attr_purr);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __devinit sysfs_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned int)(long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ register_cpu_online(cpu);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ unregister_cpu_online(cpu);
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata sysfs_cpu_nb = {
+ .notifier_call = sysfs_cpu_notify,
+};
+
+/* NUMA stuff */
+
+#ifdef CONFIG_NUMA
+static struct node node_devices[MAX_NUMNODES];
+
+static void register_nodes(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ if (node_online(i)) {
+ int p_node = parent_node(i);
+ struct node *parent = NULL;
+
+ if (p_node != i)
+ parent = &node_devices[p_node];
+
+ register_node(&node_devices[i], i, parent);
+ }
+ }
+}
+#else
+static void register_nodes(void)
+{
+ return;
+}
+#endif
+
+/* Only valid if CPU is present. */
+static ssize_t show_physical_id(struct sys_device *dev, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+ return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
+}
+static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
+
+static int __init topology_init(void)
+{
+ int cpu;
+ struct node *parent = NULL;
+
+ register_nodes();
+
+ register_cpu_notifier(&sysfs_cpu_nb);
+
+ for_each_cpu(cpu) {
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+#ifdef CONFIG_NUMA
+ /* The node to which a cpu belongs can't be known
+ * until the cpu is made present.
+ */
+ parent = NULL;
+ if (cpu_present(cpu))
+ parent = &node_devices[cpu_to_node(cpu)];
+#endif
+ /*
+ * For now, we just see if the system supports making
+ * the RTAS calls for CPU hotplug. But, there may be a
+ * more comprehensive way to do this for an individual
+ * CPU. For instance, the boot cpu might never be valid
+ * for hotplugging.
+ */
+ if (!ppc_md.cpu_die)
+ c->no_control = 1;
+
+ if (cpu_online(cpu) || (c->no_control == 0)) {
+ register_cpu(c, cpu, parent);
+
+ sysdev_create_file(&c->sysdev, &attr_physical_id);
+ }
+
+ if (cpu_online(cpu))
+ register_cpu_online(cpu);
+ }
+
+ return 0;
+}
+__initcall(topology_init);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a6282b625b4..de8479769bb 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -62,8 +62,8 @@
#include <asm/irq.h>
#include <asm/div64.h>
#include <asm/smp.h>
+#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
#include <asm/firmware.h>
#endif
#ifdef CONFIG_PPC_ISERIES
@@ -130,6 +130,34 @@ unsigned long tb_last_stamp;
*/
DEFINE_PER_CPU(unsigned long, last_jiffy);
+void __delay(unsigned long loops)
+{
+ unsigned long start;
+ int diff;
+
+ if (__USE_RTC()) {
+ start = get_rtcl();
+ do {
+ /* the RTCL register wraps at 1000000000 */
+ diff = get_rtcl() - start;
+ if (diff < 0)
+ diff += 1000000000;
+ } while (diff < loops);
+ } else {
+ start = get_tbl();
+ while (get_tbl() - start < loops)
+ HMT_low();
+ HMT_medium();
+ }
+}
+EXPORT_SYMBOL(__delay);
+
+void udelay(unsigned long usecs)
+{
+ __delay(tb_ticks_per_usec * usecs);
+}
+EXPORT_SYMBOL(udelay);
+
static __inline__ void timer_check_rtc(void)
{
/*
@@ -261,7 +289,6 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
do_gtod.varp = temp_varp;
do_gtod.var_idx = temp_idx;
-#ifdef CONFIG_PPC64
/*
* tb_update_count is used to allow the userspace gettimeofday code
* to assure itself that it sees a consistent view of the tb_to_xs and
@@ -271,14 +298,15 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
* tb_to_xs and stamp_xsec values are consistent. If not, then it
* loops back and reads them again until this criteria is met.
*/
- ++(systemcfg->tb_update_count);
+ ++(vdso_data->tb_update_count);
smp_wmb();
- systemcfg->tb_orig_stamp = new_tb_stamp;
- systemcfg->stamp_xsec = new_stamp_xsec;
- systemcfg->tb_to_xs = new_tb_to_xs;
+ vdso_data->tb_orig_stamp = new_tb_stamp;
+ vdso_data->stamp_xsec = new_stamp_xsec;
+ vdso_data->tb_to_xs = new_tb_to_xs;
+ vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
+ vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
smp_wmb();
- ++(systemcfg->tb_update_count);
-#endif
+ ++(vdso_data->tb_update_count);
}
/*
@@ -357,8 +385,8 @@ static void iSeries_tb_recal(void)
do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
tb_to_xs = divres.result_low;
do_gtod.varp->tb_to_xs = tb_to_xs;
- systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
- systemcfg->tb_to_xs = tb_to_xs;
+ vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
+ vdso_data->tb_to_xs = tb_to_xs;
}
else {
printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
@@ -483,6 +511,8 @@ void __init smp_space_timers(unsigned int max_cpus)
unsigned long offset = tb_ticks_per_jiffy / max_cpus;
unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid);
+ /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
+ previous_tb -= tb_ticks_per_jiffy;
for_each_cpu(i) {
if (i != boot_cpuid) {
previous_tb += offset;
@@ -558,10 +588,8 @@ int do_settimeofday(struct timespec *tv)
new_xsec += (u64)new_sec * XSEC_PER_SEC - tb_delta_xs;
update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs);
-#ifdef CONFIG_PPC64
- systemcfg->tz_minuteswest = sys_tz.tz_minuteswest;
- systemcfg->tz_dsttime = sys_tz.tz_dsttime;
-#endif
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
write_sequnlock_irqrestore(&xtime_lock, flags);
clock_was_set();
@@ -710,13 +738,12 @@ void __init time_init(void)
do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
do_gtod.varp->tb_to_xs = tb_to_xs;
do_gtod.tb_to_us = tb_to_us;
-#ifdef CONFIG_PPC64
- systemcfg->tb_orig_stamp = tb_last_jiffy;
- systemcfg->tb_update_count = 0;
- systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
- systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
- systemcfg->tb_to_xs = tb_to_xs;
-#endif
+
+ vdso_data->tb_orig_stamp = tb_last_jiffy;
+ vdso_data->tb_update_count = 0;
+ vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
+ vdso_data->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
+ vdso_data->tb_to_xs = tb_to_xs;
time_freq = 0;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 0578f838760..1511454c469 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -49,7 +49,6 @@
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#include <asm/processor.h>
-#include <asm/systemcfg.h>
#endif
#ifdef CONFIG_PPC64 /* XXX */
@@ -129,7 +128,7 @@ int die(const char *str, struct pt_regs *regs, long err)
nl = 1;
#endif
#ifdef CONFIG_PPC64
- switch (systemcfg->platform) {
+ switch (_machine) {
case PLATFORM_PSERIES:
printk("PSERIES ");
nl = 1;
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
new file mode 100644
index 00000000000..0d878e72fc4
--- /dev/null
+++ b/arch/powerpc/kernel/udbg.c
@@ -0,0 +1,125 @@
+/*
+ * polling mode stateless debugging stuff, originally for NS16550 Serial Ports
+ *
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stdarg.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <asm/processor.h>
+
+void (*udbg_putc)(unsigned char c);
+unsigned char (*udbg_getc)(void);
+int (*udbg_getc_poll)(void);
+
+/* udbg library, used by xmon et al */
+void udbg_puts(const char *s)
+{
+ if (udbg_putc) {
+ char c;
+
+ if (s && *s != '\0') {
+ while ((c = *s++) != '\0')
+ udbg_putc(c);
+ }
+ }
+#if 0
+ else {
+ printk("%s", s);
+ }
+#endif
+}
+
+int udbg_write(const char *s, int n)
+{
+ int remain = n;
+ char c;
+
+ if (!udbg_putc)
+ return 0;
+
+ if (s && *s != '\0') {
+ while (((c = *s++) != '\0') && (remain-- > 0)) {
+ udbg_putc(c);
+ }
+ }
+
+ return n - remain;
+}
+
+int udbg_read(char *buf, int buflen)
+{
+ char c, *p = buf;
+ int i;
+
+ if (!udbg_getc)
+ return 0;
+
+ for (i = 0; i < buflen; ++i) {
+ do {
+ c = udbg_getc();
+ } while (c == 0x11 || c == 0x13);
+ if (c == 0)
+ break;
+ *p++ = c;
+ }
+
+ return i;
+}
+
+#define UDBG_BUFSIZE 256
+void udbg_printf(const char *fmt, ...)
+{
+ unsigned char buf[UDBG_BUFSIZE];
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
+ udbg_puts(buf);
+ va_end(args);
+}
+
+/*
+ * Early boot console based on udbg
+ */
+static void udbg_console_write(struct console *con, const char *s,
+ unsigned int n)
+{
+ udbg_write(s, n);
+}
+
+static struct console udbg_console = {
+ .name = "udbg",
+ .write = udbg_console_write,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
+
+static int early_console_initialized;
+
+void __init disable_early_printk(void)
+{
+ if (!early_console_initialized)
+ return;
+ unregister_console(&udbg_console);
+ early_console_initialized = 0;
+}
+
+/* called by setup_system */
+void register_early_udbg_console(void)
+{
+ early_console_initialized = 1;
+ register_console(&udbg_console);
+}
+
+#if 0 /* if you want to use this as a regular output console */
+console_initcall(register_udbg_console);
+#endif
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
new file mode 100644
index 00000000000..9313574ab93
--- /dev/null
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -0,0 +1,123 @@
+/*
+ * udbg for for NS16550 compatable serial ports
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <asm/udbg.h>
+#include <asm/io.h>
+
+extern u8 real_readb(volatile u8 __iomem *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+
+struct NS16550 {
+ /* this struct must be packed */
+ unsigned char rbr; /* 0 */
+ unsigned char ier; /* 1 */
+ unsigned char fcr; /* 2 */
+ unsigned char lcr; /* 3 */
+ unsigned char mcr; /* 4 */
+ unsigned char lsr; /* 5 */
+ unsigned char msr; /* 6 */
+ unsigned char scr; /* 7 */
+};
+
+#define thr rbr
+#define iir fcr
+#define dll rbr
+#define dlm ier
+#define dlab lcr
+
+#define LSR_DR 0x01 /* Data ready */
+#define LSR_OE 0x02 /* Overrun */
+#define LSR_PE 0x04 /* Parity error */
+#define LSR_FE 0x08 /* Framing error */
+#define LSR_BI 0x10 /* Break */
+#define LSR_THRE 0x20 /* Xmit holding register empty */
+#define LSR_TEMT 0x40 /* Xmitter empty */
+#define LSR_ERR 0x80 /* Error */
+
+static volatile struct NS16550 __iomem *udbg_comport;
+
+static void udbg_550_putc(unsigned char c)
+{
+ if (udbg_comport) {
+ while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ out_8(&udbg_comport->thr, c);
+ if (c == '\n')
+ udbg_550_putc('\r');
+ }
+}
+
+static int udbg_550_getc_poll(void)
+{
+ if (udbg_comport) {
+ if ((in_8(&udbg_comport->lsr) & LSR_DR) != 0)
+ return in_8(&udbg_comport->rbr);
+ else
+ return -1;
+ }
+ return -1;
+}
+
+static unsigned char udbg_550_getc(void)
+{
+ if (udbg_comport) {
+ while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0)
+ /* wait for char */;
+ return in_8(&udbg_comport->rbr);
+ }
+ return 0;
+}
+
+void udbg_init_uart(void __iomem *comport, unsigned int speed)
+{
+ u16 dll = speed ? (115200 / speed) : 12;
+
+ if (comport) {
+ udbg_comport = (struct NS16550 __iomem *)comport;
+ out_8(&udbg_comport->lcr, 0x00);
+ out_8(&udbg_comport->ier, 0xff);
+ out_8(&udbg_comport->ier, 0x00);
+ out_8(&udbg_comport->lcr, 0x80); /* Access baud rate */
+ out_8(&udbg_comport->dll, dll & 0xff); /* 1 = 115200, 2 = 57600,
+ 3 = 38400, 12 = 9600 baud */
+ out_8(&udbg_comport->dlm, dll >> 8); /* dll >> 8 which should be zero
+ for fast rates; */
+ out_8(&udbg_comport->lcr, 0x03); /* 8 data, 1 stop, no parity */
+ out_8(&udbg_comport->mcr, 0x03); /* RTS/DTR */
+ out_8(&udbg_comport->fcr ,0x07); /* Clear & enable FIFOs */
+ udbg_putc = udbg_550_putc;
+ udbg_getc = udbg_550_getc;
+ udbg_getc_poll = udbg_550_getc_poll;
+ }
+}
+
+#ifdef CONFIG_PPC_MAPLE
+void udbg_maple_real_putc(unsigned char c)
+{
+ if (udbg_comport) {
+ while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ real_writeb(c, &udbg_comport->thr); eieio();
+ if (c == '\n')
+ udbg_maple_real_putc('\r');
+ }
+}
+
+void udbg_init_maple_realmode(void)
+{
+ udbg_comport = (volatile struct NS16550 __iomem *)0xf40003f8;
+
+ udbg_putc = udbg_maple_real_putc;
+ udbg_getc = NULL;
+ udbg_getc_poll = NULL;
+}
+#endif /* CONFIG_PPC_MAPLE */
diff --git a/arch/powerpc/kernel/udbg_scc.c b/arch/powerpc/kernel/udbg_scc.c
new file mode 100644
index 00000000000..820c5355150
--- /dev/null
+++ b/arch/powerpc/kernel/udbg_scc.c
@@ -0,0 +1,135 @@
+/*
+ * udbg for for zilog scc ports as found on Apple PowerMacs
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <asm/udbg.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pmac_feature.h>
+
+extern u8 real_readb(volatile u8 __iomem *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+
+#define SCC_TXRDY 4
+#define SCC_RXRDY 1
+
+static volatile u8 __iomem *sccc;
+static volatile u8 __iomem *sccd;
+
+static void udbg_scc_putc(unsigned char c)
+{
+ if (sccc) {
+ while ((in_8(sccc) & SCC_TXRDY) == 0)
+ ;
+ out_8(sccd, c);
+ if (c == '\n')
+ udbg_scc_putc('\r');
+ }
+}
+
+static int udbg_scc_getc_poll(void)
+{
+ if (sccc) {
+ if ((in_8(sccc) & SCC_RXRDY) != 0)
+ return in_8(sccd);
+ else
+ return -1;
+ }
+ return -1;
+}
+
+static unsigned char udbg_scc_getc(void)
+{
+ if (sccc) {
+ while ((in_8(sccc) & SCC_RXRDY) == 0)
+ ;
+ return in_8(sccd);
+ }
+ return 0;
+}
+
+static unsigned char scc_inittab[] = {
+ 13, 0, /* set baud rate divisor */
+ 12, 0,
+ 14, 1, /* baud rate gen enable, src=rtxc */
+ 11, 0x50, /* clocks = br gen */
+ 5, 0xea, /* tx 8 bits, assert DTR & RTS */
+ 4, 0x46, /* x16 clock, 1 stop */
+ 3, 0xc1, /* rx enable, 8 bits */
+};
+
+void udbg_init_scc(struct device_node *np)
+{
+ u32 *reg;
+ unsigned long addr;
+ int i, x;
+
+ if (np == NULL)
+ np = of_find_node_by_name(NULL, "escc");
+ if (np == NULL || np->parent == NULL)
+ return;
+
+ udbg_printf("found SCC...\n");
+ /* Get address within mac-io ASIC */
+ reg = (u32 *)get_property(np, "reg", NULL);
+ if (reg == NULL)
+ return;
+ addr = reg[0];
+ udbg_printf("local addr: %lx\n", addr);
+ /* Get address of mac-io PCI itself */
+ reg = (u32 *)get_property(np->parent, "assigned-addresses", NULL);
+ if (reg == NULL)
+ return;
+ addr += reg[2];
+ udbg_printf("final addr: %lx\n", addr);
+
+ /* Setup for 57600 8N1 */
+ addr += 0x20;
+ sccc = (volatile u8 * __iomem) ioremap(addr & PAGE_MASK, PAGE_SIZE) ;
+ sccc += addr & ~PAGE_MASK;
+ sccd = sccc + 0x10;
+
+ udbg_printf("ioremap result sccc: %p\n", sccc);
+ mb();
+
+ for (i = 20000; i != 0; --i)
+ x = in_8(sccc);
+ out_8(sccc, 0x09); /* reset A or B side */
+ out_8(sccc, 0xc0);
+ for (i = 0; i < sizeof(scc_inittab); ++i)
+ out_8(sccc, scc_inittab[i]);
+
+ udbg_putc = udbg_scc_putc;
+ udbg_getc = udbg_scc_getc;
+ udbg_getc_poll = udbg_scc_getc_poll;
+
+ udbg_puts("Hello World !\n");
+}
+
+static void udbg_real_scc_putc(unsigned char c)
+{
+ while ((real_readb(sccc) & SCC_TXRDY) == 0)
+ ;
+ real_writeb(c, sccd);
+ if (c == '\n')
+ udbg_real_scc_putc('\r');
+}
+
+void udbg_init_pmac_realmode(void)
+{
+ sccc = (volatile u8 __iomem *)0x80013020ul;
+ sccd = (volatile u8 __iomem *)0x80013030ul;
+
+ udbg_putc = udbg_real_scc_putc;
+ udbg_getc = NULL;
+ udbg_getc_poll = NULL;
+}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
new file mode 100644
index 00000000000..f0c47dab090
--- /dev/null
+++ b/arch/powerpc/kernel/vdso.c
@@ -0,0 +1,743 @@
+/*
+ * linux/arch/ppc64/kernel/vdso.c
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ * <benh@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/lmb.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* Max supported size for symbol names */
+#define MAX_SYMNAME 64
+
+extern char vdso32_start, vdso32_end;
+static void *vdso32_kbase = &vdso32_start;
+unsigned int vdso32_pages;
+unsigned long vdso32_sigtramp;
+unsigned long vdso32_rt_sigtramp;
+
+#ifdef CONFIG_PPC64
+extern char vdso64_start, vdso64_end;
+static void *vdso64_kbase = &vdso64_start;
+unsigned int vdso64_pages;
+unsigned long vdso64_rt_sigtramp;
+#endif /* CONFIG_PPC64 */
+
+/*
+ * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
+ * Once the early boot kernel code no longer needs to muck around
+ * with it, it will become dynamically allocated
+ */
+static union {
+ struct vdso_data data;
+ u8 page[PAGE_SIZE];
+} vdso_data_store __attribute__((__section__(".data.page_aligned")));
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+/* Format of the patch table */
+struct vdso_patch_def
+{
+ unsigned long ftr_mask, ftr_value;
+ const char *gen_name;
+ const char *fix_name;
+};
+
+/* Table of functions to patch based on the CPU type/revision
+ *
+ * Currently, we only change sync_dicache to do nothing on processors
+ * with a coherent icache
+ */
+static struct vdso_patch_def vdso_patches[] = {
+ {
+ CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
+ "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
+ },
+ {
+ CPU_FTR_USE_TB, 0,
+ "__kernel_gettimeofday", NULL
+ },
+};
+
+/*
+ * Some infos carried around for each of them during parsing at
+ * boot time.
+ */
+struct lib32_elfinfo
+{
+ Elf32_Ehdr *hdr; /* ptr to ELF */
+ Elf32_Sym *dynsym; /* ptr to .dynsym section */
+ unsigned long dynsymsize; /* size of .dynsym section */
+ char *dynstr; /* ptr to .dynstr section */
+ unsigned long text; /* offset of .text section in .so */
+};
+
+struct lib64_elfinfo
+{
+ Elf64_Ehdr *hdr;
+ Elf64_Sym *dynsym;
+ unsigned long dynsymsize;
+ char *dynstr;
+ unsigned long text;
+};
+
+
+#ifdef __DEBUG
+static void dump_one_vdso_page(struct page *pg, struct page *upg)
+{
+ printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
+ page_count(pg),
+ pg->flags);
+ if (upg/* && pg != upg*/) {
+ printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg)
+ << PAGE_SHIFT),
+ page_count(upg),
+ upg->flags);
+ }
+ printk("\n");
+}
+
+static void dump_vdso_pages(struct vm_area_struct * vma)
+{
+ int i;
+
+ if (!vma || test_thread_flag(TIF_32BIT)) {
+ printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
+ for (i=0; i<vdso32_pages; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase +
+ i*PAGE_SIZE);
+ struct page *upg = (vma && vma->vm_mm) ?
+ follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
+ : NULL;
+ dump_one_vdso_page(pg, upg);
+ }
+ }
+ if (!vma || !test_thread_flag(TIF_32BIT)) {
+ printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
+ for (i=0; i<vdso64_pages; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase +
+ i*PAGE_SIZE);
+ struct page *upg = (vma && vma->vm_mm) ?
+ follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
+ : NULL;
+ dump_one_vdso_page(pg, upg);
+ }
+ }
+}
+#endif /* DEBUG */
+
+/*
+ * Keep a dummy vma_close for now, it will prevent VMA merging.
+ */
+static void vdso_vma_close(struct vm_area_struct * vma)
+{
+}
+
+/*
+ * Our nopage() function, maps in the actual vDSO kernel pages, they will
+ * be mapped read-only by do_no_page(), and eventually COW'ed, either
+ * right away for an initial write access, or by do_wp_page().
+ */
+static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
+ unsigned long address, int *type)
+{
+ unsigned long offset = address - vma->vm_start;
+ struct page *pg;
+#ifdef CONFIG_PPC64
+ void *vbase = test_thread_flag(TIF_32BIT) ?
+ vdso32_kbase : vdso64_kbase;
+#else
+ void *vbase = vdso32_kbase;
+#endif
+
+ DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
+ current->comm, address, offset);
+
+ if (address < vma->vm_start || address > vma->vm_end)
+ return NOPAGE_SIGBUS;
+
+ /*
+ * Last page is systemcfg.
+ */
+ if ((vma->vm_end - address) <= PAGE_SIZE)
+ pg = virt_to_page(vdso_data);
+ else
+ pg = virt_to_page(vbase + offset);
+
+ get_page(pg);
+ DBG(" ->page count: %d\n", page_count(pg));
+
+ return pg;
+}
+
+static struct vm_operations_struct vdso_vmops = {
+ .close = vdso_vma_close,
+ .nopage = vdso_vma_nopage,
+};
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int executable_stack)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long vdso_pages;
+ unsigned long vdso_base;
+
+#ifdef CONFIG_PPC64
+ if (test_thread_flag(TIF_32BIT)) {
+ vdso_pages = vdso32_pages;
+ vdso_base = VDSO32_MBASE;
+ } else {
+ vdso_pages = vdso64_pages;
+ vdso_base = VDSO64_MBASE;
+ }
+#else
+ vdso_pages = vdso32_pages;
+ vdso_base = VDSO32_MBASE;
+#endif
+
+ current->thread.vdso_base = 0;
+
+ /* vDSO has a problem and was disabled, just don't "enable" it for the
+ * process
+ */
+ if (vdso_pages == 0)
+ return 0;
+
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma == NULL)
+ return -ENOMEM;
+
+ memset(vma, 0, sizeof(*vma));
+
+ /* Add a page to the vdso size for the data page */
+ vdso_pages ++;
+
+ /*
+ * pick a base address for the vDSO in process space. We try to put it
+ * at vdso_base which is the "natural" base for it, but we might fail
+ * and end up putting it elsewhere.
+ */
+ vdso_base = get_unmapped_area(NULL, vdso_base,
+ vdso_pages << PAGE_SHIFT, 0, 0);
+ if (vdso_base & ~PAGE_MASK) {
+ kmem_cache_free(vm_area_cachep, vma);
+ return (int)vdso_base;
+ }
+
+ current->thread.vdso_base = vdso_base;
+
+ vma->vm_mm = mm;
+ vma->vm_start = current->thread.vdso_base;
+ vma->vm_end = vma->vm_start + (vdso_pages << PAGE_SHIFT);
+
+ /*
+ * our vma flags don't have VM_WRITE so by default, the process isn't
+ * allowed to write those pages.
+ * gdb can break that with ptrace interface, and thus trigger COW on
+ * those pages but it's then your responsibility to never do that on
+ * the "data" page of the vDSO or you'll stop getting kernel updates
+ * and your nice userland gettimeofday will be totally dead.
+ * It's fine to use that for setting breakpoints in the vDSO code
+ * pages though
+ */
+ vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+ vma->vm_flags |= mm->def_flags;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+ vma->vm_ops = &vdso_vmops;
+
+ down_write(&mm->mmap_sem);
+ if (insert_vm_struct(mm, vma)) {
+ up_write(&mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, vma);
+ return -ENOMEM;
+ }
+ mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ up_write(&mm->mmap_sem);
+
+ return 0;
+}
+
+static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
+ unsigned long *size)
+{
+ Elf32_Shdr *sechdrs;
+ unsigned int i;
+ char *secnames;
+
+ /* Grab section headers and strings so we can tell who is who */
+ sechdrs = (void *)ehdr + ehdr->e_shoff;
+ secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ /* Find the section they want */
+ for (i = 1; i < ehdr->e_shnum; i++) {
+ if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+ if (size)
+ *size = sechdrs[i].sh_size;
+ return (void *)ehdr + sechdrs[i].sh_offset;
+ }
+ }
+ *size = 0;
+ return NULL;
+}
+
+static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib,
+ const char *symname)
+{
+ unsigned int i;
+ char name[MAX_SYMNAME], *c;
+
+ for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
+ if (lib->dynsym[i].st_name == 0)
+ continue;
+ strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+ MAX_SYMNAME);
+ c = strchr(name, '@');
+ if (c)
+ *c = 0;
+ if (strcmp(symname, name) == 0)
+ return &lib->dynsym[i];
+ }
+ return NULL;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function32(struct lib32_elfinfo *lib,
+ const char *symname)
+{
+ Elf32_Sym *sym = find_symbol32(lib, symname);
+
+ if (sym == NULL) {
+ printk(KERN_WARNING "vDSO32: function %s not found !\n",
+ symname);
+ return 0;
+ }
+ return sym->st_value - VDSO32_LBASE;
+}
+
+static int vdso_do_func_patch32(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64,
+ const char *orig, const char *fix)
+{
+ Elf32_Sym *sym32_gen, *sym32_fix;
+
+ sym32_gen = find_symbol32(v32, orig);
+ if (sym32_gen == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
+ return -1;
+ }
+ if (fix == NULL) {
+ sym32_gen->st_name = 0;
+ return 0;
+ }
+ sym32_fix = find_symbol32(v32, fix);
+ if (sym32_fix == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
+ return -1;
+ }
+ sym32_gen->st_value = sym32_fix->st_value;
+ sym32_gen->st_size = sym32_fix->st_size;
+ sym32_gen->st_info = sym32_fix->st_info;
+ sym32_gen->st_other = sym32_fix->st_other;
+ sym32_gen->st_shndx = sym32_fix->st_shndx;
+
+ return 0;
+}
+
+
+#ifdef CONFIG_PPC64
+
+static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
+ unsigned long *size)
+{
+ Elf64_Shdr *sechdrs;
+ unsigned int i;
+ char *secnames;
+
+ /* Grab section headers and strings so we can tell who is who */
+ sechdrs = (void *)ehdr + ehdr->e_shoff;
+ secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ /* Find the section they want */
+ for (i = 1; i < ehdr->e_shnum; i++) {
+ if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+ if (size)
+ *size = sechdrs[i].sh_size;
+ return (void *)ehdr + sechdrs[i].sh_offset;
+ }
+ }
+ if (size)
+ *size = 0;
+ return NULL;
+}
+
+static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib,
+ const char *symname)
+{
+ unsigned int i;
+ char name[MAX_SYMNAME], *c;
+
+ for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
+ if (lib->dynsym[i].st_name == 0)
+ continue;
+ strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+ MAX_SYMNAME);
+ c = strchr(name, '@');
+ if (c)
+ *c = 0;
+ if (strcmp(symname, name) == 0)
+ return &lib->dynsym[i];
+ }
+ return NULL;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function64(struct lib64_elfinfo *lib,
+ const char *symname)
+{
+ Elf64_Sym *sym = find_symbol64(lib, symname);
+
+ if (sym == NULL) {
+ printk(KERN_WARNING "vDSO64: function %s not found !\n",
+ symname);
+ return 0;
+ }
+#ifdef VDS64_HAS_DESCRIPTORS
+ return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) -
+ VDSO64_LBASE;
+#else
+ return sym->st_value - VDSO64_LBASE;
+#endif
+}
+
+static int vdso_do_func_patch64(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64,
+ const char *orig, const char *fix)
+{
+ Elf64_Sym *sym64_gen, *sym64_fix;
+
+ sym64_gen = find_symbol64(v64, orig);
+ if (sym64_gen == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
+ return -1;
+ }
+ if (fix == NULL) {
+ sym64_gen->st_name = 0;
+ return 0;
+ }
+ sym64_fix = find_symbol64(v64, fix);
+ if (sym64_fix == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
+ return -1;
+ }
+ sym64_gen->st_value = sym64_fix->st_value;
+ sym64_gen->st_size = sym64_fix->st_size;
+ sym64_gen->st_info = sym64_fix->st_info;
+ sym64_gen->st_other = sym64_fix->st_other;
+ sym64_gen->st_shndx = sym64_fix->st_shndx;
+
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+
+static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ void *sect;
+
+ /*
+ * Locate symbol tables & text section
+ */
+
+ v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
+ v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
+ if (v32->dynsym == NULL || v32->dynstr == NULL) {
+ printk(KERN_ERR "vDSO32: required symbol section not found\n");
+ return -1;
+ }
+ sect = find_section32(v32->hdr, ".text", NULL);
+ if (sect == NULL) {
+ printk(KERN_ERR "vDSO32: the .text section was not found\n");
+ return -1;
+ }
+ v32->text = sect - vdso32_kbase;
+
+#ifdef CONFIG_PPC64
+ v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
+ v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
+ if (v64->dynsym == NULL || v64->dynstr == NULL) {
+ printk(KERN_ERR "vDSO64: required symbol section not found\n");
+ return -1;
+ }
+ sect = find_section64(v64->hdr, ".text", NULL);
+ if (sect == NULL) {
+ printk(KERN_ERR "vDSO64: the .text section was not found\n");
+ return -1;
+ }
+ v64->text = sect - vdso64_kbase;
+#endif /* CONFIG_PPC64 */
+
+ return 0;
+}
+
+static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ /*
+ * Find signal trampolines
+ */
+
+#ifdef CONFIG_PPC64
+ vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64");
+#endif
+ vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32");
+ vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32");
+}
+
+static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ Elf32_Sym *sym32;
+#ifdef CONFIG_PPC64
+ Elf64_Sym *sym64;
+
+ sym64 = find_symbol64(v64, "__kernel_datapage_offset");
+ if (sym64 == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol "
+ "__kernel_datapage_offset !\n");
+ return -1;
+ }
+ *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
+ (vdso64_pages << PAGE_SHIFT) -
+ (sym64->st_value - VDSO64_LBASE);
+#endif /* CONFIG_PPC64 */
+
+ sym32 = find_symbol32(v32, "__kernel_datapage_offset");
+ if (sym32 == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol "
+ "__kernel_datapage_offset !\n");
+ return -1;
+ }
+ *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
+ (vdso32_pages << PAGE_SHIFT) -
+ (sym32->st_value - VDSO32_LBASE);
+
+ return 0;
+}
+
+static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
+ struct vdso_patch_def *patch = &vdso_patches[i];
+ int match = (cur_cpu_spec->cpu_features & patch->ftr_mask)
+ == patch->ftr_value;
+ if (!match)
+ continue;
+
+ DBG("replacing %s with %s...\n", patch->gen_name,
+ patch->fix_name ? "NONE" : patch->fix_name);
+
+ /*
+ * Patch the 32 bits and 64 bits symbols. Note that we do not
+ * patch the "." symbol on 64 bits.
+ * It would be easy to do, but doesn't seem to be necessary,
+ * patching the OPD symbol is enough.
+ */
+ vdso_do_func_patch32(v32, v64, patch->gen_name,
+ patch->fix_name);
+#ifdef CONFIG_PPC64
+ vdso_do_func_patch64(v32, v64, patch->gen_name,
+ patch->fix_name);
+#endif /* CONFIG_PPC64 */
+ }
+
+ return 0;
+}
+
+
+static __init int vdso_setup(void)
+{
+ struct lib32_elfinfo v32;
+ struct lib64_elfinfo v64;
+
+ v32.hdr = vdso32_kbase;
+#ifdef CONFIG_PPC64
+ v64.hdr = vdso64_kbase;
+#endif
+ if (vdso_do_find_sections(&v32, &v64))
+ return -1;
+
+ if (vdso_fixup_datapage(&v32, &v64))
+ return -1;
+
+ if (vdso_fixup_alt_funcs(&v32, &v64))
+ return -1;
+
+ vdso_setup_trampolines(&v32, &v64);
+
+ return 0;
+}
+
+/*
+ * Called from setup_arch to initialize the bitmap of available
+ * syscalls in the systemcfg page
+ */
+static void __init vdso_setup_syscall_map(void)
+{
+ unsigned int i;
+ extern unsigned long *sys_call_table;
+ extern unsigned long sys_ni_syscall;
+
+
+ for (i = 0; i < __NR_syscalls; i++) {
+#ifdef CONFIG_PPC64
+ if (sys_call_table[i*2] != sys_ni_syscall)
+ vdso_data->syscall_map_64[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+ if (sys_call_table[i*2+1] != sys_ni_syscall)
+ vdso_data->syscall_map_32[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+#else /* CONFIG_PPC64 */
+ if (sys_call_table[i] != sys_ni_syscall)
+ vdso_data->syscall_map_32[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+#endif /* CONFIG_PPC64 */
+ }
+}
+
+
+void __init vdso_init(void)
+{
+ int i;
+
+#ifdef CONFIG_PPC64
+ /*
+ * Fill up the "systemcfg" stuff for backward compatiblity
+ */
+ strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
+ vdso_data->version.major = SYSTEMCFG_MAJOR;
+ vdso_data->version.minor = SYSTEMCFG_MINOR;
+ vdso_data->processor = mfspr(SPRN_PVR);
+ vdso_data->platform = _machine;
+ vdso_data->physicalMemorySize = lmb_phys_mem_size();
+ vdso_data->dcache_size = ppc64_caches.dsize;
+ vdso_data->dcache_line_size = ppc64_caches.dline_size;
+ vdso_data->icache_size = ppc64_caches.isize;
+ vdso_data->icache_line_size = ppc64_caches.iline_size;
+
+ /*
+ * Calculate the size of the 64 bits vDSO
+ */
+ vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
+ DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
+#endif /* CONFIG_PPC64 */
+
+
+ /*
+ * Calculate the size of the 32 bits vDSO
+ */
+ vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
+ DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages);
+
+
+ /*
+ * Setup the syscall map in the vDOS
+ */
+ vdso_setup_syscall_map();
+ /*
+ * Initialize the vDSO images in memory, that is do necessary
+ * fixups of vDSO symbols, locate trampolines, etc...
+ */
+ if (vdso_setup()) {
+ printk(KERN_ERR "vDSO setup failure, not enabled !\n");
+ vdso32_pages = 0;
+#ifdef CONFIG_PPC64
+ vdso64_pages = 0;
+#endif
+ return;
+ }
+
+ /* Make sure pages are in the correct state */
+ for (i = 0; i < vdso32_pages; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+
+ }
+#ifdef CONFIG_PPC64
+ for (i = 0; i < vdso64_pages; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ }
+#endif /* CONFIG_PPC64 */
+
+ get_page(virt_to_page(vdso_data));
+}
+
+int in_gate_area_no_task(unsigned long addr)
+{
+ return 0;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+ return 0;
+}
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+ return NULL;
+}
+
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
new file mode 100644
index 00000000000..8a3bed5f143
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -0,0 +1,40 @@
+
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+
+# Build rules
+
+ifeq ($(CONFIG_PPC32),y)
+CROSS32CC := $(CC)
+endif
+
+targets := $(obj-vdso32) vdso32.so
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+
+EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin
+EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1
+EXTRA_AFLAGS := -D__VDSO32__ -s
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -Upowerpc
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32)
+ $(call if_changed,vdso32ld)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S
+ $(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+ cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
+
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
new file mode 100644
index 00000000000..09629aea3e4
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -0,0 +1,69 @@
+/*
+ * vDSO provided cache flush routines
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+/*
+ * Default "generic" version of __kernel_sync_dicache.
+ *
+ * void __kernel_sync_dicache(unsigned long start, unsigned long end)
+ *
+ * Flushes the data cache & invalidate the instruction cache for the
+ * provided range [start, end[
+ *
+ * Note: all CPUs supported by this kernel have a 128 bytes cache
+ * line size so we don't have to peek that info from the datapage
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache)
+ .cfi_startproc
+ li r5,127
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ srwi. r8,r8,7 /* compute line count */
+ crclr cr0*4+so
+ beqlr /* nothing to do? */
+ mtctr r8
+ mr r3,r6
+1: dcbst 0,r3
+ addi r3,r3,128
+ bdnz 1b
+ sync
+ mtctr r8
+1: icbi 0,r6
+ addi r6,r6,128
+ bdnz 1b
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache)
+
+
+/*
+ * POWER5 version of __kernel_sync_dicache
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
+ .cfi_startproc
+ crclr cr0*4+so
+ sync
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache_p5)
+
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
new file mode 100644
index 00000000000..4709f1d9542
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -0,0 +1,86 @@
+/*
+ * Access to the shared data page by the vDSO & syscall map
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+V_FUNCTION_BEGIN(__get_datapage)
+ .cfi_startproc
+ /* We don't want that exposed or overridable as we want other objects
+ * to be able to bl directly to here
+ */
+ .protected __get_datapage
+ .hidden __get_datapage
+
+ mflr r0
+ .cfi_register lr,r0
+
+ bcl 20,31,1f
+ .global __kernel_datapage_offset;
+__kernel_datapage_offset:
+ .long 0
+1:
+ mflr r3
+ mtlr r0
+ lwz r0,0(r3)
+ add r3,r0,r3
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__get_datapage)
+
+/*
+ * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
+ *
+ * returns a pointer to the syscall map. the map is agnostic to the
+ * size of "long", unlike kernel bitops, it stores bits from top to
+ * bottom so that memory actually contains a linear bitmap
+ * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
+ * 32 bits int at N >> 5.
+ */
+V_FUNCTION_BEGIN(__kernel_get_syscall_map)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ mr r4,r3
+ bl __get_datapage@local
+ mtlr r12
+ addi r3,r3,CFG_SYSCALL_MAP32
+ cmpli cr0,r4,0
+ beqlr
+ li r0,__NR_syscalls
+ stw r0,0(r4)
+ crclr cr0*4+so
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_syscall_map)
+
+/*
+ * void unsigned long long __kernel_get_tbfreq(void);
+ *
+ * returns the timebase frequency in HZ
+ */
+V_FUNCTION_BEGIN(__kernel_get_tbfreq)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ bl __get_datapage@local
+ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
+ lwz r3,CFG_TB_TICKS_PER_SEC(r3)
+ mtlr r12
+ crclr cr0*4+so
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
new file mode 100644
index 00000000000..7eebff03a04
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -0,0 +1,323 @@
+/*
+ * Userland implementation of gettimeofday() for 32 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r10,r3 /* r10 saves tv */
+ mr r11,r4 /* r11 saves tz */
+ bl __get_datapage@local /* get data page */
+ mr r9, r3 /* datapage ptr in r9 */
+ bl __do_get_xsec@local /* get xsec from tb & kernel */
+ bne- 2f /* out of line -> do syscall */
+
+ /* seconds are xsec >> 20 */
+ rlwinm r5,r4,12,20,31
+ rlwimi r5,r3,12,0,19
+ stw r5,TVAL32_TV_SEC(r10)
+
+ /* get remaining xsec and convert to usec. we scale
+ * up remaining xsec by 12 bits and get the top 32 bits
+ * of the multiplication
+ */
+ rlwinm r5,r4,12,0,19
+ lis r6,1000000@h
+ ori r6,r6,1000000@l
+ mulhwu r5,r5,r6
+ stw r5,TVAL32_TV_USEC(r10)
+
+ cmpli cr0,r11,0 /* check if tz is NULL */
+ beq 1f
+ lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
+ lwz r5,CFG_TZ_DSTTIME(r9)
+ stw r4,TZONE_TZ_MINWEST(r11)
+ stw r5,TZONE_TZ_DSTTIME(r11)
+
+1: mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ blr
+
+2:
+ mtlr r12
+ mr r3,r10
+ mr r4,r11
+ li r0,__NR_gettimeofday
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_gettimeofday)
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpli cr0,r3,CLOCK_REALTIME
+ cmpli cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ mflr r12 /* r12 saves lr */
+ .cfi_register lr,r12
+ mr r10,r3 /* r10 saves id */
+ mr r11,r4 /* r11 saves tp */
+ bl __get_datapage@local /* get data page */
+ mr r9,r3 /* datapage ptr in r9 */
+ beq cr1,50f /* if monotonic -> jump there */
+
+ /*
+ * CLOCK_REALTIME
+ */
+
+ bl __do_get_xsec@local /* get xsec from tb & kernel */
+ bne- 98f /* out of line -> do syscall */
+
+ /* seconds are xsec >> 20 */
+ rlwinm r5,r4,12,20,31
+ rlwimi r5,r3,12,0,19
+ stw r5,TSPC32_TV_SEC(r11)
+
+ /* get remaining xsec and convert to nsec. we scale
+ * up remaining xsec by 12 bits and get the top 32 bits
+ * of the multiplication, then we multiply by 1000
+ */
+ rlwinm r5,r4,12,0,19
+ lis r6,1000000@h
+ ori r6,r6,1000000@l
+ mulhwu r5,r5,r6
+ mulli r5,r5,1000
+ stw r5,TSPC32_TV_NSEC(r11)
+ mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ blr
+
+ /*
+ * CLOCK_MONOTONIC
+ */
+
+50: bl __do_get_xsec@local /* get xsec from tb & kernel */
+ bne- 98f /* out of line -> do syscall */
+
+ /* seconds are xsec >> 20 */
+ rlwinm r6,r4,12,20,31
+ rlwimi r6,r3,12,0,19
+
+ /* get remaining xsec and convert to nsec. we scale
+ * up remaining xsec by 12 bits and get the top 32 bits
+ * of the multiplication, then we multiply by 1000
+ */
+ rlwinm r7,r4,12,0,19
+ lis r5,1000000@h
+ ori r5,r5,1000000@l
+ mulhwu r7,r7,r5
+ mulli r7,r7,1000
+
+ /* now we must fixup using wall to monotonic. We need to snapshot
+ * that value and do the counter trick again. Fortunately, we still
+ * have the counter value in r8 that was returned by __do_get_xsec.
+ * At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5
+ * can be used
+ */
+
+ lwz r3,WTOM_CLOCK_SEC(r9)
+ lwz r4,WTOM_CLOCK_NSEC(r9)
+
+ /* We now have our result in r3,r4. We create a fake dependency
+ * on that result and re-check the counter
+ */
+ or r5,r4,r3
+ xor r0,r5,r5
+ add r9,r9,r0
+#ifdef CONFIG_PPC64
+ lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
+#else
+ lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
+#endif
+ cmpl cr0,r8,r0 /* check if updated */
+ bne- 50b
+
+ /* Calculate and store result. Note that this mimmics the C code,
+ * which may cause funny results if nsec goes negative... is that
+ * possible at all ?
+ */
+ add r3,r3,r6
+ add r4,r4,r7
+ lis r5,NSEC_PER_SEC@h
+ ori r5,r5,NSEC_PER_SEC@l
+ cmpl cr0,r4,r5
+ cmpli cr1,r4,0
+ blt 1f
+ subf r4,r5,r4
+ addi r3,r3,1
+1: bge cr1,1f
+ addi r3,r3,-1
+ add r4,r4,r5
+1: stw r3,TSPC32_TV_SEC(r11)
+ stw r4,TSPC32_TV_NSEC(r11)
+
+ mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ blr
+
+ /*
+ * syscall fallback
+ */
+98:
+ mtlr r12
+ mr r3,r10
+ mr r4,r11
+99:
+ li r0,__NR_clock_gettime
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_gettime)
+
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ li r3,0
+ cmpli cr0,r4,0
+ crclr cr0*4+so
+ beqlr
+ lis r5,CLOCK_REALTIME_RES@h
+ ori r5,r5,CLOCK_REALTIME_RES@l
+ stw r3,TSPC32_TV_SEC(r4)
+ stw r5,TSPC32_TV_NSEC(r4)
+ blr
+
+ /*
+ * syscall fallback
+ */
+99:
+ li r0,__NR_clock_getres
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * This is the core of gettimeofday() & friends, it returns the xsec
+ * value in r3 & r4 and expects the datapage ptr (non clobbered)
+ * in r9. clobbers r0,r4,r5,r6,r7,r8.
+ * When returning, r8 contains the counter value that can be reused
+ * by the monotonic clock implementation
+ */
+__do_get_xsec:
+ .cfi_startproc
+ /* Check for update count & load values. We use the low
+ * order 32 bits of the update count
+ */
+#ifdef CONFIG_PPC64
+1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9)
+#else
+1: lwz r8,(CFG_TB_UPDATE_COUNT)(r9)
+#endif
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- 1b
+ xor r0,r8,r8 /* create dependency */
+ add r9,r9,r0
+
+ /* Load orig stamp (offset to TB) */
+ lwz r5,CFG_TB_ORIG_STAMP(r9)
+ lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
+
+ /* Get a stable TB value */
+2: mftbu r3
+ mftbl r4
+ mftbu r0
+ cmpl cr0,r3,r0
+ bne- 2b
+
+ /* Substract tb orig stamp. If the high part is non-zero, we jump to
+ * the slow path which call the syscall.
+ * If it's ok, then we have our 32 bits tb_ticks value in r7
+ */
+ subfc r7,r6,r4
+ subfe. r0,r5,r3
+ bne- 3f
+
+ /* Load scale factor & do multiplication */
+ lwz r5,CFG_TB_TO_XS(r9) /* load values */
+ lwz r6,(CFG_TB_TO_XS+4)(r9)
+ mulhwu r4,r7,r5
+ mulhwu r6,r7,r6
+ mullw r0,r7,r5
+ addc r6,r6,r0
+
+ /* At this point, we have the scaled xsec value in r4 + XER:CA
+ * we load & add the stamp since epoch
+ */
+ lwz r5,CFG_STAMP_XSEC(r9)
+ lwz r6,(CFG_STAMP_XSEC+4)(r9)
+ adde r4,r4,r6
+ addze r3,r5
+
+ /* We now have our result in r3,r4. We create a fake dependency
+ * on that result and re-check the counter
+ */
+ or r6,r4,r3
+ xor r0,r6,r6
+ add r9,r9,r0
+#ifdef CONFIG_PPC64
+ lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
+#else
+ lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
+#endif
+ cmpl cr0,r8,r0 /* check if updated */
+ bne- 1b
+
+ /* Warning ! The caller expects CR:EQ to be set to indicate a
+ * successful calculation (so it won't fallback to the syscall
+ * method). We have overriden that CR bit in the counter check,
+ * but fortunately, the loop exit condition _is_ CR:EQ set, so
+ * we can exit safely here. If you change this code, be careful
+ * of that side effect.
+ */
+3: blr
+ .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso32/note.S
new file mode 100644
index 00000000000..d4b5be4f3d5
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/note.S
@@ -0,0 +1,25 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+
+#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \
+ .section name, flags; \
+ .balign 4; \
+ .long 1f - 0f; /* name length */ \
+ .long 3f - 2f; /* data length */ \
+ .long type; /* note type */ \
+0: .asciz vendor; /* vendor name */ \
+1: .balign 4; \
+2:
+
+#define ASM_ELF_NOTE_END \
+3: .balign 4; /* pad out section */ \
+ .previous
+
+ ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
+ .long LINUX_VERSION_CODE
+ ASM_ELF_NOTE_END
diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S
new file mode 100644
index 00000000000..e0464278191
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/sigtramp.S
@@ -0,0 +1,300 @@
+/*
+ * Signal trampolines for 32 bits processes in a ppc64 kernel for
+ * use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+ the return address to get an address in the middle of the presumed
+ call instruction. Since we don't have a call here, we artifically
+ extend the range covered by the unwind info by adding a nop before
+ the real start. */
+ nop
+V_FUNCTION_BEGIN(__kernel_sigtramp32)
+.Lsig_start = . - 4
+ li r0,__NR_sigreturn
+ sc
+.Lsig_end:
+V_FUNCTION_END(__kernel_sigtramp32)
+
+.Lsigrt_start:
+ nop
+V_FUNCTION_BEGIN(__kernel_sigtramp_rt32)
+ li r0,__NR_rt_sigreturn
+ sc
+.Lsigrt_end:
+V_FUNCTION_END(__kernel_sigtramp_rt32)
+
+ .section .eh_frame,"a",@progbits
+
+/* Register r1 can be found at offset 4 of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define cfa_save \
+ .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+9:
+
+/* Register REGNO can be found at offset OFS of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define rsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .ifne ofs; \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+ .endif; \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. The VMX reg struct is at offset VREGS of
+ the pt_regs struct. This macro is for REGNO == 0, and contains
+ 'subroutines' that the other macros jump to. */
+#define vsave_msr0(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit0 */ \
+2: \
+ .byte 0x40; /* DW_OP_lit16 */ \
+ .byte 0x1e; /* DW_OP_mul */ \
+3: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x12; /* DW_OP_dup */ \
+ .byte 0x23; /* DW_OP_plus_uconst */ \
+ .uleb128 33*RSIZE; /* msr offset */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
+ .byte 0x1a; /* DW_OP_and */ \
+ .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
+ .byte 0x30; /* DW_OP_lit0 */ \
+ .byte 0x29; /* DW_OP_eq */ \
+ .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
+ .byte 0x13; /* DW_OP_drop, pop the 0 */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x22; /* DW_OP_plus */ \
+ .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. REGNO is 1 thru 31. */
+#define vsave_msr1(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit n */ \
+ .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
+ the VMX save block. */
+#define vsave_msr2(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x0a; .short ofs; /* DW_OP_const2u */ \
+ .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
+9:
+
+/* VMX register REGNO is at offset OFS of the VMX save area. */
+#define vsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+9:
+
+/* This is where the pt_regs pointer can be found on the stack. */
+#define PTREGS 64+28
+
+/* Size of regs. */
+#define RSIZE 4
+
+/* This is the offset of the VMX regs. */
+#define VREGS 48*RSIZE+34*8
+
+/* Describe where general purpose regs are saved. */
+#define EH_FRAME_GEN \
+ cfa_save; \
+ rsave ( 0, 0*RSIZE); \
+ rsave ( 2, 2*RSIZE); \
+ rsave ( 3, 3*RSIZE); \
+ rsave ( 4, 4*RSIZE); \
+ rsave ( 5, 5*RSIZE); \
+ rsave ( 6, 6*RSIZE); \
+ rsave ( 7, 7*RSIZE); \
+ rsave ( 8, 8*RSIZE); \
+ rsave ( 9, 9*RSIZE); \
+ rsave (10, 10*RSIZE); \
+ rsave (11, 11*RSIZE); \
+ rsave (12, 12*RSIZE); \
+ rsave (13, 13*RSIZE); \
+ rsave (14, 14*RSIZE); \
+ rsave (15, 15*RSIZE); \
+ rsave (16, 16*RSIZE); \
+ rsave (17, 17*RSIZE); \
+ rsave (18, 18*RSIZE); \
+ rsave (19, 19*RSIZE); \
+ rsave (20, 20*RSIZE); \
+ rsave (21, 21*RSIZE); \
+ rsave (22, 22*RSIZE); \
+ rsave (23, 23*RSIZE); \
+ rsave (24, 24*RSIZE); \
+ rsave (25, 25*RSIZE); \
+ rsave (26, 26*RSIZE); \
+ rsave (27, 27*RSIZE); \
+ rsave (28, 28*RSIZE); \
+ rsave (29, 29*RSIZE); \
+ rsave (30, 30*RSIZE); \
+ rsave (31, 31*RSIZE); \
+ rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
+ rsave (65, 36*RSIZE); /* lr */ \
+ rsave (70, 38*RSIZE) /* cr */
+
+/* Describe where the FP regs are saved. */
+#define EH_FRAME_FP \
+ rsave (32, 48*RSIZE + 0*8); \
+ rsave (33, 48*RSIZE + 1*8); \
+ rsave (34, 48*RSIZE + 2*8); \
+ rsave (35, 48*RSIZE + 3*8); \
+ rsave (36, 48*RSIZE + 4*8); \
+ rsave (37, 48*RSIZE + 5*8); \
+ rsave (38, 48*RSIZE + 6*8); \
+ rsave (39, 48*RSIZE + 7*8); \
+ rsave (40, 48*RSIZE + 8*8); \
+ rsave (41, 48*RSIZE + 9*8); \
+ rsave (42, 48*RSIZE + 10*8); \
+ rsave (43, 48*RSIZE + 11*8); \
+ rsave (44, 48*RSIZE + 12*8); \
+ rsave (45, 48*RSIZE + 13*8); \
+ rsave (46, 48*RSIZE + 14*8); \
+ rsave (47, 48*RSIZE + 15*8); \
+ rsave (48, 48*RSIZE + 16*8); \
+ rsave (49, 48*RSIZE + 17*8); \
+ rsave (50, 48*RSIZE + 18*8); \
+ rsave (51, 48*RSIZE + 19*8); \
+ rsave (52, 48*RSIZE + 20*8); \
+ rsave (53, 48*RSIZE + 21*8); \
+ rsave (54, 48*RSIZE + 22*8); \
+ rsave (55, 48*RSIZE + 23*8); \
+ rsave (56, 48*RSIZE + 24*8); \
+ rsave (57, 48*RSIZE + 25*8); \
+ rsave (58, 48*RSIZE + 26*8); \
+ rsave (59, 48*RSIZE + 27*8); \
+ rsave (60, 48*RSIZE + 28*8); \
+ rsave (61, 48*RSIZE + 29*8); \
+ rsave (62, 48*RSIZE + 30*8); \
+ rsave (63, 48*RSIZE + 31*8)
+
+/* Describe where the VMX regs are saved. */
+#ifdef CONFIG_ALTIVEC
+#define EH_FRAME_VMX \
+ vsave_msr0 ( 0); \
+ vsave_msr1 ( 1); \
+ vsave_msr1 ( 2); \
+ vsave_msr1 ( 3); \
+ vsave_msr1 ( 4); \
+ vsave_msr1 ( 5); \
+ vsave_msr1 ( 6); \
+ vsave_msr1 ( 7); \
+ vsave_msr1 ( 8); \
+ vsave_msr1 ( 9); \
+ vsave_msr1 (10); \
+ vsave_msr1 (11); \
+ vsave_msr1 (12); \
+ vsave_msr1 (13); \
+ vsave_msr1 (14); \
+ vsave_msr1 (15); \
+ vsave_msr1 (16); \
+ vsave_msr1 (17); \
+ vsave_msr1 (18); \
+ vsave_msr1 (19); \
+ vsave_msr1 (20); \
+ vsave_msr1 (21); \
+ vsave_msr1 (22); \
+ vsave_msr1 (23); \
+ vsave_msr1 (24); \
+ vsave_msr1 (25); \
+ vsave_msr1 (26); \
+ vsave_msr1 (27); \
+ vsave_msr1 (28); \
+ vsave_msr1 (29); \
+ vsave_msr1 (30); \
+ vsave_msr1 (31); \
+ vsave_msr2 (33, 32*16+12); \
+ vsave (32, 32*16)
+#else
+#define EH_FRAME_VMX
+#endif
+
+.Lcie:
+ .long .Lcie_end - .Lcie_start
+.Lcie_start:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 4 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 67 /* Return address register column, ap */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+ .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
+ .balign 4
+.Lcie_end:
+
+ .long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+ .long .Lfde0_start - .Lcie /* CIE pointer. */
+ .long .Lsig_start - . /* PC start, length */
+ .long .Lsig_end - .Lsig_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+ .balign 4
+.Lfde0_end:
+
+/* We have a different stack layout for rt_sigreturn. */
+#undef PTREGS
+#define PTREGS 64+16+128+20+28
+
+ .long .Lfde1_end - .Lfde1_start
+.Lfde1_start:
+ .long .Lfde1_start - .Lcie /* CIE pointer. */
+ .long .Lsigrt_start - . /* PC start, length */
+ .long .Lsigrt_end - .Lsigrt_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+ .balign 4
+.Lfde1_end:
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 00000000000..f4bad720cb0
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,117 @@
+
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+/* Default link addresses for the vDSOs */
+OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
+OUTPUT_ARCH(powerpc:common)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO32_LBASE + SIZEOF_HEADERS;
+ .hash : { *(.hash) } :text
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN (16);
+ .text :
+ {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+
+ /* Other stuff is appended to the text segment: */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table) }
+ .fixup : { *(.fixup) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+ .got : { *(.got) }
+ .plt : { *(.plt) }
+
+ _end = .;
+ __end = .;
+ PROVIDE (end = .);
+
+
+ /* Stabs debugging sections are here too
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+
+ /DISCARD/ : { *(.note.GNU-stack) }
+ /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
+ /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ __kernel_datapage_offset; /* Has to be there for the kernel to find */
+ __kernel_get_syscall_map;
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+ __kernel_get_tbfreq;
+ __kernel_sync_dicache;
+ __kernel_sync_dicache_p5;
+ __kernel_sigtramp32;
+ __kernel_sigtramp_rt32;
+ local: *;
+ };
+}
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 00000000000..556f0caa5d8
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,13 @@
+#include <linux/init.h>
+#include <asm/page.h>
+
+ .section ".data.page_aligned"
+
+ .globl vdso32_start, vdso32_end
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/powerpc/kernel/vdso32/vdso32.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
new file mode 100644
index 00000000000..ab39988452c
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -0,0 +1,35 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin
+EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1
+EXTRA_AFLAGS := -D__VDSO64__ -s
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64)
+ $(call if_changed,vdso64ld)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S
+ $(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+ cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
new file mode 100644
index 00000000000..cb4ae0a5edd
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -0,0 +1,68 @@
+/*
+ * vDSO provided cache flush routines
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+/*
+ * Default "generic" version of __kernel_sync_dicache.
+ *
+ * void __kernel_sync_dicache(unsigned long start, unsigned long end)
+ *
+ * Flushes the data cache & invalidate the instruction cache for the
+ * provided range [start, end[
+ *
+ * Note: all CPUs supported by this kernel have a 128 bytes cache
+ * line size so we don't have to peek that info from the datapage
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache)
+ .cfi_startproc
+ li r5,127
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ srwi. r8,r8,7 /* compute line count */
+ crclr cr0*4+so
+ beqlr /* nothing to do? */
+ mtctr r8
+ mr r3,r6
+1: dcbst 0,r3
+ addi r3,r3,128
+ bdnz 1b
+ sync
+ mtctr r8
+1: icbi 0,r6
+ addi r6,r6,128
+ bdnz 1b
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache)
+
+
+/*
+ * POWER5 version of __kernel_sync_dicache
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
+ .cfi_startproc
+ crclr cr0*4+so
+ sync
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache_p5)
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
new file mode 100644
index 00000000000..3b2dd7d0c1e
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -0,0 +1,86 @@
+/*
+ * Access to the shared data page by the vDSO & syscall map
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+V_FUNCTION_BEGIN(__get_datapage)
+ .cfi_startproc
+ /* We don't want that exposed or overridable as we want other objects
+ * to be able to bl directly to here
+ */
+ .protected __get_datapage
+ .hidden __get_datapage
+
+ mflr r0
+ .cfi_register lr,r0
+
+ bcl 20,31,1f
+ .global __kernel_datapage_offset;
+__kernel_datapage_offset:
+ .long 0
+1:
+ mflr r3
+ mtlr r0
+ lwz r0,0(r3)
+ add r3,r0,r3
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__get_datapage)
+
+/*
+ * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
+ *
+ * returns a pointer to the syscall map. the map is agnostic to the
+ * size of "long", unlike kernel bitops, it stores bits from top to
+ * bottom so that memory actually contains a linear bitmap
+ * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
+ * 32 bits int at N >> 5.
+ */
+V_FUNCTION_BEGIN(__kernel_get_syscall_map)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ mr r4,r3
+ bl V_LOCAL_FUNC(__get_datapage)
+ mtlr r12
+ addi r3,r3,CFG_SYSCALL_MAP64
+ cmpli cr0,r4,0
+ crclr cr0*4+so
+ beqlr
+ li r0,__NR_syscalls
+ stw r0,0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_syscall_map)
+
+
+/*
+ * void unsigned long __kernel_get_tbfreq(void);
+ *
+ * returns the timebase frequency in HZ
+ */
+V_FUNCTION_BEGIN(__kernel_get_tbfreq)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ bl V_LOCAL_FUNC(__get_datapage)
+ ld r3,CFG_TB_TICKS_PER_SEC(r3)
+ mtlr r12
+ crclr cr0*4+so
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
new file mode 100644
index 00000000000..ccaeda5136d
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -0,0 +1,253 @@
+
+ /*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r11,r3 /* r11 holds tv */
+ mr r10,r4 /* r10 holds tz */
+ bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
+ lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
+ ori r7,r7,16960
+ rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
+ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
+ std r5,TVAL64_TV_SEC(r11) /* store sec in tv */
+ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
+ mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
+ * XSEC_PER_SEC
+ */
+ rldicl r0,r0,44,20
+ cmpldi cr0,r10,0 /* check if tz is NULL */
+ std r0,TVAL64_TV_USEC(r11) /* store usec in tv */
+ beq 1f
+ lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
+ lwz r5,CFG_TZ_DSTTIME(r3)
+ stw r4,TZONE_TZ_MINWEST(r10)
+ stw r5,TZONE_TZ_DSTTIME(r10)
+1: mtlr r12
+ crclr cr0*4+so
+ li r3,0 /* always success */
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_gettimeofday)
+
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ mflr r12 /* r12 saves lr */
+ .cfi_register lr,r12
+ mr r10,r3 /* r10 saves id */
+ mr r11,r4 /* r11 saves tp */
+ bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ beq cr1,50f /* if monotonic -> jump there */
+
+ /*
+ * CLOCK_REALTIME
+ */
+
+ bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
+
+ lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
+ ori r7,r7,16960
+ rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
+ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
+ std r5,TSPC64_TV_SEC(r11) /* store sec in tv */
+ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
+ mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
+ * XSEC_PER_SEC
+ */
+ rldicl r0,r0,44,20
+ mulli r0,r0,1000 /* nsec = usec * 1000 */
+ std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */
+
+ mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ blr
+
+ /*
+ * CLOCK_MONOTONIC
+ */
+
+50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
+
+ lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
+ ori r7,r7,16960
+ rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
+ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
+ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
+ mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
+ * XSEC_PER_SEC
+ */
+ rldicl r6,r0,44,20
+ mulli r6,r6,1000 /* nsec = usec * 1000 */
+
+ /* now we must fixup using wall to monotonic. We need to snapshot
+ * that value and do the counter trick again. Fortunately, we still
+ * have the counter value in r8 that was returned by __do_get_xsec.
+ * At this point, r5,r6 contain our sec/nsec values.
+ * can be used
+ */
+
+ lwa r4,WTOM_CLOCK_SEC(r3)
+ lwa r7,WTOM_CLOCK_NSEC(r3)
+
+ /* We now have our result in r4,r7. We create a fake dependency
+ * on that result and re-check the counter
+ */
+ or r9,r4,r7
+ xor r0,r9,r9
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 50b
+
+ /* Calculate and store result. Note that this mimmics the C code,
+ * which may cause funny results if nsec goes negative... is that
+ * possible at all ?
+ */
+ add r4,r4,r5
+ add r7,r7,r6
+ lis r9,NSEC_PER_SEC@h
+ ori r9,r9,NSEC_PER_SEC@l
+ cmpl cr0,r7,r9
+ cmpli cr1,r7,0
+ blt 1f
+ subf r7,r9,r7
+ addi r4,r4,1
+1: bge cr1,1f
+ addi r4,r4,-1
+ add r7,r7,r9
+1: std r4,TSPC64_TV_SEC(r11)
+ std r7,TSPC64_TV_NSEC(r11)
+
+ mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ blr
+
+ /*
+ * syscall fallback
+ */
+98:
+ mtlr r12
+ mr r3,r10
+ mr r4,r11
+99:
+ li r0,__NR_clock_gettime
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_gettime)
+
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ li r3,0
+ cmpli cr0,r4,0
+ crclr cr0*4+so
+ beqlr
+ lis r5,CLOCK_REALTIME_RES@h
+ ori r5,r5,CLOCK_REALTIME_RES@l
+ std r3,TSPC64_TV_SEC(r4)
+ std r5,TSPC64_TV_NSEC(r4)
+ blr
+
+ /*
+ * syscall fallback
+ */
+99:
+ li r0,__NR_clock_getres
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * This is the core of gettimeofday(), it returns the xsec
+ * value in r4 and expects the datapage ptr (non clobbered)
+ * in r3. clobbers r0,r4,r5,r6,r7,r8
+ * When returning, r8 contains the counter value that can be reused
+ */
+V_FUNCTION_BEGIN(__do_get_xsec)
+ .cfi_startproc
+ /* check for update count & load values */
+1: ld r8,CFG_TB_UPDATE_COUNT(r3)
+ andi. r0,r4,1 /* pending update ? loop */
+ bne- 1b
+ xor r0,r4,r4 /* create dependency */
+ add r3,r3,r0
+
+ /* Get TB & offset it */
+ mftb r7
+ ld r9,CFG_TB_ORIG_STAMP(r3)
+ subf r7,r9,r7
+
+ /* Scale result */
+ ld r5,CFG_TB_TO_XS(r3)
+ mulhdu r7,r7,r5
+
+ /* Add stamp since epoch */
+ ld r6,CFG_STAMP_XSEC(r3)
+ add r4,r6,r7
+
+ xor r0,r4,r4
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 1b
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__do_get_xsec)
diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S
new file mode 100644
index 00000000000..dc2a509f7e8
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/note.S
@@ -0,0 +1 @@
+#include "../vdso32/note.S"
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S
new file mode 100644
index 00000000000..31b604ab56d
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/sigtramp.S
@@ -0,0 +1,295 @@
+/*
+ * Signal trampoline for 64 bits processes in a ppc64 kernel for
+ * use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+#include <asm/ptrace.h> /* XXX for __SIGNAL_FRAMESIZE */
+
+ .text
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+ the return address to get an address in the middle of the presumed
+ call instruction. Since we don't have a call here, we artifically
+ extend the range covered by the unwind info by padding before the
+ real start. */
+ nop
+ .balign 8
+V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
+.Lsigrt_start = . - 4
+ addi r1, r1, __SIGNAL_FRAMESIZE
+ li r0,__NR_rt_sigreturn
+ sc
+.Lsigrt_end:
+V_FUNCTION_END(__kernel_sigtramp_rt64)
+/* The ".balign 8" above and the following zeros mimic the old stack
+ trampoline layout. The last magic value is the ucontext pointer,
+ chosen in such a way that older libgcc unwind code returns a zero
+ for a sigcontext pointer. */
+ .long 0,0,0
+ .quad 0,-21*8
+
+/* Register r1 can be found at offset 8 of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define cfa_save \
+ .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+9:
+
+/* Register REGNO can be found at offset OFS of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define rsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .ifne ofs; \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+ .endif; \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. A pointer to the VMX reg struct is at VREGS in
+ the pt_regs struct. This macro is for REGNO == 0, and contains
+ 'subroutines' that the other macros jump to. */
+#define vsave_msr0(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit0 */ \
+2: \
+ .byte 0x40; /* DW_OP_lit16 */ \
+ .byte 0x1e; /* DW_OP_mul */ \
+3: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x12; /* DW_OP_dup */ \
+ .byte 0x23; /* DW_OP_plus_uconst */ \
+ .uleb128 33*RSIZE; /* msr offset */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
+ .byte 0x1a; /* DW_OP_and */ \
+ .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
+ .byte 0x30; /* DW_OP_lit0 */ \
+ .byte 0x29; /* DW_OP_eq */ \
+ .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
+ .byte 0x13; /* DW_OP_drop, pop the 0 */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x22; /* DW_OP_plus */ \
+ .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. REGNO is 1 thru 31. */
+#define vsave_msr1(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit n */ \
+ .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
+ the VMX save block. */
+#define vsave_msr2(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x0a; .short ofs; /* DW_OP_const2u */ \
+ .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
+9:
+
+/* VMX register REGNO is at offset OFS of the VMX save area. */
+#define vsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+9:
+
+/* This is where the pt_regs pointer can be found on the stack. */
+#define PTREGS 128+168+56
+
+/* Size of regs. */
+#define RSIZE 8
+
+/* This is the offset of the VMX reg pointer. */
+#define VREGS 48*RSIZE+33*8
+
+/* Describe where general purpose regs are saved. */
+#define EH_FRAME_GEN \
+ cfa_save; \
+ rsave ( 0, 0*RSIZE); \
+ rsave ( 2, 2*RSIZE); \
+ rsave ( 3, 3*RSIZE); \
+ rsave ( 4, 4*RSIZE); \
+ rsave ( 5, 5*RSIZE); \
+ rsave ( 6, 6*RSIZE); \
+ rsave ( 7, 7*RSIZE); \
+ rsave ( 8, 8*RSIZE); \
+ rsave ( 9, 9*RSIZE); \
+ rsave (10, 10*RSIZE); \
+ rsave (11, 11*RSIZE); \
+ rsave (12, 12*RSIZE); \
+ rsave (13, 13*RSIZE); \
+ rsave (14, 14*RSIZE); \
+ rsave (15, 15*RSIZE); \
+ rsave (16, 16*RSIZE); \
+ rsave (17, 17*RSIZE); \
+ rsave (18, 18*RSIZE); \
+ rsave (19, 19*RSIZE); \
+ rsave (20, 20*RSIZE); \
+ rsave (21, 21*RSIZE); \
+ rsave (22, 22*RSIZE); \
+ rsave (23, 23*RSIZE); \
+ rsave (24, 24*RSIZE); \
+ rsave (25, 25*RSIZE); \
+ rsave (26, 26*RSIZE); \
+ rsave (27, 27*RSIZE); \
+ rsave (28, 28*RSIZE); \
+ rsave (29, 29*RSIZE); \
+ rsave (30, 30*RSIZE); \
+ rsave (31, 31*RSIZE); \
+ rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
+ rsave (65, 36*RSIZE); /* lr */ \
+ rsave (70, 38*RSIZE) /* cr */
+
+/* Describe where the FP regs are saved. */
+#define EH_FRAME_FP \
+ rsave (32, 48*RSIZE + 0*8); \
+ rsave (33, 48*RSIZE + 1*8); \
+ rsave (34, 48*RSIZE + 2*8); \
+ rsave (35, 48*RSIZE + 3*8); \
+ rsave (36, 48*RSIZE + 4*8); \
+ rsave (37, 48*RSIZE + 5*8); \
+ rsave (38, 48*RSIZE + 6*8); \
+ rsave (39, 48*RSIZE + 7*8); \
+ rsave (40, 48*RSIZE + 8*8); \
+ rsave (41, 48*RSIZE + 9*8); \
+ rsave (42, 48*RSIZE + 10*8); \
+ rsave (43, 48*RSIZE + 11*8); \
+ rsave (44, 48*RSIZE + 12*8); \
+ rsave (45, 48*RSIZE + 13*8); \
+ rsave (46, 48*RSIZE + 14*8); \
+ rsave (47, 48*RSIZE + 15*8); \
+ rsave (48, 48*RSIZE + 16*8); \
+ rsave (49, 48*RSIZE + 17*8); \
+ rsave (50, 48*RSIZE + 18*8); \
+ rsave (51, 48*RSIZE + 19*8); \
+ rsave (52, 48*RSIZE + 20*8); \
+ rsave (53, 48*RSIZE + 21*8); \
+ rsave (54, 48*RSIZE + 22*8); \
+ rsave (55, 48*RSIZE + 23*8); \
+ rsave (56, 48*RSIZE + 24*8); \
+ rsave (57, 48*RSIZE + 25*8); \
+ rsave (58, 48*RSIZE + 26*8); \
+ rsave (59, 48*RSIZE + 27*8); \
+ rsave (60, 48*RSIZE + 28*8); \
+ rsave (61, 48*RSIZE + 29*8); \
+ rsave (62, 48*RSIZE + 30*8); \
+ rsave (63, 48*RSIZE + 31*8)
+
+/* Describe where the VMX regs are saved. */
+#ifdef CONFIG_ALTIVEC
+#define EH_FRAME_VMX \
+ vsave_msr0 ( 0); \
+ vsave_msr1 ( 1); \
+ vsave_msr1 ( 2); \
+ vsave_msr1 ( 3); \
+ vsave_msr1 ( 4); \
+ vsave_msr1 ( 5); \
+ vsave_msr1 ( 6); \
+ vsave_msr1 ( 7); \
+ vsave_msr1 ( 8); \
+ vsave_msr1 ( 9); \
+ vsave_msr1 (10); \
+ vsave_msr1 (11); \
+ vsave_msr1 (12); \
+ vsave_msr1 (13); \
+ vsave_msr1 (14); \
+ vsave_msr1 (15); \
+ vsave_msr1 (16); \
+ vsave_msr1 (17); \
+ vsave_msr1 (18); \
+ vsave_msr1 (19); \
+ vsave_msr1 (20); \
+ vsave_msr1 (21); \
+ vsave_msr1 (22); \
+ vsave_msr1 (23); \
+ vsave_msr1 (24); \
+ vsave_msr1 (25); \
+ vsave_msr1 (26); \
+ vsave_msr1 (27); \
+ vsave_msr1 (28); \
+ vsave_msr1 (29); \
+ vsave_msr1 (30); \
+ vsave_msr1 (31); \
+ vsave_msr2 (33, 32*16+12); \
+ vsave (32, 33*16)
+#else
+#define EH_FRAME_VMX
+#endif
+
+ .section .eh_frame,"a",@progbits
+.Lcie:
+ .long .Lcie_end - .Lcie_start
+.Lcie_start:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 4 /* Code alignment factor */
+ .sleb128 -8 /* Data alignment factor */
+ .byte 67 /* Return address register column, ap */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x14 /* DW_EH_PE_pcrel | DW_EH_PE_udata8. */
+ .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
+ .balign 8
+.Lcie_end:
+
+ .long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+ .long .Lfde0_start - .Lcie /* CIE pointer. */
+ .quad .Lsigrt_start - . /* PC start, length */
+ .quad .Lsigrt_end - .Lsigrt_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+# Do we really need to describe the frame at this point? ie. will
+# we ever have some call chain that returns somewhere past the addi?
+# I don't think so, since gcc doesn't support async signals.
+# .byte 0x41 /* DW_CFA_advance_loc 1*4 */
+#undef PTREGS
+#define PTREGS 168+56
+# EH_FRAME_GEN
+# EH_FRAME_FP
+# EH_FRAME_VMX
+ .balign 8
+.Lfde0_end:
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 00000000000..4bdf224464a
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,116 @@
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
+OUTPUT_ARCH(powerpc:common64)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO64_LBASE + SIZEOF_HEADERS;
+ .hash : { *(.hash) } :text
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN (16);
+ .text :
+ {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ *(.sfpr .glink)
+ } :text
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+
+ /* Other stuff is appended to the text segment: */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table) }
+
+ .opd ALIGN(8) : { KEEP (*(.opd)) }
+ .got ALIGN(8) : { *(.got .toc) }
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ _end = .;
+ PROVIDE (end = .);
+
+ /* Stabs debugging sections are here too
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sectio/ns.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+
+ /DISCARD/ : { *(.note.GNU-stack) }
+ /DISCARD/ : { *(.branch_lt) }
+ /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.*) }
+ /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ __kernel_datapage_offset; /* Has to be there for the kernel to find */
+ __kernel_get_syscall_map;
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+ __kernel_get_tbfreq;
+ __kernel_sync_dicache;
+ __kernel_sync_dicache_p5;
+ __kernel_sigtramp_rt64;
+ local: *;
+ };
+}
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 00000000000..0529cb9e3b9
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,13 @@
+#include <linux/init.h>
+#include <asm/page.h>
+
+ .section ".data.page_aligned"
+
+ .globl vdso64_start, vdso64_end
+ .balign PAGE_SIZE
+vdso64_start:
+ .incbin "arch/powerpc/kernel/vdso64/vdso64.so"
+ .balign PAGE_SIZE
+vdso64_end:
+
+ .previous