aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig5
-rw-r--r--arch/powerpc/boot/Makefile4
-rw-r--r--arch/powerpc/boot/dts/gef_sbc610.dts15
-rw-r--r--arch/powerpc/boot/dts/sequoia.dts2
-rw-r--r--arch/powerpc/boot/install.sh14
-rw-r--r--arch/powerpc/configs/86xx/gef_sbc610_defconfig1
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/atomic.h4
-rw-r--r--arch/powerpc/include/asm/byteorder.h83
-rw-r--r--arch/powerpc/include/asm/cell-pmu.h2
-rw-r--r--arch/powerpc/include/asm/disassemble.h80
-rw-r--r--arch/powerpc/include/asm/hugetlb.h6
-rw-r--r--arch/powerpc/include/asm/ioctls.h2
-rw-r--r--arch/powerpc/include/asm/kexec.h55
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h61
-rw-r--r--arch/powerpc/include/asm/kvm_host.h116
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h83
-rw-r--r--arch/powerpc/include/asm/mmu-44x.h1
-rw-r--r--arch/powerpc/include/asm/oprofile_impl.h6
-rw-r--r--arch/powerpc/include/asm/ps3.h2
-rw-r--r--arch/powerpc/include/asm/spu.h2
-rw-r--r--arch/powerpc/include/asm/swab.h90
-rw-r--r--arch/powerpc/include/asm/topology.h13
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c21
-rw-r--r--arch/powerpc/kernel/cacheinfo.c837
-rw-r--r--arch/powerpc/kernel/cacheinfo.h8
-rw-r--r--arch/powerpc/kernel/irq.c2
-rw-r--r--arch/powerpc/kernel/kprobes.c9
-rw-r--r--arch/powerpc/kernel/pci-common.c71
-rw-r--r--arch/powerpc/kernel/pci_64.c9
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c1
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/prom.c14
-rw-r--r--arch/powerpc/kernel/prom_init.c2
-rw-r--r--arch/powerpc/kernel/prom_parse.c7
-rw-r--r--arch/powerpc/kernel/smp.c4
-rw-r--r--arch/powerpc/kernel/sysfs.c300
-rw-r--r--arch/powerpc/kernel/time.c20
-rw-r--r--arch/powerpc/kvm/44x.c228
-rw-r--r--arch/powerpc/kvm/44x_emulate.c371
-rw-r--r--arch/powerpc/kvm/44x_tlb.c463
-rw-r--r--arch/powerpc/kvm/44x_tlb.h26
-rw-r--r--arch/powerpc/kvm/Kconfig28
-rw-r--r--arch/powerpc/kvm/Makefile12
-rw-r--r--arch/powerpc/kvm/booke.c (renamed from arch/powerpc/kvm/booke_guest.c)418
-rw-r--r--arch/powerpc/kvm/booke.h60
-rw-r--r--arch/powerpc/kvm/booke_host.c83
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S72
-rw-r--r--arch/powerpc/kvm/emulate.c447
-rw-r--r--arch/powerpc/kvm/powerpc.c130
-rw-r--r--arch/powerpc/kvm/timing.c239
-rw-r--r--arch/powerpc/kvm/timing.h102
-rw-r--r--arch/powerpc/mm/hugetlbpage.c7
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/mm/mmu_decl.h6
-rw-r--r--arch/powerpc/mm/numa.c62
-rw-r--r--arch/powerpc/mm/pgtable_32.c3
-rw-r--r--arch/powerpc/mm/tlb_nohash.c3
-rw-r--r--arch/powerpc/oprofile/cell/pr_util.h13
-rw-r--r--arch/powerpc/oprofile/cell/spu_profiler.c58
-rw-r--r--arch/powerpc/oprofile/common.c22
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c748
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_common.c2
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c21
-rw-r--r--arch/powerpc/platforms/cell/beat_udbg.c4
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq.c2
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c2
-rw-r--r--arch/powerpc/platforms/cell/cpufreq_spudemand.c4
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c2
-rw-r--r--arch/powerpc/platforms/cell/io-workarounds.c4
-rw-r--r--arch/powerpc/platforms/cell/iommu.c4
-rw-r--r--arch/powerpc/platforms/cell/spu_priv1_mmio.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c1
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h2
-rw-r--r--arch/powerpc/platforms/iseries/Kconfig5
-rw-r--r--arch/powerpc/platforms/iseries/setup.c11
-rw-r--r--arch/powerpc/platforms/pasemi/cpufreq.c4
-rw-r--r--arch/powerpc/platforms/pasemi/dma_lib.c2
-rw-r--r--arch/powerpc/platforms/powermac/cpufreq_64.c2
-rw-r--r--arch/powerpc/platforms/powermac/pci.c2
-rw-r--r--arch/powerpc/platforms/powermac/time.c11
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c37
-rw-r--r--arch/powerpc/platforms/pseries/xics.c4
-rw-r--r--arch/powerpc/sysdev/mpic.c4
-rw-r--r--arch/powerpc/sysdev/mpic.h2
87 files changed, 3888 insertions, 1813 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 79f25cef32d..84b861316ce 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -108,6 +108,8 @@ config ARCH_NO_VIRT_TO_BUS
config PPC
bool
default y
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_IDE
@@ -326,7 +328,8 @@ config KEXEC
config CRASH_DUMP
bool "Build a kdump crash kernel"
- depends on (PPC64 && RELOCATABLE) || 6xx
+ depends on PPC64 || 6xx
+ select RELOCATABLE if PPC64
help
Build a kernel suitable for use as a kdump capture kernel.
The same kernel binary can be used as production kernel and dump
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index f32829937aa..e84df338ea2 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -208,7 +208,7 @@ image-$(CONFIG_DEFAULT_UIMAGE) += uImage
#
# Theses are default targets to build images which embed device tree blobs.
# They are only required on boards which do not have FDT support in firmware.
-# Boards with newish u-boot firmare can use the uImage target above
+# Boards with newish u-boot firmware can use the uImage target above
#
# Board ports in arch/powerpc/platform/40x/Kconfig
@@ -356,7 +356,7 @@ $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y))
@rm -f $@; ln $< $@
install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
- sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $<
+ sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^
# anything not in $(targets)
clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
diff --git a/arch/powerpc/boot/dts/gef_sbc610.dts b/arch/powerpc/boot/dts/gef_sbc610.dts
index 9708b3423bb..e78c355c7ba 100644
--- a/arch/powerpc/boot/dts/gef_sbc610.dts
+++ b/arch/powerpc/boot/dts/gef_sbc610.dts
@@ -88,6 +88,21 @@
compatible = "gef,fpga-regs";
reg = <0x4 0x0 0x40>;
};
+
+ wdt@4,2000 {
+ compatible = "gef,fpga-wdt";
+ reg = <0x4 0x2000 0x8>;
+ interrupts = <0x1a 0x4>;
+ interrupt-parent = <&gef_pic>;
+ };
+ /* Second watchdog available, driver currently supports one.
+ wdt@4,2010 {
+ compatible = "gef,fpga-wdt";
+ reg = <0x4 0x2010 0x8>;
+ interrupts = <0x1b 0x4>;
+ interrupt-parent = <&gef_pic>;
+ };
+ */
gef_pic: pic@4,4000 {
#interrupt-cells = <1>;
interrupt-controller;
diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts
index 3b295e8df53..43cc68bd319 100644
--- a/arch/powerpc/boot/dts/sequoia.dts
+++ b/arch/powerpc/boot/dts/sequoia.dts
@@ -134,7 +134,7 @@
};
USB1: usb@e0000400 {
- compatible = "ohci-be";
+ compatible = "ibm,usb-ohci-440epx", "ohci-be";
reg = <0x00000000 0xe0000400 0x00000060>;
interrupt-parent = <&UIC0>;
interrupts = <0x15 0x8>;
diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh
index b002bfd5678..51b2387bdba 100644
--- a/arch/powerpc/boot/install.sh
+++ b/arch/powerpc/boot/install.sh
@@ -15,7 +15,7 @@
# $2 - kernel image file
# $3 - kernel map file
# $4 - default install path (blank if root directory)
-# $5 - kernel boot file, the zImage
+# $5 and more - kernel boot files; zImage*, uImage, cuImage.*, etc.
#
# User may have a custom install script
@@ -38,3 +38,15 @@ fi
cat $2 > $4/$image_name
cp $3 $4/System.map
+
+# Copy all the bootable image files
+path=$4
+shift 4
+while [ $# -ne 0 ]; do
+ image_name=`basename $1`
+ if [ -f $path/$image_name ]; then
+ mv $path/$image_name $path/$image_name.old
+ fi
+ cat $1 > $path/$image_name
+ shift
+done;
diff --git a/arch/powerpc/configs/86xx/gef_sbc610_defconfig b/arch/powerpc/configs/86xx/gef_sbc610_defconfig
index cd1ffa44932..391874c7b43 100644
--- a/arch/powerpc/configs/86xx/gef_sbc610_defconfig
+++ b/arch/powerpc/configs/86xx/gef_sbc610_defconfig
@@ -1164,6 +1164,7 @@ CONFIG_WATCHDOG=y
# CONFIG_SOFT_WATCHDOG is not set
# CONFIG_ALIM7101_WDT is not set
# CONFIG_8xxx_WDT is not set
+CONFIG_GEF_WDT=y
#
# PCI-based Watchdog Cards
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 5ab7d7fe198..9268602de5d 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -35,3 +35,4 @@ unifdef-y += spu_info.h
unifdef-y += termios.h
unifdef-y += types.h
unifdef-y += unistd.h
+unifdef-y += swab.h
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 499be5bdd6f..b401950f525 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -5,7 +5,7 @@
* PowerPC atomic operations
*/
-typedef struct { int counter; } atomic_t;
+#include <linux/types.h>
#ifdef __KERNEL__
#include <linux/compiler.h>
@@ -251,8 +251,6 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
#ifdef __powerpc64__
-typedef struct { long counter; } atomic64_t;
-
#define ATOMIC64_INIT(i) { (i) }
static __inline__ long atomic64_read(const atomic64_t *v)
diff --git a/arch/powerpc/include/asm/byteorder.h b/arch/powerpc/include/asm/byteorder.h
index d5de325472e..5cca27a4153 100644
--- a/arch/powerpc/include/asm/byteorder.h
+++ b/arch/powerpc/include/asm/byteorder.h
@@ -8,86 +8,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/types.h>
-#include <linux/compiler.h>
-
-#define __BIG_ENDIAN
-
-#ifdef __GNUC__
-#ifdef __KERNEL__
-
-static __inline__ __u16 ld_le16(const volatile __u16 *addr)
-{
- __u16 val;
-
- __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
- return val;
-}
-#define __arch_swab16p ld_le16
-
-static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
-{
- __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-static inline void __arch_swab16s(__u16 *addr)
-{
- st_le16(addr, *addr);
-}
-#define __arch_swab16s __arch_swab16s
-
-static __inline__ __u32 ld_le32(const volatile __u32 *addr)
-{
- __u32 val;
-
- __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
- return val;
-}
-#define __arch_swab32p ld_le32
-
-static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
-{
- __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-static inline void __arch_swab32s(__u32 *addr)
-{
- st_le32(addr, *addr);
-}
-#define __arch_swab32s __arch_swab32s
-
-static inline __attribute_const__ __u16 __arch_swab16(__u16 value)
-{
- __u16 result;
-
- __asm__("rlwimi %0,%1,8,16,23"
- : "=r" (result)
- : "r" (value), "0" (value >> 8));
- return result;
-}
-#define __arch_swab16 __arch_swab16
-
-static inline __attribute_const__ __u32 __arch_swab32(__u32 value)
-{
- __u32 result;
-
- __asm__("rlwimi %0,%1,24,16,23\n\t"
- "rlwimi %0,%1,8,8,15\n\t"
- "rlwimi %0,%1,24,0,7"
- : "=r" (result)
- : "r" (value), "0" (value >> 24));
- return result;
-}
-#define __arch_swab32 __arch_swab32
-
-#endif /* __KERNEL__ */
-
-#ifndef __powerpc64__
-#define __SWAB_64_THRU_32__
-#endif /* __powerpc64__ */
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder.h>
+#include <asm/swab.h>
+#include <linux/byteorder/big_endian.h>
#endif /* _ASM_POWERPC_BYTEORDER_H */
diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h
index 8066eede3a0..b4b7338ad79 100644
--- a/arch/powerpc/include/asm/cell-pmu.h
+++ b/arch/powerpc/include/asm/cell-pmu.h
@@ -37,9 +37,11 @@
#define CBE_PM_STOP_AT_MAX 0x40000000
#define CBE_PM_TRACE_MODE_GET(pm_control) (((pm_control) >> 28) & 0x3)
#define CBE_PM_TRACE_MODE_SET(mode) (((mode) & 0x3) << 28)
+#define CBE_PM_TRACE_BUF_OVFLW(bit) (((bit) & 0x1) << 17)
#define CBE_PM_COUNT_MODE_SET(count) (((count) & 0x3) << 18)
#define CBE_PM_FREEZE_ALL_CTRS 0x00100000
#define CBE_PM_ENABLE_EXT_TRACE 0x00008000
+#define CBE_PM_SPU_ADDR_TRACE_SET(msk) (((msk) & 0x3) << 9)
/* Macros for the trace_address register. */
#define CBE_PM_TRACE_BUF_FULL 0x00000800
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
new file mode 100644
index 00000000000..9b198d1b3b2
--- /dev/null
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -0,0 +1,80 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_PPC_DISASSEMBLE_H__
+#define __ASM_PPC_DISASSEMBLE_H__
+
+#include <linux/types.h>
+
+static inline unsigned int get_op(u32 inst)
+{
+ return inst >> 26;
+}
+
+static inline unsigned int get_xop(u32 inst)
+{
+ return (inst >> 1) & 0x3ff;
+}
+
+static inline unsigned int get_sprn(u32 inst)
+{
+ return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_dcrn(u32 inst)
+{
+ return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_rt(u32 inst)
+{
+ return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_rs(u32 inst)
+{
+ return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_ra(u32 inst)
+{
+ return (inst >> 16) & 0x1f;
+}
+
+static inline unsigned int get_rb(u32 inst)
+{
+ return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_rc(u32 inst)
+{
+ return inst & 0x1;
+}
+
+static inline unsigned int get_ws(u32 inst)
+{
+ return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_d(u32 inst)
+{
+ return inst & 0xffff;
+}
+
+#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 26f0d0ab27a..b1dafb6a974 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -18,6 +18,12 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
/*
+ * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs
+ * to override the version in mm/hugetlb.c
+ */
+#define vma_mmu_pagesize vma_mmu_pagesize
+
+/*
* If the arch doesn't supply something else, assume that hugepage
* size aligned regions are ok without further preparation.
*/
diff --git a/arch/powerpc/include/asm/ioctls.h b/arch/powerpc/include/asm/ioctls.h
index 279a6229584..1842186d872 100644
--- a/arch/powerpc/include/asm/ioctls.h
+++ b/arch/powerpc/include/asm/ioctls.h
@@ -89,6 +89,8 @@
#define TIOCSBRK 0x5427 /* BSD compatibility */
#define TIOCCBRK 0x5428 /* BSD compatibility */
#define TIOCGSID 0x5429 /* Return the session ID of FD */
+#define TIOCGRS485 0x542e
+#define TIOCSRS485 0x542f
#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 6dbffc98170..7e06b43720d 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -48,63 +48,8 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
{
if (oldregs)
memcpy(newregs, oldregs, sizeof(*newregs));
-#ifdef __powerpc64__
- else {
- /* FIXME Merge this with xmon_save_regs ?? */
- unsigned long tmp1, tmp2;
- __asm__ __volatile__ (
- "std 0,0(%2)\n"
- "std 1,8(%2)\n"
- "std 2,16(%2)\n"
- "std 3,24(%2)\n"
- "std 4,32(%2)\n"
- "std 5,40(%2)\n"
- "std 6,48(%2)\n"
- "std 7,56(%2)\n"
- "std 8,64(%2)\n"
- "std 9,72(%2)\n"
- "std 10,80(%2)\n"
- "std 11,88(%2)\n"
- "std 12,96(%2)\n"
- "std 13,104(%2)\n"
- "std 14,112(%2)\n"
- "std 15,120(%2)\n"
- "std 16,128(%2)\n"
- "std 17,136(%2)\n"
- "std 18,144(%2)\n"
- "std 19,152(%2)\n"
- "std 20,160(%2)\n"
- "std 21,168(%2)\n"
- "std 22,176(%2)\n"
- "std 23,184(%2)\n"
- "std 24,192(%2)\n"
- "std 25,200(%2)\n"
- "std 26,208(%2)\n"
- "std 27,216(%2)\n"
- "std 28,224(%2)\n"
- "std 29,232(%2)\n"
- "std 30,240(%2)\n"
- "std 31,248(%2)\n"
- "mfmsr %0\n"
- "std %0, 264(%2)\n"
- "mfctr %0\n"
- "std %0, 280(%2)\n"
- "mflr %0\n"
- "std %0, 288(%2)\n"
- "bl 1f\n"
- "1: mflr %1\n"
- "std %1, 256(%2)\n"
- "mtlr %0\n"
- "mfxer %0\n"
- "std %0, 296(%2)\n"
- : "=&r" (tmp1), "=&r" (tmp2)
- : "b" (newregs)
- : "memory");
- }
-#else
else
ppc_save_regs(newregs);
-#endif /* __powerpc64__ */
}
extern void kexec_smp_wait(void); /* get and clear naca physid, wait for
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
new file mode 100644
index 00000000000..f49031b632c
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -0,0 +1,61 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_44X_H__
+#define __ASM_44X_H__
+
+#include <linux/kvm_host.h>
+
+#define PPC44x_TLB_SIZE 64
+
+/* If the guest is expecting it, this can be as large as we like; we'd just
+ * need to find some way of advertising it. */
+#define KVM44x_GUEST_TLB_SIZE 64
+
+struct kvmppc_44x_shadow_ref {
+ struct page *page;
+ u16 gtlb_index;
+ u8 writeable;
+ u8 tid;
+};
+
+struct kvmppc_vcpu_44x {
+ /* Unmodified copy of the guest's TLB. */
+ struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
+
+ /* References to guest pages in the hardware TLB. */
+ struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
+
+ /* State of the shadow TLB at guest context switch time. */
+ struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
+ u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+
+ struct kvm_vcpu vcpu;
+};
+
+static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
+{
+ return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
+
+#endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 34b52b7180c..c1e436fe773 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,27 +64,58 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
-struct tlbe {
+struct kvmppc_44x_tlbe {
u32 tid; /* Only the low 8 bits are used. */
u32 word0;
u32 word1;
u32 word2;
};
-struct kvm_arch {
+enum kvm_exit_types {
+ MMIO_EXITS,
+ DCR_EXITS,
+ SIGNAL_EXITS,
+ ITLB_REAL_MISS_EXITS,
+ ITLB_VIRT_MISS_EXITS,
+ DTLB_REAL_MISS_EXITS,
+ DTLB_VIRT_MISS_EXITS,
+ SYSCALL_EXITS,
+ ISI_EXITS,
+ DSI_EXITS,
+ EMULATED_INST_EXITS,
+ EMULATED_MTMSRWE_EXITS,
+ EMULATED_WRTEE_EXITS,
+ EMULATED_MTSPR_EXITS,
+ EMULATED_MFSPR_EXITS,
+ EMULATED_MTMSR_EXITS,
+ EMULATED_MFMSR_EXITS,
+ EMULATED_TLBSX_EXITS,
+ EMULATED_TLBWE_EXITS,
+ EMULATED_RFI_EXITS,
+ DEC_EXITS,
+ EXT_INTR_EXITS,
+ HALT_WAKEUP,
+ USR_PR_INST,
+ FP_UNAVAIL,
+ DEBUG_EXITS,
+ TIMEINGUEST,
+ __NUMBER_OF_KVM_EXIT_TYPES
};
-struct kvm_vcpu_arch {
- /* Unmodified copy of the guest's TLB. */
- struct tlbe guest_tlb[PPC44x_TLB_SIZE];
- /* TLB that's actually used when the guest is running. */
- struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
- /* Pages which are referenced in the shadow TLB. */
- struct page *shadow_pages[PPC44x_TLB_SIZE];
+/* allow access to big endian 32bit upper/lower parts and 64bit var */
+struct kvmppc_exit_timing {
+ union {
+ u64 tv64;
+ struct {
+ u32 tbu, tbl;
+ } tv32;
+ };
+};
- /* Track which TLB entries we've modified in the current exit. */
- u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+struct kvm_arch {
+};
+struct kvm_vcpu_arch {
u32 host_stack;
u32 host_pid;
u32 host_dbcr0;
@@ -94,32 +125,32 @@ struct kvm_vcpu_arch {
u32 host_msr;
u64 fpr[32];
- u32 gpr[32];
+ ulong gpr[32];
- u32 pc;
+ ulong pc;
u32 cr;
- u32 ctr;
- u32 lr;
- u32 xer;
+ ulong ctr;
+ ulong lr;
+ ulong xer;
- u32 msr;
+ ulong msr;
u32 mmucr;
- u32 sprg0;
- u32 sprg1;
- u32 sprg2;
- u32 sprg3;
- u32 sprg4;
- u32 sprg5;
- u32 sprg6;
- u32 sprg7;
- u32 srr0;
- u32 srr1;
- u32 csrr0;
- u32 csrr1;
- u32 dsrr0;
- u32 dsrr1;
- u32 dear;
- u32 esr;
+ ulong sprg0;
+ ulong sprg1;
+ ulong sprg2;
+ ulong sprg3;
+ ulong sprg4;
+ ulong sprg5;
+ ulong sprg6;
+ ulong sprg7;
+ ulong srr0;
+ ulong srr1;
+ ulong csrr0;
+ ulong csrr1;
+ ulong dsrr0;
+ ulong dsrr1;
+ ulong dear;
+ ulong esr;
u32 dec;
u32 decar;
u32 tbl;
@@ -127,7 +158,7 @@ struct kvm_vcpu_arch {
u32 tcr;
u32 tsr;
u32 ivor[16];
- u32 ivpr;
+ ulong ivpr;
u32 pir;
u32 shadow_pid;
@@ -140,9 +171,22 @@ struct kvm_vcpu_arch {
u32 dbcr0;
u32 dbcr1;
+#ifdef CONFIG_KVM_EXIT_TIMING
+ struct kvmppc_exit_timing timing_exit;
+ struct kvmppc_exit_timing timing_last_enter;
+ u32 last_exit_type;
+ u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
+ u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+ u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+ u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+ u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+ u64 timing_last_exit;
+ struct dentry *debugfs_exit_timing;
+#endif
+
u32 last_inst;
- u32 fault_dear;
- u32 fault_esr;
+ ulong fault_dear;
+ ulong fault_esr;
gpa_t paddr_accessed;
u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bb62ad876de..36d2a50a848 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,11 +29,6 @@
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
-struct kvm_tlb {
- struct tlbe guest_tlb[PPC44x_TLB_SIZE];
- struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
-};
-
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
@@ -41,9 +36,6 @@ enum emulation_result {
EMULATE_FAIL, /* can't emulate this instruction */
};
-extern const unsigned char exception_priority[];
-extern const unsigned char priority_exception[];
-
extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
extern char kvmppc_handlers_start[];
extern unsigned long kvmppc_handler_len;
@@ -58,51 +50,44 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
extern int kvmppc_emulate_instruction(struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
- u64 asid, u32 flags);
-extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
- gva_t eend, u32 asid);
+extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+ u64 asid, u32 flags, u32 max_bytes,
+ unsigned int gtlb_idx);
extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
-/* XXX Book E specific */
-extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
-
-extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);
-
-static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception)
-{
- unsigned int priority = exception_priority[exception];
- set_bit(priority, &vcpu->arch.pending_exceptions);
-}
-
-static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
-{
- unsigned int priority = exception_priority[exception];
- clear_bit(priority, &vcpu->arch.pending_exceptions);
-}
-
-/* Helper function for "full" MSR writes. No need to call this if only EE is
- * changing. */
-static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
-{
- if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
- kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
-
- vcpu->arch.msr = new_msr;
-
- if (vcpu->arch.msr & MSR_WE)
- kvm_vcpu_block(vcpu);
-}
-
-static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
-{
- if (vcpu->arch.pid != new_pid) {
- vcpu->arch.pid = new_pid;
- vcpu->arch.swap_pid = 1;
- }
-}
+/* Core-specific hooks */
+
+extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
+ unsigned int id);
+extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_check_processor_compat(void);
+extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr);
+
+extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+ struct kvm_interrupt *irq);
+
+extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int op, int *advance);
+extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+
+extern int kvmppc_booke_init(void);
+extern void kvmppc_booke_exit(void);
extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 8a97cfb08b7..27cc6fdcd3b 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -56,6 +56,7 @@
#ifndef __ASSEMBLY__
extern unsigned int tlb_44x_hwater;
+extern unsigned int tlb_44x_index;
typedef struct {
unsigned int id;
diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h
index 95035c602ba..639dc96077a 100644
--- a/arch/powerpc/include/asm/oprofile_impl.h
+++ b/arch/powerpc/include/asm/oprofile_impl.h
@@ -32,6 +32,12 @@ struct op_system_config {
unsigned long mmcr0;
unsigned long mmcr1;
unsigned long mmcra;
+#ifdef CONFIG_OPROFILE_CELL
+ /* Register for oprofile user tool to check cell kernel profiling
+ * suport.
+ */
+ unsigned long cell_support;
+#endif
#endif
unsigned long enable_kernel;
unsigned long enable_user;
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index cff30c0ef1f..eead5c67197 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -320,6 +320,7 @@ enum ps3_match_id {
enum ps3_match_sub_id {
PS3_MATCH_SUB_ID_GPU_FB = 1,
+ PS3_MATCH_SUB_ID_GPU_RAMDISK = 2,
};
#define PS3_MODULE_ALIAS_EHCI "ps3:1:0"
@@ -332,6 +333,7 @@ enum ps3_match_sub_id {
#define PS3_MODULE_ALIAS_STOR_FLASH "ps3:8:0"
#define PS3_MODULE_ALIAS_SOUND "ps3:9:0"
#define PS3_MODULE_ALIAS_GPU_FB "ps3:10:1"
+#define PS3_MODULE_ALIAS_GPU_RAMDISK "ps3:10:2"
#define PS3_MODULE_ALIAS_LPM "ps3:11:0"
enum ps3_system_bus_device_type {
diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h
index 8b2eb044270..0ab8d869e3d 100644
--- a/arch/powerpc/include/asm/spu.h
+++ b/arch/powerpc/include/asm/spu.h
@@ -128,7 +128,7 @@ struct spu {
int number;
unsigned int irqs[3];
u32 node;
- u64 flags;
+ unsigned long flags;
u64 class_0_pending;
u64 class_0_dar;
u64 class_1_dar;
diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h
new file mode 100644
index 00000000000..ef824ae4b79
--- /dev/null
+++ b/arch/powerpc/include/asm/swab.h
@@ -0,0 +1,90 @@
+#ifndef _ASM_POWERPC_SWAB_H
+#define _ASM_POWERPC_SWAB_H
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+#ifdef __GNUC__
+
+#ifndef __powerpc64__
+#define __SWAB_64_THRU_32__
+#endif /* __powerpc64__ */
+
+#ifdef __KERNEL__
+
+static __inline__ __u16 ld_le16(const volatile __u16 *addr)
+{
+ __u16 val;
+
+ __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
+ return val;
+}
+#define __arch_swab16p ld_le16
+
+static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
+{
+ __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
+}
+
+static inline void __arch_swab16s(__u16 *addr)
+{
+ st_le16(addr, *addr);
+}
+#define __arch_swab16s __arch_swab16s
+
+static __inline__ __u32 ld_le32(const volatile __u32 *addr)
+{
+ __u32 val;
+
+ __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
+ return val;
+}
+#define __arch_swab32p ld_le32
+
+static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
+{
+ __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
+}
+
+static inline void __arch_swab32s(__u32 *addr)
+{
+ st_le32(addr, *addr);
+}
+#define __arch_swab32s __arch_swab32s
+
+static inline __attribute_const__ __u16 __arch_swab16(__u16 value)
+{
+ __u16 result;
+
+ __asm__("rlwimi %0,%1,8,16,23"
+ : "=r" (result)
+ : "r" (value), "0" (value >> 8));
+ return result;
+}
+#define __arch_swab16 __arch_swab16
+
+static inline __attribute_const__ __u32 __arch_swab32(__u32 value)
+{
+ __u32 result;
+
+ __asm__("rlwimi %0,%1,24,16,23\n\t"
+ "rlwimi %0,%1,8,8,15\n\t"
+ "rlwimi %0,%1,24,0,7"
+ : "=r" (result)
+ : "r" (value), "0" (value >> 24));
+ return result;
+}
+#define __arch_swab32 __arch_swab32
+
+#endif /* __KERNEL__ */
+
+#endif /* __GNUC__ */
+
+#endif /* _ASM_POWERPC_SWAB_H */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c32da6f9799..375258559ae 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -22,11 +22,11 @@ static inline cpumask_t node_to_cpumask(int node)
return numa_cpumask_lookup_table[node];
}
+#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
+
static inline int node_to_first_cpu(int node)
{
- cpumask_t tmp;
- tmp = node_to_cpumask(node);
- return first_cpu(tmp);
+ return cpumask_first(cpumask_of_node(node));
}
int of_node_to_nid(struct device_node *device);
@@ -46,9 +46,12 @@ static inline int pcibus_to_node(struct pci_bus *bus)
node_to_cpumask(pcibus_to_node(bus)) \
)
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
+ cpu_all_mask : \
+ cpumask_of_node(pcibus_to_node(bus)))
+
/* sched_domains SD_NODE_INIT for PPC64 machines */
#define SD_NODE_INIT (struct sched_domain) { \
- .span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
@@ -109,6 +112,8 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
+#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
#endif
#endif
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1308a86e907..8d1a419df35 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -29,7 +29,7 @@ endif
obj-y := cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
init_task.o process.o systbl.o idle.o \
- signal.o sysfs.o
+ signal.o sysfs.o cacheinfo.o
obj-y += vdso32/
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 06958da94f1..19ee491e9e2 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,9 +23,6 @@
#include <linux/mm.h>
#include <linux/suspend.h>
#include <linux/hrtimer.h>
-#ifdef CONFIG_KVM
-#include <linux/kvm_host.h>
-#endif
#ifdef CONFIG_PPC64
#include <linux/time.h>
#include <linux/hardirq.h>
@@ -51,6 +48,9 @@
#ifdef CONFIG_PPC_ISERIES
#include <asm/iseries/alpaca.h>
#endif
+#ifdef CONFIG_KVM
+#include <asm/kvm_44x.h>
+#endif
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
#include "head_booke.h"
@@ -361,12 +361,10 @@ int main(void)
DEFINE(PTE_SIZE, sizeof(pte_t));
#ifdef CONFIG_KVM
- DEFINE(TLBE_BYTES, sizeof(struct tlbe));
+ DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
- DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
- DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
@@ -392,5 +390,16 @@ int main(void)
DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
#endif
+#ifdef CONFIG_KVM_EXIT_TIMING
+ DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
+ arch.timing_exit.tv32.tbu));
+ DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
+ arch.timing_exit.tv32.tbl));
+ DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
+ arch.timing_last_enter.tv32.tbu));
+ DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
+ arch.timing_last_enter.tv32.tbl));
+#endif
+
return 0;
}
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
new file mode 100644
index 00000000000..b33f0417a4b
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -0,0 +1,837 @@
+/*
+ * Processor cache information made available to userspace via sysfs;
+ * intended to be compatible with x86 intel_cacheinfo implementation.
+ *
+ * Copyright 2008 IBM Corporation
+ * Author: Nathan Lynch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/percpu.h>
+#include <asm/prom.h>
+
+#include "cacheinfo.h"
+
+/* per-cpu object for tracking:
+ * - a "cache" kobject for the top-level directory
+ * - a list of "index" objects representing the cpu's local cache hierarchy
+ */
+struct cache_dir {
+ struct kobject *kobj; /* bare (not embedded) kobject for cache
+ * directory */
+ struct cache_index_dir *index; /* list of index objects */
+};
+
+/* "index" object: each cpu's cache directory has an index
+ * subdirectory corresponding to a cache object associated with the
+ * cpu. This object's lifetime is managed via the embedded kobject.
+ */
+struct cache_index_dir {
+ struct kobject kobj;
+ struct cache_index_dir *next; /* next index in parent directory */
+ struct cache *cache;
+};
+
+/* Template for determining which OF properties to query for a given
+ * cache type */
+struct cache_type_info {
+ const char *name;
+ const char *size_prop;
+
+ /* Allow for both [di]-cache-line-size and
+ * [di]-cache-block-size properties. According to the PowerPC
+ * Processor binding, -line-size should be provided if it
+ * differs from the cache block size (that which is operated
+ * on by cache instructions), so we look for -line-size first.
+ * See cache_get_line_size(). */
+
+ const char *line_size_props[2];
+ const char *nr_sets_prop;
+};
+
+/* These are used to index the cache_type_info array. */
+#define CACHE_TYPE_UNIFIED 0
+#define CACHE_TYPE_INSTRUCTION 1
+#define CACHE_TYPE_DATA 2
+
+static const struct cache_type_info cache_type_info[] = {
+ {
+ /* PowerPC Processor binding says the [di]-cache-*
+ * must be equal on unified caches, so just use
+ * d-cache properties. */
+ .name = "Unified",
+ .size_prop = "d-cache-size",
+ .line_size_props = { "d-cache-line-size",
+ "d-cache-block-size", },
+ .nr_sets_prop = "d-cache-sets",
+ },
+ {
+ .name = "Instruction",
+ .size_prop = "i-cache-size",
+ .line_size_props = { "i-cache-line-size",
+ "i-cache-block-size", },
+ .nr_sets_prop = "i-cache-sets",
+ },
+ {
+ .name = "Data",
+ .size_prop = "d-cache-size",
+ .line_size_props = { "d-cache-line-size",
+ "d-cache-block-size", },
+ .nr_sets_prop = "d-cache-sets",
+ },
+};
+
+/* Cache object: each instance of this corresponds to a distinct cache
+ * in the system. There are separate objects for Harvard caches: one
+ * each for instruction and data, and each refers to the same OF node.
+ * The refcount of the OF node is elevated for the lifetime of the
+ * cache object. A cache object is released when its shared_cpu_map
+ * is cleared (see cache_cpu_clear).
+ *
+ * A cache object is on two lists: an unsorted global list
+ * (cache_list) of cache objects; and a singly-linked list
+ * representing the local cache hierarchy, which is ordered by level
+ * (e.g. L1d -> L1i -> L2 -> L3).
+ */
+struct cache {
+ struct device_node *ofnode; /* OF node for this cache, may be cpu */
+ struct cpumask shared_cpu_map; /* online CPUs using this cache */
+ int type; /* split cache disambiguation */
+ int level; /* level not explicit in device tree */
+ struct list_head list; /* global list of cache objects */
+ struct cache *next_local; /* next cache of >= level */
+};
+
+static DEFINE_PER_CPU(struct cache_dir *, cache_dir);
+
+/* traversal/modification of this list occurs only at cpu hotplug time;
+ * access is serialized by cpu hotplug locking
+ */
+static LIST_HEAD(cache_list);
+
+static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *k)
+{
+ return container_of(k, struct cache_index_dir, kobj);
+}
+
+static const char *cache_type_string(const struct cache *cache)
+{
+ return cache_type_info[cache->type].name;
+}
+
+static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode)
+{
+ cache->type = type;
+ cache->level = level;
+ cache->ofnode = of_node_get(ofnode);
+ INIT_LIST_HEAD(&cache->list);
+ list_add(&cache->list, &cache_list);
+}
+
+static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode)
+{
+ struct cache *cache;
+
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+ if (cache)
+ cache_init(cache, type, level, ofnode);
+
+ return cache;
+}
+
+static void release_cache_debugcheck(struct cache *cache)
+{
+ struct cache *iter;
+
+ list_for_each_entry(iter, &cache_list, list)
+ WARN_ONCE(iter->next_local == cache,
+ "cache for %s(%s) refers to cache for %s(%s)\n",
+ iter->ofnode->full_name,
+ cache_type_string(iter),
+ cache->ofnode->full_name,
+ cache_type_string(cache));
+}
+
+static void release_cache(struct cache *cache)
+{
+ if (!cache)
+ return;
+
+ pr_debug("freeing L%d %s cache for %s\n", cache->level,
+ cache_type_string(cache), cache->ofnode->full_name);
+
+ release_cache_debugcheck(cache);
+ list_del(&cache->list);
+ of_node_put(cache->ofnode);
+ kfree(cache);
+}
+
+static void cache_cpu_set(struct cache *cache, int cpu)
+{
+ struct cache *next = cache;
+
+ while (next) {
+ WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
+ "CPU %i already accounted in %s(%s)\n",
+ cpu, next->ofnode->full_name,
+ cache_type_string(next));
+ cpumask_set_cpu(cpu, &next->shared_cpu_map);
+ next = next->next_local;
+ }
+}
+
+static int cache_size(const struct cache *cache, unsigned int *ret)
+{
+ const char *propname;
+ const u32 *cache_size;
+
+ propname = cache_type_info[cache->type].size_prop;
+
+ cache_size = of_get_property(cache->ofnode, propname, NULL);
+ if (!cache_size)
+ return -ENODEV;
+
+ *ret = *cache_size;
+ return 0;
+}
+
+static int cache_size_kb(const struct cache *cache, unsigned int *ret)
+{
+ unsigned int size;
+
+ if (cache_size(cache, &size))
+ return -ENODEV;
+
+ *ret = size / 1024;
+ return 0;
+}
+
+/* not cache_line_size() because that's a macro in include/linux/cache.h */
+static int cache_get_line_size(const struct cache *cache, unsigned int *ret)
+{
+ const u32 *line_size;
+ int i, lim;
+
+ lim = ARRAY_SIZE(cache_type_info[cache->type].line_size_props);
+
+ for (i = 0; i < lim; i++) {
+ const char *propname;
+
+ propname = cache_type_info[cache->type].line_size_props[i];
+ line_size = of_get_property(cache->ofnode, propname, NULL);
+ if (line_size)
+ break;
+ }
+
+ if (!line_size)
+ return -ENODEV;
+
+ *ret = *line_size;
+ return 0;
+}
+
+static int cache_nr_sets(const struct cache *cache, unsigned int *ret)
+{
+ const char *propname;
+ const u32 *nr_sets;
+
+ propname = cache_type_info[cache->type].nr_sets_prop;
+
+ nr_sets = of_get_property(cache->ofnode, propname, NULL);
+ if (!nr_sets)
+ return -ENODEV;
+
+ *ret = *nr_sets;
+ return 0;
+}
+
+static int cache_associativity(const struct cache *cache, unsigned int *ret)
+{
+ unsigned int line_size;
+ unsigned int nr_sets;
+ unsigned int size;
+
+ if (cache_nr_sets(cache, &nr_sets))
+ goto err;
+
+ /* If the cache is fully associative, there is no need to
+ * check the other properties.
+ */
+ if (nr_sets == 1) {
+ *ret = 0;
+ return 0;
+ }
+
+ if (cache_get_line_size(cache, &line_size))
+ goto err;
+ if (cache_size(cache, &size))
+ goto err;
+
+ if (!(nr_sets > 0 && size > 0 && line_size > 0))
+ goto err;
+
+ *ret = (size / nr_sets) / line_size;
+ return 0;
+err:
+ return -ENODEV;
+}
+
+/* helper for dealing with split caches */
+static struct cache *cache_find_first_sibling(struct cache *cache)
+{
+ struct cache *iter;
+
+ if (cache->type == CACHE_TYPE_UNIFIED)
+ return cache;
+
+ list_for_each_entry(iter, &cache_list, list)
+ if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+ return iter;
+
+ return cache;
+}
+
+/* return the first cache on a local list matching node */
+static struct cache *cache_lookup_by_node(const struct device_node *node)
+{
+ struct cache *cache = NULL;
+ struct cache *iter;
+
+ list_for_each_entry(iter, &cache_list, list) {
+ if (iter->ofnode != node)
+ continue;
+ cache = cache_find_first_sibling(iter);
+ break;
+ }
+
+ return cache;
+}
+
+static bool cache_node_is_unified(const struct device_node *np)
+{
+ return of_get_property(np, "cache-unified", NULL);
+}
+
+static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level)
+{
+ struct cache *cache;
+
+ pr_debug("creating L%d ucache for %s\n", level, node->full_name);
+
+ cache = new_cache(CACHE_TYPE_UNIFIED, level, node);
+
+ return cache;
+}
+
+static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level)
+{
+ struct cache *dcache, *icache;
+
+ pr_debug("creating L%d dcache and icache for %s\n", level,
+ node->full_name);
+
+ dcache = new_cache(CACHE_TYPE_DATA, level, node);
+ icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+
+ if (!dcache || !icache)
+ goto err;
+
+ dcache->next_local = icache;
+
+ return dcache;
+err:
+ release_cache(dcache);
+ release_cache(icache);
+ return NULL;
+}
+
+static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level)
+{
+ struct cache *cache;
+
+ if (cache_node_is_unified(node))
+ cache = cache_do_one_devnode_unified(node, level);
+ else
+ cache = cache_do_one_devnode_split(node, level);
+
+ return cache;
+}
+
+static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level)
+{
+ struct cache *cache;
+
+ cache = cache_lookup_by_node(node);
+
+ WARN_ONCE(cache && cache->level != level,
+ "cache level mismatch on lookup (got %d, expected %d)\n",
+ cache->level, level);
+
+ if (!cache)
+ cache = cache_do_one_devnode(node, level);
+
+ return cache;
+}
+
+static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger)
+{
+ while (smaller->next_local) {
+ if (smaller->next_local == bigger)
+ return; /* already linked */
+ smaller = smaller->next_local;
+ }
+
+ smaller->next_local = bigger;
+}
+
+static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache)
+{
+ WARN_ON_ONCE(cache->level != 1);
+ WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
+}
+
+static void __cpuinit do_subsidiary_caches(struct cache *cache)
+{
+ struct device_node *subcache_node;
+ int level = cache->level;
+
+ do_subsidiary_caches_debugcheck(cache);
+
+ while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
+ struct cache *subcache;
+
+ level++;
+ subcache = cache_lookup_or_instantiate(subcache_node, level);
+ of_node_put(subcache_node);
+ if (!subcache)
+ break;
+
+ link_cache_lists(cache, subcache);
+ cache = subcache;
+ }
+}
+
+static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id)
+{
+ struct device_node *cpu_node;
+ struct cache *cpu_cache = NULL;
+
+ pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
+
+ cpu_node = of_get_cpu_node(cpu_id, NULL);
+ WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+ if (!cpu_node)
+ goto out;
+
+ cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+ if (!cpu_cache)
+ goto out;
+
+ do_subsidiary_caches(cpu_cache);
+
+ cache_cpu_set(cpu_cache, cpu_id);
+out:
+ of_node_put(cpu_node);
+
+ return cpu_cache;
+}
+
+static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id)
+{
+ struct cache_dir *cache_dir;
+ struct sys_device *sysdev;
+ struct kobject *kobj = NULL;
+
+ sysdev = get_cpu_sysdev(cpu_id);
+ WARN_ONCE(!sysdev, "no sysdev for CPU %i\n", cpu_id);
+ if (!sysdev)
+ goto err;
+
+ kobj = kobject_create_and_add("cache", &sysdev->kobj);
+ if (!kobj)
+ goto err;
+
+ cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL);
+ if (!cache_dir)
+ goto err;
+
+ cache_dir->kobj = kobj;
+
+ WARN_ON_ONCE(per_cpu(cache_dir, cpu_id) != NULL);
+
+ per_cpu(cache_dir, cpu_id) = cache_dir;
+
+ return cache_dir;
+err:
+ kobject_put(kobj);
+ return NULL;
+}
+
+static void cache_index_release(struct kobject *kobj)
+{
+ struct cache_index_dir *index;
+
+ index = kobj_to_cache_index_dir(kobj);
+
+ pr_debug("freeing index directory for L%d %s cache\n",
+ index->cache->level, cache_type_string(index->cache));
+
+ kfree(index);
+}
+
+static ssize_t cache_index_show(struct kobject *k, struct attribute *attr, char *buf)
+{
+ struct kobj_attribute *kobj_attr;
+
+ kobj_attr = container_of(attr, struct kobj_attribute, attr);
+
+ return kobj_attr->show(k, kobj_attr, buf);
+}
+
+static struct cache *index_kobj_to_cache(struct kobject *k)
+{
+ struct cache_index_dir *index;
+
+ index = kobj_to_cache_index_dir(k);
+
+ return index->cache;
+}
+
+static ssize_t size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int size_kb;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_size_kb(cache, &size_kb))
+ return -ENODEV;
+
+ return sprintf(buf, "%uK\n", size_kb);
+}
+
+static struct kobj_attribute cache_size_attr =
+ __ATTR(size, 0444, size_show, NULL);
+
+
+static ssize_t line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int line_size;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_get_line_size(cache, &line_size))
+ return -ENODEV;
+
+ return sprintf(buf, "%u\n", line_size);
+}
+
+static struct kobj_attribute cache_line_size_attr =
+ __ATTR(coherency_line_size, 0444, line_size_show, NULL);
+
+static ssize_t nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int nr_sets;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_nr_sets(cache, &nr_sets))
+ return -ENODEV;
+
+ return sprintf(buf, "%u\n", nr_sets);
+}
+
+static struct kobj_attribute cache_nr_sets_attr =
+ __ATTR(number_of_sets, 0444, nr_sets_show, NULL);
+
+static ssize_t associativity_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int associativity;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_associativity(cache, &associativity))
+ return -ENODEV;
+
+ return sprintf(buf, "%u\n", associativity);
+}
+
+static struct kobj_attribute cache_assoc_attr =
+ __ATTR(ways_of_associativity, 0444, associativity_show, NULL);
+
+static ssize_t type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ return sprintf(buf, "%s\n", cache_type_string(cache));
+}
+
+static struct kobj_attribute cache_type_attr =
+ __ATTR(type, 0444, type_show, NULL);
+
+static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ struct cache_index_dir *index;
+ struct cache *cache;
+
+ index = kobj_to_cache_index_dir(k);
+ cache = index->cache;
+
+ return sprintf(buf, "%d\n", cache->level);
+}
+
+static struct kobj_attribute cache_level_attr =
+ __ATTR(level, 0444, level_show, NULL);
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ struct cache_index_dir *index;
+ struct cache *cache;
+ int len;
+ int n = 0;
+
+ index = kobj_to_cache_index_dir(k);
+ cache = index->cache;
+ len = PAGE_SIZE - 2;
+
+ if (len > 1) {
+ n = cpumask_scnprintf(buf, len, &cache->shared_cpu_map);
+ buf[n++] = '\n';
+ buf[n] = '\0';
+ }
+ return n;
+}
+
+static struct kobj_attribute cache_shared_cpu_map_attr =
+ __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+
+/* Attributes which should always be created -- the kobject/sysfs core
+ * does this automatically via kobj_type->default_attrs. This is the
+ * minimum data required to uniquely identify a cache.
+ */
+static struct attribute *cache_index_default_attrs[] = {
+ &cache_type_attr.attr,
+ &cache_level_attr.attr,
+ &cache_shared_cpu_map_attr.attr,
+ NULL,
+};
+
+/* Attributes which should be created if the cache device node has the
+ * right properties -- see cacheinfo_create_index_opt_attrs
+ */
+static struct kobj_attribute *cache_index_opt_attrs[] = {
+ &cache_size_attr,
+ &cache_line_size_attr,
+ &cache_nr_sets_attr,
+ &cache_assoc_attr,
+};
+
+static struct sysfs_ops cache_index_ops = {
+ .show = cache_index_show,
+};
+
+static struct kobj_type cache_index_type = {
+ .release = cache_index_release,
+ .sysfs_ops = &cache_index_ops,
+ .default_attrs = cache_index_default_attrs,
+};
+
+static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
+{
+ const char *cache_name;
+ const char *cache_type;
+ struct cache *cache;
+ char *buf;
+ int i;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return;
+
+ cache = dir->cache;
+ cache_name = cache->ofnode->full_name;
+ cache_type = cache_type_string(cache);
+
+ /* We don't want to create an attribute that can't provide a
+ * meaningful value. Check the return value of each optional
+ * attribute's ->show method before registering the
+ * attribute.
+ */
+ for (i = 0; i < ARRAY_SIZE(cache_index_opt_attrs); i++) {
+ struct kobj_attribute *attr;
+ ssize_t rc;
+
+ attr = cache_index_opt_attrs[i];
+
+ rc = attr->show(&dir->kobj, attr, buf);
+ if (rc <= 0) {
+ pr_debug("not creating %s attribute for "
+ "%s(%s) (rc = %zd)\n",
+ attr->attr.name, cache_name,
+ cache_type, rc);
+ continue;
+ }
+ if (sysfs_create_file(&dir->kobj, &attr->attr))
+ pr_debug("could not create %s attribute for %s(%s)\n",
+ attr->attr.name, cache_name, cache_type);
+ }
+
+ kfree(buf);
+}
+
+static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir)
+{
+ struct cache_index_dir *index_dir;
+ int rc;
+
+ index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
+ if (!index_dir)
+ goto err;
+
+ index_dir->cache = cache;
+
+ rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
+ cache_dir->kobj, "index%d", index);
+ if (rc)
+ goto err;
+
+ index_dir->next = cache_dir->index;
+ cache_dir->index = index_dir;
+
+ cacheinfo_create_index_opt_attrs(index_dir);
+
+ return;
+err:
+ kfree(index_dir);
+}
+
+static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list)
+{
+ struct cache_dir *cache_dir;
+ struct cache *cache;
+ int index = 0;
+
+ cache_dir = cacheinfo_create_cache_dir(cpu_id);
+ if (!cache_dir)
+ return;
+
+ cache = cache_list;
+ while (cache) {
+ cacheinfo_create_index_dir(cache, index, cache_dir);
+ index++;
+ cache = cache->next_local;
+ }
+}
+
+void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id)
+{
+ struct cache *cache;
+
+ cache = cache_chain_instantiate(cpu_id);
+ if (!cache)
+ return;
+
+ cacheinfo_sysfs_populate(cpu_id, cache);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU /* functions needed for cpu offline */
+
+static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
+{
+ struct device_node *cpu_node;
+ struct cache *cache;
+
+ cpu_node = of_get_cpu_node(cpu_id, NULL);
+ WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+ if (!cpu_node)
+ return NULL;
+
+ cache = cache_lookup_by_node(cpu_node);
+ of_node_put(cpu_node);
+
+ return cache;
+}
+
+static void remove_index_dirs(struct cache_dir *cache_dir)
+{
+ struct cache_index_dir *index;
+
+ index = cache_dir->index;
+
+ while (index) {
+ struct cache_index_dir *next;
+
+ next = index->next;
+ kobject_put(&index->kobj);
+ index = next;
+ }
+}
+
+static void remove_cache_dir(struct cache_dir *cache_dir)
+{
+ remove_index_dirs(cache_dir);
+
+ kobject_put(cache_dir->kobj);
+
+ kfree(cache_dir);
+}
+
+static void cache_cpu_clear(struct cache *cache, int cpu)
+{
+ while (cache) {
+ struct cache *next = cache->next_local;
+
+ WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
+ "CPU %i not accounted in %s(%s)\n",
+ cpu, cache->ofnode->full_name,
+ cache_type_string(cache));
+
+ cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
+
+ /* Release the cache object if all the cpus using it
+ * are offline */
+ if (cpumask_empty(&cache->shared_cpu_map))
+ release_cache(cache);
+
+ cache = next;
+ }
+}
+
+void cacheinfo_cpu_offline(unsigned int cpu_id)
+{
+ struct cache_dir *cache_dir;
+ struct cache *cache;
+
+ /* Prevent userspace from seeing inconsistent state - remove
+ * the sysfs hierarchy first */
+ cache_dir = per_cpu(cache_dir, cpu_id);
+
+ /* careful, sysfs population may have failed */
+ if (cache_dir)
+ remove_cache_dir(cache_dir);
+
+ per_cpu(cache_dir, cpu_id) = NULL;
+
+ /* clear the CPU's bit in its cache chain, possibly freeing
+ * cache objects */
+ cache = cache_lookup_by_cpu(cpu_id);
+ if (cache)
+ cache_cpu_clear(cache, cpu_id);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
new file mode 100644
index 00000000000..a7b74d36acd
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.h
@@ -0,0 +1,8 @@
+#ifndef _PPC_CACHEINFO_H
+#define _PPC_CACHEINFO_H
+
+/* These are just hooks for sysfs.c to use. */
+extern void cacheinfo_cpu_online(unsigned int cpu_id);
+extern void cacheinfo_cpu_offline(unsigned int cpu_id);
+
+#endif /* _PPC_CACHEINFO_H */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ac222d0ab12..23b8b5e36f9 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map)
mask = map;
}
if (irq_desc[irq].chip->set_affinity)
- irq_desc[irq].chip->set_affinity(irq, mask);
+ irq_desc[irq].chip->set_affinity(irq, &mask);
else if (irq_desc[irq].action && !(warned++))
printk("Cannot set affinity for irq %i\n", irq);
}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index de79915452c..c9329786073 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -96,9 +96,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- mutex_lock(&kprobe_mutex);
- free_insn_slot(p->ainsn.insn, 0);
- mutex_unlock(&kprobe_mutex);
+ if (p->ainsn.insn) {
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+ }
}
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@@ -316,7 +317,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
/*
* It is possible to have multiple instances associated with a given
* task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more then one return
+ * have a return probe installed on them, and/or more than one return
* return probe was registered for a target function.
*
* We can handle this because:
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 2538030954d..da5a3855a0c 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -16,7 +16,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#undef DEBUG
+#define DEBUG
#include <linux/kernel.h>
#include <linux/pci.h>
@@ -1356,6 +1356,63 @@ static void __init pcibios_allocate_resources(int pass)
}
}
+static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ resource_size_t offset;
+ struct resource *res, *pres;
+ int i;
+
+ pr_debug("Reserving legacy ranges for domain %04x\n", pci_domain_nr(bus));
+
+ /* Check for IO */
+ if (!(hose->io_resource.flags & IORESOURCE_IO))
+ goto no_io;
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ BUG_ON(res == NULL);
+ res->name = "Legacy IO";
+ res->flags = IORESOURCE_IO;
+ res->start = offset;
+ res->end = (offset + 0xfff) & 0xfffffffful;
+ pr_debug("Candidate legacy IO: %pR\n", res);
+ if (request_resource(&hose->io_resource, res)) {
+ printk(KERN_DEBUG
+ "PCI %04x:%02x Cannot reserve Legacy IO %pR\n",
+ pci_domain_nr(bus), bus->number, res);
+ kfree(res);
+ }
+
+ no_io:
+ /* Check for memory */
+ offset = hose->pci_mem_offset;
+ pr_debug("hose mem offset: %016llx\n", (unsigned long long)offset);
+ for (i = 0; i < 3; i++) {
+ pres = &hose->mem_resources[i];
+ if (!(pres->flags & IORESOURCE_MEM))
+ continue;
+ pr_debug("hose mem res: %pR\n", pres);
+ if ((pres->start - offset) <= 0xa0000 &&
+ (pres->end - offset) >= 0xbffff)
+ break;
+ }
+ if (i >= 3)
+ return;
+ res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ BUG_ON(res == NULL);
+ res->name = "Legacy VGA memory";
+ res->flags = IORESOURCE_MEM;
+ res->start = 0xa0000 + offset;
+ res->end = 0xbffff + offset;
+ pr_debug("Candidate VGA memory: %pR\n", res);
+ if (request_resource(pres, res)) {
+ printk(KERN_DEBUG
+ "PCI %04x:%02x Cannot reserve VGA memory %pR\n",
+ pci_domain_nr(bus), bus->number, res);
+ kfree(res);
+ }
+}
+
void __init pcibios_resource_survey(void)
{
struct pci_bus *b;
@@ -1371,6 +1428,18 @@ void __init pcibios_resource_survey(void)
pcibios_allocate_resources(1);
}
+ /* Before we start assigning unassigned resource, we try to reserve
+ * the low IO area and the VGA memory area if they intersect the
+ * bus available resources to avoid allocating things on top of them
+ */
+ if (!(ppc_pci_flags & PPC_PCI_PROBE_ONLY)) {
+ list_for_each_entry(b, &pci_root_buses, node)
+ pcibios_reserve_legacy_regions(b);
+ }
+
+ /* Now, if the platform didn't decide to blindly trust the firmware,
+ * we proceed to assigning things that were left unassigned
+ */
if (!(ppc_pci_flags & PPC_PCI_PROBE_ONLY)) {
pr_debug("PCI: Assigning unassigned resouces...\n");
pci_assign_unassigned_resources();
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 39fadc6e149..586962f65c2 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -560,9 +560,14 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
* G5 machines... So when something asks for bus 0 io base
* (bus 0 is HT root), we return the AGP one instead.
*/
- if (machine_is_compatible("MacRISC4"))
- if (in_bus == 0)
+ if (in_bus == 0 && machine_is_compatible("MacRISC4")) {
+ struct device_node *agp;
+
+ agp = of_find_compatible_node(NULL, NULL, "u3-agp");
+ if (agp)
in_bus = 0xf0;
+ of_node_put(agp);
+ }
/* That syscall isn't quite compatible with PCI domains, but it's
* used on pre-domains setup. We return the first match
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index dcec1325d34..c8b27bb4dbd 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -165,6 +165,7 @@ EXPORT_SYMBOL(timer_interrupt);
EXPORT_SYMBOL(irq_desc);
EXPORT_SYMBOL(tb_ticks_per_jiffy);
EXPORT_SYMBOL(cacheable_memcpy);
+EXPORT_SYMBOL(cacheable_memzero);
#endif
#ifdef CONFIG_PPC32
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51b201ddf9a..fb7049c054c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -33,6 +33,7 @@
#include <linux/mqueue.h>
#include <linux/hardirq.h>
#include <linux/utsname.h>
+#include <linux/kernel_stat.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 6f73c739f1e..c09cffafb6e 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -824,11 +824,11 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
#endif
#ifdef CONFIG_KEXEC
- lprop = (u64*)of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
if (lprop)
crashk_res.start = *lprop;
- lprop = (u64*)of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL);
if (lprop)
crashk_res.end = crashk_res.start + *lprop - 1;
#endif
@@ -893,12 +893,12 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
u64 base, size, lmb_size;
unsigned int is_kexec_kdump = 0, rngs;
- ls = (cell_t *)of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+ ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t))
return 0;
lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls);
- dm = (cell_t *)of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
+ dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
if (dm == NULL || l < sizeof(cell_t))
return 0;
@@ -907,7 +907,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
return 0;
/* check if this is a kexec/kdump kernel. */
- usm = (cell_t *)of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
+ usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
&l);
if (usm != NULL)
is_kexec_kdump = 1;
@@ -981,9 +981,9 @@ static int __init early_init_dt_scan_memory(unsigned long node,
} else if (strcmp(type, "memory") != 0)
return 0;
- reg = (cell_t *)of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+ reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
if (reg == NULL)
- reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
+ reg = of_get_flat_dt_prop(node, "reg", &l);
if (reg == NULL)
return 0;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 2445945d376..7f1b33d5e30 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1210,7 +1210,7 @@ static void __init prom_initialize_tce_table(void)
/* Initialize the table to have a one-to-one mapping
* over the allocated size.
*/
- tce_entryp = (unsigned long *)base;
+ tce_entryp = (u64 *)base;
for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) {
tce_entry = (i << PAGE_SHIFT);
tce_entry |= 0x3;
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 8c133556608..8f0856f312d 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -232,11 +232,6 @@ int of_pci_address_to_resource(struct device_node *dev, int bar,
}
EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
-static u8 of_irq_pci_swizzle(u8 slot, u8 pin)
-{
- return (((pin - 1) + slot) % 4) + 1;
-}
-
int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
{
struct device_node *dn, *ppnode;
@@ -306,7 +301,7 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
/* We can only get here if we hit a P2P bridge with no node,
* let's do standard swizzling and try again
*/
- lspec = of_irq_pci_swizzle(PCI_SLOT(pdev->devfn), lspec);
+ lspec = pci_swizzle_interrupt_pin(pdev, lspec);
pdev = ppdev;
}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8ac3f721d23..65484b2200b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -59,13 +59,9 @@
struct thread_info *secondary_ti;
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-cpumask_t cpu_online_map = CPU_MASK_NONE;
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 0c64f10087b..4a2ee08af6a 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -18,6 +18,8 @@
#include <asm/machdep.h>
#include <asm/smp.h>
+#include "cacheinfo.h"
+
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#include <asm/lppaca.h>
@@ -25,8 +27,6 @@
static DEFINE_PER_CPU(struct cpu, cpu_devices);
-static DEFINE_PER_CPU(struct kobject *, cache_toplevel);
-
/*
* SMT snooze delay stuff, 64-bit only for now
*/
@@ -343,283 +343,6 @@ static struct sysdev_attribute pa6t_attrs[] = {
#endif /* HAS_PPC_PMC_PA6T */
#endif /* HAS_PPC_PMC_CLASSIC */
-struct cache_desc {
- struct kobject kobj;
- struct cache_desc *next;
- const char *type; /* Instruction, Data, or Unified */
- u32 size; /* total cache size in KB */
- u32 line_size; /* in bytes */
- u32 nr_sets; /* number of sets */
- u32 level; /* e.g. 1, 2, 3... */
- u32 associativity; /* e.g. 8-way... 0 is fully associative */
-};
-
-DEFINE_PER_CPU(struct cache_desc *, cache_desc);
-
-static struct cache_desc *kobj_to_cache_desc(struct kobject *k)
-{
- return container_of(k, struct cache_desc, kobj);
-}
-
-static void cache_desc_release(struct kobject *k)
-{
- struct cache_desc *desc = kobj_to_cache_desc(k);
-
- pr_debug("%s: releasing %s\n", __func__, kobject_name(k));
-
- if (desc->next)
- kobject_put(&desc->next->kobj);
-
- kfree(kobj_to_cache_desc(k));
-}
-
-static ssize_t cache_desc_show(struct kobject *k, struct attribute *attr, char *buf)
-{
- struct kobj_attribute *kobj_attr;
-
- kobj_attr = container_of(attr, struct kobj_attribute, attr);
-
- return kobj_attr->show(k, kobj_attr, buf);
-}
-
-static struct sysfs_ops cache_desc_sysfs_ops = {
- .show = cache_desc_show,
-};
-
-static struct kobj_type cache_desc_type = {
- .release = cache_desc_release,
- .sysfs_ops = &cache_desc_sysfs_ops,
-};
-
-static ssize_t cache_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
- struct cache_desc *cache = kobj_to_cache_desc(k);
-
- return sprintf(buf, "%uK\n", cache->size);
-}
-
-static struct kobj_attribute cache_size_attr =
- __ATTR(size, 0444, cache_size_show, NULL);
-
-static ssize_t cache_line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
- struct cache_desc *cache = kobj_to_cache_desc(k);
-
- return sprintf(buf, "%u\n", cache->line_size);
-}
-
-static struct kobj_attribute cache_line_size_attr =
- __ATTR(coherency_line_size, 0444, cache_line_size_show, NULL);
-
-static ssize_t cache_nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
- struct cache_desc *cache = kobj_to_cache_desc(k);
-
- return sprintf(buf, "%u\n", cache->nr_sets);
-}
-
-static struct kobj_attribute cache_nr_sets_attr =
- __ATTR(number_of_sets, 0444, cache_nr_sets_show, NULL);
-
-static ssize_t cache_type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
- struct cache_desc *cache = kobj_to_cache_desc(k);
-
- return sprintf(buf, "%s\n", cache->type);
-}
-
-static struct kobj_attribute cache_type_attr =
- __ATTR(type, 0444, cache_type_show, NULL);
-
-static ssize_t cache_level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
- struct cache_desc *cache = kobj_to_cache_desc(k);
-
- return sprintf(buf, "%u\n", cache->level);
-}
-
-static struct kobj_attribute cache_level_attr =
- __ATTR(level, 0444, cache_level_show, NULL);
-
-static ssize_t cache_assoc_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
- struct cache_desc *cache = kobj_to_cache_desc(k);
-
- return sprintf(buf, "%u\n", cache->associativity);
-}
-
-static struct kobj_attribute cache_assoc_attr =
- __ATTR(ways_of_associativity, 0444, cache_assoc_show, NULL);
-
-struct cache_desc_info {
- const char *type;
- const char *size_prop;
- const char *line_size_prop;
- const char *nr_sets_prop;
-};
-
-/* PowerPC Processor binding says the [di]-cache-* must be equal on
- * unified caches, so just use d-cache properties. */
-static struct cache_desc_info ucache_info = {
- .type = "Unified",
- .size_prop = "d-cache-size",
- .line_size_prop = "d-cache-line-size",
- .nr_sets_prop = "d-cache-sets",
-};
-
-static struct cache_desc_info dcache_info = {
- .type = "Data",
- .size_prop = "d-cache-size",
- .line_size_prop = "d-cache-line-size",
- .nr_sets_prop = "d-cache-sets",
-};
-
-static struct cache_desc_info icache_info = {
- .type = "Instruction",
- .size_prop = "i-cache-size",
- .line_size_prop = "i-cache-line-size",
- .nr_sets_prop = "i-cache-sets",
-};
-
-static struct cache_desc * __cpuinit create_cache_desc(struct device_node *np, struct kobject *parent, int index, int level, struct cache_desc_info *info)
-{
- const u32 *cache_line_size;
- struct cache_desc *new;
- const u32 *cache_size;
- const u32 *nr_sets;
- int rc;
-
- new = kzalloc(sizeof(*new), GFP_KERNEL);
- if (!new)
- return NULL;
-
- rc = kobject_init_and_add(&new->kobj, &cache_desc_type, parent,
- "index%d", index);
- if (rc)
- goto err;
-
- /* type */
- new->type = info->type;
- rc = sysfs_create_file(&new->kobj, &cache_type_attr.attr);
- WARN_ON(rc);
-
- /* level */
- new->level = level;
- rc = sysfs_create_file(&new->kobj, &cache_level_attr.attr);
- WARN_ON(rc);
-
- /* size */
- cache_size = of_get_property(np, info->size_prop, NULL);
- if (cache_size) {
- new->size = *cache_size / 1024;
- rc = sysfs_create_file(&new->kobj,
- &cache_size_attr.attr);
- WARN_ON(rc);
- }
-
- /* coherency_line_size */
- cache_line_size = of_get_property(np, info->line_size_prop, NULL);
- if (cache_line_size) {
- new->line_size = *cache_line_size;
- rc = sysfs_create_file(&new->kobj,
- &cache_line_size_attr.attr);
- WARN_ON(rc);
- }
-
- /* number_of_sets */
- nr_sets = of_get_property(np, info->nr_sets_prop, NULL);
- if (nr_sets) {
- new->nr_sets = *nr_sets;
- rc = sysfs_create_file(&new->kobj,
- &cache_nr_sets_attr.attr);
- WARN_ON(rc);
- }
-
- /* ways_of_associativity */
- if (new->nr_sets == 1) {
- /* fully associative */
- new->associativity = 0;
- goto create_assoc;
- }
-
- if (new->nr_sets && new->size && new->line_size) {
- /* If we have values for all of these we can derive
- * the associativity. */
- new->associativity =
- ((new->size * 1024) / new->nr_sets) / new->line_size;
-create_assoc:
- rc = sysfs_create_file(&new->kobj,
- &cache_assoc_attr.attr);
- WARN_ON(rc);
- }
-
- return new;
-err:
- kfree(new);
- return NULL;
-}
-
-static bool cache_is_unified(struct device_node *np)
-{
- return of_get_property(np, "cache-unified", NULL);
-}
-
-static struct cache_desc * __cpuinit create_cache_index_info(struct device_node *np, struct kobject *parent, int index, int level)
-{
- struct device_node *next_cache;
- struct cache_desc *new, **end;
-
- pr_debug("%s(node = %s, index = %d)\n", __func__, np->full_name, index);
-
- if (cache_is_unified(np)) {
- new = create_cache_desc(np, parent, index, level,
- &ucache_info);
- } else {
- new = create_cache_desc(np, parent, index, level,
- &dcache_info);
- if (new) {
- index++;
- new->next = create_cache_desc(np, parent, index, level,
- &icache_info);
- }
- }
- if (!new)
- return NULL;
-
- end = &new->next;
- while (*end)
- end = &(*end)->next;
-
- next_cache = of_find_next_cache_node(np);
- if (!next_cache)
- goto out;
-
- *end = create_cache_index_info(next_cache, parent, ++index, ++level);
-
- of_node_put(next_cache);
-out:
- return new;
-}
-
-static void __cpuinit create_cache_info(struct sys_device *sysdev)
-{
- struct kobject *cache_toplevel;
- struct device_node *np = NULL;
- int cpu = sysdev->id;
-
- cache_toplevel = kobject_create_and_add("cache", &sysdev->kobj);
- if (!cache_toplevel)
- return;
- per_cpu(cache_toplevel, cpu) = cache_toplevel;
- np = of_get_cpu_node(cpu, NULL);
- if (np != NULL) {
- per_cpu(cache_desc, cpu) =
- create_cache_index_info(np, cache_toplevel, 0, 1);
- of_node_put(np);
- }
- return;
-}
-
static void __cpuinit register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -684,25 +407,10 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
sysdev_create_file(s, &attr_dscr);
#endif /* CONFIG_PPC64 */
- create_cache_info(s);
+ cacheinfo_cpu_online(cpu);
}
#ifdef CONFIG_HOTPLUG_CPU
-static void remove_cache_info(struct sys_device *sysdev)
-{
- struct kobject *cache_toplevel;
- struct cache_desc *cache_desc;
- int cpu = sysdev->id;
-
- cache_desc = per_cpu(cache_desc, cpu);
- if (cache_desc != NULL)
- kobject_put(&cache_desc->kobj);
-
- cache_toplevel = per_cpu(cache_toplevel, cpu);
- if (cache_toplevel != NULL)
- kobject_put(cache_toplevel);
-}
-
static void unregister_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -769,7 +477,7 @@ static void unregister_cpu_online(unsigned int cpu)
sysdev_remove_file(s, &attr_dscr);
#endif /* CONFIG_PPC64 */
- remove_cache_info(s);
+ cacheinfo_cpu_offline(cpu);
}
#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e1f3a514042..c9564031a2a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,8 +256,10 @@ void account_system_vtime(struct task_struct *tsk)
delta += sys_time;
get_paca()->system_time = 0;
}
- account_system_time(tsk, 0, delta);
- account_system_time_scaled(tsk, deltascaled);
+ if (in_irq() || idle_task(smp_processor_id()) != tsk)
+ account_system_time(tsk, 0, delta, deltascaled);
+ else
+ account_idle_time(delta);
per_cpu(cputime_last_delta, smp_processor_id()) = delta;
per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
local_irq_restore(flags);
@@ -275,10 +277,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
utime = get_paca()->user_time;
get_paca()->user_time = 0;
- account_user_time(tsk, utime);
-
utimescaled = cputime_to_scaled(utime);
- account_user_time_scaled(tsk, utimescaled);
+ account_user_time(tsk, utime, utimescaled);
}
/*
@@ -338,8 +338,12 @@ void calculate_steal_time(void)
tb = mftb();
purr = mfspr(SPRN_PURR);
stolen = (tb - pme->tb) - (purr - pme->purr);
- if (stolen > 0)
- account_steal_time(current, stolen);
+ if (stolen > 0) {
+ if (idle_task(smp_processor_id()) != current)
+ account_steal_time(stolen);
+ else
+ account_idle_time(stolen);
+ }
pme->tb = tb;
pme->purr = purr;
}
@@ -844,7 +848,7 @@ static void register_decrementer_clockevent(int cpu)
struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
*dec = decrementer_clockevent;
- dec->cpumask = cpumask_of_cpu(cpu);
+ dec->cpumask = cpumask_of(cpu);
printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
dec->name, dec->mult, dec->shift, cpu);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
new file mode 100644
index 00000000000..a66bec57265
--- /dev/null
+++ b/arch/powerpc/kvm/44x.c
@@ -0,0 +1,228 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_44x.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+/* Note: clearing MSR[DE] just means that the debug interrupt will not be
+ * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
+ * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
+ * will be delivered as an "imprecise debug event" (which is indicated by
+ * DBSR[IDE].
+ */
+static void kvm44x_disable_debug_interrupts(void)
+{
+ mtmsr(mfmsr() & ~MSR_DE);
+}
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+ kvm44x_disable_debug_interrupts();
+
+ mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
+ mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
+ mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
+ mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
+ mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
+ mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
+ mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
+ mtmsr(vcpu->arch.host_msr);
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+ struct kvm_guest_debug *dbg = &vcpu->guest_debug;
+ u32 dbcr0 = 0;
+
+ vcpu->arch.host_msr = mfmsr();
+ kvm44x_disable_debug_interrupts();
+
+ /* Save host debug register state. */
+ vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
+ vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
+ vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
+ vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
+ vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
+ vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
+ vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
+
+ /* set registers up for guest */
+
+ if (dbg->bp[0]) {
+ mtspr(SPRN_IAC1, dbg->bp[0]);
+ dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
+ }
+ if (dbg->bp[1]) {
+ mtspr(SPRN_IAC2, dbg->bp[1]);
+ dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
+ }
+ if (dbg->bp[2]) {
+ mtspr(SPRN_IAC3, dbg->bp[2]);
+ dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
+ }
+ if (dbg->bp[3]) {
+ mtspr(SPRN_IAC4, dbg->bp[3]);
+ dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
+ }
+
+ mtspr(SPRN_DBCR0, dbcr0);
+ mtspr(SPRN_DBCR1, 0);
+ mtspr(SPRN_DBCR2, 0);
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ kvmppc_44x_tlb_load(vcpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ kvmppc_44x_tlb_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ int r;
+
+ if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
+ int i;
+
+ tlbe->tid = 0;
+ tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+ tlbe->word1 = 0;
+ tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+ tlbe++;
+ tlbe->tid = 0;
+ tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+ tlbe->word1 = 0xef600000;
+ tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+ | PPC44x_TLB_I | PPC44x_TLB_G;
+
+ /* Since the guest can directly access the timebase, it must know the
+ * real timebase frequency. Accordingly, it must see the state of
+ * CCR1[TCS]. */
+ vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
+ vcpu_44x->shadow_refs[i].gtlb_index = -1;
+
+ return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe;
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+ if (index == -1) {
+ tr->valid = 0;
+ return 0;
+ }
+
+ gtlbe = &vcpu_44x->guest_tlb[index];
+
+ tr->physical_address = tlb_xlate(gtlbe, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
+
+ return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x;
+ struct kvm_vcpu *vcpu;
+ int err;
+
+ vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu_44x) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ vcpu = &vcpu_44x->vcpu;
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ return vcpu;
+
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+}
+
+static int kvmppc_44x_init(void)
+{
+ int r;
+
+ r = kvmppc_booke_init();
+ if (r)
+ return r;
+
+ return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
+}
+
+static void kvmppc_44x_exit(void)
+{
+ kvmppc_booke_exit();
+}
+
+module_init(kvmppc_44x_init);
+module_exit(kvmppc_44x_exit);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
new file mode 100644
index 00000000000..82489a743a6
--- /dev/null
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -0,0 +1,371 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_44x.h>
+#include "timing.h"
+
+#include "booke.h"
+#include "44x_tlb.h"
+
+#define OP_RFI 19
+
+#define XOP_RFI 50
+#define XOP_MFMSR 83
+#define XOP_WRTEE 131
+#define XOP_MTMSR 146
+#define XOP_WRTEEI 163
+#define XOP_MFDCR 323
+#define XOP_MTDCR 451
+#define XOP_TLBSX 914
+#define XOP_ICCCI 966
+#define XOP_TLBWE 978
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pc = vcpu->arch.srr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+ int dcrn;
+ int ra;
+ int rb;
+ int rc;
+ int rs;
+ int rt;
+ int ws;
+
+ switch (get_op(inst)) {
+ case OP_RFI:
+ switch (get_xop(inst)) {
+ case XOP_RFI:
+ kvmppc_emul_rfi(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
+ *advance = 0;
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ break;
+
+ case 31:
+ switch (get_xop(inst)) {
+
+ case XOP_MFMSR:
+ rt = get_rt(inst);
+ vcpu->arch.gpr[rt] = vcpu->arch.msr;
+ kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
+ break;
+
+ case XOP_MTMSR:
+ rs = get_rs(inst);
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
+ kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+ break;
+
+ case XOP_WRTEE:
+ rs = get_rs(inst);
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (vcpu->arch.gpr[rs] & MSR_EE);
+ kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+ break;
+
+ case XOP_WRTEEI:
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (inst & MSR_EE);
+ kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+ break;
+
+ case XOP_MFDCR:
+ dcrn = get_dcrn(inst);
+ rt = get_rt(inst);
+
+ /* The guest may access CPR0 registers to determine the timebase
+ * frequency, and it must know the real host frequency because it
+ * can directly access the timebase registers.
+ *
+ * It would be possible to emulate those accesses in userspace,
+ * but userspace can really only figure out the end frequency.
+ * We could decompose that into the factors that compute it, but
+ * that's tricky math, and it's easier to just report the real
+ * CPR0 values.
+ */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+ break;
+ case DCRN_CPR0_CONFIG_DATA:
+ local_irq_disable();
+ mtdcr(DCRN_CPR0_CONFIG_ADDR,
+ vcpu->arch.cpr0_cfgaddr);
+ vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+ local_irq_enable();
+ break;
+ default:
+ run->dcr.dcrn = dcrn;
+ run->dcr.data = 0;
+ run->dcr.is_write = 0;
+ vcpu->arch.io_gpr = rt;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ emulated = EMULATE_DO_DCR;
+ }
+
+ break;
+
+ case XOP_MTDCR:
+ dcrn = get_dcrn(inst);
+ rs = get_rs(inst);
+
+ /* emulate some access in kernel */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+ break;
+ default:
+ run->dcr.dcrn = dcrn;
+ run->dcr.data = vcpu->arch.gpr[rs];
+ run->dcr.is_write = 1;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ emulated = EMULATE_DO_DCR;
+ }
+
+ break;
+
+ case XOP_TLBWE:
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ ws = get_ws(inst);
+ emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
+ break;
+
+ case XOP_TLBSX:
+ rt = get_rt(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+ rc = get_rc(inst);
+ emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
+ break;
+
+ case XOP_ICCCI:
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ switch (sprn) {
+ case SPRN_MMUCR:
+ vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+ case SPRN_PID:
+ kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+ case SPRN_CCR0:
+ vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_CCR1:
+ vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DEAR:
+ vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+ case SPRN_ESR:
+ vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR0:
+ vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR1:
+ vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_TSR:
+ vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+ case SPRN_TCR:
+ vcpu->arch.tcr = vcpu->arch.gpr[rs];
+ kvmppc_emulate_dec(vcpu);
+ break;
+
+ /* Note: SPRG4-7 are user-readable. These values are
+ * loaded into the real SPRGs when resuming the
+ * guest. */
+ case SPRN_SPRG4:
+ vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG5:
+ vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG6:
+ vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG7:
+ vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+ case SPRN_IVPR:
+ vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR0:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR1:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR2:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR3:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR4:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR5:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR6:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR7:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR8:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR9:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR10:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR11:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR12:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR13:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR14:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR15:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+ break;
+
+ default:
+ return EMULATE_FAIL;
+ }
+
+ kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+ return EMULATE_DONE;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ switch (sprn) {
+ /* 440 */
+ case SPRN_MMUCR:
+ vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+ case SPRN_CCR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+ case SPRN_CCR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
+
+ /* Book E */
+ case SPRN_PID:
+ vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+ case SPRN_IVPR:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+ case SPRN_DEAR:
+ vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+ case SPRN_ESR:
+ vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+ case SPRN_DBCR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+ case SPRN_DBCR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+
+ case SPRN_IVOR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+ break;
+ case SPRN_IVOR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+ break;
+ case SPRN_IVOR2:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+ break;
+ case SPRN_IVOR3:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+ break;
+ case SPRN_IVOR4:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+ break;
+ case SPRN_IVOR5:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+ break;
+ case SPRN_IVOR6:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+ break;
+ case SPRN_IVOR7:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+ break;
+ case SPRN_IVOR8:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+ break;
+ case SPRN_IVOR9:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+ break;
+ case SPRN_IVOR10:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+ break;
+ case SPRN_IVOR11:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+ break;
+ case SPRN_IVOR12:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+ break;
+ case SPRN_IVOR13:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+ break;
+ case SPRN_IVOR14:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+ break;
+ case SPRN_IVOR15:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+ break;
+
+ default:
+ return EMULATE_FAIL;
+ }
+
+ kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+ return EMULATE_DONE;
+}
+
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ad72c6f9811..9a34b8edb9e 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -22,20 +22,103 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/highmem.h>
+
+#include <asm/tlbflush.h>
#include <asm/mmu-44x.h>
#include <asm/kvm_ppc.h>
+#include <asm/kvm_44x.h>
+#include "timing.h"
#include "44x_tlb.h"
+#ifndef PPC44x_TLBE_SIZE
+#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
+#endif
+
+#define PAGE_SIZE_4K (1<<12)
+#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
+
+#define PPC44x_TLB_UATTR_MASK \
+ (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
-static unsigned int kvmppc_tlb_44x_pos;
+#ifdef DEBUG
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_44x_tlbe *tlbe;
+ int i;
+
+ printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
+ printk("| %2s | %3s | %8s | %8s | %8s |\n",
+ "nr", "tid", "word0", "word1", "word2");
+
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
+ tlbe = &vcpu_44x->guest_tlb[i];
+ if (tlbe->word0 & PPC44x_TLB_VALID)
+ printk(" G%2d | %02X | %08X | %08X | %08X |\n",
+ i, tlbe->tid, tlbe->word0, tlbe->word1,
+ tlbe->word2);
+ }
+}
+#endif
+
+static inline void kvmppc_44x_tlbie(unsigned int index)
+{
+ /* 0 <= index < 64, so the V bit is clear and we can use the index as
+ * word0. */
+ asm volatile(
+ "tlbwe %[index], %[index], 0\n"
+ :
+ : [index] "r"(index)
+ );
+}
+
+static inline void kvmppc_44x_tlbre(unsigned int index,
+ struct kvmppc_44x_tlbe *tlbe)
+{
+ asm volatile(
+ "tlbre %[word0], %[index], 0\n"
+ "mfspr %[tid], %[sprn_mmucr]\n"
+ "andi. %[tid], %[tid], 0xff\n"
+ "tlbre %[word1], %[index], 1\n"
+ "tlbre %[word2], %[index], 2\n"
+ : [word0] "=r"(tlbe->word0),
+ [word1] "=r"(tlbe->word1),
+ [word2] "=r"(tlbe->word2),
+ [tid] "=r"(tlbe->tid)
+ : [index] "r"(index),
+ [sprn_mmucr] "i"(SPRN_MMUCR)
+ : "cc"
+ );
+}
+
+static inline void kvmppc_44x_tlbwe(unsigned int index,
+ struct kvmppc_44x_tlbe *stlbe)
+{
+ unsigned long tmp;
+
+ asm volatile(
+ "mfspr %[tmp], %[sprn_mmucr]\n"
+ "rlwimi %[tmp], %[tid], 0, 0xff\n"
+ "mtspr %[sprn_mmucr], %[tmp]\n"
+ "tlbwe %[word0], %[index], 0\n"
+ "tlbwe %[word1], %[index], 1\n"
+ "tlbwe %[word2], %[index], 2\n"
+ : [tmp] "=&r"(tmp)
+ : [word0] "r"(stlbe->word0),
+ [word1] "r"(stlbe->word1),
+ [word2] "r"(stlbe->word2),
+ [tid] "r"(stlbe->tid),
+ [index] "r"(index),
+ [sprn_mmucr] "i"(SPRN_MMUCR)
+ );
+}
static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
{
- /* Mask off reserved bits. */
- attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
+ /* We only care about the guest's permission and user bits. */
+ attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
if (!usermode) {
/* Guest is in supervisor mode, so we need to translate guest
@@ -47,18 +130,60 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
/* Make sure host can always access this memory. */
attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
+ /* WIMGE = 0b00100 */
+ attrib |= PPC44x_TLB_M;
+
return attrib;
}
+/* Load shadow TLB back into hardware. */
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
+
+ for (i = 0; i <= tlb_44x_hwater; i++) {
+ struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+ if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+ kvmppc_44x_tlbwe(i, stlbe);
+ }
+}
+
+static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
+ unsigned int i)
+{
+ vcpu_44x->shadow_tlb_mod[i] = 1;
+}
+
+/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
+
+ for (i = 0; i <= tlb_44x_hwater; i++) {
+ struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+ if (vcpu_44x->shadow_tlb_mod[i])
+ kvmppc_44x_tlbre(i, stlbe);
+
+ if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+ kvmppc_44x_tlbie(i);
+ }
+}
+
+
/* Search the guest TLB for a matching entry. */
int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
unsigned int as)
{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
/* XXX Replace loop with fancy data structures. */
- for (i = 0; i < PPC44x_TLB_SIZE; i++) {
- struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
+ struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
unsigned int tid;
if (eaddr < get_tlb_eaddr(tlbe))
@@ -83,78 +208,89 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
return -1;
}
-struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_IS);
- unsigned int index;
- index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
- if (index == -1)
- return NULL;
- return &vcpu->arch.guest_tlb[index];
+ return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
-struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_DS);
- unsigned int index;
- index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
- if (index == -1)
- return NULL;
- return &vcpu->arch.guest_tlb[index];
+ return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
-static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
+static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
+ unsigned int stlb_index)
{
- return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
-}
+ struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
-static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
- unsigned int index)
-{
- struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
- struct page *page = vcpu->arch.shadow_pages[index];
+ if (!ref->page)
+ return;
- if (get_tlb_v(stlbe)) {
- if (kvmppc_44x_tlbe_is_writable(stlbe))
- kvm_release_page_dirty(page);
- else
- kvm_release_page_clean(page);
- }
+ /* Discard from the TLB. */
+ /* Note: we could actually invalidate a host mapping, if the host overwrote
+ * this TLB entry since we inserted a guest mapping. */
+ kvmppc_44x_tlbie(stlb_index);
+
+ /* Now release the page. */
+ if (ref->writeable)
+ kvm_release_page_dirty(ref->page);
+ else
+ kvm_release_page_clean(ref->page);
+
+ ref->page = NULL;
+
+ /* XXX set tlb_44x_index to stlb_index? */
+
+ KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
}
void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
for (i = 0; i <= tlb_44x_hwater; i++)
- kvmppc_44x_shadow_release(vcpu, i);
-}
-
-void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
-{
- vcpu->arch.shadow_tlb_mod[i] = 1;
+ kvmppc_44x_shadow_release(vcpu_44x, i);
}
-/* Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB. */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
- u32 flags)
+/**
+ * kvmppc_mmu_map -- create a host mapping for guest memory
+ *
+ * If the guest wanted a larger page than the host supports, only the first
+ * host page is mapped here and the rest are demand faulted.
+ *
+ * If the guest wanted a smaller page than the host page size, we map only the
+ * guest-size page (i.e. not a full host page mapping).
+ *
+ * Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB.
+ */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
+ u32 flags, u32 max_bytes, unsigned int gtlb_index)
{
+ struct kvmppc_44x_tlbe stlbe;
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_shadow_ref *ref;
struct page *new_page;
- struct tlbe *stlbe;
hpa_t hpaddr;
+ gfn_t gfn;
unsigned int victim;
- /* Future optimization: don't overwrite the TLB entry containing the
- * current PC (or stack?). */
- victim = kvmppc_tlb_44x_pos++;
- if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
- kvmppc_tlb_44x_pos = 0;
- stlbe = &vcpu->arch.shadow_tlb[victim];
+ /* Select TLB entry to clobber. Indirectly guard against races with the TLB
+ * miss handler by disabling interrupts. */
+ local_irq_disable();
+ victim = ++tlb_44x_index;
+ if (victim > tlb_44x_hwater)
+ victim = 0;
+ tlb_44x_index = victim;
+ local_irq_enable();
/* Get reference to new page. */
+ gfn = gpaddr >> PAGE_SHIFT;
new_page = gfn_to_page(vcpu->kvm, gfn);
if (is_error_page(new_page)) {
printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -163,10 +299,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
}
hpaddr = page_to_phys(new_page);
- /* Drop reference to old page. */
- kvmppc_44x_shadow_release(vcpu, victim);
-
- vcpu->arch.shadow_pages[victim] = new_page;
+ /* Invalidate any previous shadow mappings. */
+ kvmppc_44x_shadow_release(vcpu_44x, victim);
/* XXX Make sure (va, size) doesn't overlap any other
* entries. 440x6 user manual says the result would be
@@ -174,78 +308,193 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
/* XXX what about AS? */
- stlbe->tid = !(asid & 0xff);
-
/* Force TS=1 for all guest mappings. */
- /* For now we hardcode 4KB mappings, but it will be important to
- * use host large pages in the future. */
- stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
- | PPC44x_TLB_4K;
- stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
- stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
- vcpu->arch.msr & MSR_PR);
- kvmppc_tlbe_set_modified(vcpu, victim);
+ stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
+
+ if (max_bytes >= PAGE_SIZE) {
+ /* Guest mapping is larger than or equal to host page size. We can use
+ * a "native" host mapping. */
+ stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
+ } else {
+ /* Guest mapping is smaller than host page size. We must restrict the
+ * size of the mapping to be at most the smaller of the two, but for
+ * simplicity we fall back to a 4K mapping (this is probably what the
+ * guest is using anyways). */
+ stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
+
+ /* 'hpaddr' is a host page, which is larger than the mapping we're
+ * inserting here. To compensate, we must add the in-page offset to the
+ * sub-page. */
+ hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
+ }
- KVMTRACE_5D(STLB_WRITE, vcpu, victim,
- stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2,
- handler);
+ stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
+ stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
+ vcpu->arch.msr & MSR_PR);
+ stlbe.tid = !(asid & 0xff);
+
+ /* Keep track of the reference so we can properly release it later. */
+ ref = &vcpu_44x->shadow_refs[victim];
+ ref->page = new_page;
+ ref->gtlb_index = gtlb_index;
+ ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
+ ref->tid = stlbe.tid;
+
+ /* Insert shadow mapping into hardware TLB. */
+ kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
+ kvmppc_44x_tlbwe(victim, &stlbe);
+ KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
+ stlbe.word2, handler);
}
-void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
- gva_t eend, u32 asid)
+/* For a particular guest TLB entry, invalidate the corresponding host TLB
+ * mappings and release the host pages. */
+static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
+ unsigned int gtlb_index)
{
- unsigned int pid = !(asid & 0xff);
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
- /* XXX Replace loop with fancy data structures. */
- for (i = 0; i <= tlb_44x_hwater; i++) {
- struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
- unsigned int tid;
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+ struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+ if (ref->gtlb_index == gtlb_index)
+ kvmppc_44x_shadow_release(vcpu_44x, i);
+ }
+}
- if (!get_tlb_v(stlbe))
- continue;
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+{
+ vcpu->arch.shadow_pid = !usermode;
+}
- if (eend < get_tlb_eaddr(stlbe))
- continue;
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
- if (eaddr > get_tlb_end(stlbe))
- continue;
+ if (unlikely(vcpu->arch.pid == new_pid))
+ return;
- tid = get_tlb_tid(stlbe);
- if (tid && (tid != pid))
- continue;
+ vcpu->arch.pid = new_pid;
- kvmppc_44x_shadow_release(vcpu, i);
- stlbe->word0 = 0;
- kvmppc_tlbe_set_modified(vcpu, i);
- KVMTRACE_5D(STLB_INVAL, vcpu, i,
- stlbe->tid, stlbe->word0, stlbe->word1,
- stlbe->word2, handler);
+ /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
+ * can't access guest kernel mappings (TID=1). When we switch to a new
+ * guest PID, which will also use host PID=0, we must discard the old guest
+ * userspace mappings. */
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+ struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+
+ if (ref->tid == 0)
+ kvmppc_44x_shadow_release(vcpu_44x, i);
}
}
-/* Invalidate all mappings on the privilege switch after PID has been changed.
- * The guest always runs with PID=1, so we must clear the entire TLB when
- * switching address spaces. */
-void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct kvmppc_44x_tlbe *tlbe)
{
- int i;
+ gpa_t gpa;
- if (vcpu->arch.swap_pid) {
- /* XXX Replace loop with fancy data structures. */
- for (i = 0; i <= tlb_44x_hwater; i++) {
- struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
-
- /* Future optimization: clear only userspace mappings. */
- kvmppc_44x_shadow_release(vcpu, i);
- stlbe->word0 = 0;
- kvmppc_tlbe_set_modified(vcpu, i);
- KVMTRACE_5D(STLB_INVAL, vcpu, i,
- stlbe->tid, stlbe->word0, stlbe->word1,
- stlbe->word2, handler);
- }
- vcpu->arch.swap_pid = 0;
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+ return 0;
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *tlbe;
+ unsigned int gtlb_index;
+
+ gtlb_index = vcpu->arch.gpr[ra];
+ if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
+ printk("%s: index %d\n", __func__, gtlb_index);
+ kvmppc_dump_vcpu(vcpu);
+ return EMULATE_FAIL;
}
- vcpu->arch.shadow_pid = !usermode;
+ tlbe = &vcpu_44x->guest_tlb[gtlb_index];
+
+ /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
+ if (tlbe->word0 & PPC44x_TLB_VALID)
+ kvmppc_44x_invalidate(vcpu, gtlb_index);
+
+ switch (ws) {
+ case PPC44x_TLB_PAGEID:
+ tlbe->tid = get_mmucr_stid(vcpu);
+ tlbe->word0 = vcpu->arch.gpr[rs];
+ break;
+
+ case PPC44x_TLB_XLAT:
+ tlbe->word1 = vcpu->arch.gpr[rs];
+ break;
+
+ case PPC44x_TLB_ATTRIB:
+ tlbe->word2 = vcpu->arch.gpr[rs];
+ break;
+
+ default:
+ return EMULATE_FAIL;
+ }
+
+ if (tlbe_is_host_safe(vcpu, tlbe)) {
+ u64 asid;
+ gva_t eaddr;
+ gpa_t gpaddr;
+ u32 flags;
+ u32 bytes;
+
+ eaddr = get_tlb_eaddr(tlbe);
+ gpaddr = get_tlb_raddr(tlbe);
+
+ /* Use the advertised page size to mask effective and real addrs. */
+ bytes = get_tlb_bytes(tlbe);
+ eaddr &= ~(bytes - 1);
+ gpaddr &= ~(bytes - 1);
+
+ asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+ flags = tlbe->word2 & 0xffff;
+
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
+ }
+
+ KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
+ tlbe->word1, tlbe->word2, handler);
+
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
+ return EMULATE_DONE;
+}
+
+int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
+{
+ u32 ea;
+ int gtlb_index;
+ unsigned int as = get_mmucr_sts(vcpu);
+ unsigned int pid = get_mmucr_stid(vcpu);
+
+ ea = vcpu->arch.gpr[rb];
+ if (ra)
+ ea += vcpu->arch.gpr[ra];
+
+ gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+ if (rc) {
+ if (gtlb_index < 0)
+ vcpu->arch.cr &= ~0x20000000;
+ else
+ vcpu->arch.cr |= 0x20000000;
+ }
+ vcpu->arch.gpr[rt] = gtlb_index;
+
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
+ return EMULATE_DONE;
}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 2ccd46b6f6b..772191f29e6 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,48 +25,52 @@
extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
unsigned int pid, unsigned int as);
-extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+
+extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
+ u8 rc);
+extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
/* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
{
return (tlbe->word0 >> 4) & 0xf;
}
-static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
{
return tlbe->word0 & 0xfffffc00;
}
-static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
+static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
{
unsigned int pgsize = get_tlb_size(tlbe);
return 1 << 10 << (pgsize << 1);
}
-static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
{
return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
}
-static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
{
u64 word1 = tlbe->word1;
return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
}
-static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
{
return tlbe->tid & 0xff;
}
-static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
{
return (tlbe->word0 >> 8) & 0x1;
}
-static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
{
return (tlbe->word0 >> 9) & 0x1;
}
@@ -81,7 +85,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
return (vcpu->arch.mmucr >> 16) & 0x1;
}
-static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
+static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
{
unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 53aaa66b25e..6dbdc4817d8 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -15,27 +15,33 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION
config KVM
- bool "Kernel-based Virtual Machine (KVM) support"
- depends on 44x && EXPERIMENTAL
+ bool
select PREEMPT_NOTIFIERS
select ANON_INODES
- # We can only run on Book E hosts so far
- select KVM_BOOKE_HOST
+
+config KVM_440
+ bool "KVM support for PowerPC 440 processors"
+ depends on EXPERIMENTAL && 44x
+ select KVM
---help---
- Support hosting virtualized guest machines. You will also
- need to select one or more of the processor modules below.
+ Support running unmodified 440 guest kernels in virtual machines on
+ 440 host processors.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
If unsure, say N.
-config KVM_BOOKE_HOST
- bool "KVM host support for Book E PowerPC processors"
- depends on KVM && 44x
+config KVM_EXIT_TIMING
+ bool "Detailed exit timing"
+ depends on KVM
---help---
- Provides host support for KVM on Book E PowerPC processors. Currently
- this works on 440 processors only.
+ Calculate elapsed time for every exit/enter cycle. A per-vcpu
+ report is available in debugfs kvm/vm#_vcpu#_timing.
+ The overhead is relatively small, however it is not recommended for
+ production environments.
+
+ If unsure, say N.
config KVM_TRACE
bool "KVM trace support"
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2a5d4397ac4..df7ba59e6d5 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,10 +8,16 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
-kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o
+kvm-objs := $(common-objs-y) powerpc.o emulate.o
+obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
obj-$(CONFIG_KVM) += kvm.o
AFLAGS_booke_interrupts.o := -I$(obj)
-kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
-obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
+kvm-440-objs := \
+ booke.o \
+ booke_interrupts.o \
+ 44x.o \
+ 44x_tlb.o \
+ 44x_emulate.o
+obj-$(CONFIG_KVM_440) += kvm-440.o
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke.c
index 7b2591e26ba..35485dd6927 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke.c
@@ -24,21 +24,26 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
+#include "timing.h"
+#include <asm/cacheflush.h>
+#include <asm/kvm_44x.h>
+#include "booke.h"
#include "44x_tlb.h"
+unsigned long kvmppc_booke_handlers;
+
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "exits", VCPU_STAT(sum_exits) },
{ "mmio", VCPU_STAT(mmio_exits) },
{ "dcr", VCPU_STAT(dcr_exits) },
{ "sig", VCPU_STAT(signal_exits) },
- { "light", VCPU_STAT(light_exits) },
{ "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
{ "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
{ "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
@@ -53,103 +58,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
-static const u32 interrupt_msr_mask[16] = {
- [BOOKE_INTERRUPT_CRITICAL] = MSR_ME,
- [BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
- [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME,
- [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_DEBUG] = MSR_ME,
-};
-
-const unsigned char exception_priority[] = {
- [BOOKE_INTERRUPT_DATA_STORAGE] = 0,
- [BOOKE_INTERRUPT_INST_STORAGE] = 1,
- [BOOKE_INTERRUPT_ALIGNMENT] = 2,
- [BOOKE_INTERRUPT_PROGRAM] = 3,
- [BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
- [BOOKE_INTERRUPT_SYSCALL] = 5,
- [BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
- [BOOKE_INTERRUPT_DTLB_MISS] = 7,
- [BOOKE_INTERRUPT_ITLB_MISS] = 8,
- [BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
- [BOOKE_INTERRUPT_DEBUG] = 10,
- [BOOKE_INTERRUPT_CRITICAL] = 11,
- [BOOKE_INTERRUPT_WATCHDOG] = 12,
- [BOOKE_INTERRUPT_EXTERNAL] = 13,
- [BOOKE_INTERRUPT_FIT] = 14,
- [BOOKE_INTERRUPT_DECREMENTER] = 15,
-};
-
-const unsigned char priority_exception[] = {
- BOOKE_INTERRUPT_DATA_STORAGE,
- BOOKE_INTERRUPT_INST_STORAGE,
- BOOKE_INTERRUPT_ALIGNMENT,
- BOOKE_INTERRUPT_PROGRAM,
- BOOKE_INTERRUPT_FP_UNAVAIL,
- BOOKE_INTERRUPT_SYSCALL,
- BOOKE_INTERRUPT_AP_UNAVAIL,
- BOOKE_INTERRUPT_DTLB_MISS,
- BOOKE_INTERRUPT_ITLB_MISS,
- BOOKE_INTERRUPT_MACHINE_CHECK,
- BOOKE_INTERRUPT_DEBUG,
- BOOKE_INTERRUPT_CRITICAL,
- BOOKE_INTERRUPT_WATCHDOG,
- BOOKE_INTERRUPT_EXTERNAL,
- BOOKE_INTERRUPT_FIT,
- BOOKE_INTERRUPT_DECREMENTER,
-};
-
-
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
- struct tlbe *tlbe;
- int i;
-
- printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
- printk("| %2s | %3s | %8s | %8s | %8s |\n",
- "nr", "tid", "word0", "word1", "word2");
-
- for (i = 0; i < PPC44x_TLB_SIZE; i++) {
- tlbe = &vcpu->arch.guest_tlb[i];
- if (tlbe->word0 & PPC44x_TLB_VALID)
- printk(" G%2d | %02X | %08X | %08X | %08X |\n",
- i, tlbe->tid, tlbe->word0, tlbe->word1,
- tlbe->word2);
- }
-
- for (i = 0; i < PPC44x_TLB_SIZE; i++) {
- tlbe = &vcpu->arch.shadow_tlb[i];
- if (tlbe->word0 & PPC44x_TLB_VALID)
- printk(" S%2d | %02X | %08X | %08X | %08X |\n",
- i, tlbe->tid, tlbe->word0, tlbe->word1,
- tlbe->word2);
- }
-}
-
/* TODO: use vcpu_printf() */
void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
{
int i;
- printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr);
- printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
- printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
+ printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
+ printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
+ printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
for (i = 0; i < 32; i += 4) {
- printk("gpr%02d: %08x %08x %08x %08x\n", i,
+ printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
vcpu->arch.gpr[i],
vcpu->arch.gpr[i+1],
vcpu->arch.gpr[i+2],
@@ -157,69 +78,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
}
}
-/* Check if we are ready to deliver the interrupt */
-static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
+ unsigned int priority)
{
- int r;
+ set_bit(priority, &vcpu->arch.pending_exceptions);
+}
- switch (interrupt) {
- case BOOKE_INTERRUPT_CRITICAL:
- r = vcpu->arch.msr & MSR_CE;
- break;
- case BOOKE_INTERRUPT_MACHINE_CHECK:
- r = vcpu->arch.msr & MSR_ME;
- break;
- case BOOKE_INTERRUPT_EXTERNAL:
- r = vcpu->arch.msr & MSR_EE;
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+}
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
+}
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+ return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+ struct kvm_interrupt *irq)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
+}
+
+/* Deliver the interrupt of the corresponding priority, if possible. */
+static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
+ unsigned int priority)
+{
+ int allowed = 0;
+ ulong msr_mask;
+
+ switch (priority) {
+ case BOOKE_IRQPRIO_PROGRAM:
+ case BOOKE_IRQPRIO_DTLB_MISS:
+ case BOOKE_IRQPRIO_ITLB_MISS:
+ case BOOKE_IRQPRIO_SYSCALL:
+ case BOOKE_IRQPRIO_DATA_STORAGE:
+ case BOOKE_IRQPRIO_INST_STORAGE:
+ case BOOKE_IRQPRIO_FP_UNAVAIL:
+ case BOOKE_IRQPRIO_AP_UNAVAIL:
+ case BOOKE_IRQPRIO_ALIGNMENT:
+ allowed = 1;
+ msr_mask = MSR_CE|MSR_ME|MSR_DE;
break;
- case BOOKE_INTERRUPT_DECREMENTER:
- r = vcpu->arch.msr & MSR_EE;
+ case BOOKE_IRQPRIO_CRITICAL:
+ case BOOKE_IRQPRIO_WATCHDOG:
+ allowed = vcpu->arch.msr & MSR_CE;
+ msr_mask = MSR_ME;
break;
- case BOOKE_INTERRUPT_FIT:
- r = vcpu->arch.msr & MSR_EE;
+ case BOOKE_IRQPRIO_MACHINE_CHECK:
+ allowed = vcpu->arch.msr & MSR_ME;
+ msr_mask = 0;
break;
- case BOOKE_INTERRUPT_WATCHDOG:
- r = vcpu->arch.msr & MSR_CE;
+ case BOOKE_IRQPRIO_EXTERNAL:
+ case BOOKE_IRQPRIO_DECREMENTER:
+ case BOOKE_IRQPRIO_FIT:
+ allowed = vcpu->arch.msr & MSR_EE;
+ msr_mask = MSR_CE|MSR_ME|MSR_DE;
break;
- case BOOKE_INTERRUPT_DEBUG:
- r = vcpu->arch.msr & MSR_DE;
+ case BOOKE_IRQPRIO_DEBUG:
+ allowed = vcpu->arch.msr & MSR_DE;
+ msr_mask = MSR_ME;
break;
- default:
- r = 1;
}
- return r;
-}
+ if (allowed) {
+ vcpu->arch.srr0 = vcpu->arch.pc;
+ vcpu->arch.srr1 = vcpu->arch.msr;
+ vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
+ kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
-static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
-{
- switch (interrupt) {
- case BOOKE_INTERRUPT_DECREMENTER:
- vcpu->arch.tsr |= TSR_DIS;
- break;
+ clear_bit(priority, &vcpu->arch.pending_exceptions);
}
- vcpu->arch.srr0 = vcpu->arch.pc;
- vcpu->arch.srr1 = vcpu->arch.msr;
- vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
- kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
+ return allowed;
}
/* Check pending exceptions and deliver one, if possible. */
-void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
- unsigned int exception;
unsigned int priority;
- priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
+ priority = __ffs(*pending);
while (priority <= BOOKE_MAX_INTERRUPT) {
- exception = priority_exception[priority];
- if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
- kvmppc_clear_exception(vcpu, exception);
- kvmppc_deliver_interrupt(vcpu, exception);
+ if (kvmppc_booke_irqprio_deliver(vcpu, priority))
break;
- }
priority = find_next_bit(pending,
BITS_PER_BYTE * sizeof(*pending),
@@ -238,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
enum emulation_result er;
int r = RESUME_HOST;
+ /* update before a new last_exit_type is rewritten */
+ kvmppc_update_timing_stats(vcpu);
+
local_irq_enable();
run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -251,21 +202,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_EXTERNAL:
+ kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
+ if (need_resched())
+ cond_resched();
+ r = RESUME_GUEST;
+ break;
+
case BOOKE_INTERRUPT_DECREMENTER:
/* Since we switched IVPR back to the host's value, the host
* handled this interrupt the moment we enabled interrupts.
* Now we just offer it a chance to reschedule the guest. */
-
- /* XXX At this point the TLB still holds our shadow TLB, so if
- * we do reschedule the host will fault over it. Perhaps we
- * should politely restore the host's entries to minimize
- * misses before ceding control. */
+ kvmppc_account_exit(vcpu, DEC_EXITS);
if (need_resched())
cond_resched();
- if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
- vcpu->stat.dec_exits++;
- else
- vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
@@ -274,17 +223,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Program traps generated by user-level software must be handled
* by the guest kernel. */
vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
r = RESUME_GUEST;
+ kvmppc_account_exit(vcpu, USR_PR_INST);
break;
}
er = kvmppc_emulate_instruction(run, vcpu);
switch (er) {
case EMULATE_DONE:
+ /* don't overwrite subtypes, just account kvm_stats */
+ kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
/* Future optimization: only reload non-volatiles if
* they were actually modified by emulation. */
- vcpu->stat.emulated_inst_exits++;
r = RESUME_GUEST_NV;
break;
case EMULATE_DO_DCR:
@@ -293,7 +244,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case EMULATE_FAIL:
/* XXX Deliver Program interrupt to guest. */
- printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
__func__, vcpu->arch.pc, vcpu->arch.last_inst);
/* For debugging, encode the failing instruction and
* report it to userspace. */
@@ -307,48 +258,53 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_FP_UNAVAIL:
- kvmppc_queue_exception(vcpu, exit_nr);
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+ kvmppc_account_exit(vcpu, FP_UNAVAIL);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_DATA_STORAGE:
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_queue_exception(vcpu, exit_nr);
- vcpu->stat.dsi_exits++;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+ kvmppc_account_exit(vcpu, DSI_EXITS);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_INST_STORAGE:
vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_queue_exception(vcpu, exit_nr);
- vcpu->stat.isi_exits++;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+ kvmppc_account_exit(vcpu, ISI_EXITS);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_SYSCALL:
- kvmppc_queue_exception(vcpu, exit_nr);
- vcpu->stat.syscall_exits++;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+ kvmppc_account_exit(vcpu, SYSCALL_EXITS);
r = RESUME_GUEST;
break;
+ /* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_DTLB_MISS: {
- struct tlbe *gtlbe;
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.fault_dear;
+ int gtlb_index;
gfn_t gfn;
/* Check the guest TLB. */
- gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
- if (!gtlbe) {
+ gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
+ if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
- kvmppc_queue_exception(vcpu, exit_nr);
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
- vcpu->stat.dtlb_real_miss_exits++;
+ kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
r = RESUME_GUEST;
break;
}
+ gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
@@ -359,38 +315,45 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
- kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
- gtlbe->word2);
- vcpu->stat.dtlb_virt_miss_exits++;
+ kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
+ gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+ kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
r = RESUME_GUEST;
} else {
/* Guest has mapped and accessed a page which is not
* actually RAM. */
r = kvmppc_emulate_mmio(run, vcpu);
+ kvmppc_account_exit(vcpu, MMIO_EXITS);
}
break;
}
+ /* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_ITLB_MISS: {
- struct tlbe *gtlbe;
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.pc;
+ gpa_t gpaddr;
gfn_t gfn;
+ int gtlb_index;
r = RESUME_GUEST;
/* Check the guest TLB. */
- gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
- if (!gtlbe) {
+ gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
+ if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
- kvmppc_queue_exception(vcpu, exit_nr);
- vcpu->stat.itlb_real_miss_exits++;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+ kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
break;
}
- vcpu->stat.itlb_virt_miss_exits++;
+ kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
- gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
+ gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+ gpaddr = tlb_xlate(gtlbe, eaddr);
+ gfn = gpaddr >> PAGE_SHIFT;
if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
/* The guest TLB had a mapping, but the shadow TLB
@@ -399,12 +362,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
- kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
- gtlbe->word2);
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
+ gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
} else {
/* Guest mapped and leaped at non-RAM! */
- kvmppc_queue_exception(vcpu,
- BOOKE_INTERRUPT_MACHINE_CHECK);
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
}
break;
@@ -421,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
mtspr(SPRN_DBSR, dbsr);
run->exit_reason = KVM_EXIT_DEBUG;
+ kvmppc_account_exit(vcpu, DEBUG_EXITS);
r = RESUME_HOST;
break;
}
@@ -432,10 +395,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_disable();
- kvmppc_check_and_deliver_interrupts(vcpu);
+ kvmppc_core_deliver_interrupts(vcpu);
- /* Do some exit accounting. */
- vcpu->stat.sum_exits++;
if (!(r & RESUME_HOST)) {
/* To avoid clobbering exit_reason, only check for signals if
* we aren't already exiting to userspace for some other
@@ -443,22 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (signal_pending(current)) {
run->exit_reason = KVM_EXIT_INTR;
r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-
- vcpu->stat.signal_exits++;
- } else {
- vcpu->stat.light_exits++;
- }
- } else {
- switch (run->exit_reason) {
- case KVM_EXIT_MMIO:
- vcpu->stat.mmio_exits++;
- break;
- case KVM_EXIT_DCR:
- vcpu->stat.dcr_exits++;
- break;
- case KVM_EXIT_INTR:
- vcpu->stat.signal_exits++;
- break;
+ kvmppc_account_exit(vcpu, SIGNAL_EXITS);
}
}
@@ -468,20 +414,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
-
- tlbe->tid = 0;
- tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
- tlbe->word1 = 0;
- tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
-
- tlbe++;
- tlbe->tid = 0;
- tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
- tlbe->word1 = 0xef600000;
- tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
- | PPC44x_TLB_I | PPC44x_TLB_G;
-
vcpu->arch.pc = 0;
vcpu->arch.msr = 0;
vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@@ -492,12 +424,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
* before it's programmed its own IVPR. */
vcpu->arch.ivpr = 0x55550000;
- /* Since the guest can directly access the timebase, it must know the
- * real timebase frequency. Accordingly, it must see the state of
- * CCR1[TCS]. */
- vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+ kvmppc_init_timing_stats(vcpu);
- return 0;
+ return kvmppc_core_vcpu_setup(vcpu);
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -536,7 +465,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.ctr = regs->ctr;
vcpu->arch.lr = regs->lr;
vcpu->arch.xer = regs->xer;
- vcpu->arch.msr = regs->msr;
+ kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.srr0 = regs->srr0;
vcpu->arch.srr1 = regs->srr1;
vcpu->arch.sprg0 = regs->sprg0;
@@ -575,31 +504,62 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -ENOTSUPP;
}
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
- struct tlbe *gtlbe;
- int index;
- gva_t eaddr;
- u8 pid;
- u8 as;
-
- eaddr = tr->linear_address;
- pid = (tr->linear_address >> 32) & 0xff;
- as = (tr->linear_address >> 40) & 0x1;
-
- index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
- if (index == -1) {
- tr->valid = 0;
- return 0;
- }
+ return kvmppc_core_vcpu_translate(vcpu, tr);
+}
- gtlbe = &vcpu->arch.guest_tlb[index];
+int kvmppc_booke_init(void)
+{
+ unsigned long ivor[16];
+ unsigned long max_ivor = 0;
+ int i;
- tr->physical_address = tlb_xlate(gtlbe, eaddr);
- /* XXX what does "writeable" and "usermode" even mean? */
- tr->valid = 1;
+ /* We install our own exception handlers by hijacking IVPR. IVPR must
+ * be 16-bit aligned, so we need a 64KB allocation. */
+ kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ VCPU_SIZE_ORDER);
+ if (!kvmppc_booke_handlers)
+ return -ENOMEM;
+
+ /* XXX make sure our handlers are smaller than Linux's */
+
+ /* Copy our interrupt handlers to match host IVORs. That way we don't
+ * have to swap the IVORs on every guest/host transition. */
+ ivor[0] = mfspr(SPRN_IVOR0);
+ ivor[1] = mfspr(SPRN_IVOR1);
+ ivor[2] = mfspr(SPRN_IVOR2);
+ ivor[3] = mfspr(SPRN_IVOR3);
+ ivor[4] = mfspr(SPRN_IVOR4);
+ ivor[5] = mfspr(SPRN_IVOR5);
+ ivor[6] = mfspr(SPRN_IVOR6);
+ ivor[7] = mfspr(SPRN_IVOR7);
+ ivor[8] = mfspr(SPRN_IVOR8);
+ ivor[9] = mfspr(SPRN_IVOR9);
+ ivor[10] = mfspr(SPRN_IVOR10);
+ ivor[11] = mfspr(SPRN_IVOR11);
+ ivor[12] = mfspr(SPRN_IVOR12);
+ ivor[13] = mfspr(SPRN_IVOR13);
+ ivor[14] = mfspr(SPRN_IVOR14);
+ ivor[15] = mfspr(SPRN_IVOR15);
+
+ for (i = 0; i < 16; i++) {
+ if (ivor[i] > max_ivor)
+ max_ivor = ivor[i];
+
+ memcpy((void *)kvmppc_booke_handlers + ivor[i],
+ kvmppc_handlers_start + i * kvmppc_handler_len,
+ kvmppc_handler_len);
+ }
+ flush_icache_range(kvmppc_booke_handlers,
+ kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
return 0;
}
+
+void __exit kvmppc_booke_exit(void)
+{
+ free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+ kvm_exit();
+}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
new file mode 100644
index 00000000000..cf7c94ca24b
--- /dev/null
+++ b/arch/powerpc/kvm/booke.h
@@ -0,0 +1,60 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __KVM_BOOKE_H__
+#define __KVM_BOOKE_H__
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include "timing.h"
+
+/* interrupt priortity ordering */
+#define BOOKE_IRQPRIO_DATA_STORAGE 0
+#define BOOKE_IRQPRIO_INST_STORAGE 1
+#define BOOKE_IRQPRIO_ALIGNMENT 2
+#define BOOKE_IRQPRIO_PROGRAM 3
+#define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#define BOOKE_IRQPRIO_SYSCALL 5
+#define BOOKE_IRQPRIO_AP_UNAVAIL 6
+#define BOOKE_IRQPRIO_DTLB_MISS 7
+#define BOOKE_IRQPRIO_ITLB_MISS 8
+#define BOOKE_IRQPRIO_MACHINE_CHECK 9
+#define BOOKE_IRQPRIO_DEBUG 10
+#define BOOKE_IRQPRIO_CRITICAL 11
+#define BOOKE_IRQPRIO_WATCHDOG 12
+#define BOOKE_IRQPRIO_EXTERNAL 13
+#define BOOKE_IRQPRIO_FIT 14
+#define BOOKE_IRQPRIO_DECREMENTER 15
+
+/* Helper function for "full" MSR writes. No need to call this if only EE is
+ * changing. */
+static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+ if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
+ kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
+
+ vcpu->arch.msr = new_msr;
+
+ if (vcpu->arch.msr & MSR_WE) {
+ kvm_vcpu_block(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
+ };
+}
+
+#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
deleted file mode 100644
index b480341bc31..00000000000
--- a/arch/powerpc/kvm/booke_host.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/errno.h>
-#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <asm/cacheflush.h>
-#include <asm/kvm_ppc.h>
-
-unsigned long kvmppc_booke_handlers;
-
-static int kvmppc_booke_init(void)
-{
- unsigned long ivor[16];
- unsigned long max_ivor = 0;
- int i;
-
- /* We install our own exception handlers by hijacking IVPR. IVPR must
- * be 16-bit aligned, so we need a 64KB allocation. */
- kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
- VCPU_SIZE_ORDER);
- if (!kvmppc_booke_handlers)
- return -ENOMEM;
-
- /* XXX make sure our handlers are smaller than Linux's */
-
- /* Copy our interrupt handlers to match host IVORs. That way we don't
- * have to swap the IVORs on every guest/host transition. */
- ivor[0] = mfspr(SPRN_IVOR0);
- ivor[1] = mfspr(SPRN_IVOR1);
- ivor[2] = mfspr(SPRN_IVOR2);
- ivor[3] = mfspr(SPRN_IVOR3);
- ivor[4] = mfspr(SPRN_IVOR4);
- ivor[5] = mfspr(SPRN_IVOR5);
- ivor[6] = mfspr(SPRN_IVOR6);
- ivor[7] = mfspr(SPRN_IVOR7);
- ivor[8] = mfspr(SPRN_IVOR8);
- ivor[9] = mfspr(SPRN_IVOR9);
- ivor[10] = mfspr(SPRN_IVOR10);
- ivor[11] = mfspr(SPRN_IVOR11);
- ivor[12] = mfspr(SPRN_IVOR12);
- ivor[13] = mfspr(SPRN_IVOR13);
- ivor[14] = mfspr(SPRN_IVOR14);
- ivor[15] = mfspr(SPRN_IVOR15);
-
- for (i = 0; i < 16; i++) {
- if (ivor[i] > max_ivor)
- max_ivor = ivor[i];
-
- memcpy((void *)kvmppc_booke_handlers + ivor[i],
- kvmppc_handlers_start + i * kvmppc_handler_len,
- kvmppc_handler_len);
- }
- flush_icache_range(kvmppc_booke_handlers,
- kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
-
- return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
-}
-
-static void __exit kvmppc_booke_exit(void)
-{
- free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
- kvm_exit();
-}
-
-module_init(kvmppc_booke_init)
-module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 95e165baf85..084ebcd7dd8 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host)
li r6, 1
slw r6, r6, r5
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save exit time */
+1:
+ mfspr r7, SPRN_TBRU
+ mfspr r8, SPRN_TBRL
+ mfspr r9, SPRN_TBRU
+ cmpw r9, r7
+ bne 1b
+ stw r8, VCPU_TIMING_EXIT_TBL(r4)
+ stw r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
/* Save the faulting instruction and all GPRs for emulation. */
andi. r7, r6, NEED_INST_MASK
beq ..skip_inst_copy
@@ -335,54 +347,6 @@ lightweight_exit:
lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3
- /* Prevent all asynchronous TLB updates. */
- mfmsr r5
- lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
- ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
- andc r6, r5, r6
- mtmsr r6
-
- /* Load the guest mappings, leaving the host's "pinned" kernel mappings
- * in place. */
- mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
- li r5, PPC44x_TLB_SIZE
- lis r5, tlb_44x_hwater@ha
- lwz r5, tlb_44x_hwater@l(r5)
- mtctr r5
- addi r9, r4, VCPU_SHADOW_TLB
- addi r5, r4, VCPU_SHADOW_MOD
- li r3, 0
-1:
- lbzx r7, r3, r5
- cmpwi r7, 0
- beq 3f
-
- /* Load guest entry. */
- mulli r11, r3, TLBE_BYTES
- add r11, r11, r9
- lwz r7, 0(r11)
- mtspr SPRN_MMUCR, r7
- lwz r7, 4(r11)
- tlbwe r7, r3, PPC44x_TLB_PAGEID
- lwz r7, 8(r11)
- tlbwe r7, r3, PPC44x_TLB_XLAT
- lwz r7, 12(r11)
- tlbwe r7, r3, PPC44x_TLB_ATTRIB
-3:
- addi r3, r3, 1 /* Increment index. */
- bdnz 1b
-
- mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
-
- /* Clear bitmap of modified TLB entries */
- li r5, PPC44x_TLB_SIZE>>2
- mtctr r5
- addi r5, r4, VCPU_SHADOW_MOD - 4
- li r6, 0
-1:
- stwu r6, 4(r5)
- bdnz 1b
-
iccci 0, 0 /* XXX hack */
/* Load some guest volatiles. */
@@ -423,6 +387,18 @@ lightweight_exit:
lwz r3, VCPU_SPRG7(r4)
mtspr SPRN_SPRG7, r3
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save enter time */
+1:
+ mfspr r6, SPRN_TBRU
+ mfspr r7, SPRN_TBRL
+ mfspr r8, SPRN_TBRU
+ cmpw r8, r6
+ bne 1b
+ stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
+ stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
/* Finish loading guest volatiles and jump to guest. */
lwz r3, VCPU_CTR(r4)
mtctr r3
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 0fce4fbdc20..d1d38daa93f 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,161 +23,14 @@
#include <linux/string.h>
#include <linux/kvm_host.h>
-#include <asm/dcr.h>
-#include <asm/dcr-regs.h>
+#include <asm/reg.h>
#include <asm/time.h>
#include <asm/byteorder.h>
#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include "timing.h"
-#include "44x_tlb.h"
-
-/* Instruction decoding */
-static inline unsigned int get_op(u32 inst)
-{
- return inst >> 26;
-}
-
-static inline unsigned int get_xop(u32 inst)
-{
- return (inst >> 1) & 0x3ff;
-}
-
-static inline unsigned int get_sprn(u32 inst)
-{
- return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_dcrn(u32 inst)
-{
- return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_rt(u32 inst)
-{
- return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_rs(u32 inst)
-{
- return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_ra(u32 inst)
-{
- return (inst >> 16) & 0x1f;
-}
-
-static inline unsigned int get_rb(u32 inst)
-{
- return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_rc(u32 inst)
-{
- return inst & 0x1;
-}
-
-static inline unsigned int get_ws(u32 inst)
-{
- return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_d(u32 inst)
-{
- return inst & 0xffff;
-}
-
-static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
- const struct tlbe *tlbe)
-{
- gpa_t gpa;
-
- if (!get_tlb_v(tlbe))
- return 0;
-
- /* Does it match current guest AS? */
- /* XXX what about IS != DS? */
- if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
- return 0;
-
- gpa = get_tlb_raddr(tlbe);
- if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
- /* Mapping is not for RAM. */
- return 0;
-
- return 1;
-}
-
-static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
-{
- u64 eaddr;
- u64 raddr;
- u64 asid;
- u32 flags;
- struct tlbe *tlbe;
- unsigned int ra;
- unsigned int rs;
- unsigned int ws;
- unsigned int index;
-
- ra = get_ra(inst);
- rs = get_rs(inst);
- ws = get_ws(inst);
-
- index = vcpu->arch.gpr[ra];
- if (index > PPC44x_TLB_SIZE) {
- printk("%s: index %d\n", __func__, index);
- kvmppc_dump_vcpu(vcpu);
- return EMULATE_FAIL;
- }
-
- tlbe = &vcpu->arch.guest_tlb[index];
-
- /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
- if (tlbe->word0 & PPC44x_TLB_VALID) {
- eaddr = get_tlb_eaddr(tlbe);
- asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
- kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
- }
-
- switch (ws) {
- case PPC44x_TLB_PAGEID:
- tlbe->tid = vcpu->arch.mmucr & 0xff;
- tlbe->word0 = vcpu->arch.gpr[rs];
- break;
-
- case PPC44x_TLB_XLAT:
- tlbe->word1 = vcpu->arch.gpr[rs];
- break;
-
- case PPC44x_TLB_ATTRIB:
- tlbe->word2 = vcpu->arch.gpr[rs];
- break;
-
- default:
- return EMULATE_FAIL;
- }
-
- if (tlbe_is_host_safe(vcpu, tlbe)) {
- eaddr = get_tlb_eaddr(tlbe);
- raddr = get_tlb_raddr(tlbe);
- asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
- flags = tlbe->word2 & 0xffff;
-
- /* Create a 4KB mapping on the host. If the guest wanted a
- * large page, only the first 4KB is mapped here and the rest
- * are mapped on the fly. */
- kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
- }
-
- KVMTRACE_5D(GTLB_WRITE, vcpu, index,
- tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
- handler);
-
- return EMULATE_DONE;
-}
-
-static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.tcr & TCR_DIE) {
/* The decrementer ticks at the same rate as the timebase, so
@@ -193,12 +46,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
}
}
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pc = vcpu->arch.srr0;
- kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
/* XXX to do:
* lhax
* lhaux
@@ -213,40 +60,30 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
*
* XXX is_bigendian should depend on MMU mapping or MSR[LE]
*/
+/* XXX Should probably auto-generate instruction decoding for a particular core
+ * from opcode tables in the future. */
int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
u32 inst = vcpu->arch.last_inst;
u32 ea;
int ra;
int rb;
- int rc;
int rs;
int rt;
int sprn;
- int dcrn;
enum emulation_result emulated = EMULATE_DONE;
int advance = 1;
+ /* this default type might be overwritten by subcategories */
+ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
+
switch (get_op(inst)) {
- case 3: /* trap */
- printk("trap!\n");
- kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+ case 3: /* trap */
+ vcpu->arch.esr |= ESR_PTR;
+ kvmppc_core_queue_program(vcpu);
advance = 0;
break;
- case 19:
- switch (get_xop(inst)) {
- case 50: /* rfi */
- kvmppc_emul_rfi(vcpu);
- advance = 0;
- break;
-
- default:
- emulated = EMULATE_FAIL;
- break;
- }
- break;
-
case 31:
switch (get_xop(inst)) {
@@ -255,27 +92,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
- case 83: /* mfmsr */
- rt = get_rt(inst);
- vcpu->arch.gpr[rt] = vcpu->arch.msr;
- break;
-
case 87: /* lbzx */
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
- case 131: /* wrtee */
- rs = get_rs(inst);
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (vcpu->arch.gpr[rs] & MSR_EE);
- break;
-
- case 146: /* mtmsr */
- rs = get_rs(inst);
- kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
- break;
-
case 151: /* stwx */
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
@@ -283,11 +104,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 1);
break;
- case 163: /* wrteei */
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (inst & MSR_EE);
- break;
-
case 215: /* stbx */
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
@@ -328,42 +144,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
- case 323: /* mfdcr */
- dcrn = get_dcrn(inst);
- rt = get_rt(inst);
-
- /* The guest may access CPR0 registers to determine the timebase
- * frequency, and it must know the real host frequency because it
- * can directly access the timebase registers.
- *
- * It would be possible to emulate those accesses in userspace,
- * but userspace can really only figure out the end frequency.
- * We could decompose that into the factors that compute it, but
- * that's tricky math, and it's easier to just report the real
- * CPR0 values.
- */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
- break;
- case DCRN_CPR0_CONFIG_DATA:
- local_irq_disable();
- mtdcr(DCRN_CPR0_CONFIG_ADDR,
- vcpu->arch.cpr0_cfgaddr);
- vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
- local_irq_enable();
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = 0;
- run->dcr.is_write = 0;
- vcpu->arch.io_gpr = rt;
- vcpu->arch.dcr_needed = 1;
- emulated = EMULATE_DO_DCR;
- }
-
- break;
-
case 339: /* mfspr */
sprn = get_sprn(inst);
rt = get_rt(inst);
@@ -373,26 +153,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
case SPRN_SRR1:
vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
- case SPRN_MMUCR:
- vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
- case SPRN_PID:
- vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
- case SPRN_IVPR:
- vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
- case SPRN_CCR0:
- vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
- case SPRN_CCR1:
- vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
case SPRN_PVR:
vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
- case SPRN_DEAR:
- vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
- case SPRN_ESR:
- vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
- case SPRN_DBCR0:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
- case SPRN_DBCR1:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
/* Note: mftb and TBRL/TBWL are user-accessible, so
* the guest can always access the real TB anyways.
@@ -413,42 +175,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* Note: SPRG4-7 are user-readable, so we don't get
* a trap. */
- case SPRN_IVOR0:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
- case SPRN_IVOR1:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
- case SPRN_IVOR2:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
- case SPRN_IVOR3:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
- case SPRN_IVOR4:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
- case SPRN_IVOR5:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
- case SPRN_IVOR6:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
- case SPRN_IVOR7:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
- case SPRN_IVOR8:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
- case SPRN_IVOR9:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
- case SPRN_IVOR10:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
- case SPRN_IVOR11:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
- case SPRN_IVOR12:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
- case SPRN_IVOR13:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
- case SPRN_IVOR14:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
- case SPRN_IVOR15:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
-
default:
- printk("mfspr: unknown spr %x\n", sprn);
- vcpu->arch.gpr[rt] = 0;
+ emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
+ if (emulated == EMULATE_FAIL) {
+ printk("mfspr: unknown spr %x\n", sprn);
+ vcpu->arch.gpr[rt] = 0;
+ }
break;
}
break;
@@ -478,25 +210,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
- case 451: /* mtdcr */
- dcrn = get_dcrn(inst);
- rs = get_rs(inst);
-
- /* emulate some access in kernel */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = vcpu->arch.gpr[rs];
- run->dcr.is_write = 1;
- vcpu->arch.dcr_needed = 1;
- emulated = EMULATE_DO_DCR;
- }
-
- break;
-
case 467: /* mtspr */
sprn = get_sprn(inst);
rs = get_rs(inst);
@@ -505,22 +218,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
case SPRN_SRR1:
vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
- case SPRN_MMUCR:
- vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
- case SPRN_PID:
- kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
- case SPRN_CCR0:
- vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
- case SPRN_CCR1:
- vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
- case SPRN_DEAR:
- vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
- case SPRN_ESR:
- vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
- case SPRN_DBCR0:
- vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
- case SPRN_DBCR1:
- vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
/* XXX We need to context-switch the timebase for
* watchdog and FIT. */
@@ -532,14 +229,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_emulate_dec(vcpu);
break;
- case SPRN_TSR:
- vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
-
- case SPRN_TCR:
- vcpu->arch.tcr = vcpu->arch.gpr[rs];
- kvmppc_emulate_dec(vcpu);
- break;
-
case SPRN_SPRG0:
vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
case SPRN_SPRG1:
@@ -549,56 +238,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_SPRG3:
vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
- /* Note: SPRG4-7 are user-readable. These values are
- * loaded into the real SPRGs when resuming the
- * guest. */
- case SPRN_SPRG4:
- vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG5:
- vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG6:
- vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG7:
- vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
- case SPRN_IVPR:
- vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR0:
- vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR1:
- vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR2:
- vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR3:
- vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR4:
- vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR5:
- vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR6:
- vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR7:
- vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR8:
- vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR9:
- vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR10:
- vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR11:
- vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR12:
- vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR13:
- vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR14:
- vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR15:
- vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
-
default:
- printk("mtspr: unknown spr %x\n", sprn);
- emulated = EMULATE_FAIL;
+ emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
+ if (emulated == EMULATE_FAIL)
+ printk("mtspr: unknown spr %x\n", sprn);
break;
}
break;
@@ -629,36 +272,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 0);
break;
- case 978: /* tlbwe */
- emulated = kvmppc_emul_tlbwe(vcpu, inst);
- break;
-
- case 914: { /* tlbsx */
- int index;
- unsigned int as = get_mmucr_sts(vcpu);
- unsigned int pid = get_mmucr_stid(vcpu);
-
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
- rc = get_rc(inst);
-
- ea = vcpu->arch.gpr[rb];
- if (ra)
- ea += vcpu->arch.gpr[ra];
-
- index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
- if (rc) {
- if (index < 0)
- vcpu->arch.cr &= ~0x20000000;
- else
- vcpu->arch.cr |= 0x20000000;
- }
- vcpu->arch.gpr[rt] = index;
-
- }
- break;
-
case 790: /* lhbrx */
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@@ -674,14 +287,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
2, 0);
break;
- case 966: /* iccci */
- break;
-
default:
- printk("unknown: op %d xop %d\n", get_op(inst),
- get_xop(inst));
+ /* Attempt core-specific emulation below. */
emulated = EMULATE_FAIL;
- break;
}
break;
@@ -764,12 +372,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
default:
- printk("unknown op %d\n", get_op(inst));
emulated = EMULATE_FAIL;
- break;
}
- KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
+ if (emulated == EMULATE_FAIL) {
+ emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
+ if (emulated == EMULATE_FAIL) {
+ advance = 0;
+ printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
+ "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
+ }
+ }
+
+ KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
if (advance)
vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8bef0efcdfe..2822c8ccfaa 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -28,9 +28,9 @@
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/tlbflush.h>
+#include "timing.h"
#include "../mm/mmu_decl.h"
-
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
{
return gfn;
@@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void)
void kvm_arch_check_processor_compat(void *rtn)
{
- int r;
-
- if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
- r = 0;
- else
- r = -ENOTSUPP;
-
- *(int *)rtn = r;
+ *(int *)rtn = kvmppc_core_check_processor_compat();
}
struct kvm *kvm_arch_create_vm(void)
@@ -144,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext)
int r;
switch (ext) {
- case KVM_CAP_USER_MEMORY:
- r = 1;
- break;
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
@@ -179,30 +169,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvm_vcpu *vcpu;
- int err;
-
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu) {
- err = -ENOMEM;
- goto out;
- }
-
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
+ vcpu = kvmppc_core_vcpu_create(kvm, id);
+ kvmppc_create_vcpu_debugfs(vcpu, id);
return vcpu;
-
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
- return ERR_PTR(err);
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
+ kvmppc_remove_vcpu_debugfs(vcpu);
+ kvmppc_core_vcpu_free(vcpu);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -212,16 +187,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
-
- return test_bit(priority, &vcpu->arch.pending_exceptions);
+ return kvmppc_core_pending_dec(vcpu);
}
static void kvmppc_decrementer_func(unsigned long data)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
- kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
+ kvmppc_core_queue_dec(vcpu);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
@@ -242,96 +215,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
kvmppc_core_destroy_mmu(vcpu);
}
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvmppc_disable_debug_interrupts(void)
-{
- mtmsr(mfmsr() & ~MSR_DE);
-}
-
-static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
-{
- kvmppc_disable_debug_interrupts();
-
- mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
- mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
- mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
- mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
- mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
- mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
- mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
- mtmsr(vcpu->arch.host_msr);
-}
-
-static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
-{
- struct kvm_guest_debug *dbg = &vcpu->guest_debug;
- u32 dbcr0 = 0;
-
- vcpu->arch.host_msr = mfmsr();
- kvmppc_disable_debug_interrupts();
-
- /* Save host debug register state. */
- vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
- vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
- vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
- vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
- vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
- vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
- vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
- /* set registers up for guest */
-
- if (dbg->bp[0]) {
- mtspr(SPRN_IAC1, dbg->bp[0]);
- dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
- }
- if (dbg->bp[1]) {
- mtspr(SPRN_IAC2, dbg->bp[1]);
- dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
- }
- if (dbg->bp[2]) {
- mtspr(SPRN_IAC3, dbg->bp[2]);
- dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
- }
- if (dbg->bp[3]) {
- mtspr(SPRN_IAC4, dbg->bp[3]);
- dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
- }
-
- mtspr(SPRN_DBCR0, dbcr0);
- mtspr(SPRN_DBCR1, 0);
- mtspr(SPRN_DBCR2, 0);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- int i;
-
if (vcpu->guest_debug.enabled)
- kvmppc_load_guest_debug_registers(vcpu);
+ kvmppc_core_load_guest_debugstate(vcpu);
- /* Mark every guest entry in the shadow TLB entry modified, so that they
- * will all be reloaded on the next vcpu run (instead of being
- * demand-faulted). */
- for (i = 0; i <= tlb_44x_hwater; i++)
- kvmppc_tlbe_set_modified(vcpu, i);
+ kvmppc_core_vcpu_load(vcpu, cpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_debug.enabled)
- kvmppc_restore_host_debug_state(vcpu);
+ kvmppc_core_load_host_debugstate(vcpu);
/* Don't leave guest TLB entries resident when being de-scheduled. */
/* XXX It would be nice to differentiate between heavyweight exit and
* sched_out here, since we could avoid the TLB flush for heavyweight
* exits. */
_tlbil_all();
+ kvmppc_core_vcpu_put(vcpu);
}
int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -355,14 +257,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
- u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+ ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
*gpr = run->dcr.data;
}
static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
- u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+ ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
if (run->mmio.len > sizeof(*gpr)) {
printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -460,7 +362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->arch.dcr_needed = 0;
}
- kvmppc_check_and_deliver_interrupts(vcpu);
+ kvmppc_core_deliver_interrupts(vcpu);
local_irq_disable();
kvm_guest_enter();
@@ -478,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
- kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
+ kvmppc_core_queue_external(vcpu, irq);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
new file mode 100644
index 00000000000..47ee603f558
--- /dev/null
+++ b/arch/powerpc/kvm/timing.c
@@ -0,0 +1,239 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <asm/time.h>
+#include <asm-generic/div64.h>
+
+#include "timing.h"
+
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ /* pause guest execution to avoid concurrent updates */
+ local_irq_disable();
+ mutex_lock(&vcpu->mutex);
+
+ vcpu->arch.last_exit_type = 0xDEAD;
+ for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+ vcpu->arch.timing_count_type[i] = 0;
+ vcpu->arch.timing_max_duration[i] = 0;
+ vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
+ vcpu->arch.timing_sum_duration[i] = 0;
+ vcpu->arch.timing_sum_quad_duration[i] = 0;
+ }
+ vcpu->arch.timing_last_exit = 0;
+ vcpu->arch.timing_exit.tv64 = 0;
+ vcpu->arch.timing_last_enter.tv64 = 0;
+
+ mutex_unlock(&vcpu->mutex);
+ local_irq_enable();
+}
+
+static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
+{
+ u64 old;
+
+ do_div(duration, tb_ticks_per_usec);
+ if (unlikely(duration > 0xFFFFFFFF)) {
+ printk(KERN_ERR"%s - duration too big -> overflow"
+ " duration %lld type %d exit #%d\n",
+ __func__, duration, type,
+ vcpu->arch.timing_count_type[type]);
+ return;
+ }
+
+ vcpu->arch.timing_count_type[type]++;
+
+ /* sum */
+ old = vcpu->arch.timing_sum_duration[type];
+ vcpu->arch.timing_sum_duration[type] += duration;
+ if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
+ printk(KERN_ERR"%s - wrap adding sum of durations"
+ " old %lld new %lld type %d exit # of type %d\n",
+ __func__, old, vcpu->arch.timing_sum_duration[type],
+ type, vcpu->arch.timing_count_type[type]);
+ }
+
+ /* square sum */
+ old = vcpu->arch.timing_sum_quad_duration[type];
+ vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
+ if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
+ printk(KERN_ERR"%s - wrap adding sum of squared durations"
+ " old %lld new %lld type %d exit # of type %d\n",
+ __func__, old,
+ vcpu->arch.timing_sum_quad_duration[type],
+ type, vcpu->arch.timing_count_type[type]);
+ }
+
+ /* set min/max */
+ if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
+ vcpu->arch.timing_min_duration[type] = duration;
+ if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
+ vcpu->arch.timing_max_duration[type] = duration;
+}
+
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
+{
+ u64 exit = vcpu->arch.timing_last_exit;
+ u64 enter = vcpu->arch.timing_last_enter.tv64;
+
+ /* save exit time, used next exit when the reenter time is known */
+ vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
+
+ if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
+ return; /* skip incomplete cycle (e.g. after reset) */
+
+ /* update statistics for average and standard deviation */
+ add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
+ /* enter -> timing_last_exit is time spent in guest - log this too */
+ add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
+ TIMEINGUEST);
+}
+
+static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
+ [MMIO_EXITS] = "MMIO",
+ [DCR_EXITS] = "DCR",
+ [SIGNAL_EXITS] = "SIGNAL",
+ [ITLB_REAL_MISS_EXITS] = "ITLBREAL",
+ [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT",
+ [DTLB_REAL_MISS_EXITS] = "DTLBREAL",
+ [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT",
+ [SYSCALL_EXITS] = "SYSCALL",
+ [ISI_EXITS] = "ISI",
+ [DSI_EXITS] = "DSI",
+ [EMULATED_INST_EXITS] = "EMULINST",
+ [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT",
+ [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE",
+ [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR",
+ [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR",
+ [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR",
+ [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR",
+ [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX",
+ [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE",
+ [EMULATED_RFI_EXITS] = "EMUL_RFI",
+ [DEC_EXITS] = "DEC",
+ [EXT_INTR_EXITS] = "EXTINT",
+ [HALT_WAKEUP] = "HALT",
+ [USR_PR_INST] = "USR_PR_INST",
+ [FP_UNAVAIL] = "FP_UNAVAIL",
+ [DEBUG_EXITS] = "DEBUG",
+ [TIMEINGUEST] = "TIMEINGUEST"
+};
+
+static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
+{
+ struct kvm_vcpu *vcpu = m->private;
+ int i;
+
+ seq_printf(m, "%s", "type count min max sum sum_squared\n");
+
+ for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+ seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
+ kvm_exit_names[i],
+ vcpu->arch.timing_count_type[i],
+ vcpu->arch.timing_min_duration[i],
+ vcpu->arch.timing_max_duration[i],
+ vcpu->arch.timing_sum_duration[i],
+ vcpu->arch.timing_sum_quad_duration[i]);
+ }
+ return 0;
+}
+
+/* Write 'c' to clear the timing statistics. */
+static ssize_t kvmppc_exit_timing_write(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ int err = -EINVAL;
+ char c;
+
+ if (count > 1) {
+ goto done;
+ }
+
+ if (get_user(c, user_buf)) {
+ err = -EFAULT;
+ goto done;
+ }
+
+ if (c == 'c') {
+ struct seq_file *seqf = (struct seq_file *)file->private_data;
+ struct kvm_vcpu *vcpu = seqf->private;
+ /* Write does not affect our buffers previously generated with
+ * show. seq_file is locked here to prevent races of init with
+ * a show call */
+ mutex_lock(&seqf->lock);
+ kvmppc_init_timing_stats(vcpu);
+ mutex_unlock(&seqf->lock);
+ err = count;
+ }
+
+done:
+ return err;
+}
+
+static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, kvmppc_exit_timing_show, inode->i_private);
+}
+
+static struct file_operations kvmppc_exit_timing_fops = {
+ .owner = THIS_MODULE,
+ .open = kvmppc_exit_timing_open,
+ .read = seq_read,
+ .write = kvmppc_exit_timing_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
+{
+ static char dbg_fname[50];
+ struct dentry *debugfs_file;
+
+ snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
+ current->pid, id);
+ debugfs_file = debugfs_create_file(dbg_fname, 0666,
+ kvm_debugfs_dir, vcpu,
+ &kvmppc_exit_timing_fops);
+
+ if (!debugfs_file) {
+ printk(KERN_ERR"%s: error creating debugfs file %s\n",
+ __func__, dbg_fname);
+ return;
+ }
+
+ vcpu->arch.debugfs_exit_timing = debugfs_file;
+}
+
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.debugfs_exit_timing) {
+ debugfs_remove(vcpu->arch.debugfs_exit_timing);
+ vcpu->arch.debugfs_exit_timing = NULL;
+ }
+}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
new file mode 100644
index 00000000000..bb13b1f3cd5
--- /dev/null
+++ b/arch/powerpc/kvm/timing.h
@@ -0,0 +1,102 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_EXITTIMING_H__
+#define __POWERPC_KVM_EXITTIMING_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_host.h>
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
+
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
+{
+ vcpu->arch.last_exit_type = type;
+}
+
+#else
+/* if exit timing is not configured there is no need to build the c file */
+static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
+ unsigned int id) {}
+static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
+#endif /* CONFIG_KVM_EXIT_TIMING */
+
+/* account the exit in kvm_stats */
+static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
+{
+ /* type has to be known at build time for optimization */
+ BUILD_BUG_ON(__builtin_constant_p(type));
+ switch (type) {
+ case EXT_INTR_EXITS:
+ vcpu->stat.ext_intr_exits++;
+ break;
+ case DEC_EXITS:
+ vcpu->stat.dec_exits++;
+ break;
+ case EMULATED_INST_EXITS:
+ vcpu->stat.emulated_inst_exits++;
+ break;
+ case DCR_EXITS:
+ vcpu->stat.dcr_exits++;
+ break;
+ case DSI_EXITS:
+ vcpu->stat.dsi_exits++;
+ break;
+ case ISI_EXITS:
+ vcpu->stat.isi_exits++;
+ break;
+ case SYSCALL_EXITS:
+ vcpu->stat.syscall_exits++;
+ break;
+ case DTLB_REAL_MISS_EXITS:
+ vcpu->stat.dtlb_real_miss_exits++;
+ break;
+ case DTLB_VIRT_MISS_EXITS:
+ vcpu->stat.dtlb_virt_miss_exits++;
+ break;
+ case MMIO_EXITS:
+ vcpu->stat.mmio_exits++;
+ break;
+ case ITLB_REAL_MISS_EXITS:
+ vcpu->stat.itlb_real_miss_exits++;
+ break;
+ case ITLB_VIRT_MISS_EXITS:
+ vcpu->stat.itlb_virt_miss_exits++;
+ break;
+ case SIGNAL_EXITS:
+ vcpu->stat.signal_exits++;
+ break;
+ }
+}
+
+/* wrapper to set exit time and account for it in kvm_stats */
+static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
+{
+ kvmppc_set_exit_type(vcpu, type);
+ kvmppc_account_exit_stat(vcpu, type);
+}
+
+#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 201c7a5486c..9920d6a7cf2 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -512,6 +512,13 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
}
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+ unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
+
+ return 1UL << mmu_psize_to_shift(psize);
+}
+
/*
* Called by asm hashtable.S for doing lazy icache flush
*/
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 53b06ebb3f2..f00f09a77f1 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -132,7 +132,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
/* this should work for most non-highmem platforms */
zone = pgdata->node_zones;
- return __add_pages(zone, start_pfn, nr_pages);
+ return __add_pages(nid, zone, start_pfn, nr_pages);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index d0bb69dc627..d1f9c62dc17 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -30,11 +30,11 @@
#if defined(CONFIG_40x) || defined(CONFIG_8xx)
static inline void _tlbil_all(void)
{
- asm volatile ("sync; tlbia; isync" : : : "memory")
+ asm volatile ("sync; tlbia; isync" : : : "memory");
}
static inline void _tlbil_pid(unsigned int pid)
{
- asm volatile ("sync; tlbia; isync" : : : "memory")
+ asm volatile ("sync; tlbia; isync" : : : "memory");
}
#else /* CONFIG_40x || CONFIG_8xx */
extern void _tlbil_all(void);
@@ -47,7 +47,7 @@ extern void _tlbil_pid(unsigned int pid);
#ifdef CONFIG_8xx
static inline void _tlbil_va(unsigned long address, unsigned int pid)
{
- asm volatile ("tlbie %0; sync" : : "r" (address) : "memory")
+ asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
}
#else /* CONFIG_8xx */
extern void _tlbil_va(unsigned long address, unsigned int pid);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index cf81049e1e5..7393bd76d69 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -822,42 +822,50 @@ static void __init dump_numa_memory_topology(void)
* required. nid is the preferred node and end is the physical address of
* the highest address in the node.
*
- * Returns the physical address of the memory.
+ * Returns the virtual address of the memory.
*/
-static void __init *careful_allocation(int nid, unsigned long size,
+static void __init *careful_zallocation(int nid, unsigned long size,
unsigned long align,
unsigned long end_pfn)
{
+ void *ret;
int new_nid;
- unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
+ unsigned long ret_paddr;
+
+ ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
/* retry over all memory */
- if (!ret)
- ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
+ if (!ret_paddr)
+ ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
- if (!ret)
- panic("numa.c: cannot allocate %lu bytes on node %d",
+ if (!ret_paddr)
+ panic("numa.c: cannot allocate %lu bytes for node %d",
size, nid);
+ ret = __va(ret_paddr);
+
/*
- * If the memory came from a previously allocated node, we must
- * retry with the bootmem allocator.
+ * We initialize the nodes in numeric order: 0, 1, 2...
+ * and hand over control from the LMB allocator to the
+ * bootmem allocator. If this function is called for
+ * node 5, then we know that all nodes <5 are using the
+ * bootmem allocator instead of the LMB allocator.
+ *
+ * So, check the nid from which this allocation came
+ * and double check to see if we need to use bootmem
+ * instead of the LMB. We don't free the LMB memory
+ * since it would be useless.
*/
- new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT);
+ new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
if (new_nid < nid) {
- ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid),
+ ret = __alloc_bootmem_node(NODE_DATA(new_nid),
size, align, 0);
- if (!ret)
- panic("numa.c: cannot allocate %lu bytes on node %d",
- size, new_nid);
-
- ret = __pa(ret);
-
- dbg("alloc_bootmem %lx %lx\n", ret, size);
+ dbg("alloc_bootmem %p %lx\n", ret, size);
}
- return (void *)ret;
+ memset(ret, 0, size);
+ return ret;
}
static struct notifier_block __cpuinitdata ppc64_numa_nb = {
@@ -952,7 +960,7 @@ void __init do_init_bootmem(void)
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
- unsigned long bootmem_paddr;
+ void *bootmem_vaddr;
unsigned long bootmap_pages;
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
@@ -964,11 +972,9 @@ void __init do_init_bootmem(void)
* previous nodes' bootmem to be initialized and have
* all reserved areas marked.
*/
- NODE_DATA(nid) = careful_allocation(nid,
+ NODE_DATA(nid) = careful_zallocation(nid,
sizeof(struct pglist_data),
SMP_CACHE_BYTES, end_pfn);
- NODE_DATA(nid) = __va(NODE_DATA(nid));
- memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
dbg("node %d\n", nid);
dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
@@ -984,20 +990,20 @@ void __init do_init_bootmem(void)
dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
- bootmem_paddr = (unsigned long)careful_allocation(nid,
+ bootmem_vaddr = careful_zallocation(nid,
bootmap_pages << PAGE_SHIFT,
PAGE_SIZE, end_pfn);
- memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
- dbg("bootmap_paddr = %lx\n", bootmem_paddr);
+ dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
- init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
+ init_bootmem_node(NODE_DATA(nid),
+ __pa(bootmem_vaddr) >> PAGE_SHIFT,
start_pfn, end_pfn);
free_bootmem_with_active_regions(nid, end_pfn);
/*
* Be very careful about moving this around. Future
- * calls to careful_allocation() depend on this getting
+ * calls to careful_zallocation() depend on this getting
* done correctly.
*/
mark_reserved_regions_for_nid(nid);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 38ff35f2142..22972cd83cc 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -266,7 +266,8 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
/* The PTE should never be already set nor present in the
* hash table
*/
- BUG_ON(pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE));
+ BUG_ON((pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)) &&
+ flags);
set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 803a64c02b0..39ac22b13c7 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -189,8 +189,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
_tlbil_pid(0);
preempt_enable();
-#endif
+#else
_tlbil_pid(0);
+#endif
}
EXPORT_SYMBOL(flush_tlb_kernel_range);
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
index 628009c0195..964b93974d8 100644
--- a/arch/powerpc/oprofile/cell/pr_util.h
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -30,6 +30,10 @@
extern struct delayed_work spu_work;
extern int spu_prof_running;
+#define TRACE_ARRAY_SIZE 1024
+
+extern spinlock_t oprof_spu_smpl_arry_lck;
+
struct spu_overlay_info { /* map of sections within an SPU overlay */
unsigned int vma; /* SPU virtual memory address from elf */
unsigned int size; /* size of section from elf */
@@ -79,7 +83,7 @@ struct spu_buffer {
* the vma-to-fileoffset map.
*/
struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
- u64 objectid);
+ unsigned long objectid);
unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
unsigned int vma, const struct spu *aSpu,
int *grd_val);
@@ -89,10 +93,11 @@ void vma_map_free(struct vma_to_fileoffset_map *map);
* Entry point for SPU profiling.
* cycles_reset is the SPU_CYCLES count value specified by the user.
*/
-int start_spu_profiling(unsigned int cycles_reset);
-
-void stop_spu_profiling(void);
+int start_spu_profiling_cycles(unsigned int cycles_reset);
+void start_spu_profiling_events(void);
+void stop_spu_profiling_cycles(void);
+void stop_spu_profiling_events(void);
/* add the necessary profiling hooks */
int spu_sync_start(void);
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index dd499c3e9da..9305ddaac51 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -18,11 +18,21 @@
#include <asm/cell-pmu.h>
#include "pr_util.h"
-#define TRACE_ARRAY_SIZE 1024
#define SCALE_SHIFT 14
static u32 *samples;
+/* spu_prof_running is a flag used to indicate if spu profiling is enabled
+ * or not. It is set by the routines start_spu_profiling_cycles() and
+ * start_spu_profiling_events(). The flag is cleared by the routines
+ * stop_spu_profiling_cycles() and stop_spu_profiling_events(). These
+ * routines are called via global_start() and global_stop() which are called in
+ * op_powerpc_start() and op_powerpc_stop(). These routines are called once
+ * per system as a result of the user starting/stopping oprofile. Hence, only
+ * one CPU per user at a time will be changing the value of spu_prof_running.
+ * In general, OProfile does not protect against multiple users trying to run
+ * OProfile at a time.
+ */
int spu_prof_running;
static unsigned int profiling_interval;
@@ -31,8 +41,8 @@ static unsigned int profiling_interval;
#define SPU_PC_MASK 0xFFFF
-static DEFINE_SPINLOCK(sample_array_lock);
-unsigned long sample_array_lock_flags;
+DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
+unsigned long oprof_spu_smpl_arry_lck_flags;
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
{
@@ -49,7 +59,7 @@ void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_rese
* of precision. This is close enough for the purpose at hand.
*
* The value of the timeout should be small enough that the hw
- * trace buffer will not get more then about 1/3 full for the
+ * trace buffer will not get more than about 1/3 full for the
* maximum user specified (the LFSR value) hw sampling frequency.
* This is to ensure the trace buffer will never fill even if the
* kernel thread scheduling varies under a heavy system load.
@@ -145,13 +155,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
* sample array must be loaded and then processed for a given
* cpu. The sample array is not per cpu.
*/
- spin_lock_irqsave(&sample_array_lock,
- sample_array_lock_flags);
+ spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
+ oprof_spu_smpl_arry_lck_flags);
num_samples = cell_spu_pc_collection(cpu);
if (num_samples == 0) {
- spin_unlock_irqrestore(&sample_array_lock,
- sample_array_lock_flags);
+ spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+ oprof_spu_smpl_arry_lck_flags);
continue;
}
@@ -162,8 +172,8 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
num_samples);
}
- spin_unlock_irqrestore(&sample_array_lock,
- sample_array_lock_flags);
+ spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+ oprof_spu_smpl_arry_lck_flags);
}
smp_wmb(); /* insure spu event buffer updates are written */
@@ -182,13 +192,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
static struct hrtimer timer;
/*
- * Entry point for SPU profiling.
+ * Entry point for SPU cycle profiling.
* NOTE: SPU profiling is done system-wide, not per-CPU.
*
* cycles_reset is the count value specified by the user when
* setting up OProfile to count SPU_CYCLES.
*/
-int start_spu_profiling(unsigned int cycles_reset)
+int start_spu_profiling_cycles(unsigned int cycles_reset)
{
ktime_t kt;
@@ -212,10 +222,30 @@ int start_spu_profiling(unsigned int cycles_reset)
return 0;
}
-void stop_spu_profiling(void)
+/*
+ * Entry point for SPU event profiling.
+ * NOTE: SPU profiling is done system-wide, not per-CPU.
+ *
+ * cycles_reset is the count value specified by the user when
+ * setting up OProfile to count SPU_CYCLES.
+ */
+void start_spu_profiling_events(void)
+{
+ spu_prof_running = 1;
+ schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
+
+ return;
+}
+
+void stop_spu_profiling_cycles(void)
{
spu_prof_running = 0;
hrtimer_cancel(&timer);
kfree(samples);
- pr_debug("SPU_PROF: stop_spu_profiling issued\n");
+ pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
+}
+
+void stop_spu_profiling_events(void)
+{
+ spu_prof_running = 0;
}
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 17807acb05d..21f16edf6c8 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -132,6 +132,28 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0);
oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1);
oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra);
+#ifdef CONFIG_OPROFILE_CELL
+ /* create a file the user tool can check to see what level of profiling
+ * support exits with this kernel. Initialize bit mask to indicate
+ * what support the kernel has:
+ * bit 0 - Supports SPU event profiling in addition to PPU
+ * event and cycles; and SPU cycle profiling
+ * bits 1-31 - Currently unused.
+ *
+ * If the file does not exist, then the kernel only supports SPU
+ * cycle profiling, PPU event and cycle profiling.
+ */
+ oprofilefs_create_ulong(sb, root, "cell_support", &sys.cell_support);
+ sys.cell_support = 0x1; /* Note, the user OProfile tool must check
+ * that this bit is set before attempting to
+ * user SPU event profiling. Older kernels
+ * will not have this file, hence the user
+ * tool is not allowed to do SPU event
+ * profiling on older kernels. Older kernels
+ * will accept SPU events but collected data
+ * is garbage.
+ */
+#endif
#endif
for (i = 0; i < model->num_counters; ++i) {
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index 25a4ec2514a..ae06c6236d9 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -40,14 +40,15 @@
#include "../platforms/cell/interrupt.h"
#include "cell/pr_util.h"
-static void cell_global_stop_spu(void);
+#define PPU_PROFILING 0
+#define SPU_PROFILING_CYCLES 1
+#define SPU_PROFILING_EVENTS 2
-/*
- * spu_cycle_reset is the number of cycles between samples.
- * This variable is used for SPU profiling and should ONLY be set
- * at the beginning of cell_reg_setup; otherwise, it's read-only.
- */
-static unsigned int spu_cycle_reset;
+#define SPU_EVENT_NUM_START 4100
+#define SPU_EVENT_NUM_STOP 4399
+#define SPU_PROFILE_EVENT_ADDR 4363 /* spu, address trace, decimal */
+#define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146 /* sub unit set to zero */
+#define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186 /* sub unit set to zero */
#define NUM_SPUS_PER_NODE 8
#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
@@ -66,6 +67,21 @@ static unsigned int spu_cycle_reset;
#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
+/* Minumum HW interval timer setting to send value to trace buffer is 10 cycle.
+ * To configure counter to send value every N cycles set counter to
+ * 2^32 - 1 - N.
+ */
+#define NUM_INTERVAL_CYC 0xFFFFFFFF - 10
+
+/*
+ * spu_cycle_reset is the number of cycles between samples.
+ * This variable is used for SPU profiling and should ONLY be set
+ * at the beginning of cell_reg_setup; otherwise, it's read-only.
+ */
+static unsigned int spu_cycle_reset;
+static unsigned int profiling_mode;
+static int spu_evnt_phys_spu_indx;
+
struct pmc_cntrl_data {
unsigned long vcntr;
unsigned long evnts;
@@ -105,6 +121,8 @@ struct pm_cntrl {
u16 trace_mode;
u16 freeze;
u16 count_mode;
+ u16 spu_addr_trace;
+ u8 trace_buf_ovflw;
};
static struct {
@@ -122,7 +140,7 @@ static struct {
#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
-
+static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];
static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
/*
@@ -152,6 +170,7 @@ static u32 hdw_thread;
static u32 virt_cntr_inter_mask;
static struct timer_list timer_virt_cntr;
+static struct timer_list timer_spu_event_swap;
/*
* pm_signal needs to be global since it is initialized in
@@ -165,7 +184,7 @@ static int spu_rtas_token; /* token for SPU cycle profiling */
static u32 reset_value[NR_PHYS_CTRS];
static int num_counters;
static int oprofile_running;
-static DEFINE_SPINLOCK(virt_cntr_lock);
+static DEFINE_SPINLOCK(cntr_lock);
static u32 ctr_enabled;
@@ -336,13 +355,13 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) {
if (bus_word & (1 << i)) {
pm_regs.debug_bus_control |=
- (bus_type << (30 - (2 * i)));
+ (bus_type << (30 - (2 * i)));
for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) {
if (input_bus[j] == 0xff) {
input_bus[j] = i;
pm_regs.group_control |=
- (i << (30 - (2 * j)));
+ (i << (30 - (2 * j)));
break;
}
@@ -367,12 +386,16 @@ static void write_pm_cntrl(int cpu)
if (pm_regs.pm_cntrl.stop_at_max == 1)
val |= CBE_PM_STOP_AT_MAX;
- if (pm_regs.pm_cntrl.trace_mode == 1)
+ if (pm_regs.pm_cntrl.trace_mode != 0)
val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode);
+ if (pm_regs.pm_cntrl.trace_buf_ovflw == 1)
+ val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw);
if (pm_regs.pm_cntrl.freeze == 1)
val |= CBE_PM_FREEZE_ALL_CTRS;
+ val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace);
+
/*
* Routine set_count_mode must be called previously to set
* the count mode based on the user selection of user and kernel.
@@ -441,7 +464,7 @@ static void cell_virtual_cntr(unsigned long data)
* not both playing with the counters on the same node.
*/
- spin_lock_irqsave(&virt_cntr_lock, flags);
+ spin_lock_irqsave(&cntr_lock, flags);
prev_hdw_thread = hdw_thread;
@@ -480,7 +503,7 @@ static void cell_virtual_cntr(unsigned long data)
cbe_disable_pm_interrupts(cpu);
for (i = 0; i < num_counters; i++) {
per_cpu(pmc_values, cpu + prev_hdw_thread)[i]
- = cbe_read_ctr(cpu, i);
+ = cbe_read_ctr(cpu, i);
if (per_cpu(pmc_values, cpu + next_hdw_thread)[i]
== 0xFFFFFFFF)
@@ -527,7 +550,7 @@ static void cell_virtual_cntr(unsigned long data)
cbe_enable_pm(cpu);
}
- spin_unlock_irqrestore(&virt_cntr_lock, flags);
+ spin_unlock_irqrestore(&cntr_lock, flags);
mod_timer(&timer_virt_cntr, jiffies + HZ / 10);
}
@@ -541,38 +564,146 @@ static void start_virt_cntrs(void)
add_timer(&timer_virt_cntr);
}
-/* This function is called once for all cpus combined */
-static int cell_reg_setup(struct op_counter_config *ctr,
+static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr,
struct op_system_config *sys, int num_ctrs)
{
- int i, j, cpu;
- spu_cycle_reset = 0;
+ spu_cycle_reset = ctr[0].count;
- if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
- spu_cycle_reset = ctr[0].count;
+ /*
+ * Each node will need to make the rtas call to start
+ * and stop SPU profiling. Get the token once and store it.
+ */
+ spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
+
+ if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
+ printk(KERN_ERR
+ "%s: rtas token ibm,cbe-spu-perftools unknown\n",
+ __func__);
+ return -EIO;
+ }
+ return 0;
+}
+
+/* Unfortunately, the hardware will only support event profiling
+ * on one SPU per node at a time. Therefore, we must time slice
+ * the profiling across all SPUs in the node. Note, we do this
+ * in parallel for each node. The following routine is called
+ * periodically based on kernel timer to switch which SPU is
+ * being monitored in a round robbin fashion.
+ */
+static void spu_evnt_swap(unsigned long data)
+{
+ int node;
+ int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx;
+ unsigned long flags;
+ int cpu;
+ int ret;
+ u32 interrupt_mask;
+
+
+ /* enable interrupts on cntr 0 */
+ interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0);
+
+ hdw_thread = 0;
+
+ /* Make sure spu event interrupt handler and spu event swap
+ * don't access the counters simultaneously.
+ */
+ spin_lock_irqsave(&cntr_lock, flags);
+
+ cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx;
+
+ if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE)
+ spu_evnt_phys_spu_indx = 0;
+
+ pm_signal[0].sub_unit = spu_evnt_phys_spu_indx;
+ pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
+ pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
+
+ /* switch the SPU being profiled on each node */
+ for_each_online_cpu(cpu) {
+ if (cbe_get_hw_thread_id(cpu))
+ continue;
+
+ node = cbe_cpu_to_node(cpu);
+ cur_phys_spu = (node * NUM_SPUS_PER_NODE)
+ + cur_spu_evnt_phys_spu_indx;
+ nxt_phys_spu = (node * NUM_SPUS_PER_NODE)
+ + spu_evnt_phys_spu_indx;
/*
- * Each node will need to make the rtas call to start
- * and stop SPU profiling. Get the token once and store it.
+ * stop counters, save counter values, restore counts
+ * for previous physical SPU
*/
- spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
+ cbe_disable_pm(cpu);
+ cbe_disable_pm_interrupts(cpu);
- if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
- printk(KERN_ERR
- "%s: rtas token ibm,cbe-spu-perftools unknown\n",
- __func__);
- return -EIO;
- }
+ spu_pm_cnt[cur_phys_spu]
+ = cbe_read_ctr(cpu, 0);
+
+ /* restore previous count for the next spu to sample */
+ /* NOTE, hardware issue, counter will not start if the
+ * counter value is at max (0xFFFFFFFF).
+ */
+ if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF)
+ cbe_write_ctr(cpu, 0, 0xFFFFFFF0);
+ else
+ cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]);
+
+ pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+
+ /* setup the debug bus measure the one event and
+ * the two events to route the next SPU's PC on
+ * the debug bus
+ */
+ ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3);
+ if (ret)
+ printk(KERN_ERR "%s: pm_rtas_activate_signals failed, "
+ "SPU event swap\n", __func__);
+
+ /* clear the trace buffer, don't want to take PC for
+ * previous SPU*/
+ cbe_write_pm(cpu, trace_address, 0);
+
+ enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
+
+ /* Enable interrupts on the CPU thread that is starting */
+ cbe_enable_pm_interrupts(cpu, hdw_thread,
+ interrupt_mask);
+ cbe_enable_pm(cpu);
}
- pm_rtas_token = rtas_token("ibm,cbe-perftools");
+ spin_unlock_irqrestore(&cntr_lock, flags);
+ /* swap approximately every 0.1 seconds */
+ mod_timer(&timer_spu_event_swap, jiffies + HZ / 25);
+}
+
+static void start_spu_event_swap(void)
+{
+ init_timer(&timer_spu_event_swap);
+ timer_spu_event_swap.function = spu_evnt_swap;
+ timer_spu_event_swap.data = 0UL;
+ timer_spu_event_swap.expires = jiffies + HZ / 25;
+ add_timer(&timer_spu_event_swap);
+}
+
+static int cell_reg_setup_spu_events(struct op_counter_config *ctr,
+ struct op_system_config *sys, int num_ctrs)
+{
+ int i;
+
+ /* routine is called once for all nodes */
+
+ spu_evnt_phys_spu_indx = 0;
/*
- * For all events excetp PPU CYCLEs, each node will need to make
+ * For all events except PPU CYCLEs, each node will need to make
* the rtas cbe-perftools call to setup and reset the debug bus.
* Make the token lookup call once and store it in the global
* variable pm_rtas_token.
*/
+ pm_rtas_token = rtas_token("ibm,cbe-perftools");
+
if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
printk(KERN_ERR
"%s: rtas token ibm,cbe-perftools unknown\n",
@@ -580,6 +711,58 @@ static int cell_reg_setup(struct op_counter_config *ctr,
return -EIO;
}
+ /* setup the pm_control register settings,
+ * settings will be written per node by the
+ * cell_cpu_setup() function.
+ */
+ pm_regs.pm_cntrl.trace_buf_ovflw = 1;
+
+ /* Use the occurrence trace mode to have SPU PC saved
+ * to the trace buffer. Occurrence data in trace buffer
+ * is not used. Bit 2 must be set to store SPU addresses.
+ */
+ pm_regs.pm_cntrl.trace_mode = 2;
+
+ pm_regs.pm_cntrl.spu_addr_trace = 0x1; /* using debug bus
+ event 2 & 3 */
+
+ /* setup the debug bus event array with the SPU PC routing events.
+ * Note, pm_signal[0] will be filled in by set_pm_event() call below.
+ */
+ pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
+ pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A);
+ pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100;
+ pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
+
+ pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
+ pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B);
+ pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100;
+ pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
+
+ /* Set the user selected spu event to profile on,
+ * note, only one SPU profiling event is supported
+ */
+ num_counters = 1; /* Only support one SPU event at a time */
+ set_pm_event(0, ctr[0].event, ctr[0].unit_mask);
+
+ reset_value[0] = 0xFFFFFFFF - ctr[0].count;
+
+ /* global, used by cell_cpu_setup */
+ ctr_enabled |= 1;
+
+ /* Initialize the count for each SPU to the reset value */
+ for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++)
+ spu_pm_cnt[i] = reset_value[0];
+
+ return 0;
+}
+
+static int cell_reg_setup_ppu(struct op_counter_config *ctr,
+ struct op_system_config *sys, int num_ctrs)
+{
+ /* routine is called once for all nodes */
+ int i, j, cpu;
+
num_counters = num_ctrs;
if (unlikely(num_ctrs > NR_PHYS_CTRS)) {
@@ -589,14 +772,6 @@ static int cell_reg_setup(struct op_counter_config *ctr,
__func__);
return -EIO;
}
- pm_regs.group_control = 0;
- pm_regs.debug_bus_control = 0;
-
- /* setup the pm_control register */
- memset(&pm_regs.pm_cntrl, 0, sizeof(struct pm_cntrl));
- pm_regs.pm_cntrl.stop_at_max = 1;
- pm_regs.pm_cntrl.trace_mode = 0;
- pm_regs.pm_cntrl.freeze = 1;
set_count_mode(sys->enable_kernel, sys->enable_user);
@@ -665,6 +840,63 @@ static int cell_reg_setup(struct op_counter_config *ctr,
}
+/* This function is called once for all cpus combined */
+static int cell_reg_setup(struct op_counter_config *ctr,
+ struct op_system_config *sys, int num_ctrs)
+{
+ int ret=0;
+ spu_cycle_reset = 0;
+
+ /* initialize the spu_arr_trace value, will be reset if
+ * doing spu event profiling.
+ */
+ pm_regs.group_control = 0;
+ pm_regs.debug_bus_control = 0;
+ pm_regs.pm_cntrl.stop_at_max = 1;
+ pm_regs.pm_cntrl.trace_mode = 0;
+ pm_regs.pm_cntrl.freeze = 1;
+ pm_regs.pm_cntrl.trace_buf_ovflw = 0;
+ pm_regs.pm_cntrl.spu_addr_trace = 0;
+
+ /*
+ * For all events except PPU CYCLEs, each node will need to make
+ * the rtas cbe-perftools call to setup and reset the debug bus.
+ * Make the token lookup call once and store it in the global
+ * variable pm_rtas_token.
+ */
+ pm_rtas_token = rtas_token("ibm,cbe-perftools");
+
+ if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
+ printk(KERN_ERR
+ "%s: rtas token ibm,cbe-perftools unknown\n",
+ __func__);
+ return -EIO;
+ }
+
+ if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
+ profiling_mode = SPU_PROFILING_CYCLES;
+ ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs);
+ } else if ((ctr[0].event >= SPU_EVENT_NUM_START) &&
+ (ctr[0].event <= SPU_EVENT_NUM_STOP)) {
+ profiling_mode = SPU_PROFILING_EVENTS;
+ spu_cycle_reset = ctr[0].count;
+
+ /* for SPU event profiling, need to setup the
+ * pm_signal array with the events to route the
+ * SPU PC before making the FW call. Note, only
+ * one SPU event for profiling can be specified
+ * at a time.
+ */
+ cell_reg_setup_spu_events(ctr, sys, num_ctrs);
+ } else {
+ profiling_mode = PPU_PROFILING;
+ ret = cell_reg_setup_ppu(ctr, sys, num_ctrs);
+ }
+
+ return ret;
+}
+
+
/* This function is called once for each cpu */
static int cell_cpu_setup(struct op_counter_config *cntr)
@@ -672,8 +904,13 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
u32 cpu = smp_processor_id();
u32 num_enabled = 0;
int i;
+ int ret;
- if (spu_cycle_reset)
+ /* Cycle based SPU profiling does not use the performance
+ * counters. The trace array is configured to collect
+ * the data.
+ */
+ if (profiling_mode == SPU_PROFILING_CYCLES)
return 0;
/* There is one performance monitor per processor chip (i.e. node),
@@ -686,7 +923,6 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
cbe_disable_pm(cpu);
cbe_disable_pm_interrupts(cpu);
- cbe_write_pm(cpu, pm_interval, 0);
cbe_write_pm(cpu, pm_start_stop, 0);
cbe_write_pm(cpu, group_control, pm_regs.group_control);
cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control);
@@ -703,7 +939,20 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
* The pm_rtas_activate_signals will return -EIO if the FW
* call failed.
*/
- return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
+ if (profiling_mode == SPU_PROFILING_EVENTS) {
+ /* For SPU event profiling also need to setup the
+ * pm interval timer
+ */
+ ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
+ num_enabled+2);
+ /* store PC from debug bus to Trace buffer as often
+ * as possible (every 10 cycles)
+ */
+ cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
+ return ret;
+ } else
+ return pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
+ num_enabled);
}
#define ENTRIES 303
@@ -885,7 +1134,122 @@ static struct notifier_block cpu_freq_notifier_block = {
};
#endif
-static int cell_global_start_spu(struct op_counter_config *ctr)
+/*
+ * Note the generic OProfile stop calls do not support returning
+ * an error on stop. Hence, will not return an error if the FW
+ * calls fail on stop. Failure to reset the debug bus is not an issue.
+ * Failure to disable the SPU profiling is not an issue. The FW calls
+ * to enable the performance counters and debug bus will work even if
+ * the hardware was not cleanly reset.
+ */
+static void cell_global_stop_spu_cycles(void)
+{
+ int subfunc, rtn_value;
+ unsigned int lfsr_value;
+ int cpu;
+
+ oprofile_running = 0;
+ smp_wmb();
+
+#ifdef CONFIG_CPU_FREQ
+ cpufreq_unregister_notifier(&cpu_freq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+#endif
+
+ for_each_online_cpu(cpu) {
+ if (cbe_get_hw_thread_id(cpu))
+ continue;
+
+ subfunc = 3; /*
+ * 2 - activate SPU tracing,
+ * 3 - deactivate
+ */
+ lfsr_value = 0x8f100000;
+
+ rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
+ subfunc, cbe_cpu_to_node(cpu),
+ lfsr_value);
+
+ if (unlikely(rtn_value != 0)) {
+ printk(KERN_ERR
+ "%s: rtas call ibm,cbe-spu-perftools " \
+ "failed, return = %d\n",
+ __func__, rtn_value);
+ }
+
+ /* Deactivate the signals */
+ pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+ }
+
+ stop_spu_profiling_cycles();
+}
+
+static void cell_global_stop_spu_events(void)
+{
+ int cpu;
+ oprofile_running = 0;
+
+ stop_spu_profiling_events();
+ smp_wmb();
+
+ for_each_online_cpu(cpu) {
+ if (cbe_get_hw_thread_id(cpu))
+ continue;
+
+ cbe_sync_irq(cbe_cpu_to_node(cpu));
+ /* Stop the counters */
+ cbe_disable_pm(cpu);
+ cbe_write_pm07_control(cpu, 0, 0);
+
+ /* Deactivate the signals */
+ pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+
+ /* Deactivate interrupts */
+ cbe_disable_pm_interrupts(cpu);
+ }
+ del_timer_sync(&timer_spu_event_swap);
+}
+
+static void cell_global_stop_ppu(void)
+{
+ int cpu;
+
+ /*
+ * This routine will be called once for the system.
+ * There is one performance monitor per node, so we
+ * only need to perform this function once per node.
+ */
+ del_timer_sync(&timer_virt_cntr);
+ oprofile_running = 0;
+ smp_wmb();
+
+ for_each_online_cpu(cpu) {
+ if (cbe_get_hw_thread_id(cpu))
+ continue;
+
+ cbe_sync_irq(cbe_cpu_to_node(cpu));
+ /* Stop the counters */
+ cbe_disable_pm(cpu);
+
+ /* Deactivate the signals */
+ pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+
+ /* Deactivate interrupts */
+ cbe_disable_pm_interrupts(cpu);
+ }
+}
+
+static void cell_global_stop(void)
+{
+ if (profiling_mode == PPU_PROFILING)
+ cell_global_stop_ppu();
+ else if (profiling_mode == SPU_PROFILING_EVENTS)
+ cell_global_stop_spu_events();
+ else
+ cell_global_stop_spu_cycles();
+}
+
+static int cell_global_start_spu_cycles(struct op_counter_config *ctr)
{
int subfunc;
unsigned int lfsr_value;
@@ -951,18 +1315,18 @@ static int cell_global_start_spu(struct op_counter_config *ctr)
/* start profiling */
ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
- cbe_cpu_to_node(cpu), lfsr_value);
+ cbe_cpu_to_node(cpu), lfsr_value);
if (unlikely(ret != 0)) {
printk(KERN_ERR
- "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
- __func__, ret);
+ "%s: rtas call ibm,cbe-spu-perftools failed, " \
+ "return = %d\n", __func__, ret);
rtas_error = -EIO;
goto out;
}
}
- rtas_error = start_spu_profiling(spu_cycle_reset);
+ rtas_error = start_spu_profiling_cycles(spu_cycle_reset);
if (rtas_error)
goto out_stop;
@@ -970,11 +1334,74 @@ static int cell_global_start_spu(struct op_counter_config *ctr)
return 0;
out_stop:
- cell_global_stop_spu(); /* clean up the PMU/debug bus */
+ cell_global_stop_spu_cycles(); /* clean up the PMU/debug bus */
out:
return rtas_error;
}
+static int cell_global_start_spu_events(struct op_counter_config *ctr)
+{
+ int cpu;
+ u32 interrupt_mask = 0;
+ int rtn = 0;
+
+ hdw_thread = 0;
+
+ /* spu event profiling, uses the performance counters to generate
+ * an interrupt. The hardware is setup to store the SPU program
+ * counter into the trace array. The occurrence mode is used to
+ * enable storing data to the trace buffer. The bits are set
+ * to send/store the SPU address in the trace buffer. The debug
+ * bus must be setup to route the SPU program counter onto the
+ * debug bus. The occurrence data in the trace buffer is not used.
+ */
+
+ /* This routine gets called once for the system.
+ * There is one performance monitor per node, so we
+ * only need to perform this function once per node.
+ */
+
+ for_each_online_cpu(cpu) {
+ if (cbe_get_hw_thread_id(cpu))
+ continue;
+
+ /*
+ * Setup SPU event-based profiling.
+ * Set perf_mon_control bit 0 to a zero before
+ * enabling spu collection hardware.
+ *
+ * Only support one SPU event on one SPU per node.
+ */
+ if (ctr_enabled & 1) {
+ cbe_write_ctr(cpu, 0, reset_value[0]);
+ enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
+ interrupt_mask |=
+ CBE_PM_CTR_OVERFLOW_INTR(0);
+ } else {
+ /* Disable counter */
+ cbe_write_pm07_control(cpu, 0, 0);
+ }
+
+ cbe_get_and_clear_pm_interrupts(cpu);
+ cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
+ cbe_enable_pm(cpu);
+
+ /* clear the trace buffer */
+ cbe_write_pm(cpu, trace_address, 0);
+ }
+
+ /* Start the timer to time slice collecting the event profile
+ * on each of the SPUs. Note, can collect profile on one SPU
+ * per node at a time.
+ */
+ start_spu_event_swap();
+ start_spu_profiling_events();
+ oprofile_running = 1;
+ smp_wmb();
+
+ return rtn;
+}
+
static int cell_global_start_ppu(struct op_counter_config *ctr)
{
u32 cpu, i;
@@ -994,8 +1421,7 @@ static int cell_global_start_ppu(struct op_counter_config *ctr)
if (ctr_enabled & (1 << i)) {
cbe_write_ctr(cpu, i, reset_value[i]);
enable_ctr(cpu, i, pm_regs.pm07_cntrl);
- interrupt_mask |=
- CBE_PM_CTR_OVERFLOW_INTR(i);
+ interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i);
} else {
/* Disable counter */
cbe_write_pm07_control(cpu, i, 0);
@@ -1024,99 +1450,162 @@ static int cell_global_start_ppu(struct op_counter_config *ctr)
static int cell_global_start(struct op_counter_config *ctr)
{
- if (spu_cycle_reset)
- return cell_global_start_spu(ctr);
+ if (profiling_mode == SPU_PROFILING_CYCLES)
+ return cell_global_start_spu_cycles(ctr);
+ else if (profiling_mode == SPU_PROFILING_EVENTS)
+ return cell_global_start_spu_events(ctr);
else
return cell_global_start_ppu(ctr);
}
-/*
- * Note the generic OProfile stop calls do not support returning
- * an error on stop. Hence, will not return an error if the FW
- * calls fail on stop. Failure to reset the debug bus is not an issue.
- * Failure to disable the SPU profiling is not an issue. The FW calls
- * to enable the performance counters and debug bus will work even if
- * the hardware was not cleanly reset.
+
+/* The SPU interrupt handler
+ *
+ * SPU event profiling works as follows:
+ * The pm_signal[0] holds the one SPU event to be measured. It is routed on
+ * the debug bus using word 0 or 1. The value of pm_signal[1] and
+ * pm_signal[2] contain the necessary events to route the SPU program
+ * counter for the selected SPU onto the debug bus using words 2 and 3.
+ * The pm_interval register is setup to write the SPU PC value into the
+ * trace buffer at the maximum rate possible. The trace buffer is configured
+ * to store the PCs, wrapping when it is full. The performance counter is
+ * intialized to the max hardware count minus the number of events, N, between
+ * samples. Once the N events have occured, a HW counter overflow occurs
+ * causing the generation of a HW counter interrupt which also stops the
+ * writing of the SPU PC values to the trace buffer. Hence the last PC
+ * written to the trace buffer is the SPU PC that we want. Unfortunately,
+ * we have to read from the beginning of the trace buffer to get to the
+ * last value written. We just hope the PPU has nothing better to do then
+ * service this interrupt. The PC for the specific SPU being profiled is
+ * extracted from the trace buffer processed and stored. The trace buffer
+ * is cleared, interrupts are cleared, the counter is reset to max - N.
+ * A kernel timer is used to periodically call the routine spu_evnt_swap()
+ * to switch to the next physical SPU in the node to profile in round robbin
+ * order. This way data is collected for all SPUs on the node. It does mean
+ * that we need to use a relatively small value of N to ensure enough samples
+ * on each SPU are collected each SPU is being profiled 1/8 of the time.
+ * It may also be necessary to use a longer sample collection period.
*/
-static void cell_global_stop_spu(void)
+static void cell_handle_interrupt_spu(struct pt_regs *regs,
+ struct op_counter_config *ctr)
{
- int subfunc, rtn_value;
- unsigned int lfsr_value;
- int cpu;
+ u32 cpu, cpu_tmp;
+ u64 trace_entry;
+ u32 interrupt_mask;
+ u64 trace_buffer[2];
+ u64 last_trace_buffer;
+ u32 sample;
+ u32 trace_addr;
+ unsigned long sample_array_lock_flags;
+ int spu_num;
+ unsigned long flags;
- oprofile_running = 0;
+ /* Make sure spu event interrupt handler and spu event swap
+ * don't access the counters simultaneously.
+ */
+ cpu = smp_processor_id();
+ spin_lock_irqsave(&cntr_lock, flags);
-#ifdef CONFIG_CPU_FREQ
- cpufreq_unregister_notifier(&cpu_freq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
-#endif
+ cpu_tmp = cpu;
+ cbe_disable_pm(cpu);
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
+ interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
- subfunc = 3; /*
- * 2 - activate SPU tracing,
- * 3 - deactivate
- */
- lfsr_value = 0x8f100000;
+ sample = 0xABCDEF;
+ trace_entry = 0xfedcba;
+ last_trace_buffer = 0xdeadbeaf;
- rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
- subfunc, cbe_cpu_to_node(cpu),
- lfsr_value);
+ if ((oprofile_running == 1) && (interrupt_mask != 0)) {
+ /* disable writes to trace buff */
+ cbe_write_pm(cpu, pm_interval, 0);
- if (unlikely(rtn_value != 0)) {
- printk(KERN_ERR
- "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
- __func__, rtn_value);
+ /* only have one perf cntr being used, cntr 0 */
+ if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0))
+ && ctr[0].enabled)
+ /* The SPU PC values will be read
+ * from the trace buffer, reset counter
+ */
+
+ cbe_write_ctr(cpu, 0, reset_value[0]);
+
+ trace_addr = cbe_read_pm(cpu, trace_address);
+
+ while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
+ /* There is data in the trace buffer to process
+ * Read the buffer until you get to the last
+ * entry. This is the value we want.
+ */
+
+ cbe_read_trace_buffer(cpu, trace_buffer);
+ trace_addr = cbe_read_pm(cpu, trace_address);
}
- /* Deactivate the signals */
- pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
- }
+ /* SPU Address 16 bit count format for 128 bit
+ * HW trace buffer is used for the SPU PC storage
+ * HDR bits 0:15
+ * SPU Addr 0 bits 16:31
+ * SPU Addr 1 bits 32:47
+ * unused bits 48:127
+ *
+ * HDR: bit4 = 1 SPU Address 0 valid
+ * HDR: bit5 = 1 SPU Address 1 valid
+ * - unfortunately, the valid bits don't seem to work
+ *
+ * Note trace_buffer[0] holds bits 0:63 of the HW
+ * trace buffer, trace_buffer[1] holds bits 64:127
+ */
- stop_spu_profiling();
-}
+ trace_entry = trace_buffer[0]
+ & 0x00000000FFFF0000;
-static void cell_global_stop_ppu(void)
-{
- int cpu;
+ /* only top 16 of the 18 bit SPU PC address
+ * is stored in trace buffer, hence shift right
+ * by 16 -2 bits */
+ sample = trace_entry >> 14;
+ last_trace_buffer = trace_buffer[0];
- /*
- * This routine will be called once for the system.
- * There is one performance monitor per node, so we
- * only need to perform this function once per node.
- */
- del_timer_sync(&timer_virt_cntr);
- oprofile_running = 0;
- smp_wmb();
+ spu_num = spu_evnt_phys_spu_indx
+ + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE);
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
+ /* make sure only one process at a time is calling
+ * spu_sync_buffer()
+ */
+ spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
+ sample_array_lock_flags);
+ spu_sync_buffer(spu_num, &sample, 1);
+ spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+ sample_array_lock_flags);
- cbe_sync_irq(cbe_cpu_to_node(cpu));
- /* Stop the counters */
- cbe_disable_pm(cpu);
+ smp_wmb(); /* insure spu event buffer updates are written
+ * don't want events intermingled... */
- /* Deactivate the signals */
- pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+ /* The counters were frozen by the interrupt.
+ * Reenable the interrupt and restart the counters.
+ */
+ cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
+ cbe_enable_pm_interrupts(cpu, hdw_thread,
+ virt_cntr_inter_mask);
- /* Deactivate interrupts */
- cbe_disable_pm_interrupts(cpu);
- }
-}
+ /* clear the trace buffer, re-enable writes to trace buff */
+ cbe_write_pm(cpu, trace_address, 0);
+ cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
-static void cell_global_stop(void)
-{
- if (spu_cycle_reset)
- cell_global_stop_spu();
- else
- cell_global_stop_ppu();
+ /* The writes to the various performance counters only writes
+ * to a latch. The new values (interrupt setting bits, reset
+ * counter value etc.) are not copied to the actual registers
+ * until the performance monitor is enabled. In order to get
+ * this to work as desired, the permormance monitor needs to
+ * be disabled while writing to the latches. This is a
+ * HW design issue.
+ */
+ write_pm_cntrl(cpu);
+ cbe_enable_pm(cpu);
+ }
+ spin_unlock_irqrestore(&cntr_lock, flags);
}
-static void cell_handle_interrupt(struct pt_regs *regs,
- struct op_counter_config *ctr)
+static void cell_handle_interrupt_ppu(struct pt_regs *regs,
+ struct op_counter_config *ctr)
{
u32 cpu;
u64 pc;
@@ -1132,7 +1621,7 @@ static void cell_handle_interrupt(struct pt_regs *regs,
* routine are not running at the same time. See the
* cell_virtual_cntr() routine for additional comments.
*/
- spin_lock_irqsave(&virt_cntr_lock, flags);
+ spin_lock_irqsave(&cntr_lock, flags);
/*
* Need to disable and reenable the performance counters
@@ -1185,7 +1674,16 @@ static void cell_handle_interrupt(struct pt_regs *regs,
*/
cbe_enable_pm(cpu);
}
- spin_unlock_irqrestore(&virt_cntr_lock, flags);
+ spin_unlock_irqrestore(&cntr_lock, flags);
+}
+
+static void cell_handle_interrupt(struct pt_regs *regs,
+ struct op_counter_config *ctr)
+{
+ if (profiling_mode == PPU_PROFILING)
+ cell_handle_interrupt_ppu(regs, ctr);
+ else
+ cell_handle_interrupt_spu(regs, ctr);
}
/*
@@ -1195,7 +1693,8 @@ static void cell_handle_interrupt(struct pt_regs *regs,
*/
static int cell_sync_start(void)
{
- if (spu_cycle_reset)
+ if ((profiling_mode == SPU_PROFILING_CYCLES) ||
+ (profiling_mode == SPU_PROFILING_EVENTS))
return spu_sync_start();
else
return DO_GENERIC_SYNC;
@@ -1203,7 +1702,8 @@ static int cell_sync_start(void)
static int cell_sync_stop(void)
{
- if (spu_cycle_reset)
+ if ((profiling_mode == SPU_PROFILING_CYCLES) ||
+ (profiling_mode == SPU_PROFILING_EVENTS))
return spu_sync_stop();
else
return 1;
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
index ae7c34f37e1..98367a0255f 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_common.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
@@ -42,7 +42,7 @@ static struct of_device_id mpc52xx_bus_ids[] __initdata = {
* from interrupt context while node mapping (which calls ioremap())
* cannot be used at such point.
*/
-static spinlock_t mpc52xx_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(mpc52xx_lock);
static struct mpc52xx_gpt __iomem *mpc52xx_wdt;
static struct mpc52xx_cdm __iomem *mpc52xx_cdm;
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 2e67bd840e0..35b1ec49271 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -44,8 +44,8 @@ static DEFINE_SPINLOCK(beat_htab_lock);
static inline unsigned int beat_read_mask(unsigned hpte_group)
{
- unsigned long hpte_v[5];
unsigned long rmask = 0;
+ u64 hpte_v[5];
beat_read_htab_entries(0, hpte_group + 0, hpte_v);
if (!(hpte_v[0] & HPTE_V_BOLTED))
@@ -93,8 +93,7 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
int psize, int ssize)
{
unsigned long lpar_rc;
- unsigned long slot;
- unsigned long hpte_v, hpte_r;
+ u64 hpte_v, hpte_r, slot;
/* same as iseries */
if (vflags & HPTE_V_SECONDARY)
@@ -153,8 +152,9 @@ static long beat_lpar_hpte_remove(unsigned long hpte_group)
static unsigned long beat_lpar_hpte_getword0(unsigned long slot)
{
- unsigned long dword0, dword[5];
+ unsigned long dword0;
unsigned long lpar_rc;
+ u64 dword[5];
lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword);
@@ -170,7 +170,7 @@ static void beat_lpar_hptab_clear(void)
unsigned long size_bytes = 1UL << ppc64_pft_size;
unsigned long hpte_count = size_bytes >> 4;
int i;
- unsigned long dummy0, dummy1;
+ u64 dummy0, dummy1;
/* TODO: Use bulk call */
for (i = 0; i < hpte_count; i++)
@@ -189,7 +189,8 @@ static long beat_lpar_hpte_updatepp(unsigned long slot,
int psize, int ssize, int local)
{
unsigned long lpar_rc;
- unsigned long dummy0, dummy1, want_v;
+ u64 dummy0, dummy1;
+ unsigned long want_v;
want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
@@ -255,7 +256,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
unsigned long ea,
int psize, int ssize)
{
- unsigned long lpar_rc, slot, vsid, va, dummy0, dummy1;
+ unsigned long lpar_rc, slot, vsid, va;
+ u64 dummy0, dummy1;
vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
va = (vsid << 28) | (ea & 0x0fffffff);
@@ -276,7 +278,7 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
{
unsigned long want_v;
unsigned long lpar_rc;
- unsigned long dummy1, dummy2;
+ u64 dummy1, dummy2;
unsigned long flags;
DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
@@ -315,8 +317,7 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
int psize, int ssize)
{
unsigned long lpar_rc;
- unsigned long slot;
- unsigned long hpte_v, hpte_r;
+ u64 hpte_v, hpte_r, slot;
/* same as iseries */
if (vflags & HPTE_V_SECONDARY)
diff --git a/arch/powerpc/platforms/cell/beat_udbg.c b/arch/powerpc/platforms/cell/beat_udbg.c
index 6b418f6b617..350735bc888 100644
--- a/arch/powerpc/platforms/cell/beat_udbg.c
+++ b/arch/powerpc/platforms/cell/beat_udbg.c
@@ -40,8 +40,8 @@ static void udbg_putc_beat(char c)
}
/* Buffered chars getc */
-static long inbuflen;
-static long inbuf[2]; /* must be 2 longs */
+static u64 inbuflen;
+static u64 inbuf[2]; /* must be 2 u64s */
static int udbg_getc_poll_beat(void)
{
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c
index ec7c8f45a21..e6506cd0ff9 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c
@@ -118,7 +118,7 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
policy->cur = cbe_freqs[cur_pmode].frequency;
#ifdef CONFIG_SMP
- policy->cpus = per_cpu(cpu_sibling_map, policy->cpu);
+ cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
#endif
cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
index 70fa7aef5ed..20472e487b6 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
@@ -54,7 +54,7 @@ int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
{
struct cbe_pmd_regs __iomem *pmd_regs;
struct cbe_mic_tm_regs __iomem *mic_tm_regs;
- u64 flags;
+ unsigned long flags;
u64 value;
#ifdef DEBUG
long time;
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index a3c6c01bd6d..968c1c0b4d5 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -110,7 +110,7 @@ static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
}
/* initialize spu_gov_info for all affected cpus */
- for_each_cpu_mask(i, policy->cpus) {
+ for_each_cpu(i, policy->cpus) {
affected_info = &per_cpu(spu_gov_info, i);
affected_info->policy = policy;
}
@@ -127,7 +127,7 @@ static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
spu_gov_cancel_work(info);
/* clean spu_gov_info for all affected cpus */
- for_each_cpu_mask (i, policy->cpus) {
+ for_each_cpu (i, policy->cpus) {
info = &per_cpu(spu_gov_info, i);
info->policy = NULL;
}
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 2d5bb22d6c0..28c04dab263 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void)
iic = &__get_cpu_var(iic);
*(unsigned long *) &pending =
- in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
+ in_be64((u64 __iomem *) &iic->regs->pending_destr);
if (!(pending.flags & CBE_IIC_IRQ_VALID))
return NO_IRQ;
virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c
index b5f84e8f089..059cad6c3f6 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.c
+++ b/arch/powerpc/platforms/cell/io-workarounds.c
@@ -130,14 +130,14 @@ static const struct ppc_pci_io __devinitconst iowa_pci_io = {
};
-static void __iomem *iowa_ioremap(unsigned long addr, unsigned long size,
+static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
unsigned long flags)
{
struct iowa_bus *bus;
void __iomem *res = __ioremap(addr, size, flags);
int busno;
- bus = iowa_pci_find(0, addr);
+ bus = iowa_pci_find(0, (unsigned long)addr);
if (bus != NULL) {
busno = bus - iowa_busses;
PCI_SET_ADDR_TOKEN(res, busno + 1);
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 86db4dd170a..88d94b59a7c 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -150,8 +150,8 @@ static int cbe_nr_iommus;
static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
long n_ptes)
{
- unsigned long __iomem *reg;
- unsigned long val;
+ u64 __iomem *reg;
+ u64 val;
long n;
reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
index 906a0a2a9fe..1410443731e 100644
--- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -80,10 +80,10 @@ static void cpu_affinity_set(struct spu *spu, int cpu)
u64 route;
if (nr_cpus_node(spu->node)) {
- cpumask_t spumask = node_to_cpumask(spu->node);
- cpumask_t cpumask = node_to_cpumask(cpu_to_node(cpu));
+ const struct cpumask *spumask = cpumask_of_node(spu->node),
+ *cpumask = cpumask_of_node(cpu_to_node(cpu));
- if (!cpus_intersects(spumask, cpumask))
+ if (!cpumask_intersects(spumask, cpumask))
return;
}
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 6296bfd9cb0..e309ef70a53 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -97,7 +97,6 @@ spufs_new_inode(struct super_block *sb, int mode)
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
out:
return inode;
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 2ad914c4749..6a0ad196aeb 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -166,9 +166,9 @@ void spu_update_sched_info(struct spu_context *ctx)
static int __node_allowed(struct spu_context *ctx, int node)
{
if (nr_cpus_node(node)) {
- cpumask_t mask = node_to_cpumask(node);
+ const struct cpumask *mask = cpumask_of_node(node);
- if (cpus_intersects(mask, ctx->cpus_allowed))
+ if (cpumask_intersects(mask, &ctx->cpus_allowed))
return 1;
}
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 15c62d3ca12..3bf908e2873 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -314,7 +314,7 @@ extern char *isolated_loader;
* we need to call spu_release(ctx) before sleeping, and
* then spu_acquire(ctx) when awoken.
*
- * Returns with state_mutex re-acquired when successfull or
+ * Returns with state_mutex re-acquired when successful or
* with -ERESTARTSYS and the state_mutex dropped when interrupted.
*/
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index ed3753d8c10..7ddd0a2c802 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -10,18 +10,21 @@ menu "iSeries device drivers"
config VIODASD
tristate "iSeries Virtual I/O disk support"
depends on BLOCK
+ select VIOPATH
help
If you are running on an iSeries system and you want to use
virtual disks created and managed by OS/400, say Y.
config VIOCD
tristate "iSeries Virtual I/O CD support"
+ select VIOPATH
help
If you are running Linux on an IBM iSeries system and you want to
read a CD drive owned by OS/400, say Y here.
config VIOTAPE
tristate "iSeries Virtual Tape Support"
+ select VIOPATH
help
If you are running Linux on an iSeries system and you want Linux
to read and/or write a tape drive owned by OS/400, say Y here.
@@ -30,5 +33,3 @@ endmenu
config VIOPATH
bool
- depends on VIODASD || VIOCD || VIOTAPE || ISERIES_VETH
- default y
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 70b688c1aef..24519b96d6a 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -23,6 +23,7 @@
#include <linux/string.h>
#include <linux/seq_file.h>
#include <linux/kdev_t.h>
+#include <linux/kexec.h>
#include <linux/major.h>
#include <linux/root_dev.h>
#include <linux/kernel.h>
@@ -638,6 +639,13 @@ static int __init iseries_probe(void)
return 1;
}
+#ifdef CONFIG_KEXEC
+static int iseries_kexec_prepare(struct kimage *image)
+{
+ return -ENOSYS;
+}
+#endif
+
define_machine(iseries) {
.name = "iSeries",
.setup_arch = iSeries_setup_arch,
@@ -658,6 +666,9 @@ define_machine(iseries) {
.probe = iseries_probe,
.ioremap = iseries_ioremap,
.iounmap = iseries_iounmap,
+#ifdef CONFIG_KEXEC
+ .machine_kexec_prepare = iseries_kexec_prepare,
+#endif
/* XXX Implement enable_pmcs for iSeries */
};
diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c
index 58556b028a4..be2527a516e 100644
--- a/arch/powerpc/platforms/pasemi/cpufreq.c
+++ b/arch/powerpc/platforms/pasemi/cpufreq.c
@@ -112,7 +112,7 @@ static int get_gizmo_latency(void)
static void set_astate(int cpu, unsigned int astate)
{
- u64 flags;
+ unsigned long flags;
/* Return if called before init has run */
if (unlikely(!sdcasr_mapbase))
@@ -213,7 +213,7 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
pr_debug("current astate is at %d\n",cur_astate);
policy->cur = pas_freqs[cur_astate].frequency;
- policy->cpus = cpu_online_map;
+ cpumask_copy(policy->cpus, &cpu_online_map);
ppc_proc_freq = policy->cur * 1000ul;
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
index 217af321b0c..a6152d92224 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -509,7 +509,7 @@ fallback:
*/
int pasemi_dma_init(void)
{
- static spinlock_t init_lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(init_lock);
struct pci_dev *iob_pdev;
struct pci_dev *pdev;
struct resource res;
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
index 4dfb4bc242b..beb38333b6d 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_64.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_64.c
@@ -362,7 +362,7 @@ static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy)
/* secondary CPUs are tied to the primary one by the
* cpufreq core if in the secondary policy we tell it that
* it actually must be one policy together with all others. */
- policy->cpus = cpu_online_map;
+ cpumask_copy(policy->cpus, &cpu_online_map);
cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu);
return cpufreq_frequency_table_cpuinfo(policy,
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 54b7b76ed4f..04cdd32624d 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -661,6 +661,7 @@ static void __init init_second_ohare(void)
pci_find_hose_for_OF_device(np);
if (!hose) {
printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
+ of_node_put(np);
return;
}
early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
@@ -669,6 +670,7 @@ static void __init init_second_ohare(void)
early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
}
has_second_ohare = 1;
+ of_node_put(np);
}
/*
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 59eb840d8ce..1810e4226e5 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -265,12 +265,15 @@ int __init via_calibrate_decr(void)
struct resource rsrc;
vias = of_find_node_by_name(NULL, "via-cuda");
- if (vias == 0)
+ if (vias == NULL)
vias = of_find_node_by_name(NULL, "via-pmu");
- if (vias == 0)
+ if (vias == NULL)
vias = of_find_node_by_name(NULL, "via");
- if (vias == 0 || of_address_to_resource(vias, 0, &rsrc))
+ if (vias == NULL || of_address_to_resource(vias, 0, &rsrc)) {
+ of_node_put(vias);
return 0;
+ }
+ of_node_put(vias);
via = ioremap(rsrc.start, rsrc.end - rsrc.start + 1);
if (via == NULL) {
printk(KERN_ERR "Failed to map VIA for timer calibration !\n");
@@ -297,7 +300,7 @@ int __init via_calibrate_decr(void)
ppc_tb_freq = (dstart - dend) * 100 / 6;
iounmap(via);
-
+
return 1;
}
#endif
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index dbc124e0564..ca71a12b764 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -518,6 +518,41 @@ fail_device_register:
return result;
}
+static int __init ps3_register_ramdisk_device(void)
+{
+ int result;
+ struct layout {
+ struct ps3_system_bus_device dev;
+ } *p;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+ if (!p)
+ return -ENOMEM;
+
+ p->dev.match_id = PS3_MATCH_ID_GPU;
+ p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK;
+ p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+
+ result = ps3_system_bus_device_register(&p->dev);
+
+ if (result) {
+ pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+ __func__, __LINE__);
+ goto fail_device_register;
+ }
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return 0;
+
+fail_device_register:
+ kfree(p);
+ pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+ return result;
+}
+
/**
* ps3_setup_dynamic_device - Setup a dynamic device from the repository
*/
@@ -946,6 +981,8 @@ static int __init ps3_register_devices(void)
ps3_register_lpm_devices();
+ ps3_register_ramdisk_device();
+
pr_debug(" <- %s:%d\n", __func__, __LINE__);
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index f7a69021b7b..84e058f1e1c 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq)
lpar_xirr_info_set((0xff << 24) | irq);
}
-static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
{
unsigned int irq;
int status;
@@ -870,7 +870,7 @@ void xics_migrate_irqs_away(void)
/* Reset affinity to all cpus */
irq_desc[virq].affinity = CPU_MASK_ALL;
- desc->chip->set_affinity(virq, CPU_MASK_ALL);
+ desc->chip->set_affinity(virq, cpu_all_mask);
unlock:
spin_unlock_irqrestore(&desc->lock, flags);
}
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index c82babb7007..3e0d89dcdba 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -806,7 +806,7 @@ static void mpic_end_ipi(unsigned int irq)
#endif /* CONFIG_SMP */
-void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
+void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
{
struct mpic *mpic = mpic_from_irq(irq);
unsigned int src = mpic_irq_to_hw(irq);
@@ -818,7 +818,7 @@ void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
} else {
cpumask_t tmp;
- cpus_and(tmp, cpumask, cpu_online_map);
+ cpumask_and(&tmp, cpumask, cpu_online_mask);
mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
mpic_physmask(cpus_addr(tmp)[0]));
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 6209c62a426..3cef2af10f4 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
#endif /* _POWERPC_SYSDEV_MPIC_H */