aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig36
-rw-r--r--arch/powerpc/Kconfig.debug10
-rw-r--r--arch/powerpc/configs/g5_defconfig261
-rw-r--r--arch/powerpc/kernel/Makefile13
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/cpu_setup_power4.S233
-rw-r--r--arch/powerpc/kernel/cputable.c33
-rw-r--r--arch/powerpc/kernel/firmware.c45
-rw-r--r--arch/powerpc/kernel/fpu.S24
-rw-r--r--arch/powerpc/kernel/head_64.S385
-rw-r--r--arch/powerpc/kernel/ioctl32.c49
-rw-r--r--arch/powerpc/kernel/irq.c478
-rw-r--r--arch/powerpc/kernel/lparcfg.c612
-rw-r--r--arch/powerpc/kernel/lparmap.c2
-rw-r--r--arch/powerpc/kernel/misc_32.S23
-rw-r--r--arch/powerpc/kernel/misc_64.S78
-rw-r--r--arch/powerpc/kernel/paca.c142
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c5
-rw-r--r--arch/powerpc/kernel/proc_ppc64.c126
-rw-r--r--arch/powerpc/kernel/process.c10
-rw-r--r--arch/powerpc/kernel/prom.c128
-rw-r--r--arch/powerpc/kernel/prom_init.c196
-rw-r--r--arch/powerpc/kernel/ptrace.c43
-rw-r--r--arch/powerpc/kernel/rtas-proc.c2
-rw-r--r--arch/powerpc/kernel/rtas.c10
-rw-r--r--arch/powerpc/kernel/rtas_pci.c513
-rw-r--r--arch/powerpc/kernel/setup-common.c77
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kernel/setup_32.c19
-rw-r--r--arch/powerpc/kernel/setup_64.c170
-rw-r--r--arch/powerpc/kernel/signal_32.c2
-rw-r--r--arch/powerpc/kernel/signal_64.c1
-rw-r--r--arch/powerpc/kernel/smp.c11
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c1
-rw-r--r--arch/powerpc/kernel/sysfs.c384
-rw-r--r--arch/powerpc/kernel/time.c37
-rw-r--r--arch/powerpc/kernel/traps.c17
-rw-r--r--arch/powerpc/kernel/vio.c27
-rw-r--r--arch/powerpc/lib/bitops.c2
-rw-r--r--arch/powerpc/lib/copypage_64.S2
-rw-r--r--arch/powerpc/lib/copyuser_64.S4
-rw-r--r--arch/powerpc/lib/locks.c1
-rw-r--r--arch/powerpc/mm/fault.c17
-rw-r--r--arch/powerpc/mm/hash_low_64.S613
-rw-r--r--arch/powerpc/mm/hash_native_64.c377
-rw-r--r--arch/powerpc/mm/hash_utils_64.c570
-rw-r--r--arch/powerpc/mm/hugetlbpage.c140
-rw-r--r--arch/powerpc/mm/init_32.c3
-rw-r--r--arch/powerpc/mm/init_64.c33
-rw-r--r--arch/powerpc/mm/mem.c62
-rw-r--r--arch/powerpc/mm/numa.c1
-rw-r--r--arch/powerpc/mm/pgtable_64.c24
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c15
-rw-r--r--arch/powerpc/mm/slb.c102
-rw-r--r--arch/powerpc/mm/slb_low.S229
-rw-r--r--arch/powerpc/mm/stab.c47
-rw-r--r--arch/powerpc/mm/tlb_64.c32
-rw-r--r--arch/powerpc/oprofile/Kconfig6
-rw-r--r--arch/powerpc/oprofile/op_model_power4.c27
-rw-r--r--arch/powerpc/platforms/chrp/setup.c4
-rw-r--r--arch/powerpc/platforms/iseries/htab.c65
-rw-r--r--arch/powerpc/platforms/iseries/hvlog.c4
-rw-r--r--arch/powerpc/platforms/iseries/iommu.c74
-rw-r--r--arch/powerpc/platforms/iseries/irq.c29
-rw-r--r--arch/powerpc/platforms/iseries/misc.S1
-rw-r--r--arch/powerpc/platforms/iseries/pci.c37
-rw-r--r--arch/powerpc/platforms/iseries/setup.c58
-rw-r--r--arch/powerpc/platforms/iseries/smp.c1
-rw-r--r--arch/powerpc/platforms/iseries/vio.c39
-rw-r--r--arch/powerpc/platforms/iseries/viopath.c16
-rw-r--r--arch/powerpc/platforms/maple/pci.c3
-rw-r--r--arch/powerpc/platforms/powermac/Makefile3
-rw-r--r--arch/powerpc/platforms/powermac/cpufreq_32.c (renamed from arch/powerpc/platforms/powermac/cpufreq.c)15
-rw-r--r--arch/powerpc/platforms/powermac/cpufreq_64.c323
-rw-r--r--arch/powerpc/platforms/powermac/pci.c3
-rw-r--r--arch/powerpc/platforms/powermac/pic.c3
-rw-r--r--arch/powerpc/platforms/powermac/setup.c13
-rw-r--r--arch/powerpc/platforms/powermac/smp.c51
-rw-r--r--arch/powerpc/platforms/pseries/Makefile2
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c1212
-rw-r--r--arch/powerpc/platforms/pseries/eeh_event.c155
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c5
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c118
-rw-r--r--arch/powerpc/platforms/pseries/pci.c3
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h10
-rw-r--r--arch/powerpc/platforms/pseries/ras.c2
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c8
-rw-r--r--arch/powerpc/platforms/pseries/rtasd.c8
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c235
-rw-r--r--arch/powerpc/platforms/pseries/setup.c42
-rw-r--r--arch/powerpc/platforms/pseries/smp.c5
-rw-r--r--arch/powerpc/platforms/pseries/xics.c7
-rw-r--r--arch/powerpc/sysdev/i8259.c5
-rw-r--r--arch/powerpc/sysdev/u3_iommu.c3
-rw-r--r--arch/powerpc/xmon/Makefile2
-rw-r--r--arch/powerpc/xmon/nonstdio.c134
-rw-r--r--arch/powerpc/xmon/nonstdio.h28
-rw-r--r--arch/powerpc/xmon/setjmp.S176
-rw-r--r--arch/powerpc/xmon/start_32.c235
-rw-r--r--arch/powerpc/xmon/start_64.c167
-rw-r--r--arch/powerpc/xmon/start_8xx.c255
-rw-r--r--arch/powerpc/xmon/subr_prf.c54
-rw-r--r--arch/powerpc/xmon/xmon.c50
103 files changed, 7900 insertions, 2283 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f4e25c648fb..ed31062029f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -404,6 +404,14 @@ config CPU_FREQ_PMAC
this currently includes some models of iBook & Titanium
PowerBook.
+config CPU_FREQ_PMAC64
+ bool "Support for some Apple G5s"
+ depends on CPU_FREQ && PMAC_SMU && PPC64
+ select CPU_FREQ_TABLE
+ help
+ This adds support for frequency switching on Apple iMac G5,
+ and some of the more recent desktop G5 machines as well.
+
config PPC601_SYNC_FIX
bool "Workarounds for PPC601 bugs"
depends on 6xx && (PPC_PREP || PPC_PMAC)
@@ -484,6 +492,7 @@ source "fs/Kconfig.binfmt"
config FORCE_MAX_ZONEORDER
int
depends on PPC64
+ default "9" if PPC_64K_PAGES
default "13"
config MATH_EMULATION
@@ -590,6 +599,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool y
depends on NEED_MULTIPLE_NODES
+config ARCH_MEMORY_PROBE
+ def_bool y
+ depends on MEMORY_HOTPLUG
+
# Some NUMA nodes have memory ranges that span
# other nodes. Even though a pfn is valid and
# between a node's start and end pfns, it may not
@@ -603,6 +616,16 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
+config PPC_64K_PAGES
+ bool "64k page size"
+ depends on PPC64
+ help
+ This option changes the kernel logical page size to 64k. On machines
+ without processor support for 64k pages, the kernel will simulate
+ them by loading each individual 4k page on demand transparently,
+ while on hardware with such support, it will be used to map
+ normal application pages.
+
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on PPC64 && SMP
@@ -907,8 +930,21 @@ source "arch/powerpc/platforms/iseries/Kconfig"
source "lib/Kconfig"
+menu "Instrumentation Support"
+ depends on EXPERIMENTAL
+
source "arch/powerpc/oprofile/Kconfig"
+config KPROBES
+ bool "Kprobes (EXPERIMENTAL)"
+ help
+ Kprobes allows you to trap at almost any kernel address and
+ execute a callback function. register_kprobe() establishes
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+endmenu
+
source "arch/powerpc/Kconfig.debug"
source "security/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 0baf64ec80d..30a30bf559e 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -9,16 +9,6 @@ config DEBUG_STACKOVERFLOW
This option will cause messages to be printed if free stack space
drops below a certain limit.
-config KPROBES
- bool "Kprobes"
- depends on DEBUG_KERNEL && PPC64
- help
- Kprobes allows you to trap at almost any kernel address and
- execute a callback function. register_kprobe() establishes
- a probepoint and specifies the callback. Kprobes is useful
- for kernel debugging, non-intrusive instrumentation and testing.
- If in doubt, say "N".
-
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL && PPC64
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index 6323065fbf2..e76854f8c12 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -1,18 +1,32 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-rc4
-# Thu Oct 20 08:30:23 2005
+# Linux kernel version: 2.6.14
+# Mon Nov 7 13:37:59 2005
#
+CONFIG_PPC64=y
CONFIG_64BIT=y
+CONFIG_PPC_MERGE=y
CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_GENERIC_ISA_DMA=y
+CONFIG_PPC=y
CONFIG_EARLY_PRINTK=y
CONFIG_COMPAT=y
+CONFIG_SYSVIPC_COMPAT=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_FORCE_MAX_ZONEORDER=13
+
+#
+# Processor support
+#
+CONFIG_POWER4_ONLY=y
+CONFIG_POWER4=y
+CONFIG_PPC_FPU=y
+CONFIG_ALTIVEC=y
+CONFIG_PPC_STD_MMU=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
#
# Code maturity level options
@@ -67,30 +81,60 @@ CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
-CONFIG_SYSVIPC_COMPAT=y
#
# Platform support
#
-# CONFIG_PPC_ISERIES is not set
CONFIG_PPC_MULTIPLATFORM=y
+# CONFIG_PPC_ISERIES is not set
+# CONFIG_EMBEDDED6xx is not set
+# CONFIG_APUS is not set
# CONFIG_PPC_PSERIES is not set
-# CONFIG_PPC_BPA is not set
CONFIG_PPC_PMAC=y
+CONFIG_PPC_PMAC64=y
# CONFIG_PPC_MAPLE is not set
-CONFIG_PPC=y
-CONFIG_PPC64=y
+# CONFIG_PPC_CELL is not set
CONFIG_PPC_OF=y
-CONFIG_MPIC=y
-CONFIG_ALTIVEC=y
-CONFIG_KEXEC=y
CONFIG_U3_DART=y
-CONFIG_PPC_PMAC64=y
-CONFIG_BOOTX_TEXT=y
-CONFIG_POWER4_ONLY=y
+CONFIG_MPIC=y
+# CONFIG_PPC_RTAS is not set
+# CONFIG_MMIO_NVRAM is not set
+# CONFIG_PPC_MPC106 is not set
+CONFIG_GENERIC_TBSYNC=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
+# CONFIG_CPU_FREQ_DEBUG is not set
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_STAT_DETAILS is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_PMAC64=y
+# CONFIG_WANT_EARLY_SERIAL is not set
+
+#
+# Kernel options
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_PREEMPT_BKL is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+CONFIG_FORCE_MAX_ZONEORDER=13
CONFIG_IOMMU_VMERGE=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+# CONFIG_NUMA is not set
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_SELECT_MEMORY_MODEL=y
@@ -100,28 +144,21 @@ CONFIG_FLATMEM_MANUAL=y
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
-# CONFIG_NUMA is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_PPC_64K_PAGES is not set
# CONFIG_SCHED_SMT is not set
-CONFIG_PREEMPT_NONE=y
-# CONFIG_PREEMPT_VOLUNTARY is not set
-# CONFIG_PREEMPT is not set
-# CONFIG_PREEMPT_BKL is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_SECCOMP=y
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_MISC is not set
-# CONFIG_HOTPLUG_CPU is not set
CONFIG_PROC_DEVICETREE=y
# CONFIG_CMDLINE_BOOL is not set
+# CONFIG_PM is not set
+CONFIG_SECCOMP=y
CONFIG_ISA_DMA_API=y
#
-# Bus Options
+# Bus options
#
+CONFIG_GENERIC_ISA_DMA=y
+# CONFIG_PPC_I8259 is not set
+# CONFIG_PPC_INDIRECT_PCI is not set
CONFIG_PCI=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCI_LEGACY_PROC=y
@@ -136,6 +173,7 @@ CONFIG_PCI_LEGACY_PROC=y
# PCI Hotplug Support
#
# CONFIG_HOTPLUG_PCI is not set
+CONFIG_KERNEL_START=0xc000000000000000
#
# Networking
@@ -276,6 +314,10 @@ CONFIG_LLC=y
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
# CONFIG_NET_SCHED is not set
CONFIG_NET_CLS_ROUTE=y
@@ -348,6 +390,11 @@ CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
# CONFIG_ATA_OVER_ETH is not set
#
@@ -449,6 +496,7 @@ CONFIG_SCSI_SPI_ATTRS=y
#
# SCSI low-level drivers
#
+# CONFIG_ISCSI_TCP is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
@@ -465,10 +513,12 @@ CONFIG_SCSI_SATA_SVW=y
# CONFIG_SCSI_ATA_PIIX is not set
# CONFIG_SCSI_SATA_MV is not set
# CONFIG_SCSI_SATA_NV is not set
-# CONFIG_SCSI_SATA_PROMISE is not set
+# CONFIG_SCSI_PDC_ADMA is not set
# CONFIG_SCSI_SATA_QSTOR is not set
+# CONFIG_SCSI_SATA_PROMISE is not set
# CONFIG_SCSI_SATA_SX4 is not set
# CONFIG_SCSI_SATA_SIL is not set
+# CONFIG_SCSI_SATA_SIL24 is not set
# CONFIG_SCSI_SATA_SIS is not set
# CONFIG_SCSI_SATA_ULI is not set
# CONFIG_SCSI_SATA_VIA is not set
@@ -567,6 +617,9 @@ CONFIG_IEEE1394_RAWIO=y
CONFIG_ADB_PMU=y
CONFIG_PMAC_SMU=y
CONFIG_THERM_PM72=y
+CONFIG_WINDFARM=y
+CONFIG_WINDFARM_PM81=y
+CONFIG_WINDFARM_PM91=y
#
# Network device support
@@ -603,6 +656,7 @@ CONFIG_SUNGEM=y
# CONFIG_NET_TULIP is not set
# CONFIG_HP100 is not set
# CONFIG_NET_PCI is not set
+# CONFIG_FEC_8XX is not set
#
# Ethernet (1000 Mbit)
@@ -768,6 +822,7 @@ CONFIG_MAX_RAW_DEVS=256
# TPM devices
#
# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
#
# I2C support
@@ -820,6 +875,7 @@ CONFIG_I2C_PMAC_SMU=y
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_RTC8564 is not set
# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_RTC_X1205_I2C is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
@@ -876,10 +932,9 @@ CONFIG_FB_OF=y
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_NVIDIA is not set
-CONFIG_FB_RIVA=y
-# CONFIG_FB_RIVA_I2C is not set
-# CONFIG_FB_RIVA_DEBUG is not set
+CONFIG_FB_NVIDIA=y
+CONFIG_FB_NVIDIA_I2C=y
+# CONFIG_FB_RIVA is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON_OLD is not set
CONFIG_FB_RADEON=y
@@ -924,7 +979,96 @@ CONFIG_LCD_DEVICE=y
#
# Sound
#
-# CONFIG_SOUND is not set
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+# CONFIG_SND_SEQ_DUMMY is not set
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+CONFIG_SND_GENERIC_DRIVER=y
+
+#
+# Generic devices
+#
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_VIRMIDI is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+
+#
+# PCI devices
+#
+# CONFIG_SND_ALI5451 is not set
+# CONFIG_SND_ATIIXP is not set
+# CONFIG_SND_ATIIXP_MODEM is not set
+# CONFIG_SND_AU8810 is not set
+# CONFIG_SND_AU8820 is not set
+# CONFIG_SND_AU8830 is not set
+# CONFIG_SND_AZT3328 is not set
+# CONFIG_SND_BT87X is not set
+# CONFIG_SND_CS46XX is not set
+# CONFIG_SND_CS4281 is not set
+# CONFIG_SND_EMU10K1 is not set
+# CONFIG_SND_EMU10K1X is not set
+# CONFIG_SND_CA0106 is not set
+# CONFIG_SND_KORG1212 is not set
+# CONFIG_SND_MIXART is not set
+# CONFIG_SND_NM256 is not set
+# CONFIG_SND_RME32 is not set
+# CONFIG_SND_RME96 is not set
+# CONFIG_SND_RME9652 is not set
+# CONFIG_SND_HDSP is not set
+# CONFIG_SND_HDSPM is not set
+# CONFIG_SND_TRIDENT is not set
+# CONFIG_SND_YMFPCI is not set
+# CONFIG_SND_AD1889 is not set
+# CONFIG_SND_ALS4000 is not set
+# CONFIG_SND_CMIPCI is not set
+# CONFIG_SND_ENS1370 is not set
+# CONFIG_SND_ENS1371 is not set
+# CONFIG_SND_ES1938 is not set
+# CONFIG_SND_ES1968 is not set
+# CONFIG_SND_MAESTRO3 is not set
+# CONFIG_SND_FM801 is not set
+# CONFIG_SND_ICE1712 is not set
+# CONFIG_SND_ICE1724 is not set
+# CONFIG_SND_INTEL8X0 is not set
+# CONFIG_SND_INTEL8X0M is not set
+# CONFIG_SND_SONICVIBES is not set
+# CONFIG_SND_VIA82XX is not set
+# CONFIG_SND_VIA82XX_MODEM is not set
+# CONFIG_SND_VX222 is not set
+# CONFIG_SND_HDA_INTEL is not set
+
+#
+# ALSA PowerMac devices
+#
+CONFIG_SND_POWERMAC=m
+CONFIG_SND_POWERMAC_AUTO_DRC=y
+
+#
+# USB devices
+#
+CONFIG_SND_USB_AUDIO=m
+# CONFIG_SND_USB_USX2Y is not set
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
#
# USB support
@@ -958,12 +1102,16 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
#
# USB Device Class drivers
#
-# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
CONFIG_USB_ACM=m
CONFIG_USB_PRINTER=y
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set
@@ -1074,6 +1222,7 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
CONFIG_USB_SERIAL_KLSI=m
CONFIG_USB_SERIAL_KOBIL_SCT=m
CONFIG_USB_SERIAL_MCT_U232=m
+# CONFIG_USB_SERIAL_NOKIA_DKU2 is not set
CONFIG_USB_SERIAL_PL2303=m
# CONFIG_USB_SERIAL_HP4X is not set
CONFIG_USB_SERIAL_SAFE=m
@@ -1311,6 +1460,20 @@ CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_UTF8=y
#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+
+#
# Profiling support
#
CONFIG_PROFILING=y
@@ -1331,12 +1494,14 @@ CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_INFO is not set
CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_DEBUG_STACKOVERFLOW is not set
# CONFIG_KPROBES is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUGGER is not set
-# CONFIG_PPCDBG is not set
CONFIG_IRQSTACKS=y
+CONFIG_BOOTX_TEXT=y
#
# Security options
@@ -1376,17 +1541,3 @@ CONFIG_CRYPTO_TEST=m
#
# Hardware crypto devices
#
-
-#
-# Library routines
-#
-CONFIG_CRC_CCITT=m
-# CONFIG_CRC16 is not set
-CONFIG_CRC32=y
-CONFIG_LIBCRC32C=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b3ae2993efb..c04bbd32059 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -4,6 +4,7 @@
ifeq ($(CONFIG_PPC64),y)
EXTRA_CFLAGS += -mno-minimal-toc
+CFLAGS_ioctl32.o += -Ifs/
endif
ifeq ($(CONFIG_PPC32),y)
CFLAGS_prom_init.o += -fPIC
@@ -11,15 +12,21 @@ CFLAGS_btext.o += -fPIC
endif
obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
- signal_32.o pmc.o
+ irq.o signal_32.o pmc.o
obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
- signal_64.o ptrace32.o systbl.o
+ signal_64.o ptrace32.o systbl.o \
+ paca.o ioctl32.o cpu_setup_power4.o \
+ firmware.o sysfs.o
obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
obj-$(CONFIG_POWER4) += idle_power4.o
obj-$(CONFIG_PPC_OF) += of_device.o
-obj-$(CONFIG_PPC_RTAS) += rtas.o
+procfs-$(CONFIG_PPC64) := proc_ppc64.o
+obj-$(CONFIG_PROC_FS) += $(procfs-y)
+rtaspci-$(CONFIG_PPC64) := rtas_pci.o
+obj-$(CONFIG_PPC_RTAS) += rtas.o $(rtaspci-y)
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
+obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index bc5a3689cc0..8793102711a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -106,7 +106,6 @@ int main(void)
DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
- DEFINE(PLATFORM, offsetof(struct systemcfg, platform));
DEFINE(PLATFORM_LPAR, PLATFORM_LPAR);
/* paca */
@@ -125,6 +124,9 @@ int main(void)
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_PPC_64K_PAGES
+ DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
+#endif
#ifdef CONFIG_HUGETLB_PAGE
DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
diff --git a/arch/powerpc/kernel/cpu_setup_power4.S b/arch/powerpc/kernel/cpu_setup_power4.S
new file mode 100644
index 00000000000..cca942fe611
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power4.S
@@ -0,0 +1,233 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+_GLOBAL(__970_cpu_preinit)
+ /*
+ * Do nothing if not running in HV mode
+ */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beqlr
+
+ /*
+ * Deal only with PPC970 and PPC970FX.
+ */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bnelr
+1:
+
+ /* Make sure HID4:rm_ci is off before MMU is turned off, that large
+ * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
+ * HID5:DCBZ32_ill
+ */
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+ mfspr r3,SPRN_HID5
+ rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */
+ sync
+ mtspr SPRN_HID5,r3
+ isync
+ sync
+
+ /* Setup some basic HID1 features */
+ mfspr r0,SPRN_HID1
+ li r3,0x1200 /* enable i-fetch cacheability */
+ sldi r3,r3,44 /* and prefetch */
+ or r0,r0,r3
+ mtspr SPRN_HID1,r0
+ mtspr SPRN_HID1,r0
+ isync
+
+ /* Clear HIOR */
+ li r0,0
+ sync
+ mtspr SPRN_HIOR,0 /* Clear interrupt prefix */
+ isync
+ blr
+
+_GLOBAL(__setup_cpu_power4)
+ blr
+
+_GLOBAL(__setup_cpu_be)
+ /* Set large page sizes LP=0: 16MB, LP=1: 64KB */
+ addi r3, 0, 0
+ ori r3, r3, HID6_LB
+ sldi r3, r3, 32
+ nor r3, r3, r3
+ mfspr r4, SPRN_HID6
+ and r4, r4, r3
+ addi r3, 0, 0x02000
+ sldi r3, r3, 32
+ or r4, r4, r3
+ mtspr SPRN_HID6, r4
+ blr
+
+_GLOBAL(__setup_cpu_ppc970)
+ mfspr r0,SPRN_HID0
+ li r11,5 /* clear DOZE and SLEEP */
+ rldimi r0,r11,52,8 /* set NAP and DPM */
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ sync
+ isync
+ blr
+
+/* Definitions for the table use to save CPU states */
+#define CS_HID0 0
+#define CS_HID1 8
+#define CS_HID4 16
+#define CS_HID5 24
+#define CS_SIZE 32
+
+ .data
+ .balign L1_CACHE_BYTES,0
+cpu_state_storage:
+ .space CS_SIZE
+ .balign L1_CACHE_BYTES,0
+ .text
+
+/* Called in normal context to backup CPU 0 state. This
+ * does not include cache settings. This function is also
+ * called for machine sleep. This does not include the MMU
+ * setup, BATs, etc... but rather the "special" registers
+ * like HID0, HID1, HID4, etc...
+ */
+_GLOBAL(__save_cpu_setup)
+ /* Some CR fields are volatile, we back it up all */
+ mfcr r7
+
+ /* Get storage ptr */
+ LOADADDR(r5,cpu_state_storage)
+
+ /* We only deal with 970 for now */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bne 2f
+
+1: /* Save HID0,1,4 and 5 */
+ mfspr r3,SPRN_HID0
+ std r3,CS_HID0(r5)
+ mfspr r3,SPRN_HID1
+ std r3,CS_HID1(r5)
+ mfspr r3,SPRN_HID4
+ std r3,CS_HID4(r5)
+ mfspr r3,SPRN_HID5
+ std r3,CS_HID5(r5)
+
+2:
+ mtcr r7
+ blr
+
+/* Called with no MMU context (typically MSR:IR/DR off) to
+ * restore CPU state as backed up by the previous
+ * function. This does not include cache setting
+ */
+_GLOBAL(__restore_cpu_setup)
+ /* Get storage ptr (FIXME when using anton reloc as we
+ * are running with translation disabled here
+ */
+ LOADADDR(r5,cpu_state_storage)
+
+ /* We only deal with 970 for now */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bnelr
+
+1: /* Before accessing memory, we make sure rm_ci is clear */
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+
+ /* Clear interrupt prefix */
+ li r0,0
+ sync
+ mtspr SPRN_HIOR,0
+ isync
+
+ /* Restore HID0 */
+ ld r3,CS_HID0(r5)
+ sync
+ isync
+ mtspr SPRN_HID0,r3
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ sync
+ isync
+
+ /* Restore HID1 */
+ ld r3,CS_HID1(r5)
+ sync
+ isync
+ mtspr SPRN_HID1,r3
+ mtspr SPRN_HID1,r3
+ sync
+ isync
+
+ /* Restore HID4 */
+ ld r3,CS_HID4(r5)
+ sync
+ isync
+ mtspr SPRN_HID4,r3
+ sync
+ isync
+
+ /* Restore HID5 */
+ ld r3,CS_HID5(r5)
+ sync
+ isync
+ mtspr SPRN_HID5,r3
+ sync
+ isync
+ blr
+
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b91345fa080..1d85cedbbb7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -52,6 +52,9 @@ extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
PPC_FEATURE_HAS_MMU)
#define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64)
+#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
+#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5)
+#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS)
/* We only set the spe features if the kernel was compiled with
@@ -160,7 +163,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00350000,
.cpu_name = "POWER4 (gp)",
.cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -175,7 +178,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00380000,
.cpu_name = "POWER4+ (gq)",
.cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -190,7 +193,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00390000,
.cpu_name = "PPC970",
.cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -212,7 +215,7 @@ struct cpu_spec cpu_specs[] = {
#else
.cpu_features = CPU_FTRS_PPC970,
#endif
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -230,7 +233,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00440000,
.cpu_name = "PPC970MP",
.cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -240,12 +243,12 @@ struct cpu_spec cpu_specs[] = {
.oprofile_model = &op_model_power4,
#endif
},
- { /* Power5 */
+ { /* Power5 GR */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003a0000,
.cpu_name = "POWER5 (gr)",
.cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER5,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -255,12 +258,12 @@ struct cpu_spec cpu_specs[] = {
.oprofile_model = &op_model_power4,
#endif
},
- { /* Power5 */
+ { /* Power5 GS */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003b0000,
.cpu_name = "POWER5 (gs)",
.cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -276,7 +279,7 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "Cell Broadband Engine",
.cpu_features = CPU_FTRS_CELL,
.cpu_user_features = COMMON_USER_PPC64 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
+ PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
.cpu_setup = __setup_cpu_be,
@@ -929,6 +932,16 @@ struct cpu_spec cpu_specs[] = {
.icache_bsize = 32,
.dcache_bsize = 32,
},
+ { /* 440SPe Rev. A */
+ .pvr_mask = 0xff000fff,
+ .pvr_value = 0x53000890,
+ .cpu_name = "440SPe Rev. A",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ },
#endif /* CONFIG_44x */
#ifdef CONFIG_FSL_BOOKE
{ /* e200z5 */
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
new file mode 100644
index 00000000000..65eae752a52
--- /dev/null
+++ b/arch/powerpc/kernel/firmware.c
@@ -0,0 +1,45 @@
+/*
+ * Extracted from cputable.c
+ *
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ * Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+
+#include <asm/firmware.h>
+
+unsigned long ppc64_firmware_features;
+
+#ifdef CONFIG_PPC_PSERIES
+firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
+ {FW_FEATURE_PFT, "hcall-pft"},
+ {FW_FEATURE_TCE, "hcall-tce"},
+ {FW_FEATURE_SPRG0, "hcall-sprg0"},
+ {FW_FEATURE_DABR, "hcall-dabr"},
+ {FW_FEATURE_COPY, "hcall-copy"},
+ {FW_FEATURE_ASR, "hcall-asr"},
+ {FW_FEATURE_DEBUG, "hcall-debug"},
+ {FW_FEATURE_PERF, "hcall-perf"},
+ {FW_FEATURE_DUMP, "hcall-dump"},
+ {FW_FEATURE_INTERRUPT, "hcall-interrupt"},
+ {FW_FEATURE_MIGRATE, "hcall-migrate"},
+ {FW_FEATURE_PERFMON, "hcall-perfmon"},
+ {FW_FEATURE_CRQ, "hcall-crq"},
+ {FW_FEATURE_VIO, "hcall-vio"},
+ {FW_FEATURE_RDMA, "hcall-rdma"},
+ {FW_FEATURE_LLAN, "hcall-lLAN"},
+ {FW_FEATURE_BULK, "hcall-bulk"},
+ {FW_FEATURE_XDABR, "hcall-xdabr"},
+ {FW_FEATURE_MULTITCE, "hcall-multi-tce"},
+ {FW_FEATURE_SPLPAR, "hcall-splpar"},
+};
+#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 4d6001fa1cf..b780b42c95f 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -41,20 +41,20 @@ _GLOBAL(load_up_fpu)
#ifndef CONFIG_SMP
LOADBASE(r3, last_task_used_math)
toreal(r3)
- LDL r4,OFF(last_task_used_math)(r3)
- CMPI 0,r4,0
+ PPC_LL r4,OFF(last_task_used_math)(r3)
+ PPC_LCMPI 0,r4,0
beq 1f
toreal(r4)
addi r4,r4,THREAD /* want last_task_used_math->thread */
SAVE_32FPRS(0, r4)
mffs fr0
stfd fr0,THREAD_FPSCR(r4)
- LDL r5,PT_REGS(r4)
+ PPC_LL r5,PT_REGS(r4)
toreal(r5)
- LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
li r10,MSR_FP|MSR_FE0|MSR_FE1
andc r4,r4,r10 /* disable FP for previous task */
- STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#endif /* CONFIG_SMP */
/* enable use of FP after return */
@@ -77,7 +77,7 @@ _GLOBAL(load_up_fpu)
#ifndef CONFIG_SMP
subi r4,r5,THREAD
fromreal(r4)
- STL r4,OFF(last_task_used_math)(r3)
+ PPC_STL r4,OFF(last_task_used_math)(r3)
#endif /* CONFIG_SMP */
/* restore registers and return */
/* we haven't used ctr or xer or lr */
@@ -97,24 +97,24 @@ _GLOBAL(giveup_fpu)
MTMSRD(r5) /* enable use of fpu now */
SYNC_601
isync
- CMPI 0,r3,0
+ PPC_LCMPI 0,r3,0
beqlr- /* if no previous owner, done */
addi r3,r3,THREAD /* want THREAD of task */
- LDL r5,PT_REGS(r3)
- CMPI 0,r5,0
+ PPC_LL r5,PT_REGS(r3)
+ PPC_LCMPI 0,r5,0
SAVE_32FPRS(0, r3)
mffs fr0
stfd fr0,THREAD_FPSCR(r3)
beq 1f
- LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
li r3,MSR_FP|MSR_FE0|MSR_FE1
andc r4,r4,r3 /* disable FP for previous task */
- STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#ifndef CONFIG_SMP
li r5,0
LOADBASE(r4,last_task_used_math)
- STL r5,OFF(last_task_used_math)(r4)
+ PPC_STL r5,OFF(last_task_used_math)(r4)
#endif /* CONFIG_SMP */
blr
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 45d81976987..8a8bf79ef04 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -28,7 +28,6 @@
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/systemcfg.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/bug.h>
@@ -195,11 +194,11 @@ exception_marker:
#define EX_R12 24
#define EX_R13 32
#define EX_SRR0 40
-#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */
#define EX_DAR 48
-#define EX_LR 48 /* SLB miss saves LR, but not DAR */
#define EX_DSISR 56
#define EX_CCR 60
+#define EX_R3 64
+#define EX_LR 72
#define EXCEPTION_PROLOG_PSERIES(area, label) \
mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
@@ -419,17 +418,22 @@ data_access_slb_pSeries:
mtspr SPRN_SPRG1,r13
RUNLATCH_ON(r13)
mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_DAR
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
+ mfcr r9
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r9,SPRN_SPRG1
- std r9,PACA_EXSLB+EX_R13(r13)
- mfcr r9
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
- mfspr r3,SPRN_DAR
- b .do_slb_miss /* Rel. branch works in real mode */
+ b .slb_miss_realmode /* Rel. branch works in real mode */
STD_EXCEPTION_PSERIES(0x400, instruction_access)
@@ -440,17 +444,22 @@ instruction_access_slb_pSeries:
mtspr SPRN_SPRG1,r13
RUNLATCH_ON(r13)
mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
+ mfcr r9
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r9,SPRN_SPRG1
- std r9,PACA_EXSLB+EX_R13(r13)
- mfcr r9
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- b .do_slb_miss /* Rel. branch works in real mode */
+ b .slb_miss_realmode /* Rel. branch works in real mode */
STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
STD_EXCEPTION_PSERIES(0x600, alignment)
@@ -509,6 +518,38 @@ _GLOBAL(do_stab_bolted_pSeries)
EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
/*
+ * We have some room here we use that to put
+ * the peries slb miss user trampoline code so it's reasonably
+ * away from slb_miss_user_common to avoid problems with rfid
+ *
+ * This is used for when the SLB miss handler has to go virtual,
+ * which doesn't happen for now anymore but will once we re-implement
+ * dynamic VSIDs for shared page tables
+ */
+#ifdef __DISABLED__
+slb_miss_user_pseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r10,SPRG1
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ clrrdi r12,r13,32
+ mfmsr r10
+ mfspr r11,SRR0 /* save SRR0 */
+ ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
+ ori r10,r10,MSR_IR|MSR_DR|MSR_RI
+ mtspr SRR0,r12
+ mfspr r12,SRR1 /* and SRR1 */
+ mtspr SRR1,r10
+ rfid
+ b . /* prevent spec. execution */
+#endif /* __DISABLED__ */
+
+/*
* Vectors for the FWNMI option. Share common code.
*/
.globl system_reset_fwnmi
@@ -559,22 +600,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
.globl data_access_slb_iSeries
data_access_slb_iSeries:
mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
std r3,PACA_EXSLB+EX_R3(r13)
- ld r12,PACALPPACA+LPPACASRR1(r13)
mfspr r3,SPRN_DAR
- b .do_slb_miss
+ std r9,PACA_EXSLB+EX_R9(r13)
+ mfcr r9
+#ifdef __DISABLED__
+ cmpdi r3,0
+ bge slb_miss_user_iseries
+#endif
+ std r10,PACA_EXSLB+EX_R10(r13)
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
+ ld r12,PACALPPACA+LPPACASRR1(r13);
+ b .slb_miss_realmode
STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
.globl instruction_access_slb_iSeries
instruction_access_slb_iSeries:
mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
std r3,PACA_EXSLB+EX_R3(r13)
- ld r12,PACALPPACA+LPPACASRR1(r13)
- ld r3,PACALPPACA+LPPACASRR0(r13)
- b .do_slb_miss
+ ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
+ std r9,PACA_EXSLB+EX_R9(r13)
+ mfcr r9
+#ifdef __DISABLED__
+ cmpdi r3,0
+ bge .slb_miss_user_iseries
+#endif
+ std r10,PACA_EXSLB+EX_R10(r13)
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
+ ld r12,PACALPPACA+LPPACASRR1(r13);
+ b .slb_miss_realmode
+
+#ifdef __DISABLED__
+slb_miss_user_iseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r10,SPRG1
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ EXCEPTION_PROLOG_ISERIES_2
+ b slb_miss_user_common
+#endif
MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
@@ -809,6 +887,126 @@ instruction_access_common:
li r5,0x400
b .do_hash_page /* Try to handle as hpte fault */
+/*
+ * Here is the common SLB miss user that is used when going to virtual
+ * mode for SLB misses, that is currently not used
+ */
+#ifdef __DISABLED__
+ .align 7
+ .globl slb_miss_user_common
+slb_miss_user_common:
+ mflr r10
+ std r3,PACA_EXGEN+EX_DAR(r13)
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ std r10,PACA_EXGEN+EX_LR(r13)
+ std r11,PACA_EXGEN+EX_SRR0(r13)
+ bl .slb_allocate_user
+
+ ld r10,PACA_EXGEN+EX_LR(r13)
+ ld r3,PACA_EXGEN+EX_R3(r13)
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ ld r11,PACA_EXGEN+EX_SRR0(r13)
+ mtlr r10
+ beq- slb_miss_fault
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_user_slb
+ mfmsr r10
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+.machine pop
+
+ clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
+ mtmsrd r10,1
+
+ mtspr SRR0,r11
+ mtspr SRR1,r12
+
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ rfid
+ b .
+
+slb_miss_fault:
+ EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
+ ld r4,PACA_EXGEN+EX_DAR(r13)
+ li r5,0
+ std r4,_DAR(r1)
+ std r5,_DSISR(r1)
+ b .handle_page_fault
+
+unrecov_user_slb:
+ EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
+#endif /* __DISABLED__ */
+
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
+ */
+_GLOBAL(slb_miss_realmode)
+ mflr r10
+
+ stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
+ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+
+ bl .slb_allocate_realmode
+
+ /* All done -- return from exception. */
+
+ ld r10,PACA_EXSLB+EX_LR(r13)
+ ld r3,PACA_EXSLB+EX_R3(r13)
+ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+#ifdef CONFIG_PPC_ISERIES
+ ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
+#endif /* CONFIG_PPC_ISERIES */
+
+ mtlr r10
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_slb
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+#ifdef CONFIG_PPC_ISERIES
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+#endif /* CONFIG_PPC_ISERIES */
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ rfid
+ b . /* prevent speculative execution */
+
+unrecov_slb:
+ EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
.align 7
.globl hardware_interrupt_common
.globl hardware_interrupt_entry
@@ -1139,62 +1337,6 @@ _GLOBAL(do_stab_bolted)
b . /* prevent speculative execution */
/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
- */
-_GLOBAL(do_slb_miss)
- mflr r10
-
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
-
- bl .slb_allocate /* handle it */
-
- /* All done -- return from exception. */
-
- ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
-#ifdef CONFIG_PPC_ISERIES
- ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
-#endif /* CONFIG_PPC_ISERIES */
-
- mtlr r10
-
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- unrecov_slb
-
-.machine push
-.machine "power4"
- mtcrf 0x80,r9
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
-
-#ifdef CONFIG_PPC_ISERIES
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
-#endif /* CONFIG_PPC_ISERIES */
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
- b . /* prevent speculative execution */
-
-unrecov_slb:
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- DISABLE_INTS
- bl .save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
- b 1b
-
-/*
* Space for CPU0's segment table.
*
* On iSeries, the hypervisor must fill in at least one entry before
@@ -1554,22 +1696,14 @@ _GLOBAL(pmac_secondary_start)
* SPRG3 = paca virtual address
*/
_GLOBAL(__secondary_start)
+ /* Set thread priority to MEDIUM */
+ HMT_MEDIUM
- HMT_MEDIUM /* Set thread priority to MEDIUM */
-
+ /* Load TOC */
ld r2,PACATOC(r13)
- li r6,0
- stb r6,PACAPROCENABLED(r13)
-
-#ifndef CONFIG_PPC_ISERIES
- /* Initialize the page table pointer register. */
- LOADADDR(r6,_SDR1)
- ld r6,0(r6) /* get the value of _SDR1 */
- mtspr SPRN_SDR1,r6 /* set the htab location */
-#endif
- /* Initialize the first segment table (or SLB) entry */
- ld r3,PACASTABVIRT(r13) /* get addr of segment table */
- bl .stab_initialize
+
+ /* Do early setup for that CPU (stab, slb, hash table pointer) */
+ bl .early_setup_secondary
/* Initialize the kernel stack. Just a repeat for iSeries. */
LOADADDR(r3,current_set)
@@ -1578,37 +1712,7 @@ _GLOBAL(__secondary_start)
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
std r1,PACAKSAVE(r13)
- ld r3,PACASTABREAL(r13) /* get raddr of segment table */
- ori r4,r3,1 /* turn on valid bit */
-
-#ifdef CONFIG_PPC_ISERIES
- li r0,-1 /* hypervisor call */
- li r3,1
- sldi r3,r3,63 /* 0x8000000000000000 */
- ori r3,r3,4 /* 0x8000000000000004 */
- sc /* HvCall_setASR */
-#else
- /* set the ASR */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
- beq 98f /* branch if result is 0 */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmpwi r3,0x37 /* SStar */
- beq 97f
- cmpwi r3,0x36 /* IStar */
- beq 97f
- cmpwi r3,0x34 /* Pulsar */
- bne 98f
-97: li r3,H_SET_ASR /* hcall = H_SET_ASR */
- HVSC /* Invoking hcall */
- b 99f
-98: /* !(rpa hypervisor) || !(star) */
- mtasr r4 /* set the stab location */
-99:
-#endif
+ /* Clear backchain so we get nice backtraces */
li r7,0
mtlr r7
@@ -1631,6 +1735,7 @@ _GLOBAL(start_secondary_prolog)
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
bl .start_secondary
+ b .
#endif
/*
@@ -1750,40 +1855,6 @@ _STATIC(start_here_multiplatform)
mr r3,r31
bl .early_setup
- /* set the ASR */
- ld r3,PACASTABREAL(r13)
- ori r4,r3,1 /* turn on valid bit */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
- beq 98f /* branch if result is 0 */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmpwi r3,0x37 /* SStar */
- beq 97f
- cmpwi r3,0x36 /* IStar */
- beq 97f
- cmpwi r3,0x34 /* Pulsar */
- bne 98f
-97: li r3,H_SET_ASR /* hcall = H_SET_ASR */
- HVSC /* Invoking hcall */
- b 99f
-98: /* !(rpa hypervisor) || !(star) */
- mtasr r4 /* set the stab location */
-99:
- /* Set SDR1 (hash table pointer) */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- /* Test if bit 0 is set (LPAR bit) */
- andi. r3,r3,PLATFORM_LPAR
- bne 98f /* branch if result is !0 */
- LOADADDR(r6,_SDR1) /* Only if NOT LPAR */
- add r6,r6,r26
- ld r6,0(r6) /* get the value of _SDR1 */
- mtspr SPRN_SDR1,r6 /* set the htab location */
-98:
LOADADDR(r3,.start_here_common)
SET_REG_TO_CONST(r4, MSR_KERNEL)
mtspr SPRN_SRR0,r3
diff --git a/arch/powerpc/kernel/ioctl32.c b/arch/powerpc/kernel/ioctl32.c
new file mode 100644
index 00000000000..3fa6a93adbd
--- /dev/null
+++ b/arch/powerpc/kernel/ioctl32.c
@@ -0,0 +1,49 @@
+/*
+ * ioctl32.c: Conversion between 32bit and 64bit native ioctls.
+ *
+ * Based on sparc64 ioctl32.c by:
+ *
+ * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
+ * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
+ *
+ * ppc64 changes:
+ *
+ * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com)
+ * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com)
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * ioctls.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define INCLUDES
+#include "compat_ioctl.c"
+#include <linux/syscalls.h>
+
+#define CODE
+#include "compat_ioctl.c"
+
+#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL },
+#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
+
+#define IOCTL_TABLE_START \
+ struct ioctl_trans ioctl_start[] = {
+#define IOCTL_TABLE_END \
+ };
+
+IOCTL_TABLE_START
+#include <linux/compat_ioctl.h>
+#define DECLARES
+#include "compat_ioctl.c"
+
+/* Little p (/dev/rtc, /dev/envctrl, etc.) */
+COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */
+COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
+
+IOCTL_TABLE_END
+
+int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
new file mode 100644
index 00000000000..4b7940693f3
--- /dev/null
+++ b/arch/powerpc/kernel/irq.c
@@ -0,0 +1,478 @@
+/*
+ * arch/ppc/kernel/irq.c
+ *
+ * Derived from arch/i386/kernel/irq.c
+ * Copyright (C) 1992 Linus Torvalds
+ * Adapted from arch/i386 by Gary Thomas
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ * Copyright (C) 1996-2001 Cort Dougan
+ * Adapted for Power Macintosh by Paul Mackerras
+ * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * The MPC8xx has an interrupt mask in the SIU. If a bit is set, the
+ * interrupt is _enabled_. As expected, IRQ0 is bit 0 in the 32-bit
+ * mask register (of which only 16 are defined), hence the weird shifting
+ * and complement of the cached_irq_mask. I want to be able to stuff
+ * this right into the SIU SMASK register.
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx
+ * to reduce code space and undefined function references.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/threads.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#ifdef CONFIG_PPC64
+#include <linux/kallsyms.h>
+#endif
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/cache.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#ifdef CONFIG_PPC64
+#include <asm/iseries/it_lp_queue.h>
+#include <asm/paca.h>
+#endif
+
+static int ppc_spurious_interrupts;
+
+#if defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP)
+extern void iSeries_smp_message_recv(struct pt_regs *);
+#endif
+
+#ifdef CONFIG_PPC32
+#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
+
+unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
+atomic_t ppc_n_lost_interrupts;
+
+#ifdef CONFIG_TAU_INT
+extern int tau_initialized;
+extern int tau_interrupts(int);
+#endif
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
+extern atomic_t ipi_recv;
+extern atomic_t ipi_sent;
+#endif
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(irq_desc);
+
+int distribute_irqs = 1;
+int __irq_offset_value;
+u64 ppc64_interrupt_controller;
+#endif /* CONFIG_PPC64 */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *)v, j;
+ struct irqaction *action;
+ irq_desc_t *desc;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_puts(p, " ");
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%d ", j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ desc = get_irq_desc(i);
+ spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
+ if (!action || !action->handler)
+ goto skip;
+ seq_printf(p, "%3d: ", i);
+#ifdef CONFIG_SMP
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#else
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#endif /* CONFIG_SMP */
+ if (desc->handler)
+ seq_printf(p, " %s ", desc->handler->typename);
+ else
+ seq_puts(p, " None ");
+ seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge ");
+ seq_printf(p, " %s", action->name);
+ for (action = action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&desc->lock, flags);
+ } else if (i == NR_IRQS) {
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_TAU_INT
+ if (tau_initialized){
+ seq_puts(p, "TAU: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", tau_interrupts(j));
+ seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n");
+ }
+#endif
+#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
+ /* should this be per processor send/receive? */
+ seq_printf(p, "IPI (recv/sent): %10u/%u\n",
+ atomic_read(&ipi_recv), atomic_read(&ipi_sent));
+#endif
+#endif /* CONFIG_PPC32 */
+ seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts);
+ }
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void fixup_irqs(cpumask_t map)
+{
+ unsigned int irq;
+ static int warned;
+
+ for_each_irq(irq) {
+ cpumask_t mask;
+
+ if (irq_desc[irq].status & IRQ_PER_CPU)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ printk("Breaking affinity for irq %i\n", irq);
+ mask = map;
+ }
+ if (irq_desc[irq].handler->set_affinity)
+ irq_desc[irq].handler->set_affinity(irq, mask);
+ else if (irq_desc[irq].action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+}
+#endif
+
+#ifdef CONFIG_PPC_ISERIES
+void do_IRQ(struct pt_regs *regs)
+{
+ struct paca_struct *lpaca;
+
+ irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 2KB free? */
+ {
+ long sp;
+
+ sp = __get_SP() & (THREAD_SIZE-1);
+
+ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+ lpaca = get_paca();
+#ifdef CONFIG_SMP
+ if (lpaca->lppaca.int_dword.fields.ipi_cnt) {
+ lpaca->lppaca.int_dword.fields.ipi_cnt = 0;
+ iSeries_smp_message_recv(regs);
+ }
+#endif /* CONFIG_SMP */
+ if (hvlpevent_is_pending())
+ process_hvlpevents(regs);
+
+ irq_exit();
+
+ if (lpaca->lppaca.int_dword.fields.decr_int) {
+ lpaca->lppaca.int_dword.fields.decr_int = 0;
+ /* Signal a fake decrementer interrupt */
+ timer_interrupt(regs);
+ }
+}
+
+#else /* CONFIG_PPC_ISERIES */
+
+void do_IRQ(struct pt_regs *regs)
+{
+ int irq;
+#ifdef CONFIG_IRQSTACKS
+ struct thread_info *curtp, *irqtp;
+#endif
+
+ irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 2KB free? */
+ {
+ long sp;
+
+ sp = __get_SP() & (THREAD_SIZE-1);
+
+ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+ /*
+ * Every platform is required to implement ppc_md.get_irq.
+ * This function will either return an irq number or -1 to
+ * indicate there are no more pending.
+ * The value -2 is for buggy hardware and means that this IRQ
+ * has already been handled. -- Tom
+ */
+ irq = ppc_md.get_irq(regs);
+
+ if (irq >= 0) {
+#ifdef CONFIG_IRQSTACKS
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[smp_processor_id()];
+ if (curtp != irqtp) {
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+ call___do_IRQ(irq, regs, irqtp);
+ irqtp->task = NULL;
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+ } else
+#endif
+ __do_IRQ(irq, regs);
+ } else
+#ifdef CONFIG_PPC32
+ if (irq != -2)
+#endif
+ /* That's not SMP safe ... but who cares ? */
+ ppc_spurious_interrupts++;
+ irq_exit();
+}
+
+#endif /* CONFIG_PPC_ISERIES */
+
+void __init init_IRQ(void)
+{
+#ifdef CONFIG_PPC64
+ static int once = 0;
+
+ if (once)
+ return;
+
+ once++;
+
+#endif
+ ppc_md.init_IRQ();
+#ifdef CONFIG_PPC64
+ irq_ctx_init();
+#endif
+}
+
+#ifdef CONFIG_PPC64
+#ifndef CONFIG_PPC_ISERIES
+/*
+ * Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
+ */
+
+#define UNDEFINED_IRQ 0xffffffff
+unsigned int virt_irq_to_real_map[NR_IRQS];
+
+/*
+ * Don't use virtual irqs 0, 1, 2 for devices.
+ * The pcnet32 driver considers interrupt numbers < 2 to be invalid,
+ * and 2 is the XICS IPI interrupt.
+ * We limit virtual irqs to 17 less than NR_IRQS so that when we
+ * offset them by 16 (to reserve the first 16 for ISA interrupts)
+ * we don't end up with an interrupt number >= NR_IRQS.
+ */
+#define MIN_VIRT_IRQ 3
+#define MAX_VIRT_IRQ (NR_IRQS - NUM_ISA_INTERRUPTS - 1)
+#define NR_VIRT_IRQS (MAX_VIRT_IRQ - MIN_VIRT_IRQ + 1)
+
+void
+virt_irq_init(void)
+{
+ int i;
+ for (i = 0; i < NR_IRQS; i++)
+ virt_irq_to_real_map[i] = UNDEFINED_IRQ;
+}
+
+/* Create a mapping for a real_irq if it doesn't already exist.
+ * Return the virtual irq as a convenience.
+ */
+int virt_irq_create_mapping(unsigned int real_irq)
+{
+ unsigned int virq, first_virq;
+ static int warned;
+
+ if (ppc64_interrupt_controller == IC_OPEN_PIC)
+ return real_irq; /* no mapping for openpic (for now) */
+
+ if (ppc64_interrupt_controller == IC_CELL_PIC)
+ return real_irq; /* no mapping for iic either */
+
+ /* don't map interrupts < MIN_VIRT_IRQ */
+ if (real_irq < MIN_VIRT_IRQ) {
+ virt_irq_to_real_map[real_irq] = real_irq;
+ return real_irq;
+ }
+
+ /* map to a number between MIN_VIRT_IRQ and MAX_VIRT_IRQ */
+ virq = real_irq;
+ if (virq > MAX_VIRT_IRQ)
+ virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
+
+ /* search for this number or a free slot */
+ first_virq = virq;
+ while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) {
+ if (virt_irq_to_real_map[virq] == real_irq)
+ return virq;
+ if (++virq > MAX_VIRT_IRQ)
+ virq = MIN_VIRT_IRQ;
+ if (virq == first_virq)
+ goto nospace; /* oops, no free slots */
+ }
+
+ virt_irq_to_real_map[virq] = real_irq;
+ return virq;
+
+ nospace:
+ if (!warned) {
+ printk(KERN_CRIT "Interrupt table is full\n");
+ printk(KERN_CRIT "Increase NR_IRQS (currently %d) "
+ "in your kernel sources and rebuild.\n", NR_IRQS);
+ warned = 1;
+ }
+ return NO_IRQ;
+}
+
+/*
+ * In most cases will get a hit on the very first slot checked in the
+ * virt_irq_to_real_map. Only when there are a large number of
+ * IRQs will this be expensive.
+ */
+unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
+{
+ unsigned int virq;
+ unsigned int first_virq;
+
+ virq = real_irq;
+
+ if (virq > MAX_VIRT_IRQ)
+ virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
+
+ first_virq = virq;
+
+ do {
+ if (virt_irq_to_real_map[virq] == real_irq)
+ return virq;
+
+ virq++;
+
+ if (virq >= MAX_VIRT_IRQ)
+ virq = 0;
+
+ } while (first_virq != virq);
+
+ return NO_IRQ;
+
+}
+
+#endif /* CONFIG_PPC_ISERIES */
+
+#ifdef CONFIG_IRQSTACKS
+struct thread_info *softirq_ctx[NR_CPUS];
+struct thread_info *hardirq_ctx[NR_CPUS];
+
+void irq_ctx_init(void)
+{
+ struct thread_info *tp;
+ int i;
+
+ for_each_cpu(i) {
+ memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
+ tp = softirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = SOFTIRQ_OFFSET;
+
+ memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
+ tp = hardirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = HARDIRQ_OFFSET;
+ }
+}
+
+void do_softirq(void)
+{
+ unsigned long flags;
+ struct thread_info *curtp, *irqtp;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending()) {
+ curtp = current_thread_info();
+ irqtp = softirq_ctx[smp_processor_id()];
+ irqtp->task = curtp->task;
+ call_do_softirq(irqtp);
+ irqtp->task = NULL;
+ }
+
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(do_softirq);
+
+#endif /* CONFIG_IRQSTACKS */
+
+static int __init setup_noirqdistrib(char *str)
+{
+ distribute_irqs = 0;
+ return 1;
+}
+
+__setup("noirqdistrib", setup_noirqdistrib);
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
new file mode 100644
index 00000000000..5e954fae031
--- /dev/null
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -0,0 +1,612 @@
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ * Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <asm/iseries/hv_lp_config.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/iseries/it_exp_vpd_panel.h>
+#include <asm/prom.h>
+#include <asm/systemcfg.h>
+
+#define MODULE_VERS "1.6"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+/* find a better place for this function... */
+void log_plpar_hcall_return(unsigned long rc, char *tag)
+{
+ if (rc == 0) /* success, return */
+ return;
+/* check for null tag ? */
+ if (rc == H_Hardware)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with hardware fault\n", tag);
+ else if (rc == H_Function)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; function not allowed\n", tag);
+ else if (rc == H_Authority)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; not authorized to this function\n",
+ tag);
+ else if (rc == H_Parameter)
+ printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
+ tag);
+ else
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
+ tag, rc);
+
+}
+
+static struct proc_dir_entry *proc_ppc64_lparcfg;
+#define LPARCFG_BUFF_SIZE 4096
+
+#ifdef CONFIG_PPC_ISERIES
+
+/*
+ * For iSeries legacy systems, the PPA purr function is available from the
+ * emulated_time_base field in the paca.
+ */
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+ struct paca_struct *lpaca;
+
+ for_each_cpu(cpu) {
+ lpaca = paca + cpu;
+ sum_purr += lpaca->lppaca.emulated_time_base;
+
+#ifdef PURR_DEBUG
+ printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n",
+ cpu, lpaca->lppaca.emulated_time_base);
+#endif
+ }
+ return sum_purr;
+}
+
+#define lparcfg_write NULL
+
+/*
+ * Methods used to fetch LPAR data when running on an iSeries platform.
+ */
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ unsigned long pool_id, lp_index;
+ int shared, entitled_capacity, max_entitled_capacity;
+ int processors, max_processors;
+ struct paca_struct *lpaca = get_paca();
+ unsigned long purr = get_purr();
+
+ seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+ shared = (int)(lpaca->lppaca_ptr->shared_proc);
+ seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n",
+ e2a(xItExtVpdPanel.mfgID[2]),
+ e2a(xItExtVpdPanel.mfgID[3]),
+ e2a(xItExtVpdPanel.systemSerial[1]),
+ e2a(xItExtVpdPanel.systemSerial[2]),
+ e2a(xItExtVpdPanel.systemSerial[3]),
+ e2a(xItExtVpdPanel.systemSerial[4]),
+ e2a(xItExtVpdPanel.systemSerial[5]));
+
+ seq_printf(m, "system_type=%c%c%c%c\n",
+ e2a(xItExtVpdPanel.machineType[0]),
+ e2a(xItExtVpdPanel.machineType[1]),
+ e2a(xItExtVpdPanel.machineType[2]),
+ e2a(xItExtVpdPanel.machineType[3]));
+
+ lp_index = HvLpConfig_getLpIndex();
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ seq_printf(m, "system_active_processors=%d\n",
+ (int)HvLpConfig_getSystemPhysicalProcessors());
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ (int)HvLpConfig_getSystemPhysicalProcessors());
+
+ processors = (int)HvLpConfig_getPhysicalProcessors();
+ seq_printf(m, "partition_active_processors=%d\n", processors);
+
+ max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
+ seq_printf(m, "partition_potential_processors=%d\n", max_processors);
+
+ if (shared) {
+ entitled_capacity = HvLpConfig_getSharedProcUnits();
+ max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
+ } else {
+ entitled_capacity = processors * 100;
+ max_entitled_capacity = max_processors * 100;
+ }
+ seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ max_entitled_capacity);
+
+ if (shared) {
+ pool_id = HvLpConfig_getSharedPoolIndex();
+ seq_printf(m, "pool=%d\n", (int)pool_id);
+ seq_printf(m, "pool_capacity=%d\n",
+ (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
+ 100));
+ seq_printf(m, "purr=%ld\n", purr);
+ }
+
+ seq_printf(m, "shared_processor_mode=%d\n", shared);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_ISERIES */
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ * entitled_capacity,unallocated_capacity,
+ * aggregation, resource_capability).
+ *
+ * R4 = Entitled Processor Capacity Percentage.
+ * R5 = Unallocated Processor Capacity Percentage.
+ * R6 (AABBCCDDEEFFGGHH).
+ * XXXX - reserved (0)
+ * XXXX - reserved (0)
+ * XXXX - Group Number
+ * XXXX - Pool Number.
+ * R7 (IIJJKKLLMMNNOOPP).
+ * XX - reserved. (0)
+ * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
+ * XX - variable processor Capacity Weight
+ * XX - Unallocated Variable Processor Capacity Weight.
+ * XXXX - Active processors in Physical Processor Pool.
+ * XXXX - Processors active on platform.
+ */
+static unsigned int h_get_ppp(unsigned long *entitled,
+ unsigned long *unallocated,
+ unsigned long *aggregation,
+ unsigned long *resource)
+{
+ unsigned long rc;
+ rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
+ aggregation, resource);
+
+ log_plpar_hcall_return(rc, "H_GET_PPP");
+
+ return rc;
+}
+
+static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
+{
+ unsigned long rc;
+ unsigned long dummy;
+ rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
+
+ log_plpar_hcall_return(rc, "H_PIC");
+}
+
+static unsigned long get_purr(void);
+
+/* Track sum of all purrs across all processors. This is used to further */
+/* calculate usage values by different applications */
+
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+ struct cpu_usage *cu;
+
+ for_each_cpu(cpu) {
+ cu = &per_cpu(cpu_usage_array, cpu);
+ sum_purr += cu->current_tb;
+ }
+ return sum_purr;
+}
+
+#define SPLPAR_CHARACTERISTICS_TOKEN 20
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call. Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+ int call_status;
+
+ char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!local_buffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+ __FILE__, __FUNCTION__, __LINE__);
+ return;
+ }
+
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ SPLPAR_CHARACTERISTICS_TOKEN,
+ __pa(rtas_data_buf));
+ memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (call_status != 0) {
+ printk(KERN_INFO
+ "%s %s Error calling get-system-parameter (0x%x)\n",
+ __FILE__, __FUNCTION__, call_status);
+ } else {
+ int splpar_strlen;
+ int idx, w_idx;
+ char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!workbuffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+ __FILE__, __FUNCTION__, __LINE__);
+ kfree(local_buffer);
+ return;
+ }
+#ifdef LPARCFG_DEBUG
+ printk(KERN_INFO "success calling get-system-parameter \n");
+#endif
+ splpar_strlen = local_buffer[0] * 16 + local_buffer[1];
+ local_buffer += 2; /* step over strlen value */
+
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ w_idx = 0;
+ idx = 0;
+ while ((*local_buffer) && (idx < splpar_strlen)) {
+ workbuffer[w_idx++] = local_buffer[idx++];
+ if ((local_buffer[idx] == ',')
+ || (local_buffer[idx] == '\0')) {
+ workbuffer[w_idx] = '\0';
+ if (w_idx) {
+ /* avoid the empty string */
+ seq_printf(m, "%s\n", workbuffer);
+ }
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ idx++; /* skip the comma */
+ w_idx = 0;
+ } else if (local_buffer[idx] == '=') {
+ /* code here to replace workbuffer contents
+ with different keyword strings */
+ if (0 == strcmp(workbuffer, "MaxEntCap")) {
+ strcpy(workbuffer,
+ "partition_max_entitled_capacity");
+ w_idx = strlen(workbuffer);
+ }
+ if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+ strcpy(workbuffer,
+ "system_potential_processors");
+ w_idx = strlen(workbuffer);
+ }
+ }
+ }
+ kfree(workbuffer);
+ local_buffer -= 2; /* back up over strlen value */
+ }
+ kfree(local_buffer);
+}
+
+static int lparcfg_count_active_processors(void);
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+ struct device_node *cpus_dn = NULL;
+ int count = 0;
+
+ while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+#ifdef LPARCFG_DEBUG
+ printk(KERN_ERR "cpus_dn %p \n", cpus_dn);
+#endif
+ count++;
+ }
+ return count;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ int partition_potential_processors;
+ int partition_active_processors;
+ struct device_node *rootdn;
+ const char *model = "";
+ const char *system_id = "";
+ unsigned int *lp_index_ptr, lp_index = 0;
+ struct device_node *rtas_node;
+ int *lrdrp;
+
+ rootdn = find_path_device("/");
+ if (rootdn) {
+ model = get_property(rootdn, "model", NULL);
+ system_id = get_property(rootdn, "system-id", NULL);
+ lp_index_ptr = (unsigned int *)
+ get_property(rootdn, "ibm,partition-no", NULL);
+ if (lp_index_ptr)
+ lp_index = *lp_index_ptr;
+ }
+
+ seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+ seq_printf(m, "serial_number=%s\n", system_id);
+
+ seq_printf(m, "system_type=%s\n", model);
+
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ rtas_node = find_path_device("/rtas");
+ lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+ if (lrdrp == NULL) {
+ partition_potential_processors = _systemcfg->processorCount;
+ } else {
+ partition_potential_processors = *(lrdrp + 4);
+ }
+
+ partition_active_processors = lparcfg_count_active_processors();
+
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ unsigned long h_entitled, h_unallocated;
+ unsigned long h_aggregation, h_resource;
+ unsigned long pool_idle_time, pool_procs;
+ unsigned long purr;
+
+ h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
+ &h_resource);
+
+ seq_printf(m, "R4=0x%lx\n", h_entitled);
+ seq_printf(m, "R5=0x%lx\n", h_unallocated);
+ seq_printf(m, "R6=0x%lx\n", h_aggregation);
+ seq_printf(m, "R7=0x%lx\n", h_resource);
+
+ purr = get_purr();
+
+ /* this call handles the ibm,get-system-parameter contents */
+ parse_system_parameter_string(m);
+
+ seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
+
+ seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
+
+ seq_printf(m, "system_active_processors=%ld\n",
+ (h_resource >> 0 * 8) & 0xffff);
+
+ /* pool related entries are apropriate for shared configs */
+ if (paca[0].lppaca.shared_proc) {
+
+ h_pic(&pool_idle_time, &pool_procs);
+
+ seq_printf(m, "pool=%ld\n",
+ (h_aggregation >> 0 * 8) & 0xffff);
+
+ /* report pool_capacity in percentage */
+ seq_printf(m, "pool_capacity=%ld\n",
+ ((h_resource >> 2 * 8) & 0xffff) * 100);
+
+ seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+
+ seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ }
+
+ seq_printf(m, "unallocated_capacity_weight=%ld\n",
+ (h_resource >> 4 * 8) & 0xFF);
+
+ seq_printf(m, "capacity_weight=%ld\n",
+ (h_resource >> 5 * 8) & 0xFF);
+
+ seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
+
+ seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
+
+ seq_printf(m, "purr=%ld\n", purr);
+
+ } else { /* non SPLPAR case */
+
+ seq_printf(m, "system_active_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ partition_potential_processors * 100);
+
+ seq_printf(m, "partition_entitled_capacity=%d\n",
+ partition_active_processors * 100);
+ }
+
+ seq_printf(m, "partition_active_processors=%d\n",
+ partition_active_processors);
+
+ seq_printf(m, "partition_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc);
+
+ return 0;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity). Format of input is "param_name=value";
+ * anything after value is ignored. Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight". We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+ size_t count, loff_t * off)
+{
+ char *kbuf;
+ char *tmp;
+ u64 new_entitled, *new_entitled_ptr = &new_entitled;
+ u8 new_weight, *new_weight_ptr = &new_weight;
+
+ unsigned long current_entitled; /* parameters for h_get_ppp */
+ unsigned long dummy;
+ unsigned long resource;
+ u8 current_weight;
+
+ ssize_t retval = -ENOMEM;
+
+ kbuf = kmalloc(count, GFP_KERNEL);
+ if (!kbuf)
+ goto out;
+
+ retval = -EFAULT;
+ if (copy_from_user(kbuf, buf, count))
+ goto out;
+
+ retval = -EINVAL;
+ kbuf[count - 1] = '\0';
+ tmp = strchr(kbuf, '=');
+ if (!tmp)
+ goto out;
+
+ *tmp++ = '\0';
+
+ if (!strcmp(kbuf, "partition_entitled_capacity")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ goto out;
+ new_weight_ptr = &current_weight;
+ } else if (!strcmp(kbuf, "capacity_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ goto out;
+ new_entitled_ptr = &current_entitled;
+ } else
+ goto out;
+
+ /* Get our current parameters */
+ retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
+ if (retval) {
+ retval = -EIO;
+ goto out;
+ }
+
+ current_weight = (resource >> 5 * 8) & 0xFF;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %lu\n",
+ __FUNCTION__, current_entitled, current_weight);
+
+ pr_debug("%s: new_entitled = %lu, new_weight = %lu\n",
+ __FUNCTION__, *new_entitled_ptr, *new_weight_ptr);
+
+ retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
+ *new_weight_ptr);
+
+ if (retval == H_Success || retval == H_Constrained) {
+ retval = count;
+ } else if (retval == H_Busy) {
+ retval = -EBUSY;
+ } else if (retval == H_Hardware) {
+ retval = -EIO;
+ } else if (retval == H_Parameter) {
+ retval = -EINVAL;
+ } else {
+ printk(KERN_WARNING "%s: received unknown hv return code %ld",
+ __FUNCTION__, retval);
+ retval = -EIO;
+ }
+
+ out:
+ kfree(kbuf);
+ return retval;
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lparcfg_data, NULL);
+}
+
+struct file_operations lparcfg_fops = {
+ .owner = THIS_MODULE,
+ .read = seq_read,
+ .open = lparcfg_open,
+ .release = single_release,
+};
+
+int __init lparcfg_init(void)
+{
+ struct proc_dir_entry *ent;
+ mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+ /* Allow writing if we have FW_FEATURE_SPLPAR */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ lparcfg_fops.write = lparcfg_write;
+ mode |= S_IWUSR;
+ }
+
+ ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
+ if (ent) {
+ ent->proc_fops = &lparcfg_fops;
+ ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL);
+ if (!ent->data) {
+ printk(KERN_ERR
+ "Failed to allocate buffer for lparcfg\n");
+ remove_proc_entry("lparcfg", ent->parent);
+ return -ENOMEM;
+ }
+ } else {
+ printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
+ return -EIO;
+ }
+
+ proc_ppc64_lparcfg = ent;
+ return 0;
+}
+
+void __exit lparcfg_cleanup(void)
+{
+ if (proc_ppc64_lparcfg) {
+ kfree(proc_ppc64_lparcfg->data);
+ remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
+ }
+}
+
+module_init(lparcfg_init);
+module_exit(lparcfg_cleanup);
+MODULE_DESCRIPTION("Interface for LPAR configuration data");
+MODULE_AUTHOR("Dave Engebretsen");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c
index eded971d1bf..5a05a797485 100644
--- a/arch/powerpc/kernel/lparmap.c
+++ b/arch/powerpc/kernel/lparmap.c
@@ -25,7 +25,7 @@ const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
.xRanges = {
{ .xPages = HvPagesToMap,
.xOffset = 0,
- .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT),
+ .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - HW_PAGE_SHIFT),
},
},
};
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 3bedb532aed..f6d84a75ed2 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -519,7 +519,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
*
* flush_icache_range(unsigned long start, unsigned long stop)
*/
-_GLOBAL(flush_icache_range)
+_GLOBAL(__flush_icache_range)
BEGIN_FTR_SECTION
blr /* for 601, do nothing */
END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
@@ -607,27 +607,6 @@ _GLOBAL(invalidate_dcache_range)
sync /* wait for dcbi's to get to ram */
blr
-#ifdef CONFIG_NOT_COHERENT_CACHE
-/*
- * 40x cores have 8K or 16K dcache and 32 byte line size.
- * 44x has a 32K dcache and 32 byte line size.
- * 8xx has 1, 2, 4, 8K variants.
- * For now, cover the worst case of the 44x.
- * Must be called with external interrupts disabled.
- */
-#define CACHE_NWAYS 64
-#define CACHE_NLINES 16
-
-_GLOBAL(flush_dcache_all)
- li r4, (2 * CACHE_NWAYS * CACHE_NLINES)
- mtctr r4
- lis r5, KERNELBASE@h
-1: lwz r3, 0(r5) /* Load one word from every line */
- addi r5, r5, L1_CACHE_BYTES
- bdnz 1b
- blr
-#endif /* CONFIG_NOT_COHERENT_CACHE */
-
/*
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b3e95ff0dba..ae48a002f81 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -89,12 +89,12 @@ _GLOBAL(call_do_softirq)
mtlr r0
blr
-_GLOBAL(call_handle_IRQ_event)
+_GLOBAL(call___do_IRQ)
mflr r0
std r0,16(r1)
- stdu r1,THREAD_SIZE-112(r6)
- mr r1,r6
- bl .handle_IRQ_event
+ stdu r1,THREAD_SIZE-112(r5)
+ mr r1,r5
+ bl .__do_IRQ
ld r1,0(r1)
ld r0,16(r1)
mtlr r0
@@ -604,6 +604,76 @@ _GLOBAL(real_writeb)
#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
/*
+ * SCOM access functions for 970 (FX only for now)
+ *
+ * unsigned long scom970_read(unsigned int address);
+ * void scom970_write(unsigned int address, unsigned long value);
+ *
+ * The address passed in is the 24 bits register address. This code
+ * is 970 specific and will not check the status bits, so you should
+ * know what you are doing.
+ */
+_GLOBAL(scom970_read)
+ /* interrupts off */
+ mfmsr r4
+ ori r0,r4,MSR_EE
+ xori r0,r0,MSR_EE
+ mtmsrd r0,1
+
+ /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ * (including parity). On current CPUs they must be 0'd,
+ * and finally or in RW bit
+ */
+ rlwinm r3,r3,8,0,15
+ ori r3,r3,0x8000
+
+ /* do the actual scom read */
+ sync
+ mtspr SPRN_SCOMC,r3
+ isync
+ mfspr r3,SPRN_SCOMD
+ isync
+ mfspr r0,SPRN_SCOMC
+ isync
+
+ /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah
+ * that's the best we can do). Not implemented yet as we don't use
+ * the scom on any of the bogus CPUs yet, but may have to be done
+ * ultimately
+ */
+
+ /* restore interrupts */
+ mtmsrd r4,1
+ blr
+
+
+_GLOBAL(scom970_write)
+ /* interrupts off */
+ mfmsr r5
+ ori r0,r5,MSR_EE
+ xori r0,r0,MSR_EE
+ mtmsrd r0,1
+
+ /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ * (including parity). On current CPUs they must be 0'd.
+ */
+
+ rlwinm r3,r3,8,0,15
+
+ sync
+ mtspr SPRN_SCOMD,r4 /* write data */
+ isync
+ mtspr SPRN_SCOMC,r3 /* write command */
+ isync
+ mfspr 3,SPRN_SCOMC
+ isync
+
+ /* restore interrupts */
+ mtmsrd r5,1
+ blr
+
+
+/*
* Create a kernel thread
* kernel_thread(fn, arg, flags)
*/
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
new file mode 100644
index 00000000000..3cf2517c5f9
--- /dev/null
+++ b/arch/powerpc/kernel/paca.c
@@ -0,0 +1,142 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/module.h>
+
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+#include <asm/systemcfg.h>
+#include <asm/lppaca.h>
+#include <asm/iseries/it_lp_queue.h>
+#include <asm/paca.h>
+
+static union {
+ struct systemcfg data;
+ u8 page[PAGE_SIZE];
+} systemcfg_store __attribute__((__section__(".data.page.aligned")));
+struct systemcfg *_systemcfg = &systemcfg_store.data;
+
+
+/* This symbol is provided by the linker - let it fill in the paca
+ * field correctly */
+extern unsigned long __toc_start;
+
+/* The Paca is an array with one entry per processor. Each contains an
+ * lppaca, which contains the information shared between the
+ * hypervisor and Linux. Each also contains an ItLpRegSave area which
+ * is used by the hypervisor to save registers.
+ * On systems with hardware multi-threading, there are two threads
+ * per processor. The Paca array must contain an entry for each thread.
+ * The VPD Areas will give a max logical processors = 2 * max physical
+ * processors. The processor VPD array needs one entry per physical
+ * processor (not thread).
+ */
+#define PACA_INIT_COMMON(number, start, asrr, asrv) \
+ .lock_token = 0x8000, \
+ .paca_index = (number), /* Paca Index */ \
+ .default_decr = 0x00ff0000, /* Initial Decr */ \
+ .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \
+ .stab_real = (asrr), /* Real pointer to segment table */ \
+ .stab_addr = (asrv), /* Virt pointer to segment table */ \
+ .cpu_start = (start), /* Processor start */ \
+ .hw_cpu_id = 0xffff, \
+ .lppaca = { \
+ .desc = 0xd397d781, /* "LpPa" */ \
+ .size = sizeof(struct lppaca), \
+ .dyn_proc_status = 2, \
+ .decr_val = 0x00ff0000, \
+ .fpregs_in_use = 1, \
+ .end_of_quantum = 0xfffffffffffffffful, \
+ .slb_count = 64, \
+ .vmxregs_in_use = 0, \
+ }, \
+
+#ifdef CONFIG_PPC_ISERIES
+#define PACA_INIT_ISERIES(number) \
+ .lppaca_ptr = &paca[number].lppaca, \
+ .reg_save_ptr = &paca[number].reg_save, \
+ .reg_save = { \
+ .xDesc = 0xd397d9e2, /* "LpRS" */ \
+ .xSize = sizeof(struct ItLpRegSave) \
+ }
+
+#define PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 0, 0, 0) \
+ PACA_INIT_ISERIES(number) \
+}
+
+#define BOOTCPU_PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \
+ PACA_INIT_ISERIES(number) \
+}
+
+#else
+#define PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 0, 0, 0) \
+}
+
+#define BOOTCPU_PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \
+}
+#endif
+
+struct paca_struct paca[] = {
+ BOOTCPU_PACA_INIT(0),
+#if NR_CPUS > 1
+ PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3),
+#if NR_CPUS > 4
+ PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7),
+#if NR_CPUS > 8
+ PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11),
+ PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15),
+ PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19),
+ PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23),
+ PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27),
+ PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31),
+#if NR_CPUS > 32
+ PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35),
+ PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39),
+ PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43),
+ PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47),
+ PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51),
+ PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55),
+ PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59),
+ PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63),
+#if NR_CPUS > 64
+ PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67),
+ PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71),
+ PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75),
+ PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79),
+ PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83),
+ PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87),
+ PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91),
+ PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95),
+ PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99),
+ PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103),
+ PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107),
+ PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111),
+ PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115),
+ PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119),
+ PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123),
+ PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127),
+#endif
+#endif
+#endif
+#endif
+#endif
+};
+EXPORT_SYMBOL(paca);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 47d6f7e2ea9..5dcf4ba05ee 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -44,6 +44,7 @@
#include <asm/cputable.h>
#include <asm/btext.h>
#include <asm/div64.h>
+#include <asm/signal.h>
#ifdef CONFIG_8xx
#include <asm/commproc.h>
@@ -56,7 +57,6 @@ extern void machine_check_exception(struct pt_regs *regs);
extern void alignment_exception(struct pt_regs *regs);
extern void program_check_exception(struct pt_regs *regs);
extern void single_step_exception(struct pt_regs *regs);
-extern int do_signal(sigset_t *, struct pt_regs *);
extern int pmac_newworld;
extern int sys_sigreturn(struct pt_regs *regs);
@@ -188,9 +188,6 @@ EXPORT_SYMBOL(adb_try_handler_change);
EXPORT_SYMBOL(cuda_request);
EXPORT_SYMBOL(cuda_poll);
#endif /* CONFIG_ADB_CUDA */
-#if defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_PPC32)
-EXPORT_SYMBOL(_machine);
-#endif
#ifdef CONFIG_PPC_PMAC
EXPORT_SYMBOL(sys_ctrler);
#endif
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
new file mode 100644
index 00000000000..a1c19502fe8
--- /dev/null
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+#include <asm/systemcfg.h>
+#include <asm/rtas.h>
+#include <asm/uaccess.h>
+#include <asm/prom.h>
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence);
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos);
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma );
+
+static struct file_operations page_map_fops = {
+ .llseek = page_map_seek,
+ .read = page_map_read,
+ .mmap = page_map_mmap
+};
+
+/*
+ * Create the ppc64 and ppc64/rtas directories early. This allows us to
+ * assume that they have been previously created in drivers.
+ */
+static int __init proc_ppc64_create(void)
+{
+ struct proc_dir_entry *root;
+
+ root = proc_mkdir("ppc64", NULL);
+ if (!root)
+ return 1;
+
+ if (!(platform_is_pseries() || _machine == PLATFORM_CELL))
+ return 0;
+
+ if (!proc_mkdir("rtas", root))
+ return 1;
+
+ if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
+ return 1;
+
+ return 0;
+}
+core_initcall(proc_ppc64_create);
+
+static int __init proc_ppc64_init(void)
+{
+ struct proc_dir_entry *pde;
+
+ pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
+ if (!pde)
+ return 1;
+ pde->nlink = 1;
+ pde->data = _systemcfg;
+ pde->size = PAGE_SIZE;
+ pde->proc_fops = &page_map_fops;
+
+ return 0;
+}
+__initcall(proc_ppc64_init);
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence)
+{
+ loff_t new;
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+ switch(whence) {
+ case 0:
+ new = off;
+ break;
+ case 1:
+ new = file->f_pos + off;
+ break;
+ case 2:
+ new = dp->size + off;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if ( new < 0 || new > dp->size )
+ return -EINVAL;
+ return (file->f_pos = new);
+}
+
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos)
+{
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+ return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
+}
+
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
+{
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+ vma->vm_flags |= VM_SHM | VM_LOCKED;
+
+ if ((vma->vm_end - vma->vm_start) > dp->size)
+ return -EINVAL;
+
+ remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
+ dp->size, vma->vm_page_prot);
+ return 0;
+}
+
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 96843211cc5..de69fb37c73 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -46,10 +46,10 @@
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/prom.h>
+#include <asm/machdep.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#include <asm/time.h>
-#include <asm/machdep.h>
#endif
extern unsigned long _get_SP(void);
@@ -203,10 +203,8 @@ int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs)
int set_dabr(unsigned long dabr)
{
-#ifdef CONFIG_PPC64
if (ppc_md.set_dabr)
return ppc_md.set_dabr(dabr);
-#endif
mtspr(SPRN_DABR, dabr);
return 0;
@@ -554,12 +552,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SLB)) {
unsigned long sp_vsid = get_kernel_vsid(sp);
+ unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
sp_vsid <<= SLB_VSID_SHIFT;
- sp_vsid |= SLB_VSID_KERNEL;
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- sp_vsid |= SLB_VSID_L;
-
+ sp_vsid |= SLB_VSID_KERNEL | llp;
p->thread.ksp_vsid = sp_vsid;
}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index eec2da69550..6a5b468edb4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -48,9 +48,6 @@
#include <asm/machdep.h>
#include <asm/pSeries_reconfig.h>
#include <asm/pci-bridge.h>
-#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
-#endif
#ifdef DEBUG
#define DBG(fmt...) printk(KERN_ERR fmt)
@@ -74,10 +71,6 @@ struct isa_reg_property {
typedef int interpret_func(struct device_node *, unsigned long *,
int, int, int);
-extern struct rtas_t rtas;
-extern struct lmb lmb;
-extern unsigned long klimit;
-
static int __initdata dt_root_addr_cells;
static int __initdata dt_root_size_cells;
@@ -391,7 +384,7 @@ static int __devinit finish_node_interrupts(struct device_node *np,
#ifdef CONFIG_PPC64
/* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
- if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) {
+ if (_machine == PLATFORM_POWERMAC && ic && ic->parent) {
char *name = get_property(ic->parent, "name", NULL);
if (name && !strcmp(name, "u3"))
np->intrs[intrcount].line += 128;
@@ -724,10 +717,10 @@ static inline char *find_flat_dt_string(u32 offset)
* used to extract the memory informations at boot before we can
* unflatten the tree
*/
-static int __init scan_flat_dt(int (*it)(unsigned long node,
- const char *uname, int depth,
- void *data),
- void *data)
+int __init of_scan_flat_dt(int (*it)(unsigned long node,
+ const char *uname, int depth,
+ void *data),
+ void *data)
{
unsigned long p = ((unsigned long)initial_boot_params) +
initial_boot_params->off_dt_struct;
@@ -784,8 +777,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
* This function can be used within scan_flattened_dt callback to get
* access to properties
*/
-static void* __init get_flat_dt_prop(unsigned long node, const char *name,
- unsigned long *size)
+void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
+ unsigned long *size)
{
unsigned long p = node;
@@ -1087,27 +1080,14 @@ void __init unflatten_device_tree(void)
static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = get_flat_dt_prop(node, "device_type", NULL);
u32 *prop;
- unsigned long size = 0;
+ unsigned long size;
+ char *type = of_get_flat_dt_prop(node, "device_type", &size);
/* We are scanning "cpu" nodes only */
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
-#ifdef CONFIG_PPC_PSERIES
- /* On LPAR, look for the first ibm,pft-size property for the hash table size
- */
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
- u32 *pft_size;
- pft_size = get_flat_dt_prop(node, "ibm,pft-size", NULL);
- if (pft_size != NULL) {
- /* pft_size[0] is the NUMA CEC cookie */
- ppc64_pft_size = pft_size[1];
- }
- }
-#endif
-
boot_cpuid = 0;
boot_cpuid_phys = 0;
if (initial_boot_params && initial_boot_params->version >= 2) {
@@ -1117,8 +1097,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
boot_cpuid_phys = initial_boot_params->boot_cpuid_phys;
} else {
/* Check if it's the boot-cpu, set it's hw index now */
- if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) {
- prop = get_flat_dt_prop(node, "reg", NULL);
+ if (of_get_flat_dt_prop(node,
+ "linux,boot-cpu", NULL) != NULL) {
+ prop = of_get_flat_dt_prop(node, "reg", NULL);
if (prop != NULL)
boot_cpuid_phys = *prop;
}
@@ -1127,14 +1108,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
#ifdef CONFIG_ALTIVEC
/* Check if we have a VMX and eventually update CPU features */
- prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", &size);
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL);
if (prop && (*prop) > 0) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
}
/* Same goes for Apple's "altivec" property */
- prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
if (prop) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1147,7 +1128,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* this by looking at the size of the ibm,ppc-interrupt-server#s
* property
*/
- prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
&size);
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
if (prop && ((size / sizeof(u32)) > 1))
@@ -1170,34 +1151,30 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
return 0;
/* get platform type */
- prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
if (prop == NULL)
return 0;
-#ifdef CONFIG_PPC64
- systemcfg->platform = *prop;
-#else
#ifdef CONFIG_PPC_MULTIPLATFORM
_machine = *prop;
#endif
-#endif
#ifdef CONFIG_PPC64
/* check if iommu is forced on or off */
- if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
+ if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
iommu_is_off = 1;
- if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
+ if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
iommu_force_on = 1;
#endif
- lprop = get_flat_dt_prop(node, "linux,memory-limit", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
if (lprop)
memory_limit = *lprop;
#ifdef CONFIG_PPC64
- lprop = get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
if (lprop)
tce_alloc_start = *lprop;
- lprop = get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
if (lprop)
tce_alloc_end = *lprop;
#endif
@@ -1209,9 +1186,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
{
u64 *basep, *entryp;
- basep = get_flat_dt_prop(node, "linux,rtas-base", NULL);
- entryp = get_flat_dt_prop(node, "linux,rtas-entry", NULL);
- prop = get_flat_dt_prop(node, "linux,rtas-size", NULL);
+ basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
+ entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
+ prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
if (basep && entryp && prop) {
rtas.base = *basep;
rtas.entry = *entryp;
@@ -1232,11 +1209,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
if (depth != 0)
return 0;
- prop = get_flat_dt_prop(node, "#size-cells", NULL);
+ prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
dt_root_size_cells = (prop == NULL) ? 1 : *prop;
DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
- prop = get_flat_dt_prop(node, "#address-cells", NULL);
+ prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
@@ -1271,15 +1248,22 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
static int __init early_init_dt_scan_memory(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = get_flat_dt_prop(node, "device_type", NULL);
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
cell_t *reg, *endp;
unsigned long l;
/* We are scanning "memory" nodes only */
- if (type == NULL || strcmp(type, "memory") != 0)
+ if (type == NULL) {
+ /*
+ * The longtrail doesn't have a device_type on the
+ * /memory node, so look for the node called /memory@0.
+ */
+ if (depth != 1 || strcmp(uname, "memory@0") != 0)
+ return 0;
+ } else if (strcmp(type, "memory") != 0)
return 0;
- reg = (cell_t *)get_flat_dt_prop(node, "reg", &l);
+ reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
if (reg == NULL)
return 0;
@@ -1343,17 +1327,14 @@ void __init early_init_devtree(void *params)
* device-tree, including the platform type, initrd location and
* size, TCE reserve, and more ...
*/
- scan_flat_dt(early_init_dt_scan_chosen, NULL);
+ of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
/* Scan memory nodes and rebuild LMBs */
lmb_init();
- scan_flat_dt(early_init_dt_scan_root, NULL);
- scan_flat_dt(early_init_dt_scan_memory, NULL);
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_scan_flat_dt(early_init_dt_scan_memory, NULL);
lmb_enforce_memory_limit(memory_limit);
lmb_analyze();
-#ifdef CONFIG_PPC64
- systemcfg->physicalMemorySize = lmb_phys_mem_size();
-#endif
lmb_reserve(0, __pa(klimit));
DBG("Phys. mem: %lx\n", lmb_phys_mem_size());
@@ -1363,10 +1344,10 @@ void __init early_init_devtree(void *params)
DBG("Scanning CPUs ...\n");
- /* Retreive hash table size from flattened tree plus other
- * CPU related informations (altivec support, boot CPU ID, ...)
+ /* Retreive CPU related informations from the flat tree
+ * (altivec support, boot CPU ID, ...)
*/
- scan_flat_dt(early_init_dt_scan_cpus, NULL);
+ of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
DBG(" <- early_init_devtree()\n");
}
@@ -1920,7 +1901,7 @@ static int of_finish_dynamic_node(struct device_node *node,
/* We don't support that function on PowerMac, at least
* not yet
*/
- if (systemcfg->platform == PLATFORM_POWERMAC)
+ if (_machine == PLATFORM_POWERMAC)
return -ENODEV;
/* fix up new node's linux_phandle field */
@@ -1986,14 +1967,31 @@ EXPORT_SYMBOL(get_property);
/*
* Add a property to a node
*/
-void prom_add_property(struct device_node* np, struct property* prop)
+int prom_add_property(struct device_node* np, struct property* prop)
{
- struct property **next = &np->properties;
+ struct property **next;
prop->next = NULL;
- while (*next)
+ write_lock(&devtree_lock);
+ next = &np->properties;
+ while (*next) {
+ if (strcmp(prop->name, (*next)->name) == 0) {
+ /* duplicate ! don't insert it */
+ write_unlock(&devtree_lock);
+ return -1;
+ }
next = &(*next)->next;
+ }
*next = prop;
+ write_unlock(&devtree_lock);
+
+#ifdef CONFIG_PROC_DEVICETREE
+ /* try to add to proc as well if it was initialized */
+ if (np->pde)
+ proc_device_tree_add_prop(np->pde, prop);
+#endif /* CONFIG_PROC_DEVICETREE */
+
+ return 0;
}
/* I quickly hacked that one, check against spec ! */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index c758b6624d7..4ce0105c308 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -94,11 +94,17 @@ extern const struct linux_logo logo_linux_clut224;
#ifdef CONFIG_PPC64
#define RELOC(x) (*PTRRELOC(&(x)))
#define ADDR(x) (u32) add_reloc_offset((unsigned long)(x))
+#define OF_WORKAROUNDS 0
#else
#define RELOC(x) (x)
#define ADDR(x) (u32) (x)
+#define OF_WORKAROUNDS of_workarounds
+int of_workarounds;
#endif
+#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
+#define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */
+
#define PROM_BUG() do { \
prom_printf("kernel BUG at %s line 0x%x!\n", \
RELOC(__FILE__), __LINE__); \
@@ -111,11 +117,6 @@ extern const struct linux_logo logo_linux_clut224;
#define prom_debug(x...)
#endif
-#ifdef CONFIG_PPC32
-#define PLATFORM_POWERMAC _MACH_Pmac
-#define PLATFORM_CHRP _MACH_chrp
-#endif
-
typedef u32 prom_arg_t;
@@ -128,10 +129,11 @@ struct prom_args {
struct prom_t {
ihandle root;
- ihandle chosen;
+ phandle chosen;
int cpu;
ihandle stdout;
ihandle mmumap;
+ ihandle memory;
};
struct mem_map_entry {
@@ -360,16 +362,36 @@ static void __init prom_printf(const char *format, ...)
static unsigned int __init prom_claim(unsigned long virt, unsigned long size,
unsigned long align)
{
- int ret;
struct prom_t *_prom = &RELOC(prom);
- ret = call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size,
- (prom_arg_t)align);
- if (ret != -1 && _prom->mmumap != 0)
- /* old pmacs need us to map as well */
+ if (align == 0 && (OF_WORKAROUNDS & OF_WA_CLAIM)) {
+ /*
+ * Old OF requires we claim physical and virtual separately
+ * and then map explicitly (assuming virtual mode)
+ */
+ int ret;
+ prom_arg_t result;
+
+ ret = call_prom_ret("call-method", 5, 2, &result,
+ ADDR("claim"), _prom->memory,
+ align, size, virt);
+ if (ret != 0 || result == -1)
+ return -1;
+ ret = call_prom_ret("call-method", 5, 2, &result,
+ ADDR("claim"), _prom->mmumap,
+ align, size, virt);
+ if (ret != 0) {
+ call_prom("call-method", 4, 1, ADDR("release"),
+ _prom->memory, size, virt);
+ return -1;
+ }
+ /* the 0x12 is M (coherence) + PP == read/write */
call_prom("call-method", 6, 1,
- ADDR("map"), _prom->mmumap, 0, size, virt, virt);
- return ret;
+ ADDR("map"), _prom->mmumap, 0x12, size, virt, virt);
+ return virt;
+ }
+ return call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size,
+ (prom_arg_t)align);
}
static void __init __attribute__((noreturn)) prom_panic(const char *reason)
@@ -403,23 +425,64 @@ static int __init prom_next_node(phandle *nodep)
}
}
-static int __init prom_getprop(phandle node, const char *pname,
+static int inline prom_getprop(phandle node, const char *pname,
void *value, size_t valuelen)
{
return call_prom("getprop", 4, 1, node, ADDR(pname),
(u32)(unsigned long) value, (u32) valuelen);
}
-static int __init prom_getproplen(phandle node, const char *pname)
+static int inline prom_getproplen(phandle node, const char *pname)
{
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
-static int __init prom_setprop(phandle node, const char *pname,
- void *value, size_t valuelen)
+static void add_string(char **str, const char *q)
{
- return call_prom("setprop", 4, 1, node, ADDR(pname),
- (u32)(unsigned long) value, (u32) valuelen);
+ char *p = *str;
+
+ while (*q)
+ *p++ = *q++;
+ *p++ = ' ';
+ *str = p;
+}
+
+static char *tohex(unsigned int x)
+{
+ static char digits[] = "0123456789abcdef";
+ static char result[9];
+ int i;
+
+ result[8] = 0;
+ i = 8;
+ do {
+ --i;
+ result[i] = digits[x & 0xf];
+ x >>= 4;
+ } while (x != 0 && i > 0);
+ return &result[i];
+}
+
+static int __init prom_setprop(phandle node, const char *nodename,
+ const char *pname, void *value, size_t valuelen)
+{
+ char cmd[256], *p;
+
+ if (!(OF_WORKAROUNDS & OF_WA_LONGTRAIL))
+ return call_prom("setprop", 4, 1, node, ADDR(pname),
+ (u32)(unsigned long) value, (u32) valuelen);
+
+ /* gah... setprop doesn't work on longtrail, have to use interpret */
+ p = cmd;
+ add_string(&p, "dev");
+ add_string(&p, nodename);
+ add_string(&p, tohex((u32)(unsigned long) value));
+ add_string(&p, tohex(valuelen));
+ add_string(&p, tohex(ADDR(pname)));
+ add_string(&p, tohex(strlen(RELOC(pname))));
+ add_string(&p, "property");
+ *p = 0;
+ return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
}
/* We can't use the standard versions because of RELOC headaches. */
@@ -980,7 +1043,7 @@ static void __init prom_instantiate_rtas(void)
rtas_inst = call_prom("open", 1, 1, ADDR("/rtas"));
if (!IHANDLE_VALID(rtas_inst)) {
- prom_printf("opening rtas package failed");
+ prom_printf("opening rtas package failed (%x)\n", rtas_inst);
return;
}
@@ -988,7 +1051,7 @@ static void __init prom_instantiate_rtas(void)
if (call_prom_ret("call-method", 3, 2, &entry,
ADDR("instantiate-rtas"),
- rtas_inst, base) == PROM_ERROR
+ rtas_inst, base) != 0
|| entry == 0) {
prom_printf(" failed\n");
return;
@@ -997,8 +1060,10 @@ static void __init prom_instantiate_rtas(void)
reserve_mem(base, size);
- prom_setprop(rtas_node, "linux,rtas-base", &base, sizeof(base));
- prom_setprop(rtas_node, "linux,rtas-entry", &entry, sizeof(entry));
+ prom_setprop(rtas_node, "/rtas", "linux,rtas-base",
+ &base, sizeof(base));
+ prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
+ &entry, sizeof(entry));
prom_debug("rtas base = 0x%x\n", base);
prom_debug("rtas entry = 0x%x\n", entry);
@@ -1089,10 +1154,6 @@ static void __init prom_initialize_tce_table(void)
if (base < local_alloc_bottom)
local_alloc_bottom = base;
- /* Save away the TCE table attributes for later use. */
- prom_setprop(node, "linux,tce-base", &base, sizeof(base));
- prom_setprop(node, "linux,tce-size", &minsize, sizeof(minsize));
-
/* It seems OF doesn't null-terminate the path :-( */
memset(path, 0, sizeof(path));
/* Call OF to setup the TCE hardware */
@@ -1101,6 +1162,10 @@ static void __init prom_initialize_tce_table(void)
prom_printf("package-to-path failed\n");
}
+ /* Save away the TCE table attributes for later use. */
+ prom_setprop(node, path, "linux,tce-base", &base, sizeof(base));
+ prom_setprop(node, path, "linux,tce-size", &minsize, sizeof(minsize));
+
prom_debug("TCE table: %s\n", path);
prom_debug("\tnode = 0x%x\n", node);
prom_debug("\tbase = 0x%x\n", base);
@@ -1342,6 +1407,7 @@ static void __init prom_init_client_services(unsigned long pp)
/*
* For really old powermacs, we need to map things we claim.
* For that, we need the ihandle of the mmu.
+ * Also, on the longtrail, we need to work around other bugs.
*/
static void __init prom_find_mmu(void)
{
@@ -1355,12 +1421,19 @@ static void __init prom_find_mmu(void)
if (prom_getprop(oprom, "model", version, sizeof(version)) <= 0)
return;
version[sizeof(version) - 1] = 0;
- prom_printf("OF version is '%s'\n", version);
/* XXX might need to add other versions here */
- if (strcmp(version, "Open Firmware, 1.0.5") != 0)
+ if (strcmp(version, "Open Firmware, 1.0.5") == 0)
+ of_workarounds = OF_WA_CLAIM;
+ else if (strncmp(version, "FirmWorks,3.", 12) == 0) {
+ of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
+ call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
+ } else
return;
+ _prom->memory = call_prom("open", 1, 1, ADDR("/memory"));
prom_getprop(_prom->chosen, "mmu", &_prom->mmumap,
sizeof(_prom->mmumap));
+ if (!IHANDLE_VALID(_prom->memory) || !IHANDLE_VALID(_prom->mmumap))
+ of_workarounds &= ~OF_WA_CLAIM; /* hmmm */
}
#else
#define prom_find_mmu()
@@ -1382,16 +1455,17 @@ static void __init prom_init_stdout(void)
memset(path, 0, 256);
call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255);
val = call_prom("instance-to-package", 1, 1, _prom->stdout);
- prom_setprop(_prom->chosen, "linux,stdout-package", &val, sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,stdout-package",
+ &val, sizeof(val));
prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device));
- prom_setprop(_prom->chosen, "linux,stdout-path",
- RELOC(of_stdout_device), strlen(RELOC(of_stdout_device))+1);
+ prom_setprop(_prom->chosen, "/chosen", "linux,stdout-path",
+ path, strlen(path) + 1);
/* If it's a display, note it */
memset(type, 0, sizeof(type));
prom_getprop(val, "device_type", type, sizeof(type));
if (strcmp(type, RELOC("display")) == 0)
- prom_setprop(val, "linux,boot-display", NULL, 0);
+ prom_setprop(val, path, "linux,boot-display", NULL, 0);
}
static void __init prom_close_stdin(void)
@@ -1408,8 +1482,9 @@ static int __init prom_find_machine_type(void)
struct prom_t *_prom = &RELOC(prom);
char compat[256];
int len, i = 0;
+#ifdef CONFIG_PPC64
phandle rtas;
-
+#endif
len = prom_getprop(_prom->root, "compatible",
compat, sizeof(compat)-1);
if (len > 0) {
@@ -1513,7 +1588,7 @@ static void __init prom_check_displays(void)
/* Success */
prom_printf("done\n");
- prom_setprop(node, "linux,opened", NULL, 0);
+ prom_setprop(node, path, "linux,opened", NULL, 0);
/* Setup a usable color table when the appropriate
* method is available. Should update this to set-colors */
@@ -1872,7 +1947,7 @@ static void __init fixup_device_tree(void)
if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev))
== PROM_ERROR)
return;
- if (u3_rev != 0x35 && u3_rev != 0x37)
+ if (u3_rev < 0x35 || u3_rev > 0x39)
return;
/* does it need fixup ? */
if (prom_getproplen(i2c, "interrupts") > 0)
@@ -1883,9 +1958,11 @@ static void __init fixup_device_tree(void)
/* interrupt on this revision of u3 is number 0 and level */
interrupts[0] = 0;
interrupts[1] = 1;
- prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts));
+ prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupts",
+ &interrupts, sizeof(interrupts));
parent = (u32)mpic;
- prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent));
+ prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent",
+ &parent, sizeof(parent));
#endif
}
@@ -1921,11 +1998,11 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4;
val = RELOC(prom_initrd_start);
- prom_setprop(_prom->chosen, "linux,initrd-start", &val,
- sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,initrd-start",
+ &val, sizeof(val));
val = RELOC(prom_initrd_end);
- prom_setprop(_prom->chosen, "linux,initrd-end", &val,
- sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,initrd-end",
+ &val, sizeof(val));
reserve_mem(RELOC(prom_initrd_start),
RELOC(prom_initrd_end) - RELOC(prom_initrd_start));
@@ -1968,14 +2045,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_init_client_services(pp);
/*
- * Init prom stdout device
+ * See if this OF is old enough that we need to do explicit maps
+ * and other workarounds
*/
- prom_init_stdout();
+ prom_find_mmu();
/*
- * See if this OF is old enough that we need to do explicit maps
+ * Init prom stdout device
*/
- prom_find_mmu();
+ prom_init_stdout();
/*
* Check for an initrd
@@ -1988,14 +2066,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
RELOC(of_platform) = prom_find_machine_type();
getprop_rval = RELOC(of_platform);
- prom_setprop(_prom->chosen, "linux,platform",
+ prom_setprop(_prom->chosen, "/chosen", "linux,platform",
&getprop_rval, sizeof(getprop_rval));
#ifdef CONFIG_PPC_PSERIES
/*
* On pSeries, inform the firmware about our capabilities
*/
- if (RELOC(of_platform) & PLATFORM_PSERIES)
+ if (RELOC(of_platform) == PLATFORM_PSERIES ||
+ RELOC(of_platform) == PLATFORM_PSERIES_LPAR)
prom_send_capabilities();
#endif
@@ -2049,21 +2128,23 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
* Fill in some infos for use by the kernel later on
*/
if (RELOC(prom_memory_limit))
- prom_setprop(_prom->chosen, "linux,memory-limit",
+ prom_setprop(_prom->chosen, "/chosen", "linux,memory-limit",
&RELOC(prom_memory_limit),
sizeof(prom_memory_limit));
#ifdef CONFIG_PPC64
if (RELOC(ppc64_iommu_off))
- prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0);
+ prom_setprop(_prom->chosen, "/chosen", "linux,iommu-off",
+ NULL, 0);
if (RELOC(iommu_force_on))
- prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0);
+ prom_setprop(_prom->chosen, "/chosen", "linux,iommu-force-on",
+ NULL, 0);
if (RELOC(prom_tce_alloc_start)) {
- prom_setprop(_prom->chosen, "linux,tce-alloc-start",
+ prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-start",
&RELOC(prom_tce_alloc_start),
sizeof(prom_tce_alloc_start));
- prom_setprop(_prom->chosen, "linux,tce-alloc-end",
+ prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-end",
&RELOC(prom_tce_alloc_end),
sizeof(prom_tce_alloc_end));
}
@@ -2080,8 +2161,13 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_printf("copying OF device tree ...\n");
flatten_device_tree();
- /* in case stdin is USB and still active on IBM machines... */
- prom_close_stdin();
+ /*
+ * in case stdin is USB and still active on IBM machines...
+ * Unfortunately quiesce crashes on some powermacs if we have
+ * closed stdin already (in particular the powerbook 101).
+ */
+ if (RELOC(of_platform) != PLATFORM_POWERMAC)
+ prom_close_stdin();
/*
* Call OF "quiesce" method to shut down pending DMA's from
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 568ea335d61..3d2abd95c7a 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -248,46 +248,10 @@ void ptrace_disable(struct task_struct *child)
clear_single_step(child);
}
-long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
int ret = -EPERM;
- lock_kernel();
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- ret = security_ptrace(current->parent, current);
- if (ret)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
-
- ret = -EPERM;
- if (pid == 1) /* you may not mess with init */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -540,10 +504,7 @@ long sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
+
return ret;
}
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 5bdd5b079d9..ae1a36449cc 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -259,7 +259,7 @@ static int __init proc_rtas_init(void)
{
struct proc_dir_entry *entry;
- if (!(systemcfg->platform & PLATFORM_PSERIES))
+ if (_machine != PLATFORM_PSERIES && _machine != PLATFORM_PSERIES_LPAR)
return 1;
rtas_node = of_find_node_by_name(NULL, "rtas");
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b7fc2d88495..4283fa33f78 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -17,6 +17,7 @@
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/delay.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -28,9 +29,6 @@
#include <asm/delay.h>
#include <asm/uaccess.h>
#include <asm/lmb.h>
-#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
-#endif
struct rtas_t rtas = {
.lock = SPIN_LOCK_UNLOCKED
@@ -83,7 +81,7 @@ void call_rtas_display_status_delay(unsigned char c)
while (width-- > 0)
call_rtas_display_status(' ');
width = 16;
- udelay(500000);
+ mdelay(500);
pending_newline = 1;
} else {
if (pending_newline) {
@@ -608,7 +606,6 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
return 0;
}
-#ifdef CONFIG_SMP
/* This version can't take the spinlock, because it never returns */
struct rtas_args rtas_stop_self_args = {
@@ -633,7 +630,6 @@ void rtas_stop_self(void)
panic("Alas, I survived.\n");
}
-#endif
/*
* Call early during boot, before mem init or bootmem, to retreive the RTAS
@@ -672,7 +668,7 @@ void __init rtas_initialize(void)
* the stop-self token if any
*/
#ifdef CONFIG_PPC64
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
+ if (_machine == PLATFORM_PSERIES_LPAR)
rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX);
#endif
rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
new file mode 100644
index 00000000000..0e5a8e11665
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -0,0 +1,513 @@
+/*
+ * arch/ppc64/kernel/rtas_pci.c
+ *
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * RTAS specific routines for PCI.
+ *
+ * Based on code from pci.c, chrp_pci.c and pSeries_pci.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/rtas.h>
+#include <asm/mpic.h>
+#include <asm/ppc-pci.h>
+
+/* RTAS tokens */
+static int read_pci_config;
+static int write_pci_config;
+static int ibm_read_pci_config;
+static int ibm_write_pci_config;
+
+static inline int config_access_valid(struct pci_dn *dn, int where)
+{
+ if (where < 256)
+ return 1;
+ if (where < 4096 && dn->pci_ext_config_space)
+ return 1;
+
+ return 0;
+}
+
+static int of_device_available(struct device_node * dn)
+{
+ char * status;
+
+ status = get_property(dn, "status", NULL);
+
+ if (!status)
+ return 1;
+
+ if (!strcmp(status, "okay"))
+ return 1;
+
+ return 0;
+}
+
+static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+{
+ int returnval = -1;
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+ (pdn->devfn << 8) | (where & 0xff);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
+ addr, BUID_HI(buid), BUID_LO(buid), size);
+ } else {
+ ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
+ }
+ *val = returnval;
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (returnval == EEH_IO_ERROR_VALUE(size) &&
+ eeh_dn_check_failure (pdn->node, NULL))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_read_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 *val)
+{
+ struct device_node *busdn, *dn;
+
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else
+ busdn = bus->sysdata; /* must be a phb */
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_available(dn))
+ return rtas_read_config(pdn, where, size, val);
+ }
+
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+{
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+ (pdn->devfn << 8) | (where & 0xff);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
+ BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
+ } else {
+ ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
+ }
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_write_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 val)
+{
+ struct device_node *busdn, *dn;
+
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else
+ busdn = bus->sysdata; /* must be a phb */
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_available(dn))
+ return rtas_write_config(pdn, where, size, val);
+ }
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+struct pci_ops rtas_pci_ops = {
+ rtas_pci_read_config,
+ rtas_pci_write_config
+};
+
+int is_python(struct device_node *dev)
+{
+ char *model = (char *)get_property(dev, "model", NULL);
+
+ if (model && strstr(model, "Python"))
+ return 1;
+
+ return 0;
+}
+
+static int get_phb_reg_prop(struct device_node *dev,
+ unsigned int addr_size_words,
+ struct reg_property64 *reg)
+{
+ unsigned int *ui_ptr = NULL, len;
+
+ /* Found a PHB, now figure out where his registers are mapped. */
+ ui_ptr = (unsigned int *)get_property(dev, "reg", &len);
+ if (ui_ptr == NULL)
+ return 1;
+
+ if (addr_size_words == 1) {
+ reg->address = ((struct reg_property32 *)ui_ptr)->address;
+ reg->size = ((struct reg_property32 *)ui_ptr)->size;
+ } else {
+ *reg = *((struct reg_property64 *)ui_ptr);
+ }
+
+ return 0;
+}
+
+static void python_countermeasures(struct device_node *dev,
+ unsigned int addr_size_words)
+{
+ struct reg_property64 reg_struct;
+ void __iomem *chip_regs;
+ volatile u32 val;
+
+ if (get_phb_reg_prop(dev, addr_size_words, &reg_struct))
+ return;
+
+ /* Python's register file is 1 MB in size. */
+ chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000);
+
+ /*
+ * Firmware doesn't always clear this bit which is critical
+ * for good performance - Anton
+ */
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+ val = in_be32(chip_regs + 0xf6030);
+ if (val & PRG_CL_RESET_VALID) {
+ printk(KERN_INFO "Python workaround: ");
+ val &= ~PRG_CL_RESET_VALID;
+ out_be32(chip_regs + 0xf6030, val);
+ /*
+ * We must read it back for changes to
+ * take effect
+ */
+ val = in_be32(chip_regs + 0xf6030);
+ printk("reg0: %x\n", val);
+ }
+
+ iounmap(chip_regs);
+}
+
+void __init init_pci_config_tokens (void)
+{
+ read_pci_config = rtas_token("read-pci-config");
+ write_pci_config = rtas_token("write-pci-config");
+ ibm_read_pci_config = rtas_token("ibm,read-pci-config");
+ ibm_write_pci_config = rtas_token("ibm,write-pci-config");
+}
+
+unsigned long __devinit get_phb_buid (struct device_node *phb)
+{
+ int addr_cells;
+ unsigned int *buid_vals;
+ unsigned int len;
+ unsigned long buid;
+
+ if (ibm_read_pci_config == -1) return 0;
+
+ /* PHB's will always be children of the root node,
+ * or so it is promised by the current firmware. */
+ if (phb->parent == NULL)
+ return 0;
+ if (phb->parent->parent)
+ return 0;
+
+ buid_vals = (unsigned int *) get_property(phb, "reg", &len);
+ if (buid_vals == NULL)
+ return 0;
+
+ addr_cells = prom_n_addr_cells(phb);
+ if (addr_cells == 1) {
+ buid = (unsigned long) buid_vals[0];
+ } else {
+ buid = (((unsigned long)buid_vals[0]) << 32UL) |
+ (((unsigned long)buid_vals[1]) & 0xffffffff);
+ }
+ return buid;
+}
+
+static int phb_set_bus_ranges(struct device_node *dev,
+ struct pci_controller *phb)
+{
+ int *bus_range;
+ unsigned int len;
+
+ bus_range = (int *) get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ return 1;
+ }
+
+ phb->first_busno = bus_range[0];
+ phb->last_busno = bus_range[1];
+
+ return 0;
+}
+
+static int __devinit setup_phb(struct device_node *dev,
+ struct pci_controller *phb,
+ unsigned int addr_size_words)
+{
+ pci_setup_pci_controller(phb);
+
+ if (is_python(dev))
+ python_countermeasures(dev, addr_size_words);
+
+ if (phb_set_bus_ranges(dev, phb))
+ return 1;
+
+ phb->arch_data = dev;
+ phb->ops = &rtas_pci_ops;
+ phb->buid = get_phb_buid(dev);
+
+ return 0;
+}
+
+static void __devinit add_linux_pci_domain(struct device_node *dev,
+ struct pci_controller *phb,
+ struct property *of_prop)
+{
+ memset(of_prop, 0, sizeof(struct property));
+ of_prop->name = "linux,pci-domain";
+ of_prop->length = sizeof(phb->global_number);
+ of_prop->value = (unsigned char *)&of_prop[1];
+ memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number));
+ prom_add_property(dev, of_prop);
+}
+
+static struct pci_controller * __init alloc_phb(struct device_node *dev,
+ unsigned int addr_size_words)
+{
+ struct pci_controller *phb;
+ struct property *of_prop;
+
+ phb = alloc_bootmem(sizeof(struct pci_controller));
+ if (phb == NULL)
+ return NULL;
+
+ of_prop = alloc_bootmem(sizeof(struct property) +
+ sizeof(phb->global_number));
+ if (!of_prop)
+ return NULL;
+
+ if (setup_phb(dev, phb, addr_size_words))
+ return NULL;
+
+ add_linux_pci_domain(dev, phb, of_prop);
+
+ return phb;
+}
+
+static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words)
+{
+ struct pci_controller *phb;
+
+ phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller),
+ GFP_KERNEL);
+ if (phb == NULL)
+ return NULL;
+
+ if (setup_phb(dev, phb, addr_size_words))
+ return NULL;
+
+ phb->is_dynamic = 1;
+
+ /* TODO: linux,pci-domain? */
+
+ return phb;
+}
+
+unsigned long __init find_and_init_phbs(void)
+{
+ struct device_node *node;
+ struct pci_controller *phb;
+ unsigned int root_size_cells = 0;
+ unsigned int index;
+ unsigned int *opprop = NULL;
+ struct device_node *root = of_find_node_by_path("/");
+
+ if (ppc64_interrupt_controller == IC_OPEN_PIC) {
+ opprop = (unsigned int *)get_property(root,
+ "platform-open-pic", NULL);
+ }
+
+ root_size_cells = prom_n_size_cells(root);
+
+ index = 0;
+
+ for (node = of_get_next_child(root, NULL);
+ node != NULL;
+ node = of_get_next_child(root, node)) {
+ if (node->type == NULL || strcmp(node->type, "pci") != 0)
+ continue;
+
+ phb = alloc_phb(node, root_size_cells);
+ if (!phb)
+ continue;
+
+ pci_process_bridge_OF_ranges(phb, node, 0);
+ pci_setup_phb_io(phb, index == 0);
+#ifdef CONFIG_PPC_PSERIES
+ if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
+ int addr = root_size_cells * (index + 2) - 1;
+ mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
+ }
+#endif
+ index++;
+ }
+
+ of_node_put(root);
+ pci_devs_phb_init();
+
+ /*
+ * pci_probe_only and pci_assign_all_buses can be set via properties
+ * in chosen.
+ */
+ if (of_chosen) {
+ int *prop;
+
+ prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
+ NULL);
+ if (prop)
+ pci_probe_only = *prop;
+
+ prop = (int *)get_property(of_chosen,
+ "linux,pci-assign-all-buses", NULL);
+ if (prop)
+ pci_assign_all_buses = *prop;
+ }
+
+ return 0;
+}
+
+struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ unsigned int root_size_cells = 0;
+ struct pci_controller *phb;
+ int primary;
+
+ root_size_cells = prom_n_size_cells(root);
+
+ primary = list_empty(&hose_list);
+ phb = alloc_phb_dynamic(dn, root_size_cells);
+ if (!phb)
+ return NULL;
+
+ pci_process_bridge_OF_ranges(phb, dn, primary);
+
+ pci_setup_phb_io_dynamic(phb, primary);
+ of_node_put(root);
+
+ pci_devs_phb_init_dynamic(phb);
+ scan_phb(phb);
+
+ return phb;
+}
+EXPORT_SYMBOL(init_phb_dynamic);
+
+/* RPA-specific bits for removing PHBs */
+int pcibios_remove_root_bus(struct pci_controller *phb)
+{
+ struct pci_bus *b = phb->bus;
+ struct resource *res;
+ int rc, i;
+
+ res = b->resource[0];
+ if (!res->flags) {
+ printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__,
+ b->name);
+ return 1;
+ }
+
+ rc = unmap_bus_range(b);
+ if (rc) {
+ printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+
+ if (release_resource(res)) {
+ printk(KERN_ERR "%s: failed to release IO on bus %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+
+ for (i = 1; i < 3; ++i) {
+ res = b->resource[i];
+ if (!res->flags && i == 0) {
+ printk(KERN_ERR "%s: no MEM resource for PHB %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+ if (res->flags && release_resource(res)) {
+ printk(KERN_ERR
+ "%s: failed to release IO %d on bus %s\n",
+ __FUNCTION__, i, b->name);
+ return 1;
+ }
+ }
+
+ list_del(&phb->list_node);
+ if (phb->is_dynamic)
+ kfree(phb);
+
+ return 0;
+}
+EXPORT_SYMBOL(pcibios_remove_root_bus);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d43fa8c0e5a..bae4bff138f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,6 +33,7 @@
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/processor.h>
+#include <asm/systemcfg.h>
#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
@@ -51,6 +52,9 @@
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/lmb.h>
+#include <asm/xmon.h>
+
+#include "setup.h"
#undef DEBUG
@@ -60,6 +64,13 @@
#define DBG(fmt...)
#endif
+#ifdef CONFIG_PPC_MULTIPLATFORM
+int _machine = 0;
+EXPORT_SYMBOL(_machine);
+#endif
+
+unsigned long klimit = (unsigned long) _end;
+
/*
* This still seems to be needed... -- paulus
*/
@@ -405,6 +416,46 @@ static int __init set_preferred_console(void)
console_initcall(set_preferred_console);
#endif /* CONFIG_PPC_MULTIPLATFORM */
+void __init check_for_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ unsigned long *prop;
+
+ DBG(" -> check_for_initrd()\n");
+
+ if (of_chosen) {
+ prop = (unsigned long *)get_property(of_chosen,
+ "linux,initrd-start", NULL);
+ if (prop != NULL) {
+ initrd_start = (unsigned long)__va(*prop);
+ prop = (unsigned long *)get_property(of_chosen,
+ "linux,initrd-end", NULL);
+ if (prop != NULL) {
+ initrd_end = (unsigned long)__va(*prop);
+ initrd_below_start_ok = 1;
+ } else
+ initrd_start = 0;
+ }
+ }
+
+ /* If we were passed an initrd, set the ROOT_DEV properly if the values
+ * look sensible. If not, clear initrd reference.
+ */
+ if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
+ initrd_end > initrd_start)
+ ROOT_DEV = Root_RAM0;
+ else {
+ printk("Bogus initrd %08lx %08lx\n", initrd_start, initrd_end);
+ initrd_start = initrd_end = 0;
+ }
+
+ if (initrd_start)
+ printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
+
+ DBG(" <- check_for_initrd()\n");
+#endif /* CONFIG_BLK_DEV_INITRD */
+}
+
#ifdef CONFIG_SMP
/**
@@ -470,8 +521,8 @@ void __init smp_setup_cpu_maps(void)
* On pSeries LPAR, we need to know how many cpus
* could possibly be added to this partition.
*/
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR &&
- (dn = of_find_node_by_path("/rtas"))) {
+ if (_machine == PLATFORM_PSERIES_LPAR &&
+ (dn = of_find_node_by_path("/rtas"))) {
int num_addr_cell, num_size_cell, maxcpus;
unsigned int *ireg;
@@ -515,7 +566,27 @@ void __init smp_setup_cpu_maps(void)
cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]);
}
- systemcfg->processorCount = num_present_cpus();
+ _systemcfg->processorCount = num_present_cpus();
#endif /* CONFIG_PPC64 */
}
#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_XMON
+static int __init early_xmon(char *p)
+{
+ /* ensure xmon is enabled */
+ if (p) {
+ if (strncmp(p, "on", 2) == 0)
+ xmon_init(1);
+ if (strncmp(p, "off", 3) == 0)
+ xmon_init(0);
+ if (strncmp(p, "early", 5) != 0)
+ return 0;
+ }
+ xmon_init(1);
+ debugger(NULL);
+
+ return 0;
+}
+early_param("xmon", early_xmon);
+#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
new file mode 100644
index 00000000000..2ebba755272
--- /dev/null
+++ b/arch/powerpc/kernel/setup.h
@@ -0,0 +1,6 @@
+#ifndef _POWERPC_KERNEL_SETUP_H
+#define _POWERPC_KERNEL_SETUP_H
+
+void check_for_initrd(void);
+
+#endif /* _POWERPC_KERNEL_SETUP_H */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index b45eedbb4b3..c98cfcc9cd9 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -40,6 +40,8 @@
#include <asm/xmon.h>
#include <asm/time.h>
+#include "setup.h"
+
#define DBG(fmt...)
#if defined CONFIG_KGDB
@@ -70,8 +72,6 @@ unsigned int DMA_MODE_WRITE;
int have_of = 1;
#ifdef CONFIG_PPC_MULTIPLATFORM
-int _machine = 0;
-
extern void prep_init(void);
extern void pmac_init(void);
extern void chrp_init(void);
@@ -279,13 +279,13 @@ arch_initcall(ppc_init);
/* Warning, IO base is not yet inited */
void __init setup_arch(char **cmdline_p)
{
- extern char *klimit;
extern void do_init_bootmem(void);
/* so udelay does something sensible, assume <= 1000 bogomips */
loops_per_jiffy = 500000000 / HZ;
unflatten_device_tree();
+ check_for_initrd();
finish_device_tree();
smp_setup_cpu_maps();
@@ -302,14 +302,9 @@ void __init setup_arch(char **cmdline_p)
pmac_feature_init(); /* New cool way */
#endif
-#ifdef CONFIG_XMON
- xmon_map_scc();
- if (strstr(cmd_line, "xmon")) {
- xmon_init(1);
- debugger(NULL);
- }
-#endif /* CONFIG_XMON */
- if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab);
+#ifdef CONFIG_XMON_DEFAULT
+ xmon_init(1);
+#endif
#if defined(CONFIG_KGDB)
if (ppc_md.kgdb_map_scc)
@@ -342,7 +337,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.start_code = PAGE_OFFSET;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) klimit;
+ init_mm.brk = klimit;
/* Save unparsed command line copy for /proc/cmdline */
strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6b52cce872b..6791668213e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -41,7 +41,6 @@
#include <asm/elf.h>
#include <asm/machdep.h>
#include <asm/paca.h>
-#include <asm/ppcdebug.h>
#include <asm/time.h>
#include <asm/cputable.h>
#include <asm/sections.h>
@@ -60,6 +59,9 @@
#include <asm/firmware.h>
#include <asm/systemcfg.h>
#include <asm/xmon.h>
+#include <asm/udbg.h>
+
+#include "setup.h"
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -94,15 +96,6 @@ extern void udbg_init_maple_realmode(void);
do { udbg_putc = call_rtas_display_status_delay; } while(0)
#endif
-/* extern void *stab; */
-extern unsigned long klimit;
-
-extern void mm_init_ppc64(void);
-extern void stab_initialize(unsigned long stab);
-extern void htab_initialize(void);
-extern void early_init_devtree(void *flat_dt);
-extern void unflatten_device_tree(void);
-
int have_of = 1;
int boot_cpuid = 0;
int boot_cpuid_phys = 0;
@@ -244,12 +237,6 @@ void __init early_setup(unsigned long dt_ptr)
DBG(" -> early_setup()\n");
/*
- * Fill the default DBG level (do we want to keep
- * that old mecanism around forever ?)
- */
- ppcdbg_initialize();
-
- /*
* Do early initializations using the flattened device
* tree, like retreiving the physical memory map or
* calculating/retreiving the hash table size
@@ -260,11 +247,10 @@ void __init early_setup(unsigned long dt_ptr)
* Iterate all ppc_md structures until we find the proper
* one for the current machine type
*/
- DBG("Probing machine type for platform %x...\n",
- systemcfg->platform);
+ DBG("Probing machine type for platform %x...\n", _machine);
for (mach = machines; *mach; mach++) {
- if ((*mach)->probe(systemcfg->platform))
+ if ((*mach)->probe(_machine))
break;
}
/* What can we do if we didn't find ? */
@@ -277,20 +263,47 @@ void __init early_setup(unsigned long dt_ptr)
DBG("Found, Initializing memory management...\n");
/*
- * Initialize stab / SLB management
+ * Initialize the MMU Hash table and create the linear mapping
+ * of memory. Has to be done before stab/slb initialization as
+ * this is currently where the page size encoding is obtained
*/
- if (!firmware_has_feature(FW_FEATURE_ISERIES))
- stab_initialize(lpaca->stab_real);
+ htab_initialize();
/*
- * Initialize the MMU Hash table and create the linear mapping
- * of memory
+ * Initialize stab / SLB management except on iSeries
*/
- htab_initialize();
+ if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
+ if (cpu_has_feature(CPU_FTR_SLB))
+ slb_initialize();
+ else
+ stab_initialize(lpaca->stab_real);
+ }
DBG(" <- early_setup()\n");
}
+#ifdef CONFIG_SMP
+void early_setup_secondary(void)
+{
+ struct paca_struct *lpaca = get_paca();
+
+ /* Mark enabled in PACA */
+ lpaca->proc_enabled = 0;
+
+ /* Initialize hash table for that CPU */
+ htab_initialize_secondary();
+
+ /* Initialize STAB/SLB. We use a virtual address as it works
+ * in real mode on pSeries and we want a virutal address on
+ * iSeries anyway
+ */
+ if (cpu_has_feature(CPU_FTR_SLB))
+ slb_initialize();
+ else
+ stab_initialize(lpaca->stab_addr);
+}
+
+#endif /* CONFIG_SMP */
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
void smp_release_cpus(void)
@@ -316,7 +329,8 @@ void smp_release_cpus(void)
#endif /* CONFIG_SMP || CONFIG_KEXEC */
/*
- * Initialize some remaining members of the ppc64_caches and systemcfg structures
+ * Initialize some remaining members of the ppc64_caches and systemcfg
+ * structures
* (at least until we get rid of them completely). This is mostly some
* cache informations about the CPU that will be used by cache flush
* routines and/or provided to userland
@@ -341,7 +355,7 @@ static void __init initialize_cache_info(void)
const char *dc, *ic;
/* Then read cache informations */
- if (systemcfg->platform == PLATFORM_POWERMAC) {
+ if (_machine == PLATFORM_POWERMAC) {
dc = "d-cache-block-size";
ic = "i-cache-block-size";
} else {
@@ -361,8 +375,8 @@ static void __init initialize_cache_info(void)
DBG("Argh, can't find dcache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
- systemcfg->dcache_size = ppc64_caches.dsize = size;
- systemcfg->dcache_line_size =
+ _systemcfg->dcache_size = ppc64_caches.dsize = size;
+ _systemcfg->dcache_line_size =
ppc64_caches.dline_size = lsize;
ppc64_caches.log_dline_size = __ilog2(lsize);
ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
@@ -379,8 +393,8 @@ static void __init initialize_cache_info(void)
DBG("Argh, can't find icache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
- systemcfg->icache_size = ppc64_caches.isize = size;
- systemcfg->icache_line_size =
+ _systemcfg->icache_size = ppc64_caches.isize = size;
+ _systemcfg->icache_line_size =
ppc64_caches.iline_size = lsize;
ppc64_caches.log_iline_size = __ilog2(lsize);
ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
@@ -388,51 +402,16 @@ static void __init initialize_cache_info(void)
}
/* Add an eye catcher and the systemcfg layout version number */
- strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
- systemcfg->version.major = SYSTEMCFG_MAJOR;
- systemcfg->version.minor = SYSTEMCFG_MINOR;
- systemcfg->processor = mfspr(SPRN_PVR);
+ strcpy(_systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
+ _systemcfg->version.major = SYSTEMCFG_MAJOR;
+ _systemcfg->version.minor = SYSTEMCFG_MINOR;
+ _systemcfg->processor = mfspr(SPRN_PVR);
+ _systemcfg->platform = _machine;
+ _systemcfg->physicalMemorySize = lmb_phys_mem_size();
DBG(" <- initialize_cache_info()\n");
}
-static void __init check_for_initrd(void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
- u64 *prop;
-
- DBG(" -> check_for_initrd()\n");
-
- if (of_chosen) {
- prop = (u64 *)get_property(of_chosen,
- "linux,initrd-start", NULL);
- if (prop != NULL) {
- initrd_start = (unsigned long)__va(*prop);
- prop = (u64 *)get_property(of_chosen,
- "linux,initrd-end", NULL);
- if (prop != NULL) {
- initrd_end = (unsigned long)__va(*prop);
- initrd_below_start_ok = 1;
- } else
- initrd_start = 0;
- }
- }
-
- /* If we were passed an initrd, set the ROOT_DEV properly if the values
- * look sensible. If not, clear initrd reference.
- */
- if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
- initrd_end > initrd_start)
- ROOT_DEV = Root_RAM0;
- else
- initrd_start = initrd_end = 0;
-
- if (initrd_start)
- printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
-
- DBG(" <- check_for_initrd()\n");
-#endif /* CONFIG_BLK_DEV_INITRD */
-}
/*
* Do some initial setup of the system. The parameters are those which
@@ -516,12 +495,11 @@ void __init setup_system(void)
printk("-----------------------------------------------------\n");
printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size);
- printk("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch);
printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller);
- printk("systemcfg = 0x%p\n", systemcfg);
- printk("systemcfg->platform = 0x%x\n", systemcfg->platform);
- printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount);
- printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize);
+ printk("systemcfg = 0x%p\n", _systemcfg);
+ printk("systemcfg->platform = 0x%x\n", _systemcfg->platform);
+ printk("systemcfg->processorCount = 0x%lx\n", _systemcfg->processorCount);
+ printk("systemcfg->physicalMemorySize = 0x%lx\n", _systemcfg->physicalMemorySize);
printk("ppc64_caches.dcache_line_size = 0x%x\n",
ppc64_caches.dline_size);
printk("ppc64_caches.icache_line_size = 0x%x\n",
@@ -552,10 +530,12 @@ static void __init irqstack_early_init(void)
* SLB misses on them.
*/
for_each_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
- THREAD_SIZE, 0x10000000));
- hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
- THREAD_SIZE, 0x10000000));
+ softirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc_base(THREAD_SIZE,
+ THREAD_SIZE, 0x10000000));
+ hardirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc_base(THREAD_SIZE,
+ THREAD_SIZE, 0x10000000));
}
}
#else
@@ -583,8 +563,8 @@ static void __init emergency_stack_init(void)
limit = min(0x10000000UL, lmb.rmo_size);
for_each_cpu(i)
- paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128,
- limit)) + PAGE_SIZE;
+ paca[i].emergency_sp =
+ __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE;
}
/*
@@ -601,12 +581,12 @@ void __init setup_syscall_map(void)
for (i = 0; i < __NR_syscalls; i++) {
if (sys_call_table[i*2] != sys_ni_syscall) {
count64++;
- systemcfg->syscall_map_64[i >> 5] |=
+ _systemcfg->syscall_map_64[i >> 5] |=
0x80000000UL >> (i & 0x1f);
}
if (sys_call_table[i*2+1] != sys_ni_syscall) {
count32++;
- systemcfg->syscall_map_32[i >> 5] |=
+ _systemcfg->syscall_map_32[i >> 5] |=
0x80000000UL >> (i & 0x1f);
}
}
@@ -895,26 +875,6 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
-#ifdef CONFIG_XMON
-static int __init early_xmon(char *p)
-{
- /* ensure xmon is enabled */
- if (p) {
- if (strncmp(p, "on", 2) == 0)
- xmon_init(1);
- if (strncmp(p, "off", 3) == 0)
- xmon_init(0);
- if (strncmp(p, "early", 5) != 0)
- return 0;
- }
- xmon_init(1);
- debugger(NULL);
-
- return 0;
-}
-early_param("xmon", early_xmon);
-#endif
-
void cpu_die(void)
{
if (ppc_md.cpu_die)
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 876c57c1136..a7c4515f320 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -42,9 +42,9 @@
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
+#include <asm/sigcontext.h>
#ifdef CONFIG_PPC64
#include "ppc32.h"
-#include <asm/ppcdebug.h>
#include <asm/unistd.h>
#include <asm/vdso.h>
#else
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index ec9d0984b6a..58194e15071 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -33,7 +33,6 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-#include <asm/ppcdebug.h>
#include <asm/unistd.h>
#include <asm/cacheflush.h>
#include <asm/vdso.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1794a694a92..e28a139c29d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -40,11 +40,11 @@
#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/time.h>
-#include <asm/xmon.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/system.h>
#include <asm/mpic.h>
+#include <asm/systemcfg.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
@@ -369,9 +369,11 @@ int generic_cpu_disable(void)
if (cpu == boot_cpuid)
return -EBUSY;
- systemcfg->processorCount--;
cpu_clear(cpu, cpu_online_map);
+#ifdef CONFIG_PPC64
+ _systemcfg->processorCount--;
fixup_irqs(cpu_online_map);
+#endif
return 0;
}
@@ -389,9 +391,11 @@ int generic_cpu_enable(unsigned int cpu)
while (!cpu_online(cpu))
cpu_relax();
+#ifdef CONFIG_PPC64
fixup_irqs(cpu_online_map);
/* counter the irq disable in fixup_irqs */
local_irq_enable();
+#endif
return 0;
}
@@ -420,7 +424,9 @@ void generic_mach_cpu_die(void)
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
cpu_relax();
+#ifdef CONFIG_PPC64
flush_tlb_pending();
+#endif
cpu_set(cpu, cpu_online_map);
local_irq_enable();
}
@@ -511,6 +517,7 @@ int __devinit start_secondary(void *unused)
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
+ preempt_disable();
cpu_callin_map[cpu] = 1;
smp_ops->setup_cpu(cpu);
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index a8210ed5c68..9c921d1c408 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -52,7 +52,6 @@
#include <asm/semaphore.h>
#include <asm/time.h>
#include <asm/mmu_context.h>
-#include <asm/systemcfg.h>
#include <asm/ppc-pci.h>
/* readdir & getdents */
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
new file mode 100644
index 00000000000..850af198fb5
--- /dev/null
+++ b/arch/powerpc/kernel/sysfs.c
@@ -0,0 +1,384 @@
+#include <linux/config.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/cpumask.h>
+#include <linux/notifier.h>
+
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/prom.h>
+#include <asm/systemcfg.h>
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+/* SMT stuff */
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+/* default to snooze disabled */
+DEFINE_PER_CPU(unsigned long, smt_snooze_delay);
+
+static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+ ssize_t ret;
+ unsigned long snooze;
+
+ ret = sscanf(buf, "%lu", &snooze);
+ if (ret != 1)
+ return -EINVAL;
+
+ per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+
+ return count;
+}
+
+static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+ return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
+}
+
+static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
+ store_smt_snooze_delay);
+
+/* Only parse OF options if the matching cmdline option was not specified */
+static int smt_snooze_cmdline;
+
+static int __init smt_setup(void)
+{
+ struct device_node *options;
+ unsigned int *val;
+ unsigned int cpu;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ options = find_path_device("/options");
+ if (!options)
+ return 1;
+
+ val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
+ NULL);
+ if (!smt_snooze_cmdline && val) {
+ for_each_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = *val;
+ }
+
+ return 1;
+}
+__initcall(smt_setup);
+
+static int __init setup_smt_snooze_delay(char *str)
+{
+ unsigned int cpu;
+ int snooze;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ smt_snooze_cmdline = 1;
+
+ if (get_option(&str, &snooze)) {
+ for_each_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = snooze;
+ }
+
+ return 1;
+}
+__setup("smt-snooze-delay=", setup_smt_snooze_delay);
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+/*
+ * Enabling PMCs will slow partition context switch times so we only do
+ * it the first time we write to the PMCs.
+ */
+
+static DEFINE_PER_CPU(char, pmcs_enabled);
+
+void ppc64_enable_pmcs(void)
+{
+ /* Only need to enable them once */
+ if (__get_cpu_var(pmcs_enabled))
+ return;
+
+ __get_cpu_var(pmcs_enabled) = 1;
+
+ if (ppc_md.enable_pmcs)
+ ppc_md.enable_pmcs();
+}
+EXPORT_SYMBOL(ppc64_enable_pmcs);
+
+/* XXX convert to rusty's on_one_cpu */
+static unsigned long run_on_cpu(unsigned long cpu,
+ unsigned long (*func)(unsigned long),
+ unsigned long arg)
+{
+ cpumask_t old_affinity = current->cpus_allowed;
+ unsigned long ret;
+
+ /* should return -EINVAL to userspace */
+ if (set_cpus_allowed(current, cpumask_of_cpu(cpu)))
+ return 0;
+
+ ret = func(arg);
+
+ set_cpus_allowed(current, old_affinity);
+
+ return ret;
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+static unsigned long read_##NAME(unsigned long junk) \
+{ \
+ return mfspr(ADDRESS); \
+} \
+static unsigned long write_##NAME(unsigned long val) \
+{ \
+ ppc64_enable_pmcs(); \
+ mtspr(ADDRESS, val); \
+ return 0; \
+} \
+static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+ unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
+ return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __attribute_used__ \
+ store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+ unsigned long val; \
+ int ret = sscanf(buf, "%lx", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ run_on_cpu(cpu->sysdev.id, write_##NAME, val); \
+ return count; \
+}
+
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
+SYSFS_PMCSETUP(purr, SPRN_PURR);
+
+static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0);
+static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1);
+static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1);
+static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2);
+static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3);
+static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4);
+static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5);
+static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6);
+static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7);
+static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8);
+static SYSDEV_ATTR(purr, 0600, show_purr, NULL);
+
+static void register_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct sys_device *s = &c->sysdev;
+
+#ifndef CONFIG_PPC_ISERIES
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_create_file(s, &attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+
+ sysdev_create_file(s, &attr_mmcr0);
+ sysdev_create_file(s, &attr_mmcr1);
+
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ sysdev_create_file(s, &attr_mmcra);
+
+ if (cur_cpu_spec->num_pmcs >= 1)
+ sysdev_create_file(s, &attr_pmc1);
+ if (cur_cpu_spec->num_pmcs >= 2)
+ sysdev_create_file(s, &attr_pmc2);
+ if (cur_cpu_spec->num_pmcs >= 3)
+ sysdev_create_file(s, &attr_pmc3);
+ if (cur_cpu_spec->num_pmcs >= 4)
+ sysdev_create_file(s, &attr_pmc4);
+ if (cur_cpu_spec->num_pmcs >= 5)
+ sysdev_create_file(s, &attr_pmc5);
+ if (cur_cpu_spec->num_pmcs >= 6)
+ sysdev_create_file(s, &attr_pmc6);
+ if (cur_cpu_spec->num_pmcs >= 7)
+ sysdev_create_file(s, &attr_pmc7);
+ if (cur_cpu_spec->num_pmcs >= 8)
+ sysdev_create_file(s, &attr_pmc8);
+
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_create_file(s, &attr_purr);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void unregister_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct sys_device *s = &c->sysdev;
+
+ BUG_ON(c->no_control);
+
+#ifndef CONFIG_PPC_ISERIES
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_remove_file(s, &attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+
+ sysdev_remove_file(s, &attr_mmcr0);
+ sysdev_remove_file(s, &attr_mmcr1);
+
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ sysdev_remove_file(s, &attr_mmcra);
+
+ if (cur_cpu_spec->num_pmcs >= 1)
+ sysdev_remove_file(s, &attr_pmc1);
+ if (cur_cpu_spec->num_pmcs >= 2)
+ sysdev_remove_file(s, &attr_pmc2);
+ if (cur_cpu_spec->num_pmcs >= 3)
+ sysdev_remove_file(s, &attr_pmc3);
+ if (cur_cpu_spec->num_pmcs >= 4)
+ sysdev_remove_file(s, &attr_pmc4);
+ if (cur_cpu_spec->num_pmcs >= 5)
+ sysdev_remove_file(s, &attr_pmc5);
+ if (cur_cpu_spec->num_pmcs >= 6)
+ sysdev_remove_file(s, &attr_pmc6);
+ if (cur_cpu_spec->num_pmcs >= 7)
+ sysdev_remove_file(s, &attr_pmc7);
+ if (cur_cpu_spec->num_pmcs >= 8)
+ sysdev_remove_file(s, &attr_pmc8);
+
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_remove_file(s, &attr_purr);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __devinit sysfs_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned int)(long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ register_cpu_online(cpu);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ unregister_cpu_online(cpu);
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata sysfs_cpu_nb = {
+ .notifier_call = sysfs_cpu_notify,
+};
+
+/* NUMA stuff */
+
+#ifdef CONFIG_NUMA
+static struct node node_devices[MAX_NUMNODES];
+
+static void register_nodes(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ if (node_online(i)) {
+ int p_node = parent_node(i);
+ struct node *parent = NULL;
+
+ if (p_node != i)
+ parent = &node_devices[p_node];
+
+ register_node(&node_devices[i], i, parent);
+ }
+ }
+}
+#else
+static void register_nodes(void)
+{
+ return;
+}
+#endif
+
+/* Only valid if CPU is present. */
+static ssize_t show_physical_id(struct sys_device *dev, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+ return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
+}
+static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
+
+static int __init topology_init(void)
+{
+ int cpu;
+ struct node *parent = NULL;
+
+ register_nodes();
+
+ register_cpu_notifier(&sysfs_cpu_nb);
+
+ for_each_cpu(cpu) {
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+#ifdef CONFIG_NUMA
+ /* The node to which a cpu belongs can't be known
+ * until the cpu is made present.
+ */
+ parent = NULL;
+ if (cpu_present(cpu))
+ parent = &node_devices[cpu_to_node(cpu)];
+#endif
+ /*
+ * For now, we just see if the system supports making
+ * the RTAS calls for CPU hotplug. But, there may be a
+ * more comprehensive way to do this for an individual
+ * CPU. For instance, the boot cpu might never be valid
+ * for hotplugging.
+ */
+ if (!ppc_md.cpu_die)
+ c->no_control = 1;
+
+ if (cpu_online(cpu) || (c->no_control == 0)) {
+ register_cpu(c, cpu, parent);
+
+ sysdev_create_file(&c->sysdev, &attr_physical_id);
+ }
+
+ if (cpu_online(cpu))
+ register_cpu_online(cpu);
+ }
+
+ return 0;
+}
+__initcall(topology_init);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6996a593dcb..260b6ecd26a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -61,6 +61,7 @@
#include <asm/prom.h>
#include <asm/irq.h>
#include <asm/div64.h>
+#include <asm/smp.h>
#ifdef CONFIG_PPC64
#include <asm/systemcfg.h>
#include <asm/firmware.h>
@@ -69,6 +70,7 @@
#include <asm/iseries/it_lp_queue.h>
#include <asm/iseries/hv_call_xm.h>
#endif
+#include <asm/smp.h>
/* keep track of when we need to update the rtc */
time_t last_rtc_update;
@@ -118,10 +120,6 @@ static unsigned adjusting_time = 0;
unsigned long ppc_proc_freq;
unsigned long ppc_tb_freq;
-#ifdef CONFIG_PPC32 /* XXX for now */
-#define boot_cpuid 0
-#endif
-
u64 tb_last_jiffy __cacheline_aligned_in_smp;
unsigned long tb_last_stamp;
@@ -273,13 +271,13 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
* tb_to_xs and stamp_xsec values are consistent. If not, then it
* loops back and reads them again until this criteria is met.
*/
- ++(systemcfg->tb_update_count);
+ ++(_systemcfg->tb_update_count);
smp_wmb();
- systemcfg->tb_orig_stamp = new_tb_stamp;
- systemcfg->stamp_xsec = new_stamp_xsec;
- systemcfg->tb_to_xs = new_tb_to_xs;
+ _systemcfg->tb_orig_stamp = new_tb_stamp;
+ _systemcfg->stamp_xsec = new_stamp_xsec;
+ _systemcfg->tb_to_xs = new_tb_to_xs;
smp_wmb();
- ++(systemcfg->tb_update_count);
+ ++(_systemcfg->tb_update_count);
#endif
}
@@ -359,8 +357,9 @@ static void iSeries_tb_recal(void)
do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
tb_to_xs = divres.result_low;
do_gtod.varp->tb_to_xs = tb_to_xs;
- systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
- systemcfg->tb_to_xs = tb_to_xs;
+ _systemcfg->tb_ticks_per_sec =
+ tb_ticks_per_sec;
+ _systemcfg->tb_to_xs = tb_to_xs;
}
else {
printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
@@ -485,6 +484,8 @@ void __init smp_space_timers(unsigned int max_cpus)
unsigned long offset = tb_ticks_per_jiffy / max_cpus;
unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid);
+ /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
+ previous_tb -= tb_ticks_per_jiffy;
for_each_cpu(i) {
if (i != boot_cpuid) {
previous_tb += offset;
@@ -561,8 +562,8 @@ int do_settimeofday(struct timespec *tv)
update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs);
#ifdef CONFIG_PPC64
- systemcfg->tz_minuteswest = sys_tz.tz_minuteswest;
- systemcfg->tz_dsttime = sys_tz.tz_dsttime;
+ _systemcfg->tz_minuteswest = sys_tz.tz_minuteswest;
+ _systemcfg->tz_dsttime = sys_tz.tz_dsttime;
#endif
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -713,11 +714,11 @@ void __init time_init(void)
do_gtod.varp->tb_to_xs = tb_to_xs;
do_gtod.tb_to_us = tb_to_us;
#ifdef CONFIG_PPC64
- systemcfg->tb_orig_stamp = tb_last_jiffy;
- systemcfg->tb_update_count = 0;
- systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
- systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
- systemcfg->tb_to_xs = tb_to_xs;
+ _systemcfg->tb_orig_stamp = tb_last_jiffy;
+ _systemcfg->tb_update_count = 0;
+ _systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
+ _systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
+ _systemcfg->tb_to_xs = tb_to_xs;
#endif
time_freq = 0;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 07e5ee40b87..2020bb7648f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -39,7 +39,6 @@
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
-#include <asm/xmon.h>
#include <asm/pmc.h>
#ifdef CONFIG_PPC32
#include <asm/reg.h>
@@ -130,7 +129,7 @@ int die(const char *str, struct pt_regs *regs, long err)
nl = 1;
#endif
#ifdef CONFIG_PPC64
- switch (systemcfg->platform) {
+ switch (_machine) {
case PLATFORM_PSERIES:
printk("PSERIES ");
nl = 1;
@@ -748,22 +747,12 @@ static int check_bug_trap(struct pt_regs *regs)
return 0;
if (bug->line & BUG_WARNING_TRAP) {
/* this is a WARN_ON rather than BUG/BUG_ON */
-#ifdef CONFIG_XMON
- xmon_printf(KERN_ERR "Badness in %s at %s:%ld\n",
- bug->function, bug->file,
- bug->line & ~BUG_WARNING_TRAP);
-#endif /* CONFIG_XMON */
printk(KERN_ERR "Badness in %s at %s:%ld\n",
bug->function, bug->file,
bug->line & ~BUG_WARNING_TRAP);
dump_stack();
return 1;
}
-#ifdef CONFIG_XMON
- xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%ld!\n",
- bug->function, bug->file, bug->line);
- xmon(regs);
-#endif /* CONFIG_XMON */
printk(KERN_CRIT "kernel BUG in %s at %s:%ld!\n",
bug->function, bug->file, bug->line);
@@ -898,10 +887,6 @@ void altivec_unavailable_exception(struct pt_regs *regs)
die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
}
-#ifdef CONFIG_PPC64
-extern perf_irq_t perf_irq;
-#endif
-
#if defined(CONFIG_PPC64) || defined(CONFIG_E500)
void performance_monitor_exception(struct pt_regs *regs)
{
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 97082a4203a..71a6addf9f7 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -21,6 +21,7 @@
#include <asm/iommu.h>
#include <asm/dma.h>
#include <asm/vio.h>
+#include <asm/prom.h>
static const struct vio_device_id *vio_match_device(
const struct vio_device_id *, const struct vio_dev *);
@@ -265,7 +266,33 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv)
return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
}
+static int vio_hotplug(struct device *dev, char **envp, int num_envp,
+ char *buffer, int buffer_size)
+{
+ const struct vio_dev *vio_dev = to_vio_dev(dev);
+ char *cp;
+ int length;
+
+ if (!num_envp)
+ return -ENOMEM;
+
+ if (!vio_dev->dev.platform_data)
+ return -ENODEV;
+ cp = (char *)get_property(vio_dev->dev.platform_data, "compatible", &length);
+ if (!cp)
+ return -ENODEV;
+
+ envp[0] = buffer;
+ length = scnprintf(buffer, buffer_size, "MODALIAS=vio:T%sS%s",
+ vio_dev->type, cp);
+ if (buffer_size - length <= 0)
+ return -ENOMEM;
+ envp[1] = NULL;
+ return 0;
+}
+
struct bus_type vio_bus_type = {
.name = "vio",
+ .hotplug = vio_hotplug,
.match = vio_bus_match,
};
diff --git a/arch/powerpc/lib/bitops.c b/arch/powerpc/lib/bitops.c
index b67ce3004eb..f68ad71a018 100644
--- a/arch/powerpc/lib/bitops.c
+++ b/arch/powerpc/lib/bitops.c
@@ -41,7 +41,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
tmp = *p;
found_first:
- tmp &= (~0UL >> (64 - size));
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 733d61618bb..40523b14010 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -11,7 +11,7 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
-_GLOBAL(copy_page)
+_GLOBAL(copy_4K_page)
std r31,-8(1)
std r30,-16(1)
std r29,-24(1)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index a0b3fbbd6fb..6d69ef39b7d 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
std r4,-16(r1)
std r5,-8(r1)
dcbt 0,r4
- beq .Lcopy_page
+ beq .Lcopy_page_4K
andi. r6,r6,7
mtcrf 0x01,r5
blt cr1,.Lshort_copy
@@ -366,7 +366,7 @@ _GLOBAL(__copy_tofrom_user)
* above (following the .Ldst_aligned label) but it runs slightly
* slower on POWER3.
*/
-.Lcopy_page:
+.Lcopy_page_4K:
std r31,-32(1)
std r30,-40(1)
std r29,-48(1)
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 2a912f411eb..35bd03c41dd 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,7 @@
#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
#include <asm/hvcall.h>
#include <asm/iseries/hv_call.h>
+#include <asm/smp.h>
void __spin_yield(raw_spinlock_t *lock)
{
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 841d8b6323a..93d4fbfdb72 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -389,5 +389,22 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
}
/* kernel has accessed a bad area */
+
+ printk(KERN_ALERT "Unable to handle kernel paging request for ");
+ switch (regs->trap) {
+ case 0x300:
+ case 0x380:
+ printk("data at address 0x%08lx\n", regs->dar);
+ break;
+ case 0x400:
+ case 0x480:
+ printk("instruction fetch\n");
+ break;
+ default:
+ printk("unknown fault\n");
+ }
+ printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
+ regs->nip);
+
die("Kernel access of bad area", regs, sig);
}
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d6ed9102eee..e0d02c4a261 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -1,7 +1,7 @@
/*
* ppc64 MMU hashtable management routines
*
- * (c) Copyright IBM Corp. 2003
+ * (c) Copyright IBM Corp. 2003, 2005
*
* Maintained by: Benjamin Herrenschmidt
* <benh@kernel.crashing.org>
@@ -10,6 +10,7 @@
* described in the kernel's COPYING file.
*/
+#include <linux/config.h>
#include <asm/reg.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -42,14 +43,24 @@
/* Save non-volatile offsets */
#define STK_REG(i) (112 + ((i)-14)*8)
+
+#ifndef CONFIG_PPC_64K_PAGES
+
+/*****************************************************************************
+ * *
+ * 4K SW & 4K HW pages implementation *
+ * *
+ *****************************************************************************/
+
+
/*
- * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
- * pte_t *ptep, unsigned long trap, int local)
+ * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ * pte_t *ptep, unsigned long trap, int local)
*
- * Adds a page to the hash table. This is the non-LPAR version for now
+ * Adds a 4K page to the hash table in a segment of 4K pages only
*/
-_GLOBAL(__hash_page)
+_GLOBAL(__hash_page_4K)
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
@@ -88,7 +99,8 @@ _GLOBAL(__hash_page)
/* If so, just bail out and refault if needed. Someone else
* is changing this PTE anyway and might hash it.
*/
- bne- bail_ok
+ bne- htab_bail_ok
+
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY,HASHPTE and ACCESSED)
*/
@@ -118,10 +130,10 @@ _GLOBAL(__hash_page)
/* Convert linux PTE bits into HW equivalents */
andi. r3,r30,0x1fe /* Get basic set of flags */
- xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
- and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r30,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
@@ -158,19 +170,21 @@ htab_insert_pte:
andc r30,r30,r0
ori r30,r30,_PAGE_HASHPTE
- /* page number in r5 */
- rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
+ /* physical address r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
/* Calculate primary group hash */
and r0,r28,r27
- rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
/* Call ppc_md.hpte_insert */
- ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,0 /* no vflags */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_4K /* page size */
_GLOBAL(htab_call_hpte_insert1)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge htab_pte_insert_ok /* Insertion successful */
cmpdi 0,r3,-2 /* Critical failure */
@@ -178,19 +192,21 @@ _GLOBAL(htab_call_hpte_insert1)
/* Now try secondary slot */
- /* page number in r5 */
- rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
+ /* physical address r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
/* Calculate secondary group hash */
andc r0,r27,r28
rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
/* Call ppc_md.hpte_insert */
- ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,HPTE_V_SECONDARY@l /* secondary slot */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_4K /* page size */
_GLOBAL(htab_call_hpte_insert2)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge+ htab_pte_insert_ok /* Insertion successful */
cmpdi 0,r3,-2 /* Critical failure */
@@ -207,14 +223,14 @@ _GLOBAL(htab_call_hpte_insert2)
rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
/* Call ppc_md.hpte_remove */
_GLOBAL(htab_call_hpte_remove)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
/* Try all again */
b htab_insert_pte
-bail_ok:
+htab_bail_ok:
li r3,0
- b bail
+ b htab_bail
htab_pte_insert_ok:
/* Insert slot number & secondary bit in PTE */
@@ -227,7 +243,7 @@ htab_write_out_pte:
ld r6,STK_PARM(r6)(r1)
std r30,0(r6)
li r3, 0
-bail:
+htab_bail:
ld r27,STK_REG(r27)(r1)
ld r28,STK_REG(r28)(r1)
ld r29,STK_REG(r29)(r1)
@@ -256,10 +272,10 @@ htab_modify_pte:
/* Call ppc_md.hpte_updatepp */
mr r5,r29 /* va */
- li r6,0 /* large is 0 */
+ li r6,MMU_PAGE_4K /* page size */
ld r7,STK_PARM(r8)(r1) /* get "local" param */
_GLOBAL(htab_call_hpte_updatepp)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
/* if we failed because typically the HPTE wasn't really here
* we try an insertion.
@@ -276,13 +292,556 @@ htab_wrong_access:
/* Bail out clearing reservation */
stdcx. r31,0,r6
li r3,1
- b bail
+ b htab_bail
+
+htab_pte_insert_failure:
+ /* Bail out restoring old PTE */
+ ld r6,STK_PARM(r6)(r1)
+ std r31,0(r6)
+ li r3,-1
+ b htab_bail
+
+
+#else /* CONFIG_PPC_64K_PAGES */
+
+
+/*****************************************************************************
+ * *
+ * 64K SW & 4K or 64K HW in a 4K segment pages implementation *
+ * *
+ *****************************************************************************/
+
+/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ * pte_t *ptep, unsigned long trap, int local)
+ */
+
+/*
+ * For now, we do NOT implement Admixed pages
+ */
+_GLOBAL(__hash_page_4K)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ /* Save all params that we need after a function call */
+ std r6,STK_PARM(r6)(r1)
+ std r8,STK_PARM(r8)(r1)
+
+ /* Add _PAGE_PRESENT to access */
+ ori r4,r4,_PAGE_PRESENT
+
+ /* Save non-volatile registers.
+ * r31 will hold "old PTE"
+ * r30 is "new PTE"
+ * r29 is "va"
+ * r28 is a hash value
+ * r27 is hashtab mask (maybe dynamic patched instead ?)
+ * r26 is the hidx mask
+ * r25 is the index in combo page
+ */
+ std r25,STK_REG(r25)(r1)
+ std r26,STK_REG(r26)(r1)
+ std r27,STK_REG(r27)(r1)
+ std r28,STK_REG(r28)(r1)
+ std r29,STK_REG(r29)(r1)
+ std r30,STK_REG(r30)(r1)
+ std r31,STK_REG(r31)(r1)
+
+ /* Step 1:
+ *
+ * Check permissions, atomically mark the linux PTE busy
+ * and hashed.
+ */
+1:
+ ldarx r31,0,r6
+ /* Check access rights (access & ~(pte_val(*ptep))) */
+ andc. r0,r4,r31
+ bne- htab_wrong_access
+ /* Check if PTE is busy */
+ andi. r0,r31,_PAGE_BUSY
+ /* If so, just bail out and refault if needed. Someone else
+ * is changing this PTE anyway and might hash it.
+ */
+ bne- htab_bail_ok
+ /* Prepare new PTE value (turn access RW into DIRTY, then
+ * add BUSY and ACCESSED)
+ */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
+ or r30,r30,r31
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ /* Write the linux PTE atomically (setting busy) */
+ stdcx. r30,0,r6
+ bne- 1b
+ isync
+
+ /* Step 2:
+ *
+ * Insert/Update the HPTE in the hash table. At this point,
+ * r4 (access) is re-useable, we use it for the new HPTE flags
+ */
+
+ /* Load the hidx index */
+ rldicl r25,r3,64-12,60
+
+ /* Calc va and put it in r29 */
+ rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
+ rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
+ or r29,r3,r29 /* r29 = va
+
+ /* Calculate hash value for primary slot and store it in r28 */
+ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
+ rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
+ xor r28,r5,r0
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
+ andc r0,r30,r0 /* r0 = pte & ~r0 */
+ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
+
+ /* We eventually do the icache sync here (maybe inline that
+ * code rather than call a C function...)
+ */
+BEGIN_FTR_SECTION
+ mr r4,r30
+ mr r5,r7
+ bl .hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+ /* At this point, r3 contains new PP bits, save them in
+ * place of "access" in the param area (sic)
+ */
+ std r3,STK_PARM(r4)(r1)
+
+ /* Get htab_hash_mask */
+ ld r4,htab_hash_mask@got(2)
+ ld r27,0(r4) /* htab_hash_mask -> r27 */
+
+ /* Check if we may already be in the hashtable, in this case, we
+ * go to out-of-line code to try to modify the HPTE. We look for
+ * the bit at (1 >> (index + 32))
+ */
+ andi. r0,r31,_PAGE_HASHPTE
+ li r26,0 /* Default hidx */
+ beq htab_insert_pte
+ ld r6,STK_PARM(r6)(r1)
+ ori r26,r6,0x8000 /* Load the hidx mask */
+ ld r26,0(r26)
+ addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
+ rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
+ bne htab_modify_pte
+
+htab_insert_pte:
+ /* real page number in r5, PTE RPN value + index */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ add r5,r5,r25
+ sldi r5,r5,HW_PAGE_SHIFT
+
+ /* Calculate primary group hash */
+ and r0,r28,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_4K /* page size */
+_GLOBAL(htab_call_hpte_insert1)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge htab_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- htab_pte_insert_failure
+
+ /* Now try secondary slot */
+
+ /* real page number in r5, PTE RPN value + index */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ add r5,r5,r25
+ sldi r5,r5,HW_PAGE_SHIFT
+
+ /* Calculate secondary group hash */
+ andc r0,r27,r28
+ rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_4K /* page size */
+_GLOBAL(htab_call_hpte_insert2)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge+ htab_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- htab_pte_insert_failure
+
+ /* Both are full, we need to evict something */
+ mftb r0
+ /* Pick a random group based on TB */
+ andi. r0,r0,1
+ mr r5,r28
+ bne 2f
+ not r5,r5
+2: and r0,r5,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ /* Call ppc_md.hpte_remove */
+_GLOBAL(htab_call_hpte_remove)
+ bl . /* patched by htab_finish_init() */
+
+ /* Try all again */
+ b htab_insert_pte
+
+htab_bail_ok:
+ li r3,0
+ b htab_bail
+
+htab_pte_insert_ok:
+ /* Insert slot number & secondary bit in PTE second half,
+ * clear _PAGE_BUSY and set approriate HPTE slot bit
+ */
+ ld r6,STK_PARM(r6)(r1)
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ /* HPTE SUB bit */
+ li r0,1
+ subfic r5,r25,27 /* Must match bit position in */
+ sld r0,r0,r5 /* pgtable.h */
+ or r30,r30,r0
+ /* hindx */
+ sldi r5,r25,2
+ sld r3,r3,r5
+ li r4,0xf
+ sld r4,r4,r5
+ andc r26,r26,r4
+ or r26,r26,r3
+ ori r5,r6,0x8000
+ std r26,0(r5)
+ lwsync
+ std r30,0(r6)
+ li r3, 0
+htab_bail:
+ ld r25,STK_REG(r25)(r1)
+ ld r26,STK_REG(r26)(r1)
+ ld r27,STK_REG(r27)(r1)
+ ld r28,STK_REG(r28)(r1)
+ ld r29,STK_REG(r29)(r1)
+ ld r30,STK_REG(r30)(r1)
+ ld r31,STK_REG(r31)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+htab_modify_pte:
+ /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+ mr r4,r3
+ sldi r5,r25,2
+ srd r3,r26,r5
+
+ /* Secondary group ? if yes, get a inverted hash value */
+ mr r5,r28
+ andi. r0,r3,0x8 /* page secondary ? */
+ beq 1f
+ not r5,r5
+1: andi. r3,r3,0x7 /* extract idx alone */
+
+ /* Calculate proper slot value for ppc_md.hpte_updatepp */
+ and r0,r5,r27
+ rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ add r3,r0,r3 /* add slot idx */
+
+ /* Call ppc_md.hpte_updatepp */
+ mr r5,r29 /* va */
+ li r6,MMU_PAGE_4K /* page size */
+ ld r7,STK_PARM(r8)(r1) /* get "local" param */
+_GLOBAL(htab_call_hpte_updatepp)
+ bl . /* patched by htab_finish_init() */
+
+ /* if we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ cmpdi 0,r3,-1
+ beq- htab_insert_pte
+
+ /* Clear the BUSY bit and Write out the PTE */
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ ld r6,STK_PARM(r6)(r1)
+ std r30,0(r6)
+ li r3,0
+ b htab_bail
+
+htab_wrong_access:
+ /* Bail out clearing reservation */
+ stdcx. r31,0,r6
+ li r3,1
+ b htab_bail
htab_pte_insert_failure:
/* Bail out restoring old PTE */
ld r6,STK_PARM(r6)(r1)
std r31,0(r6)
li r3,-1
- b bail
+ b htab_bail
+
+
+/*****************************************************************************
+ * *
+ * 64K SW & 64K HW in a 64K segment pages implementation *
+ * *
+ *****************************************************************************/
+
+_GLOBAL(__hash_page_64K)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ /* Save all params that we need after a function call */
+ std r6,STK_PARM(r6)(r1)
+ std r8,STK_PARM(r8)(r1)
+
+ /* Add _PAGE_PRESENT to access */
+ ori r4,r4,_PAGE_PRESENT
+
+ /* Save non-volatile registers.
+ * r31 will hold "old PTE"
+ * r30 is "new PTE"
+ * r29 is "va"
+ * r28 is a hash value
+ * r27 is hashtab mask (maybe dynamic patched instead ?)
+ */
+ std r27,STK_REG(r27)(r1)
+ std r28,STK_REG(r28)(r1)
+ std r29,STK_REG(r29)(r1)
+ std r30,STK_REG(r30)(r1)
+ std r31,STK_REG(r31)(r1)
+
+ /* Step 1:
+ *
+ * Check permissions, atomically mark the linux PTE busy
+ * and hashed.
+ */
+1:
+ ldarx r31,0,r6
+ /* Check access rights (access & ~(pte_val(*ptep))) */
+ andc. r0,r4,r31
+ bne- ht64_wrong_access
+ /* Check if PTE is busy */
+ andi. r0,r31,_PAGE_BUSY
+ /* If so, just bail out and refault if needed. Someone else
+ * is changing this PTE anyway and might hash it.
+ */
+ bne- ht64_bail_ok
+ /* Prepare new PTE value (turn access RW into DIRTY, then
+ * add BUSY,HASHPTE and ACCESSED)
+ */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
+ or r30,r30,r31
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ /* Write the linux PTE atomically (setting busy) */
+ stdcx. r30,0,r6
+ bne- 1b
+ isync
+
+ /* Step 2:
+ *
+ * Insert/Update the HPTE in the hash table. At this point,
+ * r4 (access) is re-useable, we use it for the new HPTE flags
+ */
+
+ /* Calc va and put it in r29 */
+ rldicr r29,r5,28,63-28
+ rldicl r3,r3,0,36
+ or r29,r3,r29
+
+ /* Calculate hash value for primary slot and store it in r28 */
+ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
+ rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */
+ xor r28,r5,r0
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
+ andc r0,r30,r0 /* r0 = pte & ~r0 */
+ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
+
+ /* We eventually do the icache sync here (maybe inline that
+ * code rather than call a C function...)
+ */
+BEGIN_FTR_SECTION
+ mr r4,r30
+ mr r5,r7
+ bl .hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+ /* At this point, r3 contains new PP bits, save them in
+ * place of "access" in the param area (sic)
+ */
+ std r3,STK_PARM(r4)(r1)
+
+ /* Get htab_hash_mask */
+ ld r4,htab_hash_mask@got(2)
+ ld r27,0(r4) /* htab_hash_mask -> r27 */
+
+ /* Check if we may already be in the hashtable, in this case, we
+ * go to out-of-line code to try to modify the HPTE
+ */
+ andi. r0,r31,_PAGE_HASHPTE
+ bne ht64_modify_pte
+
+ht64_insert_pte:
+ /* Clear hpte bits in new pte (we also clear BUSY btw) and
+ * add _PAGE_HASHPTE
+ */
+ lis r0,_PAGE_HPTEFLAGS@h
+ ori r0,r0,_PAGE_HPTEFLAGS@l
+ andc r30,r30,r0
+ ori r30,r30,_PAGE_HASHPTE
+
+ /* Phyical address in r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
+
+ /* Calculate primary group hash */
+ and r0,r28,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_64K
+_GLOBAL(ht64_call_hpte_insert1)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge ht64_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- ht64_pte_insert_failure
+
+ /* Now try secondary slot */
+
+ /* Phyical address in r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
+
+ /* Calculate secondary group hash */
+ andc r0,r27,r28
+ rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_64K
+_GLOBAL(ht64_call_hpte_insert2)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge+ ht64_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- ht64_pte_insert_failure
+
+ /* Both are full, we need to evict something */
+ mftb r0
+ /* Pick a random group based on TB */
+ andi. r0,r0,1
+ mr r5,r28
+ bne 2f
+ not r5,r5
+2: and r0,r5,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ /* Call ppc_md.hpte_remove */
+_GLOBAL(ht64_call_hpte_remove)
+ bl . /* patched by htab_finish_init() */
+
+ /* Try all again */
+ b ht64_insert_pte
+
+ht64_bail_ok:
+ li r3,0
+ b ht64_bail
+
+ht64_pte_insert_ok:
+ /* Insert slot number & secondary bit in PTE */
+ rldimi r30,r3,12,63-15
+
+ /* Write out the PTE with a normal write
+ * (maybe add eieio may be good still ?)
+ */
+ht64_write_out_pte:
+ ld r6,STK_PARM(r6)(r1)
+ std r30,0(r6)
+ li r3, 0
+ht64_bail:
+ ld r27,STK_REG(r27)(r1)
+ ld r28,STK_REG(r28)(r1)
+ ld r29,STK_REG(r29)(r1)
+ ld r30,STK_REG(r30)(r1)
+ ld r31,STK_REG(r31)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+ht64_modify_pte:
+ /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+ mr r4,r3
+ rlwinm r3,r31,32-12,29,31
+
+ /* Secondary group ? if yes, get a inverted hash value */
+ mr r5,r28
+ andi. r0,r31,_PAGE_F_SECOND
+ beq 1f
+ not r5,r5
+1:
+ /* Calculate proper slot value for ppc_md.hpte_updatepp */
+ and r0,r5,r27
+ rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ add r3,r0,r3 /* add slot idx */
+
+ /* Call ppc_md.hpte_updatepp */
+ mr r5,r29 /* va */
+ li r6,MMU_PAGE_64K
+ ld r7,STK_PARM(r8)(r1) /* get "local" param */
+_GLOBAL(ht64_call_hpte_updatepp)
+ bl . /* patched by htab_finish_init() */
+
+ /* if we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ cmpdi 0,r3,-1
+ beq- ht64_insert_pte
+
+ /* Clear the BUSY bit and Write out the PTE */
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ b ht64_write_out_pte
+
+ht64_wrong_access:
+ /* Bail out clearing reservation */
+ stdcx. r31,0,r6
+ li r3,1
+ b ht64_bail
+
+ht64_pte_insert_failure:
+ /* Bail out restoring old PTE */
+ ld r6,STK_PARM(r6)(r1)
+ std r31,0(r6)
+ li r3,-1
+ b ht64_bail
+
+
+#endif /* CONFIG_PPC_64K_PAGES */
+/*****************************************************************************
+ * *
+ * Huge pages implementation is in hugetlbpage.c *
+ * *
+ *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 174d14576c2..d96bcfe4c6f 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -9,6 +9,9 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
+#undef DEBUG_LOW
+
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/threads.h>
@@ -22,11 +25,84 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
+#include <asm/udbg.h>
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
#define HPTE_LOCK_BIT 3
static DEFINE_SPINLOCK(native_tlbie_lock);
+static inline void __tlbie(unsigned long va, unsigned int psize)
+{
+ unsigned int penc;
+
+ /* clear top 16 bits, non SLS segment */
+ va &= ~(0xffffULL << 48);
+
+ switch (psize) {
+ case MMU_PAGE_4K:
+ va &= ~0xffful;
+ asm volatile("tlbie %0,0" : : "r" (va) : "memory");
+ break;
+ default:
+ penc = mmu_psize_defs[psize].penc;
+ va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ va |= (0x7f >> (8 - penc)) << 12;
+ asm volatile("tlbie %0,1" : : "r" (va) : "memory");
+ break;
+ }
+}
+
+static inline void __tlbiel(unsigned long va, unsigned int psize)
+{
+ unsigned int penc;
+
+ /* clear top 16 bits, non SLS segment */
+ va &= ~(0xffffULL << 48);
+
+ switch (psize) {
+ case MMU_PAGE_4K:
+ va &= ~0xffful;
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
+ : : "r"(va) : "memory");
+ break;
+ default:
+ penc = mmu_psize_defs[psize].penc;
+ va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ va |= (0x7f >> (8 - penc)) << 12;
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
+ : : "r"(va) : "memory");
+ break;
+ }
+
+}
+
+static inline void tlbie(unsigned long va, int psize, int local)
+{
+ unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
+ int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+
+ if (use_local)
+ use_local = mmu_psize_defs[psize].tlbiel;
+ if (lock_tlbie && !use_local)
+ spin_lock(&native_tlbie_lock);
+ asm volatile("ptesync": : :"memory");
+ if (use_local) {
+ __tlbiel(va, psize);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie(va, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ if (lock_tlbie && !use_local)
+ spin_unlock(&native_tlbie_lock);
+}
+
static inline void native_lock_hpte(hpte_t *hptep)
{
unsigned long *word = &hptep->v;
@@ -48,13 +124,19 @@ static inline void native_unlock_hpte(hpte_t *hptep)
}
long native_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize)
{
hpte_t *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
int i;
+ if (!(vflags & HPTE_V_BOLTED)) {
+ DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
+ " rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, va, pa, rflags, vflags, psize);
+ }
+
for (i = 0; i < HPTES_PER_GROUP; i++) {
if (! (hptep->v & HPTE_V_VALID)) {
/* retry with lock held */
@@ -70,10 +152,13 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
if (i == HPTES_PER_GROUP)
return -1;
- hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
- if (vflags & HPTE_V_LARGE)
- va &= ~(1UL << HPTE_V_AVPN_SHIFT);
- hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
+ hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED)) {
+ DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
+ i, hpte_v, hpte_r);
+ }
hptep->r = hpte_r;
/* Guarantee the second dword is visible before the valid bit */
@@ -96,6 +181,8 @@ static long native_hpte_remove(unsigned long hpte_group)
int slot_offset;
unsigned long hpte_v;
+ DBG_LOW(" remove(group=%lx)\n", hpte_group);
+
/* pick a random entry to start at */
slot_offset = mftb() & 0x7;
@@ -126,34 +213,51 @@ static long native_hpte_remove(unsigned long hpte_group)
return i;
}
-static inline void set_pp_bit(unsigned long pp, hpte_t *addr)
+static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ unsigned long va, int psize, int local)
{
- unsigned long old;
- unsigned long *p = &addr->r;
-
- __asm__ __volatile__(
- "1: ldarx %0,0,%3\n\
- rldimi %0,%2,0,61\n\
- stdcx. %0,0,%3\n\
- bne 1b"
- : "=&r" (old), "=m" (*p)
- : "r" (pp), "r" (p), "m" (*p)
- : "cc");
+ hpte_t *hptep = htab_address + slot;
+ unsigned long hpte_v, want_v;
+ int ret = 0;
+
+ want_v = hpte_encode_v(va, psize);
+
+ DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
+ va, want_v & HPTE_V_AVPN, slot, newpp);
+
+ native_lock_hpte(hptep);
+
+ hpte_v = hptep->v;
+
+ /* Even if we miss, we need to invalidate the TLB */
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+ DBG_LOW(" -> miss\n");
+ native_unlock_hpte(hptep);
+ ret = -1;
+ } else {
+ DBG_LOW(" -> hit\n");
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N));
+ native_unlock_hpte(hptep);
+ }
+
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, local);
+
+ return ret;
}
-/*
- * Only works on small pages. Yes its ugly to have to check each slot in
- * the group but we only use this during bootup.
- */
-static long native_hpte_find(unsigned long vpn)
+static long native_hpte_find(unsigned long va, int psize)
{
hpte_t *hptep;
unsigned long hash;
unsigned long i, j;
long slot;
- unsigned long hpte_v;
+ unsigned long want_v, hpte_v;
- hash = hpt_hash(vpn, 0);
+ hash = hpt_hash(va, mmu_psize_defs[psize].shift);
+ want_v = hpte_encode_v(va, psize);
for (j = 0; j < 2; j++) {
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -161,7 +265,7 @@ static long native_hpte_find(unsigned long vpn)
hptep = htab_address + slot;
hpte_v = hptep->v;
- if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+ if (HPTE_V_COMPARE(hpte_v, want_v)
&& (hpte_v & HPTE_V_VALID)
&& ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
/* HPTE matches */
@@ -177,127 +281,101 @@ static long native_hpte_find(unsigned long vpn)
return -1;
}
-static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
-{
- hpte_t *hptep = htab_address + slot;
- unsigned long hpte_v;
- unsigned long avpn = va >> 23;
- int ret = 0;
-
- if (large)
- avpn &= ~1;
-
- native_lock_hpte(hptep);
-
- hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
- native_unlock_hpte(hptep);
- ret = -1;
- } else {
- set_pp_bit(newpp, hptep);
- native_unlock_hpte(hptep);
- }
-
- /* Ensure it is out of the tlb too */
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
- tlbiel(va);
- } else {
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
- if (lock_tlbie)
- spin_lock(&native_tlbie_lock);
- tlbie(va, large);
- if (lock_tlbie)
- spin_unlock(&native_tlbie_lock);
- }
-
- return ret;
-}
-
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
* in the HPT. Assumes pages being operated on will not be stolen.
- * Does not work on large pages.
*
* No need to lock here because we should be the only user.
*/
-static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+ int psize)
{
- unsigned long vsid, va, vpn, flags = 0;
+ unsigned long vsid, va;
long slot;
hpte_t *hptep;
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
- slot = native_hpte_find(vpn);
+ slot = native_hpte_find(va, psize);
if (slot == -1)
panic("could not find page to bolt\n");
hptep = htab_address + slot;
- set_pp_bit(newpp, hptep);
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N));
- /* Ensure it is out of the tlb too */
- if (lock_tlbie)
- spin_lock_irqsave(&native_tlbie_lock, flags);
- tlbie(va, 0);
- if (lock_tlbie)
- spin_unlock_irqrestore(&native_tlbie_lock, flags);
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, 0);
}
static void native_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
hpte_t *hptep = htab_address + slot;
unsigned long hpte_v;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
unsigned long flags;
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
- if (large)
- avpn &= ~1;
local_irq_save(flags);
- native_lock_hpte(hptep);
+ DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
+
+ want_v = hpte_encode_v(va, psize);
+ native_lock_hpte(hptep);
hpte_v = hptep->v;
/* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
- } else {
+ else
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->v = 0;
- }
- /* Invalidate the tlb */
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
- tlbiel(va);
- } else {
- if (lock_tlbie)
- spin_lock(&native_tlbie_lock);
- tlbie(va, large);
- if (lock_tlbie)
- spin_unlock(&native_tlbie_lock);
- }
+ /* Invalidate the TLB */
+ tlbie(va, psize, local);
+
local_irq_restore(flags);
}
/*
+ * XXX This need fixing based on page size. It's only used by
+ * native_hpte_clear() for now which needs fixing too so they
+ * make a good pair...
+ */
+static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
+{
+ unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
+ unsigned long va;
+
+ va = avpn << 23;
+
+ if (! (hpte_v & HPTE_V_LARGE)) {
+ unsigned long vpi, pteg;
+
+ pteg = slot / HPTES_PER_GROUP;
+ if (hpte_v & HPTE_V_SECONDARY)
+ pteg = ~pteg;
+
+ vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
+
+ va |= vpi << PAGE_SHIFT;
+ }
+
+ return va;
+}
+
+/*
* clear all mappings on kexec. All cpus are in real mode (or they will
* be when they isi), and we are the only one left. We rely on our kernel
* mapping being 0xC0's and the hardware ignoring those two real bits.
*
* TODO: add batching support when enabled. remember, no dynamic memory here,
* athough there is the control page available...
+ *
+ * XXX FIXME: 4k only for now !
*/
static void native_hpte_clear(void)
{
@@ -327,7 +405,7 @@ static void native_hpte_clear(void)
if (hpte_v & HPTE_V_VALID) {
hptep->v = 0;
- tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE);
+ tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0);
}
}
@@ -335,59 +413,59 @@ static void native_hpte_clear(void)
local_irq_restore(flags);
}
+/*
+ * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
+ * the lock all the time
+ */
static void native_flush_hash_range(unsigned long number, int local)
{
- unsigned long va, vpn, hash, secondary, slot, flags, avpn;
- int i, j;
+ unsigned long va, hash, index, hidx, shift, slot;
hpte_t *hptep;
unsigned long hpte_v;
+ unsigned long want_v;
+ unsigned long flags;
+ real_pte_t pte;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
- unsigned long large = batch->large;
+ unsigned long psize = batch->psize;
+ int i;
local_irq_save(flags);
- j = 0;
for (i = 0; i < number; i++) {
- va = batch->vaddr[j];
- if (large)
- vpn = va >> HPAGE_SHIFT;
- else
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, large);
- secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15;
- if (secondary)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12;
-
- hptep = htab_address + slot;
-
- avpn = va >> 23;
- if (large)
- avpn &= ~0x1UL;
-
- native_lock_hpte(hptep);
-
- hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
- native_unlock_hpte(hptep);
- } else {
- /* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->v = 0;
- }
-
- j++;
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
+ hash = hpt_hash(va, shift);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ hptep = htab_address + slot;
+ want_v = hpte_encode_v(va, psize);
+ native_lock_hpte(hptep);
+ hpte_v = hptep->v;
+ if (!HPTE_V_COMPARE(hpte_v, want_v) ||
+ !(hpte_v & HPTE_V_VALID))
+ native_unlock_hpte(hptep);
+ else
+ hptep->v = 0;
+ } pte_iterate_hashed_end();
}
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
+ if (cpu_has_feature(CPU_FTR_TLBIEL) &&
+ mmu_psize_defs[psize].tlbiel && local) {
asm volatile("ptesync":::"memory");
-
- for (i = 0; i < j; i++)
- __tlbiel(batch->vaddr[i]);
-
+ for (i = 0; i < number; i++) {
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index,
+ shift) {
+ __tlbiel(va, psize);
+ } pte_iterate_hashed_end();
+ }
asm volatile("ptesync":::"memory");
} else {
int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
@@ -396,10 +474,15 @@ static void native_flush_hash_range(unsigned long number, int local)
spin_lock(&native_tlbie_lock);
asm volatile("ptesync":::"memory");
-
- for (i = 0; i < j; i++)
- __tlbie(batch->vaddr[i], large);
-
+ for (i = 0; i < number; i++) {
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index,
+ shift) {
+ __tlbie(va, psize);
+ } pte_iterate_hashed_end();
+ }
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (lock_tlbie)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6e9e05cce02..706e8a63ced 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -19,6 +19,7 @@
*/
#undef DEBUG
+#undef DEBUG_LOW
#include <linux/config.h>
#include <linux/spinlock.h>
@@ -32,7 +33,6 @@
#include <linux/init.h>
#include <linux/signal.h>
-#include <asm/ppcdebug.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -59,6 +59,15 @@
#define DBG(fmt...)
#endif
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
+
+#define KB (1024)
+#define MB (1024*KB)
+
/*
* Note: pte --> Linux PTE
* HPTE --> PowerPC Hashed Page Table Entry
@@ -75,99 +84,302 @@
extern unsigned long dart_tablebase;
#endif /* CONFIG_U3_DART */
+static unsigned long _SDR1;
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
hpte_t *htab_address;
unsigned long htab_hash_mask;
+int mmu_linear_psize = MMU_PAGE_4K;
+int mmu_virtual_psize = MMU_PAGE_4K;
+#ifdef CONFIG_HUGETLB_PAGE
+int mmu_huge_psize = MMU_PAGE_16M;
+unsigned int HPAGE_SHIFT;
+#endif
-unsigned long _SDR1;
-
-#define KB (1024)
-#define MB (1024*KB)
-
-static inline void loop_forever(void)
-{
- volatile unsigned long x = 1;
- for(;x;x|=1)
- ;
-}
+/* There are definitions of page sizes arrays to be used when none
+ * is provided by the firmware.
+ */
-static inline void create_pte_mapping(unsigned long start, unsigned long end,
- unsigned long mode, int large)
+/* Pre-POWER4 CPUs (4k pages only)
+ */
+struct mmu_psize_def mmu_psize_defaults_old[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = 0,
+ .avpnm = 0,
+ .tlbiel = 0,
+ },
+};
+
+/* POWER4, GPUL, POWER5
+ *
+ * Support for 16Mb large pages
+ */
+struct mmu_psize_def mmu_psize_defaults_gp[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = 0,
+ .avpnm = 0,
+ .tlbiel = 1,
+ },
+ [MMU_PAGE_16M] = {
+ .shift = 24,
+ .sllp = SLB_VSID_L,
+ .penc = 0,
+ .avpnm = 0x1UL,
+ .tlbiel = 0,
+ },
+};
+
+
+int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+ unsigned long pstart, unsigned long mode, int psize)
{
- unsigned long addr;
- unsigned int step;
+ unsigned long vaddr, paddr;
+ unsigned int step, shift;
unsigned long tmp_mode;
- unsigned long vflags;
+ int ret = 0;
- if (large) {
- step = 16*MB;
- vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
- } else {
- step = 4*KB;
- vflags = HPTE_V_BOLTED;
- }
+ shift = mmu_psize_defs[psize].shift;
+ step = 1 << shift;
- for (addr = start; addr < end; addr += step) {
+ for (vaddr = vstart, paddr = pstart; vaddr < vend;
+ vaddr += step, paddr += step) {
unsigned long vpn, hash, hpteg;
- unsigned long vsid = get_kernel_vsid(addr);
- unsigned long va = (vsid << 28) | (addr & 0xfffffff);
- int ret = -1;
-
- if (large)
- vpn = va >> HPAGE_SHIFT;
- else
- vpn = va >> PAGE_SHIFT;
-
+ unsigned long vsid = get_kernel_vsid(vaddr);
+ unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
+ vpn = va >> shift;
tmp_mode = mode;
/* Make non-kernel text non-executable */
- if (!in_kernel_text(addr))
- tmp_mode = mode | HW_NO_EXEC;
-
- hash = hpt_hash(vpn, large);
+ if (!in_kernel_text(vaddr))
+ tmp_mode = mode | HPTE_R_N;
+ hash = hpt_hash(va, shift);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+ /* The crap below can be cleaned once ppd_md.probe() can
+ * set up the hash callbacks, thus we can just used the
+ * normal insert callback here.
+ */
#ifdef CONFIG_PPC_ISERIES
- if (systemcfg->platform & PLATFORM_ISERIES_LPAR)
- ret = iSeries_hpte_bolt_or_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ if (_machine == PLATFORM_ISERIES_LPAR)
+ ret = iSeries_hpte_insert(hpteg, va,
+ virt_to_abs(paddr),
+ tmp_mode,
+ HPTE_V_BOLTED,
+ psize);
else
#endif
#ifdef CONFIG_PPC_PSERIES
- if (systemcfg->platform & PLATFORM_LPAR)
+ if (_machine & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ virt_to_abs(paddr),
+ tmp_mode,
+ HPTE_V_BOLTED,
+ psize);
else
#endif
#ifdef CONFIG_PPC_MULTIPLATFORM
ret = native_hpte_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ virt_to_abs(paddr),
+ tmp_mode, HPTE_V_BOLTED,
+ psize);
#endif
+ if (ret < 0)
+ break;
+ }
+ return ret < 0 ? ret : 0;
+}
- if (ret == -1) {
- ppc64_terminate_msg(0x20, "create_pte_mapping");
- loop_forever();
+static int __init htab_dt_scan_page_sizes(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ u32 *prop;
+ unsigned long size = 0;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = (u32 *)of_get_flat_dt_prop(node,
+ "ibm,segment-page-sizes", &size);
+ if (prop != NULL) {
+ DBG("Page sizes from device-tree:\n");
+ size /= 4;
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
+ while(size > 0) {
+ unsigned int shift = prop[0];
+ unsigned int slbenc = prop[1];
+ unsigned int lpnum = prop[2];
+ unsigned int lpenc = 0;
+ struct mmu_psize_def *def;
+ int idx = -1;
+
+ size -= 3; prop += 3;
+ while(size > 0 && lpnum) {
+ if (prop[0] == shift)
+ lpenc = prop[1];
+ prop += 2; size -= 2;
+ lpnum--;
+ }
+ switch(shift) {
+ case 0xc:
+ idx = MMU_PAGE_4K;
+ break;
+ case 0x10:
+ idx = MMU_PAGE_64K;
+ break;
+ case 0x14:
+ idx = MMU_PAGE_1M;
+ break;
+ case 0x18:
+ idx = MMU_PAGE_16M;
+ cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
+ break;
+ case 0x22:
+ idx = MMU_PAGE_16G;
+ break;
+ }
+ if (idx < 0)
+ continue;
+ def = &mmu_psize_defs[idx];
+ def->shift = shift;
+ if (shift <= 23)
+ def->avpnm = 0;
+ else
+ def->avpnm = (1 << (shift - 23)) - 1;
+ def->sllp = slbenc;
+ def->penc = lpenc;
+ /* We don't know for sure what's up with tlbiel, so
+ * for now we only set it for 4K and 64K pages
+ */
+ if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
+ def->tlbiel = 1;
+ else
+ def->tlbiel = 0;
+
+ DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, "
+ "tlbiel=%d, penc=%d\n",
+ idx, shift, def->sllp, def->avpnm, def->tlbiel,
+ def->penc);
}
+ return 1;
+ }
+ return 0;
+}
+
+
+static void __init htab_init_page_sizes(void)
+{
+ int rc;
+
+ /* Default to 4K pages only */
+ memcpy(mmu_psize_defs, mmu_psize_defaults_old,
+ sizeof(mmu_psize_defaults_old));
+
+ /*
+ * Try to find the available page sizes in the device-tree
+ */
+ rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
+ if (rc != 0) /* Found */
+ goto found;
+
+ /*
+ * Not in the device-tree, let's fallback on known size
+ * list for 16M capable GP & GR
+ */
+ if ((_machine != PLATFORM_ISERIES_LPAR) &&
+ cpu_has_feature(CPU_FTR_16M_PAGE))
+ memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
+ sizeof(mmu_psize_defaults_gp));
+ found:
+ /*
+ * Pick a size for the linear mapping. Currently, we only support
+ * 16M, 1M and 4K which is the default
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_linear_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_linear_psize = MMU_PAGE_1M;
+
+ /*
+ * Pick a size for the ordinary pages. Default is 4K, we support
+ * 64K if cache inhibited large pages are supported by the
+ * processor
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+ if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+ cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
+ mmu_virtual_psize = MMU_PAGE_64K;
+#endif
+
+ printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n",
+ mmu_psize_defs[mmu_linear_psize].shift,
+ mmu_psize_defs[mmu_virtual_psize].shift);
+
+#ifdef CONFIG_HUGETLB_PAGE
+ /* Init large page size. Currently, we pick 16M or 1M depending
+ * on what is available
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_huge_psize = MMU_PAGE_16M;
+ /* With 4k/4level pagetables, we can't (for now) cope with a
+ * huge page size < PMD_SIZE */
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_huge_psize = MMU_PAGE_1M;
+
+ /* Calculate HPAGE_SHIFT and sanity check it */
+ if (mmu_psize_defs[mmu_huge_psize].shift > MIN_HUGEPTE_SHIFT &&
+ mmu_psize_defs[mmu_huge_psize].shift < SID_SHIFT)
+ HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
+ else
+ HPAGE_SHIFT = 0; /* No huge pages dude ! */
+#endif /* CONFIG_HUGETLB_PAGE */
+}
+
+static int __init htab_dt_scan_pftsize(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ u32 *prop;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
+ if (prop != NULL) {
+ /* pft_size[0] is the NUMA CEC cookie */
+ ppc64_pft_size = prop[1];
+ return 1;
}
+ return 0;
}
-static unsigned long get_hashtable_size(void)
+static unsigned long __init htab_get_table_size(void)
{
- unsigned long rnd_mem_size, pteg_count;
+ unsigned long mem_size, rnd_mem_size, pteg_count;
- /* If hash size wasn't obtained in prom.c, we calculate it now based on
- * the total RAM size
+ /* If hash size isn't already provided by the platform, we try to
+ * retreive it from the device-tree. If it's not there neither, we
+ * calculate it now based on the total RAM size
*/
+ if (ppc64_pft_size == 0)
+ of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
if (ppc64_pft_size)
return 1UL << ppc64_pft_size;
/* round mem_size up to next power of 2 */
- rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize);
- if (rnd_mem_size < systemcfg->physicalMemorySize)
+ mem_size = lmb_phys_mem_size();
+ rnd_mem_size = 1UL << __ilog2(mem_size);
+ if (rnd_mem_size < mem_size)
rnd_mem_size <<= 1;
/* # pages / 2 */
@@ -176,33 +388,40 @@ static unsigned long get_hashtable_size(void)
return pteg_count << 7;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+void create_section_mapping(unsigned long start, unsigned long end)
+{
+ BUG_ON(htab_bolt_mapping(start, end, start,
+ _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX,
+ mmu_linear_psize));
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
void __init htab_initialize(void)
{
unsigned long table, htab_size_bytes;
unsigned long pteg_count;
unsigned long mode_rw;
- int i, use_largepages = 0;
unsigned long base = 0, size = 0;
+ int i;
+
extern unsigned long tce_alloc_start, tce_alloc_end;
DBG(" -> htab_initialize()\n");
+ /* Initialize page sizes */
+ htab_init_page_sizes();
+
/*
* Calculate the required size of the htab. We want the number of
* PTEGs to equal one half the number of real pages.
*/
- htab_size_bytes = get_hashtable_size();
+ htab_size_bytes = htab_get_table_size();
pteg_count = htab_size_bytes >> 7;
- /* For debug, make the HTAB 1/8 as big as it normally would be. */
- ifppcdebug(PPCDBG_HTABSIZE) {
- pteg_count >>= 3;
- htab_size_bytes = pteg_count << 7;
- }
-
htab_hash_mask = pteg_count - 1;
- if (systemcfg->platform & PLATFORM_LPAR) {
+ if (platform_is_lpar()) {
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
@@ -211,14 +430,11 @@ void __init htab_initialize(void)
* the absolute address space.
*/
table = lmb_alloc(htab_size_bytes, htab_size_bytes);
+ BUG_ON(table == 0);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
- if ( !table ) {
- ppc64_terminate_msg(0x20, "hpt space");
- loop_forever();
- }
htab_address = abs_to_virt(table);
/* htab absolute addr + encoded htabsize */
@@ -226,6 +442,9 @@ void __init htab_initialize(void)
/* Initialize the HPT with no entries */
memset((void *)table, 0, htab_size_bytes);
+
+ /* Set SDR1 */
+ mtspr(SPRN_SDR1, _SDR1);
}
mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
@@ -234,8 +453,6 @@ void __init htab_initialize(void)
* _NOT_ map it to avoid cache paradoxes as it's remapped non
* cacheable later on
*/
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- use_largepages = 1;
/* create bolted the linear mapping in the hash table */
for (i=0; i < lmb.memory.cnt; i++) {
@@ -246,27 +463,32 @@ void __init htab_initialize(void)
#ifdef CONFIG_U3_DART
/* Do not map the DART space. Fortunately, it will be aligned
- * in such a way that it will not cross two lmb regions and will
- * fit within a single 16Mb page.
- * The DART space is assumed to be a full 16Mb region even if we
- * only use 2Mb of that space. We will use more of it later for
- * AGP GART. We have to use a full 16Mb large page.
+ * in such a way that it will not cross two lmb regions and
+ * will fit within a single 16Mb page.
+ * The DART space is assumed to be a full 16Mb region even if
+ * we only use 2Mb of that space. We will use more of it later
+ * for AGP GART. We have to use a full 16Mb large page.
*/
DBG("DART base: %lx\n", dart_tablebase);
if (dart_tablebase != 0 && dart_tablebase >= base
&& dart_tablebase < (base + size)) {
if (base != dart_tablebase)
- create_pte_mapping(base, dart_tablebase, mode_rw,
- use_largepages);
+ BUG_ON(htab_bolt_mapping(base, dart_tablebase,
+ base, mode_rw,
+ mmu_linear_psize));
if ((base + size) > (dart_tablebase + 16*MB))
- create_pte_mapping(dart_tablebase + 16*MB, base + size,
- mode_rw, use_largepages);
+ BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
+ base + size,
+ dart_tablebase+16*MB,
+ mode_rw,
+ mmu_linear_psize));
continue;
}
#endif /* CONFIG_U3_DART */
- create_pte_mapping(base, base + size, mode_rw, use_largepages);
- }
+ BUG_ON(htab_bolt_mapping(base, base + size, base,
+ mode_rw, mmu_linear_psize));
+ }
/*
* If we have a memory_limit and we've allocated TCEs then we need to
@@ -282,8 +504,9 @@ void __init htab_initialize(void)
if (base + size >= tce_alloc_start)
tce_alloc_start = base + size + 1;
- create_pte_mapping(tce_alloc_start, tce_alloc_end,
- mode_rw, use_largepages);
+ BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
+ tce_alloc_start, mode_rw,
+ mmu_linear_psize));
}
DBG(" <- htab_initialize()\n");
@@ -291,6 +514,12 @@ void __init htab_initialize(void)
#undef KB
#undef MB
+void __init htab_initialize_secondary(void)
+{
+ if (!platform_is_lpar())
+ mtspr(SPRN_SDR1, _SDR1);
+}
+
/*
* Called by asm hashtable.S for doing lazy icache flush
*/
@@ -309,7 +538,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
} else
- pp |= HW_NO_EXEC;
+ pp |= HPTE_R_N;
}
return pp;
}
@@ -325,94 +554,169 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
unsigned long vsid;
struct mm_struct *mm;
pte_t *ptep;
- int ret;
- int user_region = 0;
- int local = 0;
cpumask_t tmp;
+ int rc, user_region = 0, local = 0;
- if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
- return 1;
+ DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
+ ea, access, trap);
+ if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
+ DBG_LOW(" out of pgtable range !\n");
+ return 1;
+ }
+
+ /* Get region & vsid */
switch (REGION_ID(ea)) {
case USER_REGION_ID:
user_region = 1;
mm = current->mm;
- if (! mm)
+ if (! mm) {
+ DBG_LOW(" user region with no mm !\n");
return 1;
-
+ }
vsid = get_vsid(mm->context.id, ea);
break;
case VMALLOC_REGION_ID:
mm = &init_mm;
vsid = get_kernel_vsid(ea);
break;
-#if 0
- case KERNEL_REGION_ID:
- /*
- * Should never get here - entire 0xC0... region is bolted.
- * Send the problem up to do_page_fault
- */
-#endif
default:
/* Not a valid range
* Send the problem up to do_page_fault
*/
return 1;
- break;
}
+ DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
+ /* Get pgdir */
pgdir = mm->pgd;
-
if (pgdir == NULL)
return 1;
+ /* Check CPU locality */
tmp = cpumask_of_cpu(smp_processor_id());
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
- /* Is this a huge page ? */
- if (unlikely(in_hugepage_area(mm->context, ea)))
- ret = hash_huge_page(mm, access, ea, vsid, local);
- else {
- ptep = find_linux_pte(pgdir, ea);
- if (ptep == NULL)
- return 1;
- ret = __hash_page(ea, access, vsid, ptep, trap, local);
+ /* Handle hugepage regions */
+ if (unlikely(in_hugepage_area(mm->context, ea))) {
+ DBG_LOW(" -> huge page !\n");
+ return hash_huge_page(mm, access, ea, vsid, local);
+ }
+
+ /* Get PTE and page size from page tables */
+ ptep = find_linux_pte(pgdir, ea);
+ if (ptep == NULL || !pte_present(*ptep)) {
+ DBG_LOW(" no PTE !\n");
+ return 1;
+ }
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ /* Pre-check access permissions (will be re-checked atomically
+ * in __hash_page_XX but this pre-check is a fast path
+ */
+ if (access & ~pte_val(*ptep)) {
+ DBG_LOW(" no access !\n");
+ return 1;
}
- return ret;
+ /* Do actual hashing */
+#ifndef CONFIG_PPC_64K_PAGES
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#else
+ if (mmu_virtual_psize == MMU_PAGE_64K)
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
+ else
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ DBG_LOW(" -> rc=%d\n", rc);
+ return rc;
}
-void flush_hash_page(unsigned long va, pte_t pte, int local)
+void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap)
{
- unsigned long vpn, hash, secondary, slot;
- unsigned long huge = pte_huge(pte);
+ unsigned long vsid;
+ void *pgdir;
+ pte_t *ptep;
+ cpumask_t mask;
+ unsigned long flags;
+ int local = 0;
+
+ /* We don't want huge pages prefaulted for now
+ */
+ if (unlikely(in_hugepage_area(mm->context, ea)))
+ return;
+
+ DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
+ " trap=%lx\n", mm, mm->pgd, ea, access, trap);
+
+ /* Get PTE, VSID, access mask */
+ pgdir = mm->pgd;
+ if (pgdir == NULL)
+ return;
+ ptep = find_linux_pte(pgdir, ea);
+ if (!ptep)
+ return;
+ vsid = get_vsid(mm->context.id, ea);
- if (huge)
- vpn = va >> HPAGE_SHIFT;
+ /* Hash it in */
+ local_irq_save(flags);
+ mask = cpumask_of_cpu(smp_processor_id());
+ if (cpus_equal(mm->cpu_vm_mask, mask))
+ local = 1;
+#ifndef CONFIG_PPC_64K_PAGES
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#else
+ if (mmu_virtual_psize == MMU_PAGE_64K)
+ __hash_page_64K(ea, access, vsid, ptep, trap, local);
else
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, huge);
- secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
- if (secondary)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
-
- ppc_md.hpte_invalidate(slot, va, huge, local);
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#endif /* CONFIG_PPC_64K_PAGES */
+ local_irq_restore(flags);
+}
+
+void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local)
+{
+ unsigned long hash, index, shift, hidx, slot;
+
+ DBG_LOW("flush_hash_page(va=%016x)\n", va);
+ pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
+ hash = hpt_hash(va, shift);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx);
+ ppc_md.hpte_invalidate(slot, va, psize, local);
+ } pte_iterate_hashed_end();
}
void flush_hash_range(unsigned long number, int local)
{
- if (ppc_md.flush_hash_range) {
+ if (ppc_md.flush_hash_range)
ppc_md.flush_hash_range(number, local);
- } else {
+ else {
int i;
struct ppc64_tlb_batch *batch =
&__get_cpu_var(ppc64_tlb_batch);
for (i = 0; i < number; i++)
- flush_hash_page(batch->vaddr[i], batch->pte[i], local);
+ flush_hash_page(batch->vaddr[i], batch->pte[i],
+ batch->psize, local);
}
}
@@ -452,6 +756,18 @@ void __init htab_finish_init(void)
extern unsigned int *htab_call_hpte_remove;
extern unsigned int *htab_call_hpte_updatepp;
+#ifdef CONFIG_PPC_64K_PAGES
+ extern unsigned int *ht64_call_hpte_insert1;
+ extern unsigned int *ht64_call_hpte_insert2;
+ extern unsigned int *ht64_call_hpte_remove;
+ extern unsigned int *ht64_call_hpte_updatepp;
+
+ make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
+ make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
+ make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
+ make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
+#endif /* CONFIG_PPC_64K_PAGES */
+
make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ea0994ed97..426c269e552 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pu = pud_offset(pg, addr);
if (!pud_none(*pu)) {
pm = pmd_offset(pu, addr);
+#ifdef CONFIG_PPC_64K_PAGES
+ /* Currently, we use the normal PTE offset within full
+ * size PTE pages, thus our huge PTEs are scattered in
+ * the PTE page and we do waste some. We may change
+ * that in the future, but the current mecanism keeps
+ * things much simpler
+ */
+ if (!pmd_none(*pm)) {
+ /* Note: pte_offset_* are all equivalent on
+ * ppc64 as we don't have HIGHMEM
+ */
+ pt = pte_offset_kernel(pm, addr);
+ return pt;
+ }
+#else /* CONFIG_PPC_64K_PAGES */
+ /* On 4k pages, we put huge PTEs in the PMD page */
pt = (pte_t *)pm;
- BUG_ON(!pmd_none(*pm)
- && !(pte_present(*pt) && pte_huge(*pt)));
return pt;
+#endif /* CONFIG_PPC_64K_PAGES */
}
}
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
if (pu) {
pm = pmd_alloc(mm, pu, addr);
if (pm) {
+#ifdef CONFIG_PPC_64K_PAGES
+ /* See comment in huge_pte_offset. Note that if we ever
+ * want to put the page size in the PMD, we would have
+ * to open code our own pte_alloc* function in order
+ * to populate and set the size atomically
+ */
+ pt = pte_alloc_map(mm, pm, addr);
+#else /* CONFIG_PPC_64K_PAGES */
pt = (pte_t *)pm;
- BUG_ON(!pmd_none(*pm)
- && !(pte_present(*pt) && pte_huge(*pt)));
+#endif /* CONFIG_PPC_64K_PAGES */
return pt;
}
}
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
return NULL;
}
-#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
-
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- int i;
-
if (pte_present(*ptep)) {
- pte_clear(mm, addr, ptep);
+ /* We open-code pte_clear because we need to pass the right
+ * argument to hpte_update (huge / !huge)
+ */
+ unsigned long old = pte_update(ptep, ~0UL);
+ if (old & _PAGE_HASHPTE)
+ hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
flush_tlb_pending();
}
-
- for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
- *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
- ptep++;
- }
+ *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
unsigned long old = pte_update(ptep, ~0UL);
- int i;
if (old & _PAGE_HASHPTE)
- hpte_update(mm, addr, old, 0);
-
- for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
- ptep[i] = __pte(0);
+ hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
+ *ptep = __pte(0);
return __pte(old);
}
@@ -196,6 +212,12 @@ static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
BUG_ON(area >= NUM_HIGH_AREAS);
+ /* Hack, so that each addresses is controlled by exactly one
+ * of the high or low area bitmaps, the first high area starts
+ * at 4GB, not 0 */
+ if (start == 0)
+ start = 0x100000000UL;
+
/* Check no VMAs are in the region */
vma = find_vma(mm, start);
if (vma && (vma->vm_start < end))
@@ -563,6 +585,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
int lastshift;
u16 areamask, curareas;
+ if (HPAGE_SHIFT == 0)
+ return -EINVAL;
if (len & ~HPAGE_MASK)
return -EINVAL;
@@ -619,19 +643,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local)
{
pte_t *ptep;
- unsigned long va, vpn;
- pte_t old_pte, new_pte;
- unsigned long rflags, prpn;
+ unsigned long old_pte, new_pte;
+ unsigned long va, rflags, pa;
long slot;
int err = 1;
- spin_lock(&mm->page_table_lock);
-
ptep = huge_pte_offset(mm, ea);
/* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> HPAGE_SHIFT;
/*
* If no pte found or not present, send the problem up to
@@ -640,8 +660,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
if (unlikely(!ptep || pte_none(*ptep)))
goto out;
-/* BUG_ON(pte_bad(*ptep)); */
-
/*
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
@@ -661,58 +679,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
*/
- old_pte = *ptep;
- new_pte = old_pte;
-
- rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ do {
+ old_pte = pte_val(*ptep);
+ if (old_pte & _PAGE_BUSY)
+ goto out;
+ new_pte = old_pte | _PAGE_BUSY |
+ _PAGE_ACCESSED | _PAGE_HASHPTE;
+ } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
+ old_pte, new_pte));
+
+ rflags = 0x2 | (!(new_pte & _PAGE_RW));
/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
- rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
+ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
/* Check if pte already has an hpte (case 2) */
- if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
+ if (unlikely(old_pte & _PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot;
- hash = hpt_hash(vpn, 1);
- if (pte_val(old_pte) & _PAGE_SECONDARY)
+ hash = hpt_hash(va, HPAGE_SHIFT);
+ if (old_pte & _PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
+ slot += (old_pte & _PAGE_F_GIX) >> 12;
if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
- pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
+ old_pte &= ~_PAGE_HPTEFLAGS;
}
- if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
- unsigned long hash = hpt_hash(vpn, 1);
+ if (likely(!(old_pte & _PAGE_HASHPTE))) {
+ unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
unsigned long hpte_group;
- prpn = pte_pfn(old_pte);
+ pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
repeat:
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- /* Update the linux pte with the HPTE slot */
- pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
- pte_val(new_pte) |= _PAGE_HASHPTE;
+ /* clear HPTE slot informations in new PTE */
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
/* Add in WIMG bits */
/* XXX We should store these in the pte */
+ /* --BenH: I think they are ... */
rflags |= _PAGE_COHERENT;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- HPTE_V_LARGE, rflags);
+ /* Insert into the hash table, primary slot */
+ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
+ mmu_huge_psize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
- pte_val(new_pte) |= _PAGE_SECONDARY;
+ new_pte |= _PAGE_F_SECOND;
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- HPTE_V_LARGE |
+ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
HPTE_V_SECONDARY,
- rflags);
+ mmu_huge_psize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
@@ -726,20 +750,18 @@ repeat:
if (unlikely(slot == -2))
panic("hash_huge_page: pte_insert failed\n");
- pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
-
- /*
- * No need to use ldarx/stdcx here because all who
- * might be updating the pte will hold the
- * page_table_lock
- */
- *ptep = new_pte;
+ new_pte |= (slot << 12) & _PAGE_F_GIX;
}
+ /*
+ * No need to use ldarx/stdcx here because all who
+ * might be updating the pte will hold the
+ * page_table_lock
+ */
+ *ptep = __pte(new_pte & ~_PAGE_BUSY);
+
err = 0;
out:
- spin_unlock(&mm->page_table_lock);
-
return err;
}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 4612a79dfb6..7d4b8b5f060 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -84,9 +84,6 @@ void MMU_init(void);
/* XXX should be in current.h -- paulus */
extern struct task_struct *current_set[NR_CPUS];
-char *klimit = _end;
-struct device_node *memory_node;
-
extern int init_bootmem_done;
/*
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index b0fc822ec29..1134f70f231 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -20,6 +20,8 @@
*
*/
+#undef DEBUG
+
#include <linux/config.h>
#include <linux/signal.h>
#include <linux/sched.h>
@@ -57,7 +59,6 @@
#include <asm/processor.h>
#include <asm/mmzone.h>
#include <asm/cputable.h>
-#include <asm/ppcdebug.h>
#include <asm/sections.h>
#include <asm/system.h>
#include <asm/iommu.h>
@@ -65,6 +66,12 @@
#include <asm/vdso.h>
#include <asm/imalloc.h>
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
#if PGTABLE_RANGE > USER_VSID_RANGE
#warning Limited user VSID range means pagetable space is wasted
#endif
@@ -73,8 +80,6 @@
#warning TASK_SIZE is smaller than it needs to be.
#endif
-unsigned long klimit = (unsigned long)_end;
-
/* max amount of RAM to use */
unsigned long __max_memory;
@@ -188,12 +193,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
memset(addr, 0, kmem_cache_size(cache));
}
-static const int pgtable_cache_size[2] = {
+#ifdef CONFIG_PPC_64K_PAGES
+static const unsigned int pgtable_cache_size[3] = {
+ PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+ "pte_pmd_cache", "pmd_cache", "pgd_cache",
+};
+#else
+static const unsigned int pgtable_cache_size[2] = {
PTE_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
"pgd_pte_cache", "pud_pmd_cache",
};
+#endif /* CONFIG_PPC_64K_PAGES */
kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
@@ -201,19 +215,16 @@ void pgtable_cache_init(void)
{
int i;
- BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
- BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
- BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
- BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
-
for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
int size = pgtable_cache_size[i];
const char *name = pgtable_cache_name[i];
+ DBG("Allocating page table cache %s (#%d) "
+ "for size: %08x...\n", name, i, size);
pgtable_cache[i] = kmem_cache_create(name,
size, size,
- SLAB_HWCACHE_ALIGN
- | SLAB_MUST_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_MUST_HWCACHE_ALIGN,
zero_ctor,
NULL);
if (! pgtable_cache[i])
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 117b00012e1..1dd3cc69a49 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -61,6 +61,9 @@ int init_bootmem_done;
int mem_init_done;
unsigned long memory_limit;
+extern void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap);
+
/*
* This is called by /dev/mem to know if a given address has to
* be mapped non-cacheable or not
@@ -107,6 +110,7 @@ EXPORT_SYMBOL(phys_mem_access_prot);
void online_page(struct page *page)
{
ClearPageReserved(page);
+ set_page_count(page, 0);
free_cold_page(page);
totalram_pages++;
num_physpages++;
@@ -124,6 +128,9 @@ int __devinit add_memory(u64 start, u64 size)
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ start += KERNELBASE;
+ create_section_mapping(start, start + size);
+
/* this should work for most non-highmem platforms */
zone = pgdata->node_zones;
@@ -355,7 +362,7 @@ void __init mem_init(void)
}
codesize = (unsigned long)&_sdata - (unsigned long)&_stext;
- datasize = (unsigned long)&__init_begin - (unsigned long)&_sdata;
+ datasize = (unsigned long)&_edata - (unsigned long)&_sdata;
initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
@@ -493,18 +500,10 @@ EXPORT_SYMBOL(flush_icache_user_range);
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t pte)
{
- /* handle i-cache coherency */
- unsigned long pfn = pte_pfn(pte);
-#ifdef CONFIG_PPC32
- pmd_t *pmd;
-#else
- unsigned long vsid;
- void *pgdir;
- pte_t *ptep;
- int local = 0;
- cpumask_t tmp;
- unsigned long flags;
+#ifdef CONFIG_PPC_STD_MMU
+ unsigned long access = 0, trap;
#endif
+ unsigned long pfn = pte_pfn(pte);
/* handle i-cache coherency */
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
@@ -535,30 +534,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(pte) || address >= TASK_SIZE)
return;
-#ifdef CONFIG_PPC32
- if (Hash == 0)
- return;
- pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
- if (!pmd_none(*pmd))
- add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
-#else
- pgdir = vma->vm_mm->pgd;
- if (pgdir == NULL)
- return;
- ptep = find_linux_pte(pgdir, address);
- if (!ptep)
+ /* We try to figure out if we are coming from an instruction
+ * access fault and pass that down to __hash_page so we avoid
+ * double-faulting on execution of fresh text. We have to test
+ * for regs NULL since init will get here first thing at boot
+ *
+ * We also avoid filling the hash if not coming from a fault
+ */
+ if (current->thread.regs == NULL)
return;
-
- vsid = get_vsid(vma->vm_mm->context.id, address);
-
- local_irq_save(flags);
- tmp = cpumask_of_cpu(smp_processor_id());
- if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
- local = 1;
-
- __hash_page(address, 0, vsid, ptep, 0x300, local);
- local_irq_restore(flags);
-#endif
-#endif
+ trap = TRAP(current->thread.regs);
+ if (trap == 0x400)
+ access |= _PAGE_EXEC;
+ else if (trap != 0x300)
+ return;
+ hash_preload(vma->vm_mm, address, access, trap);
+#endif /* CONFIG_PPC_STD_MMU */
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 4035cad8d7f..da09ba03c42 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -21,6 +21,7 @@
#include <asm/machdep.h>
#include <asm/abs_addr.h>
#include <asm/system.h>
+#include <asm/smp.h>
static int numa_enabled = 1;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index b79a7820613..c7f7bb6f30b 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -59,7 +59,6 @@
#include <asm/processor.h>
#include <asm/mmzone.h>
#include <asm/cputable.h>
-#include <asm/ppcdebug.h>
#include <asm/sections.h>
#include <asm/system.h>
#include <asm/iommu.h>
@@ -101,7 +100,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
- unsigned long vsid;
if (mem_init_done) {
pgdp = pgd_offset_k(ea);
@@ -117,27 +115,17 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
} else {
- unsigned long va, vpn, hash, hpteg;
-
/*
* If the mm subsystem is not fully up, we cannot create a
* linux page table entry for this mapping. Simply bolt an
* entry in the hardware page table.
+ *
*/
- vsid = get_kernel_vsid(ea);
- va = (vsid << 28) | (ea & 0xFFFFFFF);
- vpn = va >> PAGE_SHIFT;
-
- hash = hpt_hash(vpn, 0);
-
- hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
-
- /* Panic if a pte grpup is full */
- if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
- HPTE_V_BOLTED,
- _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
- == -1) {
- panic("map_io_page: could not insert mapping");
+ if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+ mmu_virtual_psize)) {
+ printk(KERN_ERR "Failed to do bolted mapping IO "
+ "memory at %016lx !\n", pa);
+ return -ENOMEM;
}
}
return 0;
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index cef9e83cc7e..ed7fcfe5fd3 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -179,6 +179,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
}
/*
+ * Preload a translation in the hash table
+ */
+void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap)
+{
+ pmd_t *pmd;
+
+ if (Hash == 0)
+ return;
+ pmd = pmd_offset(pgd_offset(mm, ea), ea);
+ if (!pmd_none(*pmd))
+ add_hash_page(mm->context, ea, pmd_val(*pmd));
+}
+
+/*
* Initialize the hash table and patch the instructions in hashtable.S.
*/
void __init MMU_init_hw(void)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0473953f6a3..60e852f2f8e 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,14 +14,32 @@
* 2 of the License, or (at your option) any later version.
*/
+#undef DEBUG
+
#include <linux/config.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
-extern void slb_allocate(unsigned long ea);
+extern void slb_allocate_realmode(unsigned long ea);
+extern void slb_allocate_user(unsigned long ea);
+
+static void slb_allocate(unsigned long ea)
+{
+ /* Currently, we do real mode for all SLBs including user, but
+ * that will change if we bring back dynamic VSIDs
+ */
+ slb_allocate_realmode(ea);
+}
static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
{
@@ -46,13 +64,15 @@ static void slb_flush_and_rebolt(void)
{
/* If you change this make sure you change SLB_NUM_BOLTED
* appropriately too. */
- unsigned long ksp_flags = SLB_VSID_KERNEL;
+ unsigned long linear_llp, virtual_llp, lflags, vflags;
unsigned long ksp_esid_data;
WARN_ON(!irqs_disabled());
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- ksp_flags |= SLB_VSID_L;
+ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+ lflags = SLB_VSID_KERNEL | linear_llp;
+ vflags = SLB_VSID_KERNEL | virtual_llp;
ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
@@ -67,9 +87,9 @@ static void slb_flush_and_rebolt(void)
/* Slot 2 - kernel stack */
"slbmte %2,%3\n"
"isync"
- :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)),
+ :: "r"(mk_vsid_data(VMALLOCBASE, vflags)),
"r"(mk_esid_data(VMALLOCBASE, 1)),
- "r"(mk_vsid_data(ksp_esid_data, ksp_flags)),
+ "r"(mk_vsid_data(ksp_esid_data, lflags)),
"r"(ksp_esid_data)
: "memory");
}
@@ -102,6 +122,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
get_paca()->slb_cache_ptr = 0;
get_paca()->context = mm->context;
+#ifdef CONFIG_PPC_64K_PAGES
+ get_paca()->pgdir = mm->pgd;
+#endif /* CONFIG_PPC_64K_PAGES */
/*
* preload some userspace segments into the SLB.
@@ -131,28 +154,77 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
slb_allocate(unmapped_base);
}
+static inline void patch_slb_encoding(unsigned int *insn_addr,
+ unsigned int immed)
+{
+ /* Assume the instruction had a "0" immediate value, just
+ * "or" in the new value
+ */
+ *insn_addr |= immed;
+ flush_icache_range((unsigned long)insn_addr, 4+
+ (unsigned long)insn_addr);
+}
+
void slb_initialize(void)
{
+ unsigned long linear_llp, virtual_llp;
+ static int slb_encoding_inited;
+ extern unsigned int *slb_miss_kernel_load_linear;
+ extern unsigned int *slb_miss_kernel_load_virtual;
+ extern unsigned int *slb_miss_user_load_normal;
+#ifdef CONFIG_HUGETLB_PAGE
+ extern unsigned int *slb_miss_user_load_huge;
+ unsigned long huge_llp;
+
+ huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
+#endif
+
+ /* Prepare our SLB miss handler based on our page size */
+ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+ if (!slb_encoding_inited) {
+ slb_encoding_inited = 1;
+ patch_slb_encoding(slb_miss_kernel_load_linear,
+ SLB_VSID_KERNEL | linear_llp);
+ patch_slb_encoding(slb_miss_kernel_load_virtual,
+ SLB_VSID_KERNEL | virtual_llp);
+ patch_slb_encoding(slb_miss_user_load_normal,
+ SLB_VSID_USER | virtual_llp);
+
+ DBG("SLB: linear LLP = %04x\n", linear_llp);
+ DBG("SLB: virtual LLP = %04x\n", virtual_llp);
+#ifdef CONFIG_HUGETLB_PAGE
+ patch_slb_encoding(slb_miss_user_load_huge,
+ SLB_VSID_USER | huge_llp);
+ DBG("SLB: huge LLP = %04x\n", huge_llp);
+#endif
+ }
+
/* On iSeries the bolted entries have already been set up by
* the hypervisor from the lparMap data in head.S */
#ifndef CONFIG_PPC_ISERIES
- unsigned long flags = SLB_VSID_KERNEL;
+ {
+ unsigned long lflags, vflags;
- /* Invalidate the entire SLB (even slot 0) & all the ERATS */
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- flags |= SLB_VSID_L;
+ lflags = SLB_VSID_KERNEL | linear_llp;
+ vflags = SLB_VSID_KERNEL | virtual_llp;
- asm volatile("isync":::"memory");
- asm volatile("slbmte %0,%0"::"r" (0) : "memory");
+ /* Invalidate the entire SLB (even slot 0) & all the ERATS */
+ asm volatile("isync":::"memory");
+ asm volatile("slbmte %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
- create_slbe(KERNELBASE, flags, 0);
- create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1);
+ create_slbe(KERNELBASE, lflags, 0);
+
+ /* VMALLOC space has 4K pages always for now */
+ create_slbe(VMALLOCBASE, vflags, 1);
+
/* We don't bolt the stack for the time being - we're in boot,
* so the stack is in the bolted segment. By the time it goes
* elsewhere, we'll call _switch() which will bolt in the new
* one. */
asm volatile("isync":::"memory");
-#endif
+ }
+#endif /* CONFIG_PPC_ISERIES */
get_paca()->stab_rr = SLB_NUM_BOLTED;
}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a3a03da503b..950ffc5848c 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -18,61 +18,28 @@
#include <linux/config.h>
#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cputable.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
-/* void slb_allocate(unsigned long ea);
+/* void slb_allocate_realmode(unsigned long ea);
*
* Create an SLB entry for the given EA (user or kernel).
* r3 = faulting address, r13 = PACA
* r9, r10, r11 are clobbered by this function
* No other registers are examined or changed.
*/
-_GLOBAL(slb_allocate)
- /*
- * First find a slot, round robin. Previously we tried to find
- * a free slot first but that took too long. Unfortunately we
- * dont have any LRU information to help us choose a slot.
- */
-#ifdef CONFIG_PPC_ISERIES
- /*
- * On iSeries, the "bolted" stack segment can be cast out on
- * shared processor switch so we need to check for a miss on
- * it and restore it to the right slot.
- */
- ld r9,PACAKSAVE(r13)
- clrrdi r9,r9,28
- clrrdi r11,r3,28
- li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
- cmpld r9,r11
- beq 3f
-#endif /* CONFIG_PPC_ISERIES */
-
- ld r10,PACASTABRR(r13)
- addi r10,r10,1
- /* use a cpu feature mask if we ever change our slb size */
- cmpldi r10,SLB_NUM_ENTRIES
-
- blt+ 4f
- li r10,SLB_NUM_BOLTED
-
-4:
- std r10,PACASTABRR(r13)
-3:
- /* r3 = faulting address, r10 = entry */
+_GLOBAL(slb_allocate_realmode)
+ /* r3 = faulting address */
srdi r9,r3,60 /* get region */
- srdi r3,r3,28 /* get esid */
+ srdi r10,r3,28 /* get esid */
cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
- rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */
- oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
-
- /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
-
+ /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */
blt cr7,0f /* user or kernel? */
/* kernel address: proto-VSID = ESID */
@@ -81,43 +48,166 @@ _GLOBAL(slb_allocate)
* top segment. That's ok, the scramble below will translate
* it to VSID 0, which is reserved as a bad VSID - one which
* will never have any pages in it. */
- li r11,SLB_VSID_KERNEL
-BEGIN_FTR_SECTION
- bne cr7,9f
- li r11,(SLB_VSID_KERNEL|SLB_VSID_L)
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
- b 9f
-0: /* user address: proto-VSID = context<<15 | ESID */
- srdi. r9,r3,USER_ESID_BITS
+ /* Check if hitting the linear mapping of the vmalloc/ioremap
+ * kernel space
+ */
+ bne cr7,1f
+
+ /* Linear mapping encoding bits, the "li" instruction below will
+ * be patched by the kernel at boot
+ */
+_GLOBAL(slb_miss_kernel_load_linear)
+ li r11,0
+ b slb_finish_load
+
+1: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below
+ * will be patched by the kernel at boot
+ */
+_GLOBAL(slb_miss_kernel_load_virtual)
+ li r11,0
+ b slb_finish_load
+
+
+0: /* user address: proto-VSID = context << 15 | ESID. First check
+ * if the address is within the boundaries of the user region
+ */
+ srdi. r9,r10,USER_ESID_BITS
bne- 8f /* invalid ea bits set */
+ /* Figure out if the segment contains huge pages */
#ifdef CONFIG_HUGETLB_PAGE
BEGIN_FTR_SECTION
+ b 1f
+END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
+ cmpldi r10,16
+
+ lhz r9,PACALOWHTLBAREAS(r13)
+ mr r11,r10
+ blt 5f
+
lhz r9,PACAHIGHHTLBAREAS(r13)
- srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
- srd r9,r9,r11
- lhz r11,PACALOWHTLBAREAS(r13)
- srd r11,r11,r3
- or r9,r9,r11
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
+ srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
+
+5: srd r9,r9,r11
+ andi. r9,r9,1
+ beq 1f
+_GLOBAL(slb_miss_user_load_huge)
+ li r11,0
+ b 2f
+1:
#endif /* CONFIG_HUGETLB_PAGE */
- li r11,SLB_VSID_USER
+_GLOBAL(slb_miss_user_load_normal)
+ li r11,0
-#ifdef CONFIG_HUGETLB_PAGE
-BEGIN_FTR_SECTION
- rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
-#endif /* CONFIG_HUGETLB_PAGE */
+2:
+ ld r9,PACACONTEXTID(r13)
+ rldimi r10,r9,USER_ESID_BITS,0
+ b slb_finish_load
+8: /* invalid EA */
+ li r10,0 /* BAD_VSID */
+ li r11,SLB_VSID_USER /* flags don't much matter */
+ b slb_finish_load
+
+#ifdef __DISABLED__
+
+/* void slb_allocate_user(unsigned long ea);
+ *
+ * Create an SLB entry for the given EA (user or kernel).
+ * r3 = faulting address, r13 = PACA
+ * r9, r10, r11 are clobbered by this function
+ * No other registers are examined or changed.
+ *
+ * It is called with translation enabled in order to be able to walk the
+ * page tables. This is not currently used.
+ */
+_GLOBAL(slb_allocate_user)
+ /* r3 = faulting address */
+ srdi r10,r3,28 /* get esid */
+
+ crset 4*cr7+lt /* set "user" flag for later */
+
+ /* check if we fit in the range covered by the pagetables*/
+ srdi. r9,r3,PGTABLE_EADDR_SIZE
+ crnot 4*cr0+eq,4*cr0+eq
+ beqlr
+
+ /* now we need to get to the page tables in order to get the page
+ * size encoding from the PMD. In the future, we'll be able to deal
+ * with 1T segments too by getting the encoding from the PGD instead
+ */
+ ld r9,PACAPGDIR(r13)
+ cmpldi cr0,r9,0
+ beqlr
+ rlwinm r11,r10,8,25,28
+ ldx r9,r9,r11 /* get pgd_t */
+ cmpldi cr0,r9,0
+ beqlr
+ rlwinm r11,r10,3,17,28
+ ldx r9,r9,r11 /* get pmd_t */
+ cmpldi cr0,r9,0
+ beqlr
+
+ /* build vsid flags */
+ andi. r11,r9,SLB_VSID_LLP
+ ori r11,r11,SLB_VSID_USER
+
+ /* get context to calculate proto-VSID */
ld r9,PACACONTEXTID(r13)
- rldimi r3,r9,USER_ESID_BITS,0
+ rldimi r10,r9,USER_ESID_BITS,0
+
+ /* fall through slb_finish_load */
+
+#endif /* __DISABLED__ */
+
+
+/*
+ * Finish loading of an SLB entry and return
+ *
+ * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE
+ */
+slb_finish_load:
+ ASM_VSID_SCRAMBLE(r10,r9)
+ rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */
+
+ /* r3 = EA, r11 = VSID data */
+ /*
+ * Find a slot, round robin. Previously we tried to find a
+ * free slot first but that took too long. Unfortunately we
+ * dont have any LRU information to help us choose a slot.
+ */
+#ifdef CONFIG_PPC_ISERIES
+ /*
+ * On iSeries, the "bolted" stack segment can be cast out on
+ * shared processor switch so we need to check for a miss on
+ * it and restore it to the right slot.
+ */
+ ld r9,PACAKSAVE(r13)
+ clrrdi r9,r9,28
+ clrrdi r3,r3,28
+ li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
+ cmpld r9,r3
+ beq 3f
+#endif /* CONFIG_PPC_ISERIES */
+
+ ld r10,PACASTABRR(r13)
+ addi r10,r10,1
+ /* use a cpu feature mask if we ever change our slb size */
+ cmpldi r10,SLB_NUM_ENTRIES
+
+ blt+ 4f
+ li r10,SLB_NUM_BOLTED
-9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
- ASM_VSID_SCRAMBLE(r3,r9)
+4:
+ std r10,PACASTABRR(r13)
+
+3:
+ rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
+ oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
- rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */
+ /* r3 = ESID data, r11 = VSID data */
/*
* No need for an isync before or after this slbmte. The exception
@@ -125,7 +215,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
*/
slbmte r11,r10
- bgelr cr7 /* we're done for kernel addresses */
+ /* we're done for kernel addresses */
+ crclr 4*cr0+eq /* set result to "success" */
+ bgelr cr7
/* Update the slb cache */
lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
@@ -143,9 +235,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
li r3,SLB_CACHE_ENTRIES+1
2:
sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
+ crclr 4*cr0+eq /* set result to "success" */
blr
-8: /* invalid EA */
- li r3,0 /* BAD_VSID */
- li r11,SLB_VSID_USER /* flags don't much matter */
- b 9b
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 1b83f002bf2..cfbb4e1f966 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -20,13 +20,13 @@
#include <asm/cputable.h>
#include <asm/lmb.h>
#include <asm/abs_addr.h>
+#include <asm/firmware.h>
struct stab_entry {
unsigned long esid_data;
unsigned long vsid_data;
};
-/* Both the segment table and SLB code uses the following cache */
#define NR_STAB_CACHE_ENTRIES 8
DEFINE_PER_CPU(long, stab_cache_ptr);
DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
@@ -186,7 +186,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
/* Never flush the first entry. */
ste += 1;
for (entry = 1;
- entry < (PAGE_SIZE / sizeof(struct stab_entry));
+ entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
entry++, ste++) {
unsigned long ea;
ea = ste->esid_data & ESID_MASK;
@@ -200,6 +200,10 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
__get_cpu_var(stab_cache_ptr) = 0;
+#ifdef CONFIG_PPC_64K_PAGES
+ get_paca()->pgdir = mm->pgd;
+#endif /* CONFIG_PPC_64K_PAGES */
+
/* Now preload some entries for the new task */
if (test_tsk_thread_flag(tsk, TIF_32BIT))
unmapped_base = TASK_UNMAPPED_BASE_USER32;
@@ -223,8 +227,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
asm volatile("sync" : : : "memory");
}
-extern void slb_initialize(void);
-
/*
* Allocate segment tables for secondary CPUs. These must all go in
* the first (bolted) segment, so that do_stab_bolted won't get a
@@ -243,18 +245,21 @@ void stabs_alloc(void)
if (cpu == 0)
continue; /* stab for CPU 0 is statically allocated */
- newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT);
+ newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
+ 1<<SID_SHIFT);
if (! newstab)
panic("Unable to allocate segment table for CPU %d.\n",
cpu);
newstab += KERNELBASE;
- memset((void *)newstab, 0, PAGE_SIZE);
+ memset((void *)newstab, 0, HW_PAGE_SIZE);
paca[cpu].stab_addr = newstab;
paca[cpu].stab_real = virt_to_abs(newstab);
- printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
+ printk(KERN_INFO "Segment table for CPU %d at 0x%lx "
+ "virtual, 0x%lx absolute\n",
+ cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
}
}
@@ -266,14 +271,28 @@ void stabs_alloc(void)
void stab_initialize(unsigned long stab)
{
unsigned long vsid = get_kernel_vsid(KERNELBASE);
+ unsigned long stabreal;
- if (cpu_has_feature(CPU_FTR_SLB)) {
- slb_initialize();
- } else {
- asm volatile("isync; slbia; isync":::"memory");
- make_ste(stab, GET_ESID(KERNELBASE), vsid);
+ asm volatile("isync; slbia; isync":::"memory");
+ make_ste(stab, GET_ESID(KERNELBASE), vsid);
- /* Order update */
- asm volatile("sync":::"memory");
+ /* Order update */
+ asm volatile("sync":::"memory");
+
+ /* Set ASR */
+ stabreal = get_paca()->stab_real | 0x1ul;
+
+#ifdef CONFIG_PPC_ISERIES
+ if (firmware_has_feature(FW_FEATURE_ISERIES)) {
+ HvCall1(HvCallBaseSetASR, stabreal);
+ return;
+ }
+#endif /* CONFIG_PPC_ISERIES */
+#ifdef CONFIG_PPC_PSERIES
+ if (platform_is_lpar()) {
+ plpar_hcall_norets(H_SET_ASR, stabreal);
+ return;
}
+#endif
+ mtspr(SPRN_ASR, stabreal);
}
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index 09ab81a10f4..53e31b834ac 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -21,6 +21,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -30,7 +31,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
-#include <linux/highmem.h>
+#include <asm/bug.h>
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
@@ -126,28 +127,46 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
* (if we remove it we should clear the _PTE_HPTEFLAGS bits).
*/
void hpte_update(struct mm_struct *mm, unsigned long addr,
- unsigned long pte, int wrprot)
+ pte_t *ptep, unsigned long pte, int huge)
{
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
unsigned long vsid;
+ unsigned int psize = mmu_virtual_psize;
int i;
i = batch->index;
+ /* We mask the address for the base page size. Huge pages will
+ * have applied their own masking already
+ */
+ addr &= PAGE_MASK;
+
+ /* Get page size (maybe move back to caller) */
+ if (huge) {
+#ifdef CONFIG_HUGETLB_PAGE
+ psize = mmu_huge_psize;
+#else
+ BUG();
+#endif
+ }
+
/*
* This can happen when we are in the middle of a TLB batch and
* we encounter memory pressure (eg copy_page_range when it tries
* to allocate a new pte). If we have to reclaim memory and end
* up scanning and resetting referenced bits then our batch context
* will change mid stream.
+ *
+ * We also need to ensure only one page size is present in a given
+ * batch
*/
- if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) {
+ if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
flush_tlb_pending();
i = 0;
}
if (i == 0) {
batch->mm = mm;
- batch->large = pte_huge(pte);
+ batch->psize = psize;
}
if (addr < KERNELBASE) {
vsid = get_vsid(mm->context.id, addr);
@@ -155,7 +174,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
} else
vsid = get_kernel_vsid(addr);
batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
- batch->pte[i] = __pte(pte);
+ batch->pte[i] = __real_pte(__pte(pte), ptep);
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
flush_tlb_pending();
@@ -177,7 +196,8 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
local = 1;
if (i == 1)
- flush_hash_page(batch->vaddr[0], batch->pte[0], local);
+ flush_hash_page(batch->vaddr[0], batch->pte[0],
+ batch->psize, local);
else
flush_hash_range(i, local);
batch->index = 0;
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig
index 19d37730b66..eb2dece76a5 100644
--- a/arch/powerpc/oprofile/Kconfig
+++ b/arch/powerpc/oprofile/Kconfig
@@ -1,7 +1,3 @@
-
-menu "Profiling support"
- depends on EXPERIMENTAL
-
config PROFILING
bool "Profiling support (EXPERIMENTAL)"
help
@@ -19,5 +15,3 @@ config OPROFILE
If unsure, say N.
-endmenu
-
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c
index 88644931584..e3a024e324b 100644
--- a/arch/powerpc/oprofile/op_model_power4.c
+++ b/arch/powerpc/oprofile/op_model_power4.c
@@ -17,6 +17,7 @@
#include <asm/systemcfg.h>
#include <asm/rtas.h>
#include <asm/oprofile_impl.h>
+#include <asm/reg.h>
#define dbg(args...)
@@ -81,6 +82,26 @@ static void power4_reg_setup(struct op_counter_config *ctr,
extern void ppc64_enable_pmcs(void);
+/*
+ * Older CPUs require the MMCRA sample bit to be always set, but newer
+ * CPUs only want it set for some groups. Eventually we will remove all
+ * knowledge of this bit in the kernel, oprofile userspace should be
+ * setting it when required.
+ *
+ * In order to keep current installations working we force the bit for
+ * those older CPUs. Once everyone has updated their oprofile userspace we
+ * can remove this hack.
+ */
+static inline int mmcra_must_set_sample(void)
+{
+ if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) ||
+ __is_processor(PV_970) || __is_processor(PV_970FX) ||
+ __is_processor(PV_970MP))
+ return 1;
+
+ return 0;
+}
+
static void power4_cpu_setup(void *unused)
{
unsigned int mmcr0 = mmcr0_val;
@@ -98,7 +119,8 @@ static void power4_cpu_setup(void *unused)
mtspr(SPRN_MMCR1, mmcr1_val);
- mmcra |= MMCRA_SAMPLE_ENABLE;
+ if (mmcra_must_set_sample())
+ mmcra |= MMCRA_SAMPLE_ENABLE;
mtspr(SPRN_MMCRA, mmcra);
dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(),
@@ -211,8 +233,7 @@ static unsigned long get_pc(struct pt_regs *regs)
mmcra = mfspr(SPRN_MMCRA);
/* Were we in the hypervisor? */
- if ((systemcfg->platform == PLATFORM_PSERIES_LPAR) &&
- (mmcra & MMCRA_SIHV))
+ if (platform_is_lpar() && (mmcra & MMCRA_SIHV))
/* function descriptor madness */
return *((unsigned long *)hypervisor_bucket);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index ecd32d5d85f..4099ddab920 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -361,7 +361,9 @@ static void __init chrp_find_openpic(void)
printk(KERN_INFO "OpenPIC at %lx\n", opaddr);
irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */
- prom_get_irq_senses(init_senses, NUM_8259_INTERRUPTS, NR_IRQS - 4);
+ prom_get_irq_senses(init_senses, NUM_ISA_INTERRUPTS, NR_IRQS - 4);
+ /* i8259 cascade is always positive level */
+ init_senses[0] = IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE;
iranges = (unsigned int *) get_property(np, "interrupt-ranges", &len);
if (iranges == NULL)
diff --git a/arch/powerpc/platforms/iseries/htab.c b/arch/powerpc/platforms/iseries/htab.c
index b3c6c3374ca..30bdcf3925d 100644
--- a/arch/powerpc/platforms/iseries/htab.c
+++ b/arch/powerpc/platforms/iseries/htab.c
@@ -39,15 +39,16 @@ static inline void iSeries_hunlock(unsigned long slot)
spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
}
-static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
+long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize)
{
- unsigned long arpn;
long slot;
hpte_t lhpte;
int secondary = 0;
+ BUG_ON(psize != MMU_PAGE_4K);
+
/*
* The hypervisor tries both primary and secondary.
* If we are being called to insert in the secondary,
@@ -59,8 +60,19 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
iSeries_hlock(hpte_group);
- slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
- BUG_ON(lhpte.v & HPTE_V_VALID);
+ slot = HvCallHpt_findValid(&lhpte, va >> HW_PAGE_SHIFT);
+ if (unlikely(lhpte.v & HPTE_V_VALID)) {
+ if (vflags & HPTE_V_BOLTED) {
+ HvCallHpt_setSwBits(slot, 0x10, 0);
+ HvCallHpt_setPp(slot, PP_RWXX);
+ iSeries_hunlock(hpte_group);
+ if (slot < 0)
+ return 0x8 | (slot & 7);
+ else
+ return slot & 7;
+ }
+ BUG();
+ }
if (slot == -1) { /* No available entry found in either group */
iSeries_hunlock(hpte_group);
@@ -73,10 +85,9 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
slot &= 0x7fffffffffffffff;
}
- arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
- lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
- lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+ lhpte.v = hpte_encode_v(va, MMU_PAGE_4K) | vflags | HPTE_V_VALID;
+ lhpte.r = hpte_encode_r(phys_to_abs(pa), MMU_PAGE_4K) | rflags;
/* Now fill in the actual HPTE */
HvCallHpt_addValidate(slot, secondary, &lhpte);
@@ -86,25 +97,6 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
return (secondary << 3) | (slot & 7);
}
-long iSeries_hpte_bolt_or_insert(unsigned long hpte_group,
- unsigned long va, unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
-{
- long slot;
- hpte_t lhpte;
-
- slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
-
- if (lhpte.v & HPTE_V_VALID) {
- /* Bolt the existing HPTE */
- HvCallHpt_setSwBits(slot, 0x10, 0);
- HvCallHpt_setPp(slot, PP_RWXX);
- return 0;
- }
-
- return iSeries_hpte_insert(hpte_group, va, prpn, vflags, rflags);
-}
-
static unsigned long iSeries_hpte_getword0(unsigned long slot)
{
hpte_t hpte;
@@ -150,15 +142,17 @@ static long iSeries_hpte_remove(unsigned long hpte_group)
* bits 61..63 : PP2,PP1,PP0
*/
static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
+ unsigned long va, int psize, int local)
{
hpte_t hpte;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
iSeries_hlock(slot);
HvCallHpt_get(&hpte, slot);
- if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) {
+ want_v = hpte_encode_v(va, MMU_PAGE_4K);
+
+ if (HPTE_V_COMPARE(hpte.v, want_v) && (hpte.v & HPTE_V_VALID)) {
/*
* Hypervisor expects bits as NPPP, which is
* different from how they are mapped in our PP.
@@ -210,14 +204,17 @@ static long iSeries_hpte_find(unsigned long vpn)
*
* No need to lock here because we should be the only user.
*/
-static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+ int psize)
{
unsigned long vsid,va,vpn;
long slot;
+ BUG_ON(psize != MMU_PAGE_4K);
+
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
+ vpn = va >> HW_PAGE_SHIFT;
slot = iSeries_hpte_find(vpn);
if (slot == -1)
panic("updateboltedpp: Could not find page to bolt\n");
@@ -225,7 +222,7 @@ static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
}
static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
unsigned long hpte_v;
unsigned long avpn = va >> 23;
diff --git a/arch/powerpc/platforms/iseries/hvlog.c b/arch/powerpc/platforms/iseries/hvlog.c
index 62ec7347968..f476d71194f 100644
--- a/arch/powerpc/platforms/iseries/hvlog.c
+++ b/arch/powerpc/platforms/iseries/hvlog.c
@@ -22,7 +22,7 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
while (len) {
hv_buf.addr = cur;
- left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur;
+ left_this_page = ((cur & HW_PAGE_MASK) + HW_PAGE_SIZE) - cur;
if (left_this_page > len)
left_this_page = len;
hv_buf.len = left_this_page;
@@ -30,6 +30,6 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
HvCall2(HvCallBaseWriteLogBuffer,
virt_to_abs(&hv_buf),
left_this_page);
- cur = (cur & PAGE_MASK) + PAGE_SIZE;
+ cur = (cur & HW_PAGE_MASK) + HW_PAGE_SIZE;
}
}
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index 1a6845b5c5a..bf081b34582 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -43,9 +43,12 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
u64 rc;
union tce_entry tce;
+ index <<= TCE_PAGE_FACTOR;
+ npages <<= TCE_PAGE_FACTOR;
+
while (npages--) {
tce.te_word = 0;
- tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT;
+ tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> TCE_SHIFT;
if (tbl->it_type == TCE_VB) {
/* Virtual Bus */
@@ -66,7 +69,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
rc);
index++;
- uaddr += PAGE_SIZE;
+ uaddr += TCE_PAGE_SIZE;
}
}
@@ -74,6 +77,9 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
{
u64 rc;
+ npages <<= TCE_PAGE_FACTOR;
+ index <<= TCE_PAGE_FACTOR;
+
while (npages--) {
rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0);
if (rc)
@@ -83,27 +89,6 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
}
}
-#ifdef CONFIG_PCI
-/*
- * This function compares the known tables to find an iommu_table
- * that has already been built for hardware TCEs.
- */
-static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
-{
- struct pci_dn *pdn;
-
- list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
- struct iommu_table *it = pdn->iommu_table;
- if ((it != NULL) &&
- (it->it_type == TCE_PCI) &&
- (it->it_offset == tbl->it_offset) &&
- (it->it_index == tbl->it_index) &&
- (it->it_size == tbl->it_size))
- return it;
- }
- return NULL;
-}
-
/*
* Call Hv with the architected data structure to get TCE table info.
* info. Put the returned data into the Linux representation of the
@@ -113,8 +98,10 @@ static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
* 2. TCE table per Bus.
* 3. TCE Table per IOA.
*/
-static void iommu_table_getparms(struct pci_dn *pdn,
- struct iommu_table* tbl)
+void iommu_table_getparms_iSeries(unsigned long busno,
+ unsigned char slotno,
+ unsigned char virtbus,
+ struct iommu_table* tbl)
{
struct iommu_table_cb *parms;
@@ -124,9 +111,9 @@ static void iommu_table_getparms(struct pci_dn *pdn,
memset(parms, 0, sizeof(*parms));
- parms->itc_busno = pdn->busno;
- parms->itc_slotno = pdn->LogicalSlot;
- parms->itc_virtbus = 0;
+ parms->itc_busno = busno;
+ parms->itc_slotno = slotno;
+ parms->itc_virtbus = virtbus;
HvCallXm_getTceTableParms(iseries_hv_addr(parms));
@@ -134,17 +121,40 @@ static void iommu_table_getparms(struct pci_dn *pdn,
panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
/* itc_size is in pages worth of table, it_size is in # of entries */
- tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry);
+ tbl->it_size = ((parms->itc_size * TCE_PAGE_SIZE) /
+ sizeof(union tce_entry)) >> TCE_PAGE_FACTOR;
tbl->it_busno = parms->itc_busno;
- tbl->it_offset = parms->itc_offset;
+ tbl->it_offset = parms->itc_offset >> TCE_PAGE_FACTOR;
tbl->it_index = parms->itc_index;
tbl->it_blocksize = 1;
- tbl->it_type = TCE_PCI;
+ tbl->it_type = virtbus ? TCE_VB : TCE_PCI;
kfree(parms);
}
+#ifdef CONFIG_PCI
+/*
+ * This function compares the known tables to find an iommu_table
+ * that has already been built for hardware TCEs.
+ */
+static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
+{
+ struct pci_dn *pdn;
+
+ list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
+ struct iommu_table *it = pdn->iommu_table;
+ if ((it != NULL) &&
+ (it->it_type == TCE_PCI) &&
+ (it->it_offset == tbl->it_offset) &&
+ (it->it_index == tbl->it_index) &&
+ (it->it_size == tbl->it_size))
+ return it;
+ }
+ return NULL;
+}
+
+
void iommu_devnode_init_iSeries(struct device_node *dn)
{
struct iommu_table *tbl;
@@ -152,7 +162,7 @@ void iommu_devnode_init_iSeries(struct device_node *dn)
tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
- iommu_table_getparms(pdn, tbl);
+ iommu_table_getparms_iSeries(pdn->busno, pdn->LogicalSlot, 0, tbl);
/* Look for existing tce table */
pdn->iommu_table = iommu_table_find(tbl);
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index c1135912cc0..01090e9ce0c 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -35,7 +35,6 @@
#include <linux/irq.h>
#include <linux/spinlock.h>
-#include <asm/ppcdebug.h>
#include <asm/iseries/hv_types.h>
#include <asm/iseries/hv_lp_event.h>
#include <asm/iseries/hv_call_xm.h>
@@ -104,6 +103,9 @@ static void intReceived(struct XmPciLpEvent *eventParm,
struct pt_regs *regsParm)
{
int irq;
+#ifdef CONFIG_IRQSTACKS
+ struct thread_info *curtp, *irqtp;
+#endif
++Pci_Interrupt_Count;
@@ -111,7 +113,20 @@ static void intReceived(struct XmPciLpEvent *eventParm,
case XmPciLpEvent_SlotInterrupt:
irq = eventParm->hvLpEvent.xCorrelationToken;
/* Dispatch the interrupt handlers for this irq */
- ppc_irq_dispatch_handler(regsParm, irq);
+#ifdef CONFIG_IRQSTACKS
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[smp_processor_id()];
+ if (curtp != irqtp) {
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+ call___do_IRQ(irq, regsParm, irqtp);
+ irqtp->task = NULL;
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+ } else
+#endif
+ __do_IRQ(irq, regsParm);
HvCallPci_eoi(eventParm->eventData.slotInterrupt.busNumber,
eventParm->eventData.slotInterrupt.subBusNumber,
eventParm->eventData.slotInterrupt.deviceId);
@@ -227,8 +242,6 @@ static void iSeries_enable_IRQ(unsigned int irq)
/* Unmask secondary INTA */
mask = 0x80000000;
HvCallPci_unmaskInterrupts(bus, subBus, deviceId, mask);
- PPCDBG(PPCDBG_BUSWALK, "iSeries_enable_IRQ 0x%02X.%02X.%02X 0x%04X\n",
- bus, subBus, deviceId, irq);
}
/* This is called by iSeries_activate_IRQs */
@@ -310,15 +323,11 @@ static void iSeries_disable_IRQ(unsigned int irq)
/* Mask secondary INTA */
mask = 0x80000000;
HvCallPci_maskInterrupts(bus, subBus, deviceId, mask);
- PPCDBG(PPCDBG_BUSWALK, "iSeries_disable_IRQ 0x%02X.%02X.%02X 0x%04X\n",
- bus, subBus, deviceId, irq);
}
/*
- * Need to define this so ppc_irq_dispatch_handler will NOT call
- * enable_IRQ at the end of interrupt handling. However, this does
- * nothing because there is not enough information provided to do
- * the EOI HvCall. This is done by XmPciLpEvent.c
+ * This does nothing because there is not enough information
+ * provided to do the EOI HvCall. This is done by XmPciLpEvent.c
*/
static void iSeries_end_IRQ(unsigned int irq)
{
diff --git a/arch/powerpc/platforms/iseries/misc.S b/arch/powerpc/platforms/iseries/misc.S
index 09f14522e17..dfe7aa1ba09 100644
--- a/arch/powerpc/platforms/iseries/misc.S
+++ b/arch/powerpc/platforms/iseries/misc.S
@@ -15,6 +15,7 @@
#include <asm/processor.h>
#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
.text
diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c
index 7d7d5884343..4b75131773a 100644
--- a/arch/powerpc/platforms/iseries/pci.c
+++ b/arch/powerpc/platforms/iseries/pci.c
@@ -32,7 +32,6 @@
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
-#include <asm/ppcdebug.h>
#include <asm/iommu.h>
#include <asm/abs_addr.h>
@@ -207,10 +206,6 @@ static struct device_node *build_device_node(HvBusNumber Bus,
struct device_node *node;
struct pci_dn *pdn;
- PPCDBG(PPCDBG_BUSWALK,
- "-build_device_node 0x%02X.%02X.%02X Function: %02X\n",
- Bus, SubBus, AgentId, Function);
-
node = kmalloc(sizeof(struct device_node), GFP_KERNEL);
if (node == NULL)
return NULL;
@@ -243,8 +238,6 @@ unsigned long __init find_and_init_phbs(void)
struct pci_controller *phb;
HvBusNumber bus;
- PPCDBG(PPCDBG_BUSWALK, "find_and_init_phbs Entry\n");
-
/* Check all possible buses. */
for (bus = 0; bus < 256; bus++) {
int ret = HvCallXm_testBus(bus);
@@ -261,9 +254,6 @@ unsigned long __init find_and_init_phbs(void)
phb->last_busno = bus;
phb->ops = &iSeries_pci_ops;
- PPCDBG(PPCDBG_BUSWALK, "PCI:Create iSeries pci_controller(%p), Bus: %04X\n",
- phb, bus);
-
/* Find and connect the devices. */
scan_PHB_slots(phb);
}
@@ -285,11 +275,9 @@ unsigned long __init find_and_init_phbs(void)
*/
void iSeries_pcibios_init(void)
{
- PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Entry.\n");
iomm_table_initialize();
find_and_init_phbs();
io_page_mask = -1;
- PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Exit.\n");
}
/*
@@ -301,8 +289,6 @@ void __init iSeries_pci_final_fixup(void)
struct device_node *node;
int DeviceCount = 0;
- PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup Entry.\n");
-
/* Fix up at the device node and pci_dev relationship */
mf_display_src(0xC9000100);
@@ -316,9 +302,6 @@ void __init iSeries_pci_final_fixup(void)
++DeviceCount;
pdev->sysdata = (void *)node;
PCI_DN(node)->pcidev = pdev;
- PPCDBG(PPCDBG_BUSWALK,
- "pdev 0x%p <==> DevNode 0x%p\n",
- pdev, node);
allocate_device_bars(pdev);
iSeries_Device_Information(pdev, DeviceCount);
iommu_devnode_init_iSeries(node);
@@ -333,13 +316,10 @@ void __init iSeries_pci_final_fixup(void)
void pcibios_fixup_bus(struct pci_bus *PciBus)
{
- PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup_bus(0x%04X) Entry.\n",
- PciBus->number);
}
void pcibios_fixup_resources(struct pci_dev *pdev)
{
- PPCDBG(PPCDBG_BUSWALK, "fixup_resources pdev %p\n", pdev);
}
/*
@@ -401,9 +381,6 @@ static void scan_EADS_bridge(HvBusNumber bus, HvSubBusNumber SubBus,
printk("found device at bus %d idsel %d func %d (AgentId %x)\n",
bus, IdSel, Function, AgentId);
/* Connect EADs: 0x18.00.12 = 0x00 */
- PPCDBG(PPCDBG_BUSWALK,
- "PCI:Connect EADs: 0x%02X.%02X.%02X\n",
- bus, SubBus, AgentId);
HvRc = HvCallPci_getBusUnitInfo(bus, SubBus, AgentId,
iseries_hv_addr(BridgeInfo),
sizeof(struct HvCallPci_BridgeInfo));
@@ -414,14 +391,6 @@ static void scan_EADS_bridge(HvBusNumber bus, HvSubBusNumber SubBus,
BridgeInfo->maxAgents,
BridgeInfo->maxSubBusNumber,
BridgeInfo->logicalSlotNumber);
- PPCDBG(PPCDBG_BUSWALK,
- "PCI: BridgeInfo, Type:0x%02X, SubBus:0x%02X, MaxAgents:0x%02X, MaxSubBus: 0x%02X, LSlot: 0x%02X\n",
- BridgeInfo->busUnitInfo.deviceType,
- BridgeInfo->subBusNumber,
- BridgeInfo->maxAgents,
- BridgeInfo->maxSubBusNumber,
- BridgeInfo->logicalSlotNumber);
-
if (BridgeInfo->busUnitInfo.deviceType ==
HvCallPci_BridgeDevice) {
/* Scan_Bridge_Slot...: 0x18.00.12 */
@@ -454,9 +423,6 @@ static int scan_bridge_slot(HvBusNumber Bus,
/* iSeries_allocate_IRQ.: 0x18.00.12(0xA3) */
Irq = iSeries_allocate_IRQ(Bus, 0, EADsIdSel);
- PPCDBG(PPCDBG_BUSWALK,
- "PCI:- allocate and assign IRQ 0x%02X.%02X.%02X = 0x%02X\n",
- Bus, 0, EADsIdSel, Irq);
/*
* Connect all functions of any device found.
@@ -482,9 +448,6 @@ static int scan_bridge_slot(HvBusNumber Bus,
printk("read vendor ID: %x\n", VendorId);
/* FoundDevice: 0x18.28.10 = 0x12AE */
- PPCDBG(PPCDBG_BUSWALK,
- "PCI:- FoundDevice: 0x%02X.%02X.%02X = 0x%04X, irq %d\n",
- Bus, SubBus, AgentId, VendorId, Irq);
HvRc = HvCallPci_configStore8(Bus, SubBus, AgentId,
PCI_INTERRUPT_LINE, Irq);
if (HvRc != 0)
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index fda712b4216..6a29f301436 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -39,7 +39,8 @@
#include <asm/sections.h>
#include <asm/iommu.h>
#include <asm/firmware.h>
-
+#include <asm/systemcfg.h>
+#include <asm/system.h>
#include <asm/time.h>
#include <asm/paca.h>
#include <asm/cache.h>
@@ -71,9 +72,7 @@ extern void hvlog(char *fmt, ...);
#endif
/* Function Prototypes */
-extern void ppcdbg_initialize(void);
-
-static void build_iSeries_Memory_Map(void);
+static unsigned long build_iSeries_Memory_Map(void);
static void iseries_shared_idle(void);
static void iseries_dedicated_idle(void);
#ifdef CONFIG_PCI
@@ -86,7 +85,6 @@ static void iSeries_pci_final_fixup(void) { }
int piranha_simulator;
extern int rd_size; /* Defined in drivers/block/rd.c */
-extern unsigned long klimit;
extern unsigned long embedded_sysmap_start;
extern unsigned long embedded_sysmap_end;
@@ -309,8 +307,6 @@ static void __init iSeries_init_early(void)
ppc64_firmware_features = FW_FEATURE_ISERIES;
- ppcdbg_initialize();
-
ppc64_interrupt_controller = IC_ISERIES;
#if defined(CONFIG_BLK_DEV_INITRD)
@@ -320,11 +316,11 @@ static void __init iSeries_init_early(void)
*/
if (naca.xRamDisk) {
initrd_start = (unsigned long)__va(naca.xRamDisk);
- initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE;
+ initrd_end = initrd_start + naca.xRamDiskSize * HW_PAGE_SIZE;
initrd_below_start_ok = 1; // ramdisk in kernel space
ROOT_DEV = Root_RAM0;
- if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize)
- rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024;
+ if (((rd_size * 1024) / HW_PAGE_SIZE) < naca.xRamDiskSize)
+ rd_size = (naca.xRamDiskSize * HW_PAGE_SIZE) / 1024;
} else
#endif /* CONFIG_BLK_DEV_INITRD */
{
@@ -407,9 +403,11 @@ void mschunks_alloc(unsigned long num_chunks)
* a table used to translate Linux's physical addresses to these
* absolute addresses. Absolute addresses are needed when
* communicating with the hypervisor (e.g. to build HPT entries)
+ *
+ * Returns the physical memory size
*/
-static void __init build_iSeries_Memory_Map(void)
+static unsigned long __init build_iSeries_Memory_Map(void)
{
u32 loadAreaFirstChunk, loadAreaLastChunk, loadAreaSize;
u32 nextPhysChunk;
@@ -470,13 +468,14 @@ static void __init build_iSeries_Memory_Map(void)
*/
hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
hptSizePages = (u32)HvCallHpt_getHptPages();
- hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT);
+ hptSizeChunks = hptSizePages >>
+ (MSCHUNKS_CHUNK_SHIFT - HW_PAGE_SHIFT);
hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
printk("HPT absolute addr = %016lx, size = %dK\n",
chunk_to_addr(hptFirstChunk), hptSizeChunks * 256);
- ppc64_pft_size = __ilog2(hptSizePages * PAGE_SIZE);
+ ppc64_pft_size = __ilog2(hptSizePages * HW_PAGE_SIZE);
/*
* The actual hashed page table is in the hypervisor,
@@ -541,7 +540,7 @@ static void __init build_iSeries_Memory_Map(void)
* which should be equal to
* nextPhysChunk
*/
- systemcfg->physicalMemorySize = chunk_to_addr(nextPhysChunk);
+ return chunk_to_addr(nextPhysChunk);
}
/*
@@ -567,8 +566,8 @@ static void __init iSeries_setup_arch(void)
printk("Max physical processors = %d\n",
itVpdAreas.xSlicMaxPhysicalProcs);
- systemcfg->processor = xIoHriProcessorVpd[procIx].xPVR;
- printk("Processor version = %x\n", systemcfg->processor);
+ _systemcfg->processor = xIoHriProcessorVpd[procIx].xPVR;
+ printk("Processor version = %x\n", _systemcfg->processor);
}
static void iSeries_show_cpuinfo(struct seq_file *m)
@@ -629,7 +628,7 @@ static void __init iSeries_fixup_klimit(void)
*/
if (naca.xRamDisk)
klimit = KERNELBASE + (u64)naca.xRamDisk +
- (naca.xRamDiskSize * PAGE_SIZE);
+ (naca.xRamDiskSize * HW_PAGE_SIZE);
else {
/*
* No ram disk was included - check and see if there
@@ -697,20 +696,18 @@ static void iseries_shared_idle(void)
if (hvlpevent_is_pending())
process_iSeries_events();
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
}
}
static void iseries_dedicated_idle(void)
{
- long oldval;
+ set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
-
+ if (!need_resched()) {
while (!need_resched()) {
ppc64_runlatch_off();
HMT_low();
@@ -723,13 +720,12 @@ static void iseries_dedicated_idle(void)
}
HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
}
ppc64_runlatch_on();
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
}
}
@@ -934,7 +930,7 @@ void dt_cpus(struct iseries_flat_dt *dt)
dt_end_node(dt);
}
-void build_flat_dt(struct iseries_flat_dt *dt)
+void build_flat_dt(struct iseries_flat_dt *dt, unsigned long phys_mem_size)
{
u64 tmp[2];
@@ -950,7 +946,7 @@ void build_flat_dt(struct iseries_flat_dt *dt)
dt_prop_str(dt, "name", "memory");
dt_prop_str(dt, "device_type", "memory");
tmp[0] = 0;
- tmp[1] = systemcfg->physicalMemorySize;
+ tmp[1] = phys_mem_size;
dt_prop_u64_list(dt, "reg", tmp, 2);
dt_end_node(dt);
@@ -970,13 +966,15 @@ void build_flat_dt(struct iseries_flat_dt *dt)
void * __init iSeries_early_setup(void)
{
+ unsigned long phys_mem_size;
+
iSeries_fixup_klimit();
/*
* Initialize the table which translate Linux physical addresses to
* AS/400 absolute addresses
*/
- build_iSeries_Memory_Map();
+ phys_mem_size = build_iSeries_Memory_Map();
iSeries_get_cmdline();
@@ -986,7 +984,7 @@ void * __init iSeries_early_setup(void)
/* Parse early parameters, in particular mem=x */
parse_early_param();
- build_flat_dt(&iseries_dt);
+ build_flat_dt(&iseries_dt, phys_mem_size);
return (void *) __pa(&iseries_dt);
}
diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c
index 3336bad6772..fcb094ec6ae 100644
--- a/arch/powerpc/platforms/iseries/smp.c
+++ b/arch/powerpc/platforms/iseries/smp.c
@@ -40,7 +40,6 @@
#include <asm/paca.h>
#include <asm/iseries/hv_call.h>
#include <asm/time.h>
-#include <asm/ppcdebug.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/system.h>
diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c
index c27a66876c2..384360ee06e 100644
--- a/arch/powerpc/platforms/iseries/vio.c
+++ b/arch/powerpc/platforms/iseries/vio.c
@@ -30,41 +30,14 @@ static struct iommu_table vio_iommu_table;
static void __init iommu_vio_init(void)
{
- struct iommu_table *t;
- struct iommu_table_cb cb;
- unsigned long cbp;
- unsigned long itc_entries;
+ iommu_table_getparms_iSeries(255, 0, 0xff, &veth_iommu_table);
+ veth_iommu_table.it_size /= 2;
+ vio_iommu_table = veth_iommu_table;
+ vio_iommu_table.it_offset += veth_iommu_table.it_size;
- cb.itc_busno = 255; /* Bus 255 is the virtual bus */
- cb.itc_virtbus = 0xff; /* Ask for virtual bus */
-
- cbp = virt_to_abs(&cb);
- HvCallXm_getTceTableParms(cbp);
-
- itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
- veth_iommu_table.it_size = itc_entries / 2;
- veth_iommu_table.it_busno = cb.itc_busno;
- veth_iommu_table.it_offset = cb.itc_offset;
- veth_iommu_table.it_index = cb.itc_index;
- veth_iommu_table.it_type = TCE_VB;
- veth_iommu_table.it_blocksize = 1;
-
- t = iommu_init_table(&veth_iommu_table);
-
- if (!t)
+ if (!iommu_init_table(&veth_iommu_table))
printk("Virtual Bus VETH TCE table failed.\n");
-
- vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size;
- vio_iommu_table.it_busno = cb.itc_busno;
- vio_iommu_table.it_offset = cb.itc_offset +
- veth_iommu_table.it_size;
- vio_iommu_table.it_index = cb.itc_index;
- vio_iommu_table.it_type = TCE_VB;
- vio_iommu_table.it_blocksize = 1;
-
- t = iommu_init_table(&vio_iommu_table);
-
- if (!t)
+ if (!iommu_init_table(&vio_iommu_table))
printk("Virtual Bus VIO TCE table failed.\n");
}
diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c
index fe97bfbf746..84267269559 100644
--- a/arch/powerpc/platforms/iseries/viopath.c
+++ b/arch/powerpc/platforms/iseries/viopath.c
@@ -68,7 +68,8 @@ static DEFINE_SPINLOCK(statuslock);
* For each kind of event we allocate a buffer that is
* guaranteed not to cross a page boundary
*/
-static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned;
+static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256]
+ __attribute__((__aligned__(4096)));
static atomic_t event_buffer_available[VIO_MAX_SUBTYPES];
static int event_buffer_initialised;
@@ -116,12 +117,12 @@ static int proc_viopath_show(struct seq_file *m, void *v)
HvLpEvent_Rc hvrc;
DECLARE_MUTEX_LOCKED(Semaphore);
- buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL);
if (!buf)
return 0;
- memset(buf, 0, PAGE_SIZE);
+ memset(buf, 0, HW_PAGE_SIZE);
- handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE,
+ handle = dma_map_single(iSeries_vio_dev, buf, HW_PAGE_SIZE,
DMA_FROM_DEVICE);
hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
@@ -131,7 +132,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
viopath_sourceinst(viopath_hostLp),
viopath_targetinst(viopath_hostLp),
(u64)(unsigned long)&Semaphore, VIOVERSION << 16,
- ((u64)handle) << 32, PAGE_SIZE, 0, 0);
+ ((u64)handle) << 32, HW_PAGE_SIZE, 0, 0);
if (hvrc != HvLpEvent_Rc_Good)
printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc);
@@ -140,7 +141,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
vlanMap = HvLpConfig_getVirtualLanIndexMap();
- buf[PAGE_SIZE-1] = '\0';
+ buf[HW_PAGE_SIZE-1] = '\0';
seq_printf(m, "%s", buf);
seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
@@ -152,7 +153,8 @@ static int proc_viopath_show(struct seq_file *m, void *v)
e2a(xItExtVpdPanel.systemSerial[4]),
e2a(xItExtVpdPanel.systemSerial[5]));
- dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE);
+ dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE,
+ DMA_FROM_DEVICE);
kfree(buf);
return 0;
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index 340c21caeae..895aeb3f75d 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -380,9 +380,6 @@ void __init maple_pcibios_fixup(void)
for_each_pci_dev(dev)
pci_read_irq_line(dev);
- /* Do the mapping of the IO space */
- phbs_remap_io();
-
DBG(" <- maple_pcibios_fixup\n");
}
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 4369676f1d5..c9df44fcf57 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -1,7 +1,8 @@
obj-y += pic.o setup.o time.o feature.o pci.o \
sleep.o low_i2c.o cache.o
obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o
-obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq.o
+obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq_32.o
+obj-$(CONFIG_CPU_FREQ_PMAC64) += cpufreq_64.o
obj-$(CONFIG_NVRAM) += nvram.o
# ppc64 pmac doesn't define CONFIG_NVRAM but needs nvram stuff
obj-$(CONFIG_PPC64) += nvram.o
diff --git a/arch/powerpc/platforms/powermac/cpufreq.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
index c47f8b69725..56fd4e05fed 100644
--- a/arch/powerpc/platforms/powermac/cpufreq.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_32.c
@@ -397,18 +397,16 @@ static int pmac_cpufreq_target( struct cpufreq_policy *policy,
unsigned int relation)
{
unsigned int newstate = 0;
+ int rc;
if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs,
target_freq, relation, &newstate))
return -EINVAL;
- return do_set_cpu_speed(newstate, 1);
-}
+ rc = do_set_cpu_speed(newstate, 1);
-unsigned int pmac_get_one_cpufreq(int i)
-{
- /* Supports only one CPU for now */
- return (i == 0) ? cur_freq : 0;
+ ppc_proc_freq = cur_freq * 1000ul;
+ return rc;
}
static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -474,6 +472,8 @@ static int pmac_cpufreq_resume(struct cpufreq_policy *policy)
do_set_cpu_speed(sleep_freq == low_freq ?
CPUFREQ_LOW : CPUFREQ_HIGH, 0);
+ ppc_proc_freq = cur_freq * 1000ul;
+
no_schedule = 0;
return 0;
}
@@ -547,7 +547,7 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
*/
if (low_freq < 98000000)
low_freq = 101000000;
-
+
/* Convert those to CPU core clocks */
low_freq = (low_freq * (*ratio)) / 2000;
hi_freq = (hi_freq * (*ratio)) / 2000;
@@ -714,6 +714,7 @@ out:
pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq;
pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq;
+ ppc_proc_freq = cur_freq * 1000ul;
printk(KERN_INFO "Registering PowerMac CPU frequency driver\n");
printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
new file mode 100644
index 00000000000..39150342c6f
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/cpufreq_64.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * and Markus Demleitner <msdemlei@cl.uni-heidelberg.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs,
+ * that is iMac G5 and latest single CPU desktop.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+#include <linux/completion.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/sections.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+#include <asm/smu.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* see 970FX user manual */
+
+#define SCOM_PCR 0x0aa001 /* PCR scom addr */
+
+#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */
+#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */
+#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */
+#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */
+#define PCR_SPEED_MASK 0x000e0000U /* speed mask */
+#define PCR_SPEED_SHIFT 17
+#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */
+#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */
+#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */
+#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */
+#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */
+#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */
+
+#define SCOM_PSR 0x408001 /* PSR scom addr */
+/* warning: PSR is a 64 bits register */
+#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */
+#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */
+#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */
+#define PSR_CUR_SPEED_SHIFT (56)
+
+/*
+ * The G5 only supports two frequencies (Quarter speed is not supported)
+ */
+#define CPUFREQ_HIGH 0
+#define CPUFREQ_LOW 1
+
+static struct cpufreq_frequency_table g5_cpu_freqs[] = {
+ {CPUFREQ_HIGH, 0},
+ {CPUFREQ_LOW, 0},
+ {0, CPUFREQ_TABLE_END},
+};
+
+static struct freq_attr* g5_cpu_freqs_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+/* Power mode data is an array of the 32 bits PCR values to use for
+ * the various frequencies, retreived from the device-tree
+ */
+static u32 *g5_pmode_data;
+static int g5_pmode_max;
+static int g5_pmode_cur;
+
+static DECLARE_MUTEX(g5_switch_mutex);
+
+
+static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */
+static int g5_fvt_count; /* number of op. points */
+static int g5_fvt_cur; /* current op. point */
+
+/* ----------------- real hardware interface */
+
+static void g5_switch_volt(int speed_mode)
+{
+ struct smu_simple_cmd cmd;
+
+ DECLARE_COMPLETION(comp);
+ smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete,
+ &comp, 'V', 'S', 'L', 'E', 'W',
+ 0xff, g5_fvt_cur+1, speed_mode);
+ wait_for_completion(&comp);
+}
+
+static int g5_switch_freq(int speed_mode)
+{
+ struct cpufreq_freqs freqs;
+ int to;
+
+ if (g5_pmode_cur == speed_mode)
+ return 0;
+
+ down(&g5_switch_mutex);
+
+ freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency;
+ freqs.new = g5_cpu_freqs[speed_mode].frequency;
+ freqs.cpu = 0;
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ /* If frequency is going up, first ramp up the voltage */
+ if (speed_mode < g5_pmode_cur)
+ g5_switch_volt(speed_mode);
+
+ /* Clear PCR high */
+ scom970_write(SCOM_PCR, 0);
+ /* Clear PCR low */
+ scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0);
+ /* Set PCR low */
+ scom970_write(SCOM_PCR, PCR_HILO_SELECT |
+ g5_pmode_data[speed_mode]);
+
+ /* Wait for completion */
+ for (to = 0; to < 10; to++) {
+ unsigned long psr = scom970_read(SCOM_PSR);
+
+ if ((psr & PSR_CMD_RECEIVED) == 0 &&
+ (((psr >> PSR_CUR_SPEED_SHIFT) ^
+ (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3)
+ == 0)
+ break;
+ if (psr & PSR_CMD_COMPLETED)
+ break;
+ udelay(100);
+ }
+
+ /* If frequency is going down, last ramp the voltage */
+ if (speed_mode > g5_pmode_cur)
+ g5_switch_volt(speed_mode);
+
+ g5_pmode_cur = speed_mode;
+ ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul;
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+ up(&g5_switch_mutex);
+
+ return 0;
+}
+
+static int g5_query_freq(void)
+{
+ unsigned long psr = scom970_read(SCOM_PSR);
+ int i;
+
+ for (i = 0; i <= g5_pmode_max; i++)
+ if ((((psr >> PSR_CUR_SPEED_SHIFT) ^
+ (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0)
+ break;
+ return i;
+}
+
+/* ----------------- cpufreq bookkeeping */
+
+static int g5_cpufreq_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, g5_cpu_freqs);
+}
+
+static int g5_cpufreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq, unsigned int relation)
+{
+ unsigned int newstate = 0;
+
+ if (cpufreq_frequency_table_target(policy, g5_cpu_freqs,
+ target_freq, relation, &newstate))
+ return -EINVAL;
+
+ return g5_switch_freq(newstate);
+}
+
+static unsigned int g5_cpufreq_get_speed(unsigned int cpu)
+{
+ return g5_cpu_freqs[g5_pmode_cur].frequency;
+}
+
+static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+ if (policy->cpu != 0)
+ return -ENODEV;
+
+ policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+ policy->cur = g5_cpu_freqs[g5_query_freq()].frequency;
+ cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu);
+
+ return cpufreq_frequency_table_cpuinfo(policy,
+ g5_cpu_freqs);
+}
+
+
+static struct cpufreq_driver g5_cpufreq_driver = {
+ .name = "powermac",
+ .owner = THIS_MODULE,
+ .flags = CPUFREQ_CONST_LOOPS,
+ .init = g5_cpufreq_cpu_init,
+ .verify = g5_cpufreq_verify,
+ .target = g5_cpufreq_target,
+ .get = g5_cpufreq_get_speed,
+ .attr = g5_cpu_freqs_attr,
+};
+
+
+static int __init g5_cpufreq_init(void)
+{
+ struct device_node *cpunode;
+ unsigned int psize, ssize;
+ struct smu_sdbp_header *shdr;
+ unsigned long max_freq;
+ u32 *valp;
+ int rc = -ENODEV;
+
+ /* Look for CPU and SMU nodes */
+ cpunode = of_find_node_by_type(NULL, "cpu");
+ if (!cpunode) {
+ DBG("No CPU node !\n");
+ return -ENODEV;
+ }
+
+ /* Check 970FX for now */
+ valp = (u32 *)get_property(cpunode, "cpu-version", NULL);
+ if (!valp) {
+ DBG("No cpu-version property !\n");
+ goto bail_noprops;
+ }
+ if (((*valp) >> 16) != 0x3c) {
+ DBG("Wrong CPU version: %08x\n", *valp);
+ goto bail_noprops;
+ }
+
+ /* Look for the powertune data in the device-tree */
+ g5_pmode_data = (u32 *)get_property(cpunode, "power-mode-data",&psize);
+ if (!g5_pmode_data) {
+ DBG("No power-mode-data !\n");
+ goto bail_noprops;
+ }
+ g5_pmode_max = psize / sizeof(u32) - 1;
+
+ /* Look for the FVT table */
+ shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL);
+ if (!shdr)
+ goto bail_noprops;
+ g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1];
+ ssize = (shdr->len * sizeof(u32)) - sizeof(struct smu_sdbp_header);
+ g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt);
+ g5_fvt_cur = 0;
+
+ /* Sanity checking */
+ if (g5_fvt_count < 1 || g5_pmode_max < 1)
+ goto bail_noprops;
+
+ /*
+ * From what I see, clock-frequency is always the maximal frequency.
+ * The current driver can not slew sysclk yet, so we really only deal
+ * with powertune steps for now. We also only implement full freq and
+ * half freq in this version. So far, I haven't yet seen a machine
+ * supporting anything else.
+ */
+ valp = (u32 *)get_property(cpunode, "clock-frequency", NULL);
+ if (!valp)
+ return -ENODEV;
+ max_freq = (*valp)/1000;
+ g5_cpu_freqs[0].frequency = max_freq;
+ g5_cpu_freqs[1].frequency = max_freq/2;
+
+ /* Check current frequency */
+ g5_pmode_cur = g5_query_freq();
+ if (g5_pmode_cur > 1)
+ /* We don't support anything but 1:1 and 1:2, fixup ... */
+ g5_pmode_cur = 1;
+
+ /* Force apply current frequency to make sure everything is in
+ * sync (voltage is right for example). Firmware may leave us with
+ * a strange setting ...
+ */
+ g5_switch_freq(g5_pmode_cur);
+
+ printk(KERN_INFO "Registering G5 CPU frequency driver\n");
+ printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+ g5_cpu_freqs[1].frequency/1000,
+ g5_cpu_freqs[0].frequency/1000,
+ g5_cpu_freqs[g5_pmode_cur].frequency/1000);
+
+ rc = cpufreq_register_driver(&g5_cpufreq_driver);
+
+ /* We keep the CPU node on hold... hopefully, Apple G5 don't have
+ * hotplug CPU with a dynamic device-tree ...
+ */
+ return rc;
+
+ bail_noprops:
+ of_node_put(cpunode);
+
+ return rc;
+}
+
+module_init(g5_cpufreq_init);
+
+
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 8f818d092e2..dfd41b9781a 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -918,9 +918,6 @@ void __init pmac_pci_init(void)
PCI_DN(np)->busno = 0xf0;
}
- /* map in PCI I/O space */
- phbs_remap_io();
-
/* pmac_check_ht_link(); */
/* Tell pci.c to not use the common resource allocation mechanism */
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 83a49e80ac2..90040c49494 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -74,6 +74,9 @@ static DEFINE_SPINLOCK(pmac_pic_lock);
#define GATWICK_IRQ_POOL_SIZE 10
static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE];
+#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
+static unsigned long ppc_lost_interrupts[NR_MASK_WORDS];
+
/*
* Mark an irq as "lost". This is only used on the pmac
* since it can lose interrupts (see pmac_set_irq_mask).
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 80b58c1ec41..7acb0546671 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -193,18 +193,6 @@ static void pmac_show_cpuinfo(struct seq_file *m)
pmac_newworld ? "NewWorld" : "OldWorld");
}
-static void pmac_show_percpuinfo(struct seq_file *m, int i)
-{
-#ifdef CONFIG_CPU_FREQ_PMAC
- extern unsigned int pmac_get_one_cpufreq(int i);
- unsigned int freq = pmac_get_one_cpufreq(i);
- if (freq != 0) {
- seq_printf(m, "clock\t\t: %dMHz\n", freq/1000);
- return;
- }
-#endif /* CONFIG_CPU_FREQ_PMAC */
-}
-
#ifndef CONFIG_ADB_CUDA
int find_via_cuda(void)
{
@@ -767,7 +755,6 @@ struct machdep_calls __initdata pmac_md = {
.setup_arch = pmac_setup_arch,
.init_early = pmac_init_early,
.show_cpuinfo = pmac_show_cpuinfo,
- .show_percpuinfo = pmac_show_percpuinfo,
.init_IRQ = pmac_pic_init,
.get_irq = mpic_get_irq, /* changed later */
.pcibios_fixup = pmac_pcibios_fixup,
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index e1f9443cc87..957b0910342 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -305,9 +305,19 @@ static int __init smp_psurge_probe(void)
psurge_start = ioremap(PSURGE_START, 4);
psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
- /* this is not actually strictly necessary -- paulus. */
- for (i = 1; i < ncpus; ++i)
- smp_hw_index[i] = i;
+ /*
+ * This is necessary because OF doesn't know about the
+ * secondary cpu(s), and thus there aren't nodes in the
+ * device tree for them, and smp_setup_cpu_maps hasn't
+ * set their bits in cpu_possible_map and cpu_present_map.
+ */
+ if (ncpus > NR_CPUS)
+ ncpus = NR_CPUS;
+ for (i = 1; i < ncpus ; ++i) {
+ cpu_set(i, cpu_present_map);
+ cpu_set(i, cpu_possible_map);
+ set_hard_smp_processor_id(i, i);
+ }
if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
@@ -348,6 +358,7 @@ static void __init psurge_dual_sync_tb(int cpu_nr)
int t;
set_dec(tb_ticks_per_jiffy);
+ /* XXX fixme */
set_tb(0, 0);
last_jiffy_stamp(cpu_nr) = 0;
@@ -363,8 +374,6 @@ static void __init psurge_dual_sync_tb(int cpu_nr)
/* now interrupt the secondary, starting both TBs */
psurge_set_ipi(1);
-
- smp_tb_synchronized = 1;
}
static struct irqaction psurge_irqaction = {
@@ -625,9 +634,8 @@ void smp_core99_give_timebase(void)
for (t = 100000; t > 0 && sec_tb_reset; --t)
udelay(10);
if (sec_tb_reset)
+ /* XXX BUG_ON here? */
printk(KERN_WARNING "Timeout waiting sync(2) on second CPU\n");
- else
- smp_tb_synchronized = 1;
/* Now, restart the timebase by leaving the GPIO to an open collector */
pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0);
@@ -810,19 +818,9 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr)
}
-/* Core99 Macs (dual G4s and G5s) */
-struct smp_ops_t core99_smp_ops = {
- .message_pass = smp_mpic_message_pass,
- .probe = smp_core99_probe,
- .kick_cpu = smp_core99_kick_cpu,
- .setup_cpu = smp_core99_setup_cpu,
- .give_timebase = smp_core99_give_timebase,
- .take_timebase = smp_core99_take_timebase,
-};
-
#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32)
-int __cpu_disable(void)
+int smp_core99_cpu_disable(void)
{
cpu_clear(smp_processor_id(), cpu_online_map);
@@ -846,7 +844,7 @@ void cpu_die(void)
low_cpu_die();
}
-void __cpu_die(unsigned int cpu)
+void smp_core99_cpu_die(unsigned int cpu)
{
int timeout;
@@ -858,8 +856,21 @@ void __cpu_die(unsigned int cpu)
}
msleep(1);
}
- cpu_callin_map[cpu] = 0;
cpu_dead[cpu] = 0;
}
#endif
+
+/* Core99 Macs (dual G4s and G5s) */
+struct smp_ops_t core99_smp_ops = {
+ .message_pass = smp_mpic_message_pass,
+ .probe = smp_core99_probe,
+ .kick_cpu = smp_core99_kick_cpu,
+ .setup_cpu = smp_core99_setup_cpu,
+ .give_timebase = smp_core99_give_timebase,
+ .take_timebase = smp_core99_take_timebase,
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32)
+ .cpu_disable = smp_core99_cpu_disable,
+ .cpu_die = smp_core99_cpu_die,
+#endif
+};
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index b9938fece78..e7ca5b1f591 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -3,3 +3,5 @@ obj-y := pci.o lpar.o hvCall.o nvram.o reconfig.o \
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_XICS) += xics.o
+obj-$(CONFIG_SCANLOG) += scanlog.o
+obj-$(CONFIG_EEH) += eeh.o eeh_event.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
new file mode 100644
index 00000000000..79de2310e70
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -0,0 +1,1212 @@
+/*
+ * eeh.c
+ * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/rtas.h>
+
+#undef DEBUG
+
+/** Overview:
+ * EEH, or "Extended Error Handling" is a PCI bridge technology for
+ * dealing with PCI bus errors that can't be dealt with within the
+ * usual PCI framework, except by check-stopping the CPU. Systems
+ * that are designed for high-availability/reliability cannot afford
+ * to crash due to a "mere" PCI error, thus the need for EEH.
+ * An EEH-capable bridge operates by converting a detected error
+ * into a "slot freeze", taking the PCI adapter off-line, making
+ * the slot behave, from the OS'es point of view, as if the slot
+ * were "empty": all reads return 0xff's and all writes are silently
+ * ignored. EEH slot isolation events can be triggered by parity
+ * errors on the address or data busses (e.g. during posted writes),
+ * which in turn might be caused by low voltage on the bus, dust,
+ * vibration, humidity, radioactivity or plain-old failed hardware.
+ *
+ * Note, however, that one of the leading causes of EEH slot
+ * freeze events are buggy device drivers, buggy device microcode,
+ * or buggy device hardware. This is because any attempt by the
+ * device to bus-master data to a memory address that is not
+ * assigned to the device will trigger a slot freeze. (The idea
+ * is to prevent devices-gone-wild from corrupting system memory).
+ * Buggy hardware/drivers will have a miserable time co-existing
+ * with EEH.
+ *
+ * Ideally, a PCI device driver, when suspecting that an isolation
+ * event has occured (e.g. by reading 0xff's), will then ask EEH
+ * whether this is the case, and then take appropriate steps to
+ * reset the PCI slot, the PCI device, and then resume operations.
+ * However, until that day, the checking is done here, with the
+ * eeh_check_failure() routine embedded in the MMIO macros. If
+ * the slot is found to be isolated, an "EEH Event" is synthesized
+ * and sent out for processing.
+ */
+
+/* If a device driver keeps reading an MMIO register in an interrupt
+ * handler after a slot isolation event has occurred, we assume it
+ * is broken and panic. This sets the threshold for how many read
+ * attempts we allow before panicking.
+ */
+#define EEH_MAX_FAILS 100000
+
+/* Misc forward declaraions */
+static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn);
+
+/* RTAS tokens */
+static int ibm_set_eeh_option;
+static int ibm_set_slot_reset;
+static int ibm_read_slot_reset_state;
+static int ibm_read_slot_reset_state2;
+static int ibm_slot_error_detail;
+
+static int eeh_subsystem_enabled;
+
+/* Lock to avoid races due to multiple reports of an error */
+static DEFINE_SPINLOCK(confirm_error_lock);
+
+/* Buffer for reporting slot-error-detail rtas calls */
+static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
+static DEFINE_SPINLOCK(slot_errbuf_lock);
+static int eeh_error_buf_size;
+
+/* System monitoring statistics */
+static DEFINE_PER_CPU(unsigned long, no_device);
+static DEFINE_PER_CPU(unsigned long, no_dn);
+static DEFINE_PER_CPU(unsigned long, no_cfg_addr);
+static DEFINE_PER_CPU(unsigned long, ignored_check);
+static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
+static DEFINE_PER_CPU(unsigned long, false_positives);
+static DEFINE_PER_CPU(unsigned long, ignored_failures);
+static DEFINE_PER_CPU(unsigned long, slot_resets);
+
+/**
+ * The pci address cache subsystem. This subsystem places
+ * PCI device address resources into a red-black tree, sorted
+ * according to the address range, so that given only an i/o
+ * address, the corresponding PCI device can be **quickly**
+ * found. It is safe to perform an address lookup in an interrupt
+ * context; this ability is an important feature.
+ *
+ * Currently, the only customer of this code is the EEH subsystem;
+ * thus, this code has been somewhat tailored to suit EEH better.
+ * In particular, the cache does *not* hold the addresses of devices
+ * for which EEH is not enabled.
+ *
+ * (Implementation Note: The RB tree seems to be better/faster
+ * than any hash algo I could think of for this problem, even
+ * with the penalty of slow pointer chases for d-cache misses).
+ */
+struct pci_io_addr_range
+{
+ struct rb_node rb_node;
+ unsigned long addr_lo;
+ unsigned long addr_hi;
+ struct pci_dev *pcidev;
+ unsigned int flags;
+};
+
+static struct pci_io_addr_cache
+{
+ struct rb_root rb_root;
+ spinlock_t piar_lock;
+} pci_io_addr_cache_root;
+
+static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
+{
+ struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
+
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+ if (addr < piar->addr_lo) {
+ n = n->rb_left;
+ } else {
+ if (addr > piar->addr_hi) {
+ n = n->rb_right;
+ } else {
+ pci_dev_get(piar->pcidev);
+ return piar->pcidev;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * pci_get_device_by_addr - Get device, given only address
+ * @addr: mmio (PIO) phys address or i/o port number
+ *
+ * Given an mmio phys address, or a port number, find a pci device
+ * that implements this address. Be sure to pci_dev_put the device
+ * when finished. I/O port numbers are assumed to be offset
+ * from zero (that is, they do *not* have pci_io_addr added in).
+ * It is safe to call this function within an interrupt.
+ */
+static struct pci_dev *pci_get_device_by_addr(unsigned long addr)
+{
+ struct pci_dev *dev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ dev = __pci_get_device_by_addr(addr);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+ return dev;
+}
+
+#ifdef DEBUG
+/*
+ * Handy-dandy debug print routine, does nothing more
+ * than print out the contents of our addr cache.
+ */
+static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
+{
+ struct rb_node *n;
+ int cnt = 0;
+
+ n = rb_first(&cache->rb_root);
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+ printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
+ (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
+ piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
+ cnt++;
+ n = rb_next(n);
+ }
+}
+#endif
+
+/* Insert address range into the rb tree. */
+static struct pci_io_addr_range *
+pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
+ unsigned long ahi, unsigned int flags)
+{
+ struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct pci_io_addr_range *piar;
+
+ /* Walk tree, find a place to insert into tree */
+ while (*p) {
+ parent = *p;
+ piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
+ if (ahi < piar->addr_lo) {
+ p = &parent->rb_left;
+ } else if (alo > piar->addr_hi) {
+ p = &parent->rb_right;
+ } else {
+ if (dev != piar->pcidev ||
+ alo != piar->addr_lo || ahi != piar->addr_hi) {
+ printk(KERN_WARNING "PIAR: overlapping address range\n");
+ }
+ return piar;
+ }
+ }
+ piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
+ if (!piar)
+ return NULL;
+
+ piar->addr_lo = alo;
+ piar->addr_hi = ahi;
+ piar->pcidev = dev;
+ piar->flags = flags;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
+ alo, ahi, pci_name (dev));
+#endif
+
+ rb_link_node(&piar->rb_node, parent, p);
+ rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
+
+ return piar;
+}
+
+static void __pci_addr_cache_insert_device(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ struct pci_dn *pdn;
+ int i;
+ int inserted = 0;
+
+ dn = pci_device_to_OF_node(dev);
+ if (!dn) {
+ printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));
+ return;
+ }
+
+ /* Skip any devices for which EEH is not enabled. */
+ pdn = PCI_DN(dn);
+ if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
+ pdn->eeh_mode & EEH_MODE_NOCHECK) {
+#ifdef DEBUG
+ printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",
+ pci_name(dev), pdn->node->full_name);
+#endif
+ return;
+ }
+
+ /* The cache holds a reference to the device... */
+ pci_dev_get(dev);
+
+ /* Walk resources on this device, poke them into the tree */
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ unsigned long start = pci_resource_start(dev,i);
+ unsigned long end = pci_resource_end(dev,i);
+ unsigned int flags = pci_resource_flags(dev,i);
+
+ /* We are interested only bus addresses, not dma or other stuff */
+ if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+ continue;
+ if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
+ continue;
+ pci_addr_cache_insert(dev, start, end, flags);
+ inserted = 1;
+ }
+
+ /* If there was nothing to add, the cache has no reference... */
+ if (!inserted)
+ pci_dev_put(dev);
+}
+
+/**
+ * pci_addr_cache_insert_device - Add a device to the address cache
+ * @dev: PCI device whose I/O addresses we are interested in.
+ *
+ * In order to support the fast lookup of devices based on addresses,
+ * we maintain a cache of devices that can be quickly searched.
+ * This routine adds a device to that cache.
+ */
+static void pci_addr_cache_insert_device(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ __pci_addr_cache_insert_device(dev);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
+{
+ struct rb_node *n;
+ int removed = 0;
+
+restart:
+ n = rb_first(&pci_io_addr_cache_root.rb_root);
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+ if (piar->pcidev == dev) {
+ rb_erase(n, &pci_io_addr_cache_root.rb_root);
+ removed = 1;
+ kfree(piar);
+ goto restart;
+ }
+ n = rb_next(n);
+ }
+
+ /* The cache no longer holds its reference to this device... */
+ if (removed)
+ pci_dev_put(dev);
+}
+
+/**
+ * pci_addr_cache_remove_device - remove pci device from addr cache
+ * @dev: device to remove
+ *
+ * Remove a device from the addr-cache tree.
+ * This is potentially expensive, since it will walk
+ * the tree multiple times (once per resource).
+ * But so what; device removal doesn't need to be that fast.
+ */
+static void pci_addr_cache_remove_device(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ __pci_addr_cache_remove_device(dev);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+/**
+ * pci_addr_cache_build - Build a cache of I/O addresses
+ *
+ * Build a cache of pci i/o addresses. This cache will be used to
+ * find the pci device that corresponds to a given address.
+ * This routine scans all pci busses to build the cache.
+ * Must be run late in boot process, after the pci controllers
+ * have been scaned for devices (after all device resources are known).
+ */
+void __init pci_addr_cache_build(void)
+{
+ struct device_node *dn;
+ struct pci_dev *dev = NULL;
+
+ if (!eeh_subsystem_enabled)
+ return;
+
+ spin_lock_init(&pci_io_addr_cache_root.piar_lock);
+
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ /* Ignore PCI bridges ( XXX why ??) */
+ if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
+ continue;
+ }
+ pci_addr_cache_insert_device(dev);
+
+ /* Save the BAR's; firmware doesn't restore these after EEH reset */
+ dn = pci_device_to_OF_node(dev);
+ eeh_save_bars(dev, PCI_DN(dn));
+ }
+
+#ifdef DEBUG
+ /* Verify tree built up above, echo back the list of addrs. */
+ pci_addr_cache_print(&pci_io_addr_cache_root);
+#endif
+}
+
+/* --------------------------------------------------------------- */
+/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
+
+void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
+{
+ unsigned long flags;
+ int rc;
+
+ /* Log the error with the rtas logger */
+ spin_lock_irqsave(&slot_errbuf_lock, flags);
+ memset(slot_errbuf, 0, eeh_error_buf_size);
+
+ rc = rtas_call(ibm_slot_error_detail,
+ 8, 1, NULL, pdn->eeh_config_addr,
+ BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid), NULL, 0,
+ virt_to_phys(slot_errbuf),
+ eeh_error_buf_size,
+ severity);
+
+ if (rc == 0)
+ log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
+ spin_unlock_irqrestore(&slot_errbuf_lock, flags);
+}
+
+/**
+ * read_slot_reset_state - Read the reset state of a device node's slot
+ * @dn: device node to read
+ * @rets: array to return results in
+ */
+static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
+{
+ int token, outputs;
+
+ if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
+ token = ibm_read_slot_reset_state2;
+ outputs = 4;
+ } else {
+ token = ibm_read_slot_reset_state;
+ rets[2] = 0; /* fake PE Unavailable info */
+ outputs = 3;
+ }
+
+ return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr,
+ BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
+}
+
+/**
+ * eeh_token_to_phys - convert EEH address token to phys address
+ * @token i/o token, should be address in the form 0xA....
+ */
+static inline unsigned long eeh_token_to_phys(unsigned long token)
+{
+ pte_t *ptep;
+ unsigned long pa;
+
+ ptep = find_linux_pte(init_mm.pgd, token);
+ if (!ptep)
+ return token;
+ pa = pte_pfn(*ptep) << PAGE_SHIFT;
+
+ return pa | (token & (PAGE_SIZE-1));
+}
+
+/**
+ * Return the "partitionable endpoint" (pe) under which this device lies
+ */
+static struct device_node * find_device_pe(struct device_node *dn)
+{
+ while ((dn->parent) && PCI_DN(dn->parent) &&
+ (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
+ dn = dn->parent;
+ }
+ return dn;
+}
+
+/** Mark all devices that are peers of this device as failed.
+ * Mark the device driver too, so that it can see the failure
+ * immediately; this is critical, since some drivers poll
+ * status registers in interrupts ... If a driver is polling,
+ * and the slot is frozen, then the driver can deadlock in
+ * an interrupt context, which is bad.
+ */
+
+static void __eeh_mark_slot (struct device_node *dn, int mode_flag)
+{
+ while (dn) {
+ if (PCI_DN(dn)) {
+ PCI_DN(dn)->eeh_mode |= mode_flag;
+
+ if (dn->child)
+ __eeh_mark_slot (dn->child, mode_flag);
+ }
+ dn = dn->sibling;
+ }
+}
+
+void eeh_mark_slot (struct device_node *dn, int mode_flag)
+{
+ dn = find_device_pe (dn);
+ PCI_DN(dn)->eeh_mode |= mode_flag;
+ __eeh_mark_slot (dn->child, mode_flag);
+}
+
+static void __eeh_clear_slot (struct device_node *dn, int mode_flag)
+{
+ while (dn) {
+ if (PCI_DN(dn)) {
+ PCI_DN(dn)->eeh_mode &= ~mode_flag;
+ PCI_DN(dn)->eeh_check_count = 0;
+ if (dn->child)
+ __eeh_clear_slot (dn->child, mode_flag);
+ }
+ dn = dn->sibling;
+ }
+}
+
+void eeh_clear_slot (struct device_node *dn, int mode_flag)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&confirm_error_lock, flags);
+ dn = find_device_pe (dn);
+ PCI_DN(dn)->eeh_mode &= ~mode_flag;
+ PCI_DN(dn)->eeh_check_count = 0;
+ __eeh_clear_slot (dn->child, mode_flag);
+ spin_unlock_irqrestore(&confirm_error_lock, flags);
+}
+
+/**
+ * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
+ * @dn device node
+ * @dev pci device, if known
+ *
+ * Check for an EEH failure for the given device node. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze. This routine
+ * will query firmware for the EEH status.
+ *
+ * Returns 0 if there has not been an EEH error; otherwise returns
+ * a non-zero value and queues up a slot isolation event notification.
+ *
+ * It is safe to call this routine in an interrupt context.
+ */
+int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
+{
+ int ret;
+ int rets[3];
+ unsigned long flags;
+ struct pci_dn *pdn;
+ int rc = 0;
+
+ __get_cpu_var(total_mmio_ffs)++;
+
+ if (!eeh_subsystem_enabled)
+ return 0;
+
+ if (!dn) {
+ __get_cpu_var(no_dn)++;
+ return 0;
+ }
+ pdn = PCI_DN(dn);
+
+ /* Access to IO BARs might get this far and still not want checking. */
+ if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
+ pdn->eeh_mode & EEH_MODE_NOCHECK) {
+ __get_cpu_var(ignored_check)++;
+#ifdef DEBUG
+ printk ("EEH:ignored check (%x) for %s %s\n",
+ pdn->eeh_mode, pci_name (dev), dn->full_name);
+#endif
+ return 0;
+ }
+
+ if (!pdn->eeh_config_addr) {
+ __get_cpu_var(no_cfg_addr)++;
+ return 0;
+ }
+
+ /* If we already have a pending isolation event for this
+ * slot, we know it's bad already, we don't need to check.
+ * Do this checking under a lock; as multiple PCI devices
+ * in one slot might report errors simultaneously, and we
+ * only want one error recovery routine running.
+ */
+ spin_lock_irqsave(&confirm_error_lock, flags);
+ rc = 1;
+ if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
+ pdn->eeh_check_count ++;
+ if (pdn->eeh_check_count >= EEH_MAX_FAILS) {
+ printk (KERN_ERR "EEH: Device driver ignored %d bad reads, panicing\n",
+ pdn->eeh_check_count);
+ dump_stack();
+
+ /* re-read the slot reset state */
+ if (read_slot_reset_state(pdn, rets) != 0)
+ rets[0] = -1; /* reset state unknown */
+
+ /* If we are here, then we hit an infinite loop. Stop. */
+ panic("EEH: MMIO halt (%d) on device:%s\n", rets[0], pci_name(dev));
+ }
+ goto dn_unlock;
+ }
+
+ /*
+ * Now test for an EEH failure. This is VERY expensive.
+ * Note that the eeh_config_addr may be a parent device
+ * in the case of a device behind a bridge, or it may be
+ * function zero of a multi-function device.
+ * In any case they must share a common PHB.
+ */
+ ret = read_slot_reset_state(pdn, rets);
+
+ /* If the call to firmware failed, punt */
+ if (ret != 0) {
+ printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
+ ret, dn->full_name);
+ __get_cpu_var(false_positives)++;
+ rc = 0;
+ goto dn_unlock;
+ }
+
+ /* If EEH is not supported on this device, punt. */
+ if (rets[1] != 1) {
+ printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
+ ret, dn->full_name);
+ __get_cpu_var(false_positives)++;
+ rc = 0;
+ goto dn_unlock;
+ }
+
+ /* If not the kind of error we know about, punt. */
+ if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
+ __get_cpu_var(false_positives)++;
+ rc = 0;
+ goto dn_unlock;
+ }
+
+ /* Note that config-io to empty slots may fail;
+ * we recognize empty because they don't have children. */
+ if ((rets[0] == 5) && (dn->child == NULL)) {
+ __get_cpu_var(false_positives)++;
+ rc = 0;
+ goto dn_unlock;
+ }
+
+ __get_cpu_var(slot_resets)++;
+
+ /* Avoid repeated reports of this failure, including problems
+ * with other functions on this device, and functions under
+ * bridges. */
+ eeh_mark_slot (dn, EEH_MODE_ISOLATED);
+ spin_unlock_irqrestore(&confirm_error_lock, flags);
+
+ eeh_send_failure_event (dn, dev, rets[0], rets[2]);
+
+ /* Most EEH events are due to device driver bugs. Having
+ * a stack trace will help the device-driver authors figure
+ * out what happened. So print that out. */
+ if (rets[0] != 5) dump_stack();
+ return 1;
+
+dn_unlock:
+ spin_unlock_irqrestore(&confirm_error_lock, flags);
+ return rc;
+}
+
+EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
+
+/**
+ * eeh_check_failure - check if all 1's data is due to EEH slot freeze
+ * @token i/o token, should be address in the form 0xA....
+ * @val value, should be all 1's (XXX why do we need this arg??)
+ *
+ * Check for an EEH failure at the given token address. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze event. This routine
+ * will query firmware for the EEH status.
+ *
+ * Note this routine is safe to call in an interrupt context.
+ */
+unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+{
+ unsigned long addr;
+ struct pci_dev *dev;
+ struct device_node *dn;
+
+ /* Finding the phys addr + pci device; this is pretty quick. */
+ addr = eeh_token_to_phys((unsigned long __force) token);
+ dev = pci_get_device_by_addr(addr);
+ if (!dev) {
+ __get_cpu_var(no_device)++;
+ return val;
+ }
+
+ dn = pci_device_to_OF_node(dev);
+ eeh_dn_check_failure (dn, dev);
+
+ pci_dev_put(dev);
+ return val;
+}
+
+EXPORT_SYMBOL(eeh_check_failure);
+
+/* ------------------------------------------------------------- */
+/* The code below deals with error recovery */
+
+/** Return negative value if a permanent error, else return
+ * a number of milliseconds to wait until the PCI slot is
+ * ready to be used.
+ */
+static int
+eeh_slot_availability(struct pci_dn *pdn)
+{
+ int rc;
+ int rets[3];
+
+ rc = read_slot_reset_state(pdn, rets);
+
+ if (rc) return rc;
+
+ if (rets[1] == 0) return -1; /* EEH is not supported */
+ if (rets[0] == 0) return 0; /* Oll Korrect */
+ if (rets[0] == 5) {
+ if (rets[2] == 0) return -1; /* permanently unavailable */
+ return rets[2]; /* number of millisecs to wait */
+ }
+ return -1;
+}
+
+/** rtas_pci_slot_reset raises/lowers the pci #RST line
+ * state: 1/0 to raise/lower the #RST
+ *
+ * Clear the EEH-frozen condition on a slot. This routine
+ * asserts the PCI #RST line if the 'state' argument is '1',
+ * and drops the #RST line if 'state is '0'. This routine is
+ * safe to call in an interrupt context.
+ *
+ */
+
+static void
+rtas_pci_slot_reset(struct pci_dn *pdn, int state)
+{
+ int rc;
+
+ BUG_ON (pdn==NULL);
+
+ if (!pdn->phb) {
+ printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
+ pdn->node->full_name);
+ return;
+ }
+
+ rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
+ pdn->eeh_config_addr,
+ BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid),
+ state);
+ if (rc) {
+ printk (KERN_WARNING "EEH: Unable to reset the failed slot, (%d) #RST=%d dn=%s\n",
+ rc, state, pdn->node->full_name);
+ return;
+ }
+}
+
+/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
+ * dn -- device node to be reset.
+ */
+
+void
+rtas_set_slot_reset(struct pci_dn *pdn)
+{
+ int i, rc;
+
+ rtas_pci_slot_reset (pdn, 1);
+
+ /* The PCI bus requires that the reset be held high for at least
+ * a 100 milliseconds. We wait a bit longer 'just in case'. */
+
+#define PCI_BUS_RST_HOLD_TIME_MSEC 250
+ msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
+
+ /* We might get hit with another EEH freeze as soon as the
+ * pci slot reset line is dropped. Make sure we don't miss
+ * these, and clear the flag now. */
+ eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED);
+
+ rtas_pci_slot_reset (pdn, 0);
+
+ /* After a PCI slot has been reset, the PCI Express spec requires
+ * a 1.5 second idle time for the bus to stabilize, before starting
+ * up traffic. */
+#define PCI_BUS_SETTLE_TIME_MSEC 1800
+ msleep (PCI_BUS_SETTLE_TIME_MSEC);
+
+ /* Now double check with the firmware to make sure the device is
+ * ready to be used; if not, wait for recovery. */
+ for (i=0; i<10; i++) {
+ rc = eeh_slot_availability (pdn);
+ if (rc <= 0) break;
+
+ msleep (rc+100);
+ }
+}
+
+/* ------------------------------------------------------- */
+/** Save and restore of PCI BARs
+ *
+ * Although firmware will set up BARs during boot, it doesn't
+ * set up device BAR's after a device reset, although it will,
+ * if requested, set up bridge configuration. Thus, we need to
+ * configure the PCI devices ourselves.
+ */
+
+/**
+ * __restore_bars - Restore the Base Address Registers
+ * Loads the PCI configuration space base address registers,
+ * the expansion ROM base address, the latency timer, and etc.
+ * from the saved values in the device node.
+ */
+static inline void __restore_bars (struct pci_dn *pdn)
+{
+ int i;
+
+ if (NULL==pdn->phb) return;
+ for (i=4; i<10; i++) {
+ rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
+ }
+
+ /* 12 == Expansion ROM Address */
+ rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
+
+#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
+#define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
+
+ rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1,
+ SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+
+ rtas_write_config (pdn, PCI_LATENCY_TIMER, 1,
+ SAVED_BYTE(PCI_LATENCY_TIMER));
+
+ /* max latency, min grant, interrupt pin and line */
+ rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
+}
+
+/**
+ * eeh_restore_bars - restore the PCI config space info
+ *
+ * This routine performs a recursive walk to the children
+ * of this device as well.
+ */
+void eeh_restore_bars(struct pci_dn *pdn)
+{
+ struct device_node *dn;
+ if (!pdn)
+ return;
+
+ if (! pdn->eeh_is_bridge)
+ __restore_bars (pdn);
+
+ dn = pdn->node->child;
+ while (dn) {
+ eeh_restore_bars (PCI_DN(dn));
+ dn = dn->sibling;
+ }
+}
+
+/**
+ * eeh_save_bars - save device bars
+ *
+ * Save the values of the device bars. Unlike the restore
+ * routine, this routine is *not* recursive. This is because
+ * PCI devices are added individuallly; but, for the restore,
+ * an entire slot is reset at a time.
+ */
+static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn)
+{
+ int i;
+
+ if (!pdev || !pdn )
+ return;
+
+ for (i = 0; i < 16; i++)
+ pci_read_config_dword(pdev, i * 4, &pdn->config_space[i]);
+
+ if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+ pdn->eeh_is_bridge = 1;
+}
+
+void
+rtas_configure_bridge(struct pci_dn *pdn)
+{
+ int token = rtas_token ("ibm,configure-bridge");
+ int rc;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return;
+ rc = rtas_call(token,3,1, NULL,
+ pdn->eeh_config_addr,
+ BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid));
+ if (rc) {
+ printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
+ rc, pdn->node->full_name);
+ }
+}
+
+/* ------------------------------------------------------------- */
+/* The code below deals with enabling EEH for devices during the
+ * early boot sequence. EEH must be enabled before any PCI probing
+ * can be done.
+ */
+
+#define EEH_ENABLE 1
+
+struct eeh_early_enable_info {
+ unsigned int buid_hi;
+ unsigned int buid_lo;
+};
+
+/* Enable eeh for the given device node. */
+static void *early_enable_eeh(struct device_node *dn, void *data)
+{
+ struct eeh_early_enable_info *info = data;
+ int ret;
+ char *status = get_property(dn, "status", NULL);
+ u32 *class_code = (u32 *)get_property(dn, "class-code", NULL);
+ u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL);
+ u32 *device_id = (u32 *)get_property(dn, "device-id", NULL);
+ u32 *regs;
+ int enable;
+ struct pci_dn *pdn = PCI_DN(dn);
+
+ pdn->eeh_mode = 0;
+ pdn->eeh_check_count = 0;
+ pdn->eeh_freeze_count = 0;
+
+ if (status && strcmp(status, "ok") != 0)
+ return NULL; /* ignore devices with bad status */
+
+ /* Ignore bad nodes. */
+ if (!class_code || !vendor_id || !device_id)
+ return NULL;
+
+ /* There is nothing to check on PCI to ISA bridges */
+ if (dn->type && !strcmp(dn->type, "isa")) {
+ pdn->eeh_mode |= EEH_MODE_NOCHECK;
+ return NULL;
+ }
+
+ /*
+ * Now decide if we are going to "Disable" EEH checking
+ * for this device. We still run with the EEH hardware active,
+ * but we won't be checking for ff's. This means a driver
+ * could return bad data (very bad!), an interrupt handler could
+ * hang waiting on status bits that won't change, etc.
+ * But there are a few cases like display devices that make sense.
+ */
+ enable = 1; /* i.e. we will do checking */
+ if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
+ enable = 0;
+
+ if (!enable)
+ pdn->eeh_mode |= EEH_MODE_NOCHECK;
+
+ /* Ok... see if this device supports EEH. Some do, some don't,
+ * and the only way to find out is to check each and every one. */
+ regs = (u32 *)get_property(dn, "reg", NULL);
+ if (regs) {
+ /* First register entry is addr (00BBSS00) */
+ /* Try to enable eeh */
+ ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
+ regs[0], info->buid_hi, info->buid_lo,
+ EEH_ENABLE);
+
+ if (ret == 0) {
+ eeh_subsystem_enabled = 1;
+ pdn->eeh_mode |= EEH_MODE_SUPPORTED;
+ pdn->eeh_config_addr = regs[0];
+#ifdef DEBUG
+ printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
+#endif
+ } else {
+
+ /* This device doesn't support EEH, but it may have an
+ * EEH parent, in which case we mark it as supported. */
+ if (dn->parent && PCI_DN(dn->parent)
+ && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
+ /* Parent supports EEH. */
+ pdn->eeh_mode |= EEH_MODE_SUPPORTED;
+ pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
+ return NULL;
+ }
+ }
+ } else {
+ printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
+ dn->full_name);
+ }
+
+ return NULL;
+}
+
+/*
+ * Initialize EEH by trying to enable it for all of the adapters in the system.
+ * As a side effect we can determine here if eeh is supported at all.
+ * Note that we leave EEH on so failed config cycles won't cause a machine
+ * check. If a user turns off EEH for a particular adapter they are really
+ * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
+ * grant access to a slot if EEH isn't enabled, and so we always enable
+ * EEH for all slots/all devices.
+ *
+ * The eeh-force-off option disables EEH checking globally, for all slots.
+ * Even if force-off is set, the EEH hardware is still enabled, so that
+ * newer systems can boot.
+ */
+void __init eeh_init(void)
+{
+ struct device_node *phb, *np;
+ struct eeh_early_enable_info info;
+
+ spin_lock_init(&confirm_error_lock);
+ spin_lock_init(&slot_errbuf_lock);
+
+ np = of_find_node_by_path("/rtas");
+ if (np == NULL)
+ return;
+
+ ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
+ ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
+ ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
+ ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
+ ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
+
+ if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
+ return;
+
+ eeh_error_buf_size = rtas_token("rtas-error-log-max");
+ if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
+ eeh_error_buf_size = 1024;
+ }
+ if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
+ printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
+ "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
+ eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
+ }
+
+ /* Enable EEH for all adapters. Note that eeh requires buid's */
+ for (phb = of_find_node_by_name(NULL, "pci"); phb;
+ phb = of_find_node_by_name(phb, "pci")) {
+ unsigned long buid;
+
+ buid = get_phb_buid(phb);
+ if (buid == 0 || PCI_DN(phb) == NULL)
+ continue;
+
+ info.buid_lo = BUID_LO(buid);
+ info.buid_hi = BUID_HI(buid);
+ traverse_pci_devices(phb, early_enable_eeh, &info);
+ }
+
+ if (eeh_subsystem_enabled)
+ printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
+ else
+ printk(KERN_WARNING "EEH: No capable adapters found\n");
+}
+
+/**
+ * eeh_add_device_early - enable EEH for the indicated device_node
+ * @dn: device node for which to set up EEH
+ *
+ * This routine must be used to perform EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ * This routine must be called before any i/o is performed to the
+ * adapter (inluding any config-space i/o).
+ * Whether this actually enables EEH or not for this device depends
+ * on the CEC architecture, type of the device, on earlier boot
+ * command-line arguments & etc.
+ */
+void eeh_add_device_early(struct device_node *dn)
+{
+ struct pci_controller *phb;
+ struct eeh_early_enable_info info;
+
+ if (!dn || !PCI_DN(dn))
+ return;
+ phb = PCI_DN(dn)->phb;
+ if (NULL == phb || 0 == phb->buid) {
+ printk(KERN_WARNING "EEH: Expected buid but found none for %s\n",
+ dn->full_name);
+ dump_stack();
+ return;
+ }
+
+ info.buid_hi = BUID_HI(phb->buid);
+ info.buid_lo = BUID_LO(phb->buid);
+ early_enable_eeh(dn, &info);
+}
+EXPORT_SYMBOL_GPL(eeh_add_device_early);
+
+/**
+ * eeh_add_device_late - perform EEH initialization for the indicated pci device
+ * @dev: pci device for which to set up EEH
+ *
+ * This routine must be used to complete EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ */
+void eeh_add_device_late(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ struct pci_dn *pdn;
+
+ if (!dev || !eeh_subsystem_enabled)
+ return;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev));
+#endif
+
+ pci_dev_get (dev);
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ pdn->pcidev = dev;
+
+ pci_addr_cache_insert_device (dev);
+ eeh_save_bars(dev, pdn);
+}
+EXPORT_SYMBOL_GPL(eeh_add_device_late);
+
+/**
+ * eeh_remove_device - undo EEH setup for the indicated pci device
+ * @dev: pci device to be removed
+ *
+ * This routine should be when a device is removed from a running
+ * system (e.g. by hotplug or dlpar).
+ */
+void eeh_remove_device(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ if (!dev || !eeh_subsystem_enabled)
+ return;
+
+ /* Unregister the device with the EEH/PCI address search system */
+#ifdef DEBUG
+ printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev));
+#endif
+ pci_addr_cache_remove_device(dev);
+
+ dn = pci_device_to_OF_node(dev);
+ PCI_DN(dn)->pcidev = NULL;
+ pci_dev_put (dev);
+}
+EXPORT_SYMBOL_GPL(eeh_remove_device);
+
+static int proc_eeh_show(struct seq_file *m, void *v)
+{
+ unsigned int cpu;
+ unsigned long ffs = 0, positives = 0, failures = 0;
+ unsigned long resets = 0;
+ unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0;
+
+ for_each_cpu(cpu) {
+ ffs += per_cpu(total_mmio_ffs, cpu);
+ positives += per_cpu(false_positives, cpu);
+ failures += per_cpu(ignored_failures, cpu);
+ resets += per_cpu(slot_resets, cpu);
+ no_dev += per_cpu(no_device, cpu);
+ no_dn += per_cpu(no_dn, cpu);
+ no_cfg += per_cpu(no_cfg_addr, cpu);
+ no_check += per_cpu(ignored_check, cpu);
+ }
+
+ if (0 == eeh_subsystem_enabled) {
+ seq_printf(m, "EEH Subsystem is globally disabled\n");
+ seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
+ } else {
+ seq_printf(m, "EEH Subsystem is enabled\n");
+ seq_printf(m,
+ "no device=%ld\n"
+ "no device node=%ld\n"
+ "no config address=%ld\n"
+ "check not wanted=%ld\n"
+ "eeh_total_mmio_ffs=%ld\n"
+ "eeh_false_positives=%ld\n"
+ "eeh_ignored_failures=%ld\n"
+ "eeh_slot_resets=%ld\n",
+ no_dev, no_dn, no_cfg, no_check,
+ ffs, positives, failures, resets);
+ }
+
+ return 0;
+}
+
+static int proc_eeh_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_eeh_show, NULL);
+}
+
+static struct file_operations proc_eeh_operations = {
+ .open = proc_eeh_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init eeh_init_proc(void)
+{
+ struct proc_dir_entry *e;
+
+ if (platform_is_pseries()) {
+ e = create_proc_entry("ppc64/eeh", 0, NULL);
+ if (e)
+ e->proc_fops = &proc_eeh_operations;
+ }
+
+ return 0;
+}
+__initcall(eeh_init_proc);
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
new file mode 100644
index 00000000000..92497333c2b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -0,0 +1,155 @@
+/*
+ * eeh_event.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
+ */
+
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <asm/eeh_event.h>
+
+/** Overview:
+ * EEH error states may be detected within exception handlers;
+ * however, the recovery processing needs to occur asynchronously
+ * in a normal kernel context and not an interrupt context.
+ * This pair of routines creates an event and queues it onto a
+ * work-queue, where a worker thread can drive recovery.
+ */
+
+/* EEH event workqueue setup. */
+static spinlock_t eeh_eventlist_lock = SPIN_LOCK_UNLOCKED;
+LIST_HEAD(eeh_eventlist);
+static void eeh_thread_launcher(void *);
+DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL);
+
+/**
+ * eeh_panic - call panic() for an eeh event that cannot be handled.
+ * The philosophy of this routine is that it is better to panic and
+ * halt the OS than it is to risk possible data corruption by
+ * oblivious device drivers that don't know better.
+ *
+ * @dev pci device that had an eeh event
+ * @reset_state current reset state of the device slot
+ */
+static void eeh_panic(struct pci_dev *dev, int reset_state)
+{
+ /*
+ * Since the panic_on_oops sysctl is used to halt the system
+ * in light of potential corruption, we can use it here.
+ */
+ if (panic_on_oops) {
+ panic("EEH: MMIO failure (%d) on device:%s\n", reset_state,
+ pci_name(dev));
+ }
+ else {
+ printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n",
+ reset_state, pci_name(dev));
+ }
+}
+
+/**
+ * eeh_event_handler - dispatch EEH events. The detection of a frozen
+ * slot can occur inside an interrupt, where it can be hard to do
+ * anything about it. The goal of this routine is to pull these
+ * detection events out of the context of the interrupt handler, and
+ * re-dispatch them for processing at a later time in a normal context.
+ *
+ * @dummy - unused
+ */
+static int eeh_event_handler(void * dummy)
+{
+ unsigned long flags;
+ struct eeh_event *event;
+
+ daemonize ("eehd");
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ event = NULL;
+ if (!list_empty(&eeh_eventlist)) {
+ event = list_entry(eeh_eventlist.next, struct eeh_event, list);
+ list_del(&event->list);
+ }
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+ if (event == NULL)
+ break;
+
+ printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
+ pci_name(event->dev));
+
+ eeh_panic (event->dev, event->state);
+
+ kfree(event);
+ }
+
+ return 0;
+}
+
+/**
+ * eeh_thread_launcher
+ *
+ * @dummy - unused
+ */
+static void eeh_thread_launcher(void *dummy)
+{
+ if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0)
+ printk(KERN_ERR "Failed to start EEH daemon\n");
+}
+
+/**
+ * eeh_send_failure_event - generate a PCI error event
+ * @dev pci device
+ *
+ * This routine can be called within an interrupt context;
+ * the actual event will be delivered in a normal context
+ * (from a workqueue).
+ */
+int eeh_send_failure_event (struct device_node *dn,
+ struct pci_dev *dev,
+ int state,
+ int time_unavail)
+{
+ unsigned long flags;
+ struct eeh_event *event;
+
+ event = kmalloc(sizeof(*event), GFP_ATOMIC);
+ if (event == NULL) {
+ printk (KERN_ERR "EEH: out of memory, event not handled\n");
+ return 1;
+ }
+
+ if (dev)
+ pci_dev_get(dev);
+
+ event->dn = dn;
+ event->dev = dev;
+ event->state = state;
+ event->time_unavail = time_unavail;
+
+ /* We may or may not be called in an interrupt context */
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ list_add(&event->list, &eeh_eventlist);
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+
+ schedule_work(&eeh_event_wq);
+
+ return 0;
+}
+
+/********************** END OF FILE ******************************/
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 513e2723149..97ba5214417 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -37,16 +37,15 @@
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
-#include <asm/ppcdebug.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/abs_addr.h>
#include <asm/pSeries_reconfig.h>
-#include <asm/systemcfg.h>
#include <asm/firmware.h>
#include <asm/tce.h>
#include <asm/ppc-pci.h>
+#include <asm/udbg.h>
#include "plpar_wrappers.h"
@@ -582,7 +581,7 @@ void iommu_init_early_pSeries(void)
return;
}
- if (systemcfg->platform & PLATFORM_LPAR) {
+ if (platform_is_lpar()) {
if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
ppc_md.tce_build = tce_buildmulti_pSeriesLP;
ppc_md.tce_free = tce_freemulti_pSeriesLP;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index e384a5a9179..a50e5f3f396 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -19,7 +19,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#define DEBUG
+#undef DEBUG_LOW
#include <linux/config.h>
#include <linux/kernel.h>
@@ -31,20 +31,21 @@
#include <asm/machdep.h>
#include <asm/abs_addr.h>
#include <asm/mmu_context.h>
-#include <asm/ppcdebug.h>
#include <asm/iommu.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/prom.h>
#include <asm/abs_addr.h>
#include <asm/cputable.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
#include "plpar_wrappers.h"
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0)
#else
-#define DBG(fmt...)
+#define DBG_LOW(fmt...) do { } while(0)
#endif
/* in pSeries_hvCall.S */
@@ -276,8 +277,9 @@ void vpa_init(int cpu)
}
long pSeries_lpar_hpte_insert(unsigned long hpte_group,
- unsigned long va, unsigned long prpn,
- unsigned long vflags, unsigned long rflags)
+ unsigned long va, unsigned long pa,
+ unsigned long rflags, unsigned long vflags,
+ int psize)
{
unsigned long lpar_rc;
unsigned long flags;
@@ -285,11 +287,28 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long hpte_v, hpte_r;
unsigned long dummy0, dummy1;
- hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID;
- if (vflags & HPTE_V_LARGE)
- hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT);
-
- hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+ "rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, va, pa, rflags, vflags, psize);
+
+ hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+#if 1
+ {
+ int i;
+ for (i=0;i<8;i++) {
+ unsigned long w0, w1;
+ plpar_pte_read(0, hpte_group, &w0, &w1);
+ BUG_ON (HPTE_V_COMPARE(hpte_v, w0)
+ && (w0 & HPTE_V_VALID));
+ }
+ }
+#endif
/* Now fill in the actual HPTE */
/* Set CEC cookie to 0 */
@@ -299,23 +318,30 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
/* Exact = 0 */
flags = 0;
- /* XXX why is this here? - Anton */
+ /* Make pHyp happy */
if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
hpte_r &= ~_PAGE_COHERENT;
lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
hpte_r, &slot, &dummy0, &dummy1);
-
- if (unlikely(lpar_rc == H_PTEG_Full))
+ if (unlikely(lpar_rc == H_PTEG_Full)) {
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" full\n");
return -1;
+ }
/*
* Since we try and ioremap PHBs we don't own, the pte insert
* will fail. However we must catch the failure in hash_page
* or we will loop forever, so return -2 in this case.
*/
- if (unlikely(lpar_rc != H_Success))
+ if (unlikely(lpar_rc != H_Success)) {
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" lpar err %d\n", lpar_rc);
return -2;
+ }
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" -> slot: %d\n", slot & 7);
/* Because of iSeries, we have to pass down the secondary
* bucket bit here as well
@@ -340,10 +366,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
/* don't remove a bolted entry */
lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
(0x1UL << 4), &dummy1, &dummy2);
-
if (lpar_rc == H_Success)
return i;
-
BUG_ON(lpar_rc != H_Not_Found);
slot_offset++;
@@ -371,20 +395,28 @@ static void pSeries_lpar_hptab_clear(void)
* We can probably optimize here and assume the high bits of newpp are
* already zero. For now I am paranoid.
*/
-static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
+static long pSeries_lpar_hpte_updatepp(unsigned long slot,
+ unsigned long newpp,
+ unsigned long va,
+ int psize, int local)
{
unsigned long lpar_rc;
unsigned long flags = (newpp & 7) | H_AVPN;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
- if (large)
- avpn &= ~0x1UL;
+ want_v = hpte_encode_v(va, psize);
- lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7));
+ DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ",
+ want_v & HPTE_V_AVPN, slot, flags, psize);
- if (lpar_rc == H_Not_Found)
+ lpar_rc = plpar_pte_protect(flags, slot, want_v & HPTE_V_AVPN);
+
+ if (lpar_rc == H_Not_Found) {
+ DBG_LOW("not found !\n");
return -1;
+ }
+
+ DBG_LOW("ok\n");
BUG_ON(lpar_rc != H_Success);
@@ -410,21 +442,22 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
return dword0;
}
-static long pSeries_lpar_hpte_find(unsigned long vpn)
+static long pSeries_lpar_hpte_find(unsigned long va, int psize)
{
unsigned long hash;
unsigned long i, j;
long slot;
- unsigned long hpte_v;
+ unsigned long want_v, hpte_v;
- hash = hpt_hash(vpn, 0);
+ hash = hpt_hash(va, mmu_psize_defs[psize].shift);
+ want_v = hpte_encode_v(va, psize);
for (j = 0; j < 2; j++) {
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
hpte_v = pSeries_lpar_hpte_getword0(slot);
- if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+ if (HPTE_V_COMPARE(hpte_v, want_v)
&& (hpte_v & HPTE_V_VALID)
&& (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
/* HPTE matches */
@@ -441,17 +474,15 @@ static long pSeries_lpar_hpte_find(unsigned long vpn)
}
static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
- unsigned long ea)
+ unsigned long ea,
+ int psize)
{
- unsigned long lpar_rc;
- unsigned long vsid, va, vpn, flags;
- long slot;
+ unsigned long lpar_rc, slot, vsid, va, flags;
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
- slot = pSeries_lpar_hpte_find(vpn);
+ slot = pSeries_lpar_hpte_find(va, psize);
BUG_ON(slot == -1);
flags = newpp & 7;
@@ -461,18 +492,18 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
}
static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
unsigned long lpar_rc;
unsigned long dummy1, dummy2;
- if (large)
- avpn &= ~0x1UL;
-
- lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
- &dummy2);
+ DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d",
+ slot, va, psize, local);
+ want_v = hpte_encode_v(va, psize);
+ lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v & HPTE_V_AVPN,
+ &dummy1, &dummy2);
if (lpar_rc == H_Not_Found)
return;
@@ -494,7 +525,8 @@ void pSeries_lpar_flush_hash_range(unsigned long number, int local)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
for (i = 0; i < number; i++)
- flush_hash_page(batch->vaddr[i], batch->pte[i], local);
+ flush_hash_page(batch->vaddr[i], batch->pte[i],
+ batch->psize, local);
if (lock_tlbie)
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index c198656a3bb..999a9620b5c 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -107,7 +107,6 @@ static void __init pSeries_request_regions(void)
void __init pSeries_final_fixup(void)
{
- phbs_remap_io();
pSeries_request_regions();
pci_addr_cache_build();
@@ -123,7 +122,7 @@ static void fixup_winbond_82c105(struct pci_dev* dev)
int i;
unsigned int reg;
- if (!(systemcfg->platform & PLATFORM_PSERIES))
+ if (!platform_is_pseries())
return;
printk("Using INTC for W82c105 IDE controller.\n");
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index 382f8c5b0e7..3bd1b3e0600 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -107,14 +107,4 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
lbuf[1]);
}
-static inline long plpar_set_xdabr(unsigned long address, unsigned long flags)
-{
- return plpar_hcall_norets(H_SET_XDABR, address, flags);
-}
-
-static inline long plpar_set_dabr(unsigned long val)
-{
- return plpar_hcall_norets(H_SET_DABR, val);
-}
-
#endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 6562ff4b0a8..fbd214d68b0 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -48,7 +48,7 @@
#include <asm/ptrace.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
-#include <asm/ppcdebug.h>
+#include <asm/udbg.h>
static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
static DEFINE_SPINLOCK(ras_log_buf_lock);
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 58c61219d08..d8864164dbe 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -286,10 +286,8 @@ static struct property *new_property(const char *name, const int length,
return new;
cleanup:
- if (new->name)
- kfree(new->name);
- if (new->value)
- kfree(new->value);
+ kfree(new->name);
+ kfree(new->value);
kfree(new);
return NULL;
}
@@ -410,7 +408,7 @@ static int proc_ppc64_create_ofdt(void)
{
struct proc_dir_entry *ent;
- if (!(systemcfg->platform & PLATFORM_PSERIES))
+ if (!platform_is_pseries())
return 0;
ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL);
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index e26b0420b6d..00cf331a1dc 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -482,10 +482,12 @@ static int __init rtas_init(void)
{
struct proc_dir_entry *entry;
- /* No RTAS, only warn if we are on a pSeries box */
+ if (!platform_is_pseries())
+ return 0;
+
+ /* No RTAS */
if (rtas_token("event-scan") == RTAS_UNKNOWN_SERVICE) {
- if (systemcfg->platform & PLATFORM_PSERIES)
- printk(KERN_INFO "rtasd: no event-scan on system\n");
+ printk(KERN_INFO "rtasd: no event-scan on system\n");
return 1;
}
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
new file mode 100644
index 00000000000..2edc947f7c4
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -0,0 +1,235 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com>
+ *
+ * When ppc64 hardware fails the service processor dumps internal state
+ * of the system. After a reboot the operating system can access a dump
+ * of this data using this driver. A dump exists if the device-tree
+ * /chosen/ibm,scan-log-data property exists.
+ *
+ * This driver exports /proc/ppc64/scan-log-dump which can be read.
+ * The driver supports only sequential reads.
+ *
+ * The driver looks at a write to the driver for the single word "reset".
+ * If given, the driver will reset the scanlog so the platform can free it.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/prom.h>
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "scanlog"
+
+/* Status returns from ibm,scan-log-dump */
+#define SCANLOG_COMPLETE 0
+#define SCANLOG_HWERROR -1
+#define SCANLOG_CONTINUE 1
+
+#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0)
+
+static int scanlog_debug;
+static unsigned int ibm_scan_log_dump; /* RTAS token */
+static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */
+
+static ssize_t scanlog_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode * inode = file->f_dentry->d_inode;
+ struct proc_dir_entry *dp;
+ unsigned int *data;
+ int status;
+ unsigned long len, off;
+ unsigned int wait_time;
+
+ dp = PDE(inode);
+ data = (unsigned int *)dp->data;
+
+ if (!data) {
+ printk(KERN_ERR "scanlog: read failed no data\n");
+ return -EIO;
+ }
+
+ if (count > RTAS_DATA_BUF_SIZE)
+ count = RTAS_DATA_BUF_SIZE;
+
+ if (count < 1024) {
+ /* This is the min supported by this RTAS call. Rather
+ * than do all the buffering we insist the user code handle
+ * larger reads. As long as cp works... :)
+ */
+ printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count);
+ return -EINVAL;
+ }
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+
+ for (;;) {
+ wait_time = 500; /* default wait if no data */
+ spin_lock(&rtas_data_buf_lock);
+ memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE);
+ status = rtas_call(ibm_scan_log_dump, 2, 1, NULL,
+ (u32) __pa(rtas_data_buf), (u32) count);
+ memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+ spin_unlock(&rtas_data_buf_lock);
+
+ DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n",
+ status, data[0], data[1], data[2]);
+ switch (status) {
+ case SCANLOG_COMPLETE:
+ DEBUG("hit eof\n");
+ return 0;
+ case SCANLOG_HWERROR:
+ DEBUG("hardware error reading scan log data\n");
+ return -EIO;
+ case SCANLOG_CONTINUE:
+ /* We may or may not have data yet */
+ len = data[1];
+ off = data[2];
+ if (len > 0) {
+ if (copy_to_user(buf, ((char *)data)+off, len))
+ return -EFAULT;
+ return len;
+ }
+ /* Break to sleep default time */
+ break;
+ default:
+ if (status > 9900 && status <= 9905) {
+ wait_time = rtas_extended_busy_delay_time(status);
+ } else {
+ printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status);
+ return -EIO;
+ }
+ }
+ /* Apparently no data yet. Wait and try again. */
+ msleep_interruptible(wait_time);
+ }
+ /*NOTREACHED*/
+}
+
+static ssize_t scanlog_write(struct file * file, const char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ char stkbuf[20];
+ int status;
+
+ if (count > 19) count = 19;
+ if (copy_from_user (stkbuf, buf, count)) {
+ return -EFAULT;
+ }
+ stkbuf[count] = 0;
+
+ if (buf) {
+ if (strncmp(stkbuf, "reset", 5) == 0) {
+ DEBUG("reset scanlog\n");
+ status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0);
+ DEBUG("rtas returns %d\n", status);
+ } else if (strncmp(stkbuf, "debugon", 7) == 0) {
+ printk(KERN_ERR "scanlog: debug on\n");
+ scanlog_debug = 1;
+ } else if (strncmp(stkbuf, "debugoff", 8) == 0) {
+ printk(KERN_ERR "scanlog: debug off\n");
+ scanlog_debug = 0;
+ }
+ }
+ return count;
+}
+
+static int scanlog_open(struct inode * inode, struct file * file)
+{
+ struct proc_dir_entry *dp = PDE(inode);
+ unsigned int *data = (unsigned int *)dp->data;
+
+ if (!data) {
+ printk(KERN_ERR "scanlog: open failed no data\n");
+ return -EIO;
+ }
+
+ if (data[0] != 0) {
+ /* This imperfect test stops a second copy of the
+ * data (or a reset while data is being copied)
+ */
+ return -EBUSY;
+ }
+
+ data[0] = 0; /* re-init so we restart the scan */
+
+ return 0;
+}
+
+static int scanlog_release(struct inode * inode, struct file * file)
+{
+ struct proc_dir_entry *dp = PDE(inode);
+ unsigned int *data = (unsigned int *)dp->data;
+
+ if (!data) {
+ printk(KERN_ERR "scanlog: release failed no data\n");
+ return -EIO;
+ }
+ data[0] = 0;
+
+ return 0;
+}
+
+struct file_operations scanlog_fops = {
+ .owner = THIS_MODULE,
+ .read = scanlog_read,
+ .write = scanlog_write,
+ .open = scanlog_open,
+ .release = scanlog_release,
+};
+
+int __init scanlog_init(void)
+{
+ struct proc_dir_entry *ent;
+
+ ibm_scan_log_dump = rtas_token("ibm,scan-log-dump");
+ if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) {
+ printk(KERN_ERR "scan-log-dump not implemented on this system\n");
+ return -EIO;
+ }
+
+ ent = create_proc_entry("ppc64/rtas/scan-log-dump", S_IRUSR, NULL);
+ if (ent) {
+ ent->proc_fops = &scanlog_fops;
+ /* Ideally we could allocate a buffer < 4G */
+ ent->data = kmalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!ent->data) {
+ printk(KERN_ERR "Failed to allocate a buffer\n");
+ remove_proc_entry("scan-log-dump", ent->parent);
+ return -ENOMEM;
+ }
+ ((unsigned int *)ent->data)[0] = 0;
+ } else {
+ printk(KERN_ERR "Failed to create ppc64/scan-log-dump proc entry\n");
+ return -EIO;
+ }
+ proc_ppc64_scan_log_dump = ent;
+
+ return 0;
+}
+
+void __exit scanlog_cleanup(void)
+{
+ if (proc_ppc64_scan_log_dump) {
+ kfree(proc_ppc64_scan_log_dump->data);
+ remove_proc_entry("scan-log-dump", proc_ppc64_scan_log_dump->parent);
+ }
+}
+
+module_init(scanlog_init);
+module_exit(scanlog_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 65bee939eec..e94247c28d4 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -65,6 +65,7 @@
#include <asm/ppc-pci.h>
#include <asm/i8259.h>
#include <asm/udbg.h>
+#include <asm/smp.h>
#include "plpar_wrappers.h"
@@ -248,7 +249,7 @@ static void __init pSeries_setup_arch(void)
ppc_md.idle_loop = default_idle;
}
- if (systemcfg->platform & PLATFORM_LPAR)
+ if (platform_is_lpar())
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
else
ppc_md.enable_pmcs = power4_enable_pmcs;
@@ -353,14 +354,15 @@ static void pSeries_mach_cpu_die(void)
static int pseries_set_dabr(unsigned long dabr)
{
- if (firmware_has_feature(FW_FEATURE_XDABR)) {
- /* We want to catch accesses from kernel and userspace */
- return plpar_set_xdabr(dabr, H_DABRX_KERNEL | H_DABRX_USER);
- }
-
- return plpar_set_dabr(dabr);
+ return plpar_hcall_norets(H_SET_DABR, dabr);
}
+static int pseries_set_xdabr(unsigned long dabr)
+{
+ /* We want to catch accesses from kernel and userspace */
+ return plpar_hcall_norets(H_SET_XDABR, dabr,
+ H_DABRX_KERNEL | H_DABRX_USER);
+}
/*
* Early initialization. Relocation is on but do not reference unbolted pages
@@ -376,7 +378,7 @@ static void __init pSeries_init_early(void)
fw_feature_init();
- if (systemcfg->platform & PLATFORM_LPAR)
+ if (platform_is_lpar())
hpte_init_lpar();
else {
hpte_init_native();
@@ -386,7 +388,7 @@ static void __init pSeries_init_early(void)
generic_find_legacy_serial_ports(&physport, &default_speed);
- if (systemcfg->platform & PLATFORM_LPAR)
+ if (platform_is_lpar())
find_udbg_vterm();
else if (physport) {
/* Map the uart for udbg. */
@@ -396,8 +398,10 @@ static void __init pSeries_init_early(void)
DBG("Hello World !\n");
}
- if (firmware_has_feature(FW_FEATURE_XDABR | FW_FEATURE_DABR))
+ if (firmware_has_feature(FW_FEATURE_DABR))
ppc_md.set_dabr = pseries_set_dabr;
+ else if (firmware_has_feature(FW_FEATURE_XDABR))
+ ppc_md.set_dabr = pseries_set_xdabr;
iommu_init_early_pSeries();
@@ -465,6 +469,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
* more.
*/
clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb__after_clear_bit();
/*
* SMT dynamic mode. Cede will result in this thread going
@@ -477,6 +482,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
cede_processor();
else
local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
} else {
/*
* Give the HV an opportunity at the processor, since we are
@@ -488,11 +494,11 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
static void pseries_dedicated_idle(void)
{
- long oldval;
struct paca_struct *lpaca = get_paca();
unsigned int cpu = smp_processor_id();
unsigned long start_snooze;
unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
+ set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
/*
@@ -501,10 +507,7 @@ static void pseries_dedicated_idle(void)
*/
lpaca->lppaca.idle = 1;
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
-
+ if (!need_resched()) {
start_snooze = __get_tb() +
*smt_snooze_delay * tb_ticks_per_usec;
@@ -527,15 +530,14 @@ static void pseries_dedicated_idle(void)
}
HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
}
lpaca->lppaca.idle = 0;
ppc64_runlatch_on();
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
cpu_die();
@@ -579,7 +581,9 @@ static void pseries_shared_idle(void)
lpaca->lppaca.idle = 0;
ppc64_runlatch_on();
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
cpu_die();
@@ -588,7 +592,7 @@ static void pseries_shared_idle(void)
static int pSeries_pci_probe_mode(struct pci_bus *bus)
{
- if (systemcfg->platform & PLATFORM_LPAR)
+ if (platform_is_lpar())
return PCI_PROBE_DEVTREE;
return PCI_PROBE_NORMAL;
}
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 7a243e8ccd7..3ba794ca328 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -46,6 +46,7 @@
#include <asm/rtas.h>
#include <asm/pSeries_reconfig.h>
#include <asm/mpic.h>
+#include <asm/systemcfg.h>
#include "plpar_wrappers.h"
@@ -96,7 +97,7 @@ int pSeries_cpu_disable(void)
int cpu = smp_processor_id();
cpu_clear(cpu, cpu_online_map);
- systemcfg->processorCount--;
+ _systemcfg->processorCount--;
/*fix boot_cpuid here*/
if (cpu == boot_cpuid)
@@ -441,7 +442,7 @@ void __init smp_init_pSeries(void)
smp_ops->cpu_die = pSeries_cpu_die;
/* Processors can be added/removed only on LPAR */
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
+ if (platform_is_lpar())
pSeries_reconfig_notifier_register(&pSeries_smp_nb);
#endif
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index c72c86f05cb..72ac18067ec 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -545,7 +545,9 @@ nextnode:
of_node_put(np);
}
- if (systemcfg->platform == PLATFORM_PSERIES) {
+ if (platform_is_lpar())
+ ops = &pSeriesLP_ops;
+ else {
#ifdef CONFIG_SMP
for_each_cpu(i) {
int hard_id;
@@ -561,12 +563,11 @@ nextnode:
#else
xics_per_cpu[0] = ioremap(intr_base, intr_size);
#endif /* CONFIG_SMP */
- } else if (systemcfg->platform == PLATFORM_PSERIES_LPAR) {
- ops = &pSeriesLP_ops;
}
xics_8259_pic.enable = i8259_pic.enable;
xics_8259_pic.disable = i8259_pic.disable;
+ xics_8259_pic.end = i8259_pic.end;
for (i = 0; i < 16; ++i)
get_irq_desc(i)->handler = &xics_8259_pic;
for (; i < NR_IRQS; ++i)
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 90bce6e0c19..b7ac32fdd77 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -207,6 +207,9 @@ void __init i8259_init(unsigned long intack_addr, int offset)
spin_unlock_irqrestore(&i8259_lock, flags);
+ for (i = 0; i < NUM_ISA_INTERRUPTS; ++i)
+ irq_desc[offset + i].handler = &i8259_pic;
+
/* reserve our resources */
setup_irq(offset + 2, &i8259_irqaction);
request_resource(&ioport_resource, &pic1_iores);
@@ -216,6 +219,4 @@ void __init i8259_init(unsigned long intack_addr, int offset)
if (intack_addr != 0)
pci_intack = ioremap(intack_addr, 1);
- for (i = 0; i < NUM_ISA_INTERRUPTS; ++i)
- irq_desc[offset + i].handler = &i8259_pic;
}
diff --git a/arch/powerpc/sysdev/u3_iommu.c b/arch/powerpc/sysdev/u3_iommu.c
index 607722178c1..f32baf7f469 100644
--- a/arch/powerpc/sysdev/u3_iommu.c
+++ b/arch/powerpc/sysdev/u3_iommu.c
@@ -37,7 +37,6 @@
#include <linux/vmalloc.h>
#include <asm/io.h>
#include <asm/prom.h>
-#include <asm/ppcdebug.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
@@ -227,7 +226,7 @@ static void iommu_table_u3_setup(void)
iommu_table_u3.it_busno = 0;
iommu_table_u3.it_offset = 0;
/* it_size is in number of entries */
- iommu_table_u3.it_size = dart_tablesize / sizeof(u32);
+ iommu_table_u3.it_size = (dart_tablesize / sizeof(u32)) >> DART_PAGE_FACTOR;
/* Initialize the common IOMMU code */
iommu_table_u3.it_base = (unsigned long)dart_vbase;
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 79a784f0e7a..b20312e5ed2 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -8,4 +8,4 @@ obj-$(CONFIG_8xx) += start_8xx.o
obj-$(CONFIG_6xx) += start_32.o
obj-$(CONFIG_4xx) += start_32.o
obj-$(CONFIG_PPC64) += start_64.o
-obj-y += xmon.o ppc-dis.o ppc-opc.o subr_prf.o setjmp.o
+obj-y += xmon.o ppc-dis.o ppc-opc.o setjmp.o nonstdio.o
diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c
new file mode 100644
index 00000000000..78765833f4c
--- /dev/null
+++ b/arch/powerpc/xmon/nonstdio.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/string.h>
+#include <asm/time.h>
+#include "nonstdio.h"
+
+int xmon_putchar(int c)
+{
+ char ch = c;
+
+ if (c == '\n')
+ xmon_putchar('\r');
+ return xmon_write(&ch, 1) == 1? c: -1;
+}
+
+static char line[256];
+static char *lineptr;
+static int lineleft;
+
+int xmon_expect(const char *str, unsigned long timeout)
+{
+ int c;
+ unsigned long t0;
+
+ /* assume 25MHz default timebase if tb_ticks_per_sec not set yet */
+ timeout *= tb_ticks_per_sec? tb_ticks_per_sec: 25000000;
+ t0 = get_tbl();
+ do {
+ lineptr = line;
+ for (;;) {
+ c = xmon_read_poll();
+ if (c == -1) {
+ if (get_tbl() - t0 > timeout)
+ return 0;
+ continue;
+ }
+ if (c == '\n')
+ break;
+ if (c != '\r' && lineptr < &line[sizeof(line) - 1])
+ *lineptr++ = c;
+ }
+ *lineptr = 0;
+ } while (strstr(line, str) == NULL);
+ return 1;
+}
+
+int xmon_getchar(void)
+{
+ int c;
+
+ if (lineleft == 0) {
+ lineptr = line;
+ for (;;) {
+ c = xmon_readchar();
+ if (c == -1 || c == 4)
+ break;
+ if (c == '\r' || c == '\n') {
+ *lineptr++ = '\n';
+ xmon_putchar('\n');
+ break;
+ }
+ switch (c) {
+ case 0177:
+ case '\b':
+ if (lineptr > line) {
+ xmon_putchar('\b');
+ xmon_putchar(' ');
+ xmon_putchar('\b');
+ --lineptr;
+ }
+ break;
+ case 'U' & 0x1F:
+ while (lineptr > line) {
+ xmon_putchar('\b');
+ xmon_putchar(' ');
+ xmon_putchar('\b');
+ --lineptr;
+ }
+ break;
+ default:
+ if (lineptr >= &line[sizeof(line) - 1])
+ xmon_putchar('\a');
+ else {
+ xmon_putchar(c);
+ *lineptr++ = c;
+ }
+ }
+ }
+ lineleft = lineptr - line;
+ lineptr = line;
+ }
+ if (lineleft == 0)
+ return -1;
+ --lineleft;
+ return *lineptr++;
+}
+
+char *xmon_gets(char *str, int nb)
+{
+ char *p;
+ int c;
+
+ for (p = str; p < str + nb - 1; ) {
+ c = xmon_getchar();
+ if (c == -1) {
+ if (p == str)
+ return NULL;
+ break;
+ }
+ *p++ = c;
+ if (c == '\n')
+ break;
+ }
+ *p = 0;
+ return str;
+}
+
+void xmon_printf(const char *format, ...)
+{
+ va_list args;
+ int n;
+ static char xmon_outbuf[1024];
+
+ va_start(args, format);
+ n = vsnprintf(xmon_outbuf, sizeof(xmon_outbuf), format, args);
+ va_end(args);
+ xmon_write(xmon_outbuf, n);
+}
diff --git a/arch/powerpc/xmon/nonstdio.h b/arch/powerpc/xmon/nonstdio.h
index 84211a21c6f..47cebbd2b1b 100644
--- a/arch/powerpc/xmon/nonstdio.h
+++ b/arch/powerpc/xmon/nonstdio.h
@@ -1,22 +1,14 @@
-typedef int FILE;
-extern FILE *xmon_stdin, *xmon_stdout;
#define EOF (-1)
-#define stdin xmon_stdin
-#define stdout xmon_stdout
+
#define printf xmon_printf
-#define fprintf xmon_fprintf
-#define fputs xmon_fputs
-#define fgets xmon_fgets
#define putchar xmon_putchar
-#define getchar xmon_getchar
-#define putc xmon_putc
-#define getc xmon_getc
-#define fopen(n, m) NULL
-#define fflush(f) do {} while (0)
-#define fclose(f) do {} while (0)
-extern char *fgets(char *, int, void *);
-extern void xmon_printf(const char *, ...);
-extern void xmon_fprintf(void *, const char *, ...);
-extern void xmon_sprintf(char *, const char *, ...);
-#define perror(s) printf("%s: no files!\n", (s))
+extern int xmon_putchar(int c);
+extern int xmon_getchar(void);
+extern char *xmon_gets(char *, int);
+extern void xmon_printf(const char *, ...);
+extern void xmon_map_scc(void);
+extern int xmon_expect(const char *str, unsigned long timeout);
+extern int xmon_write(void *ptr, int nb);
+extern int xmon_readchar(void);
+extern int xmon_read_poll(void);
diff --git a/arch/powerpc/xmon/setjmp.S b/arch/powerpc/xmon/setjmp.S
index f8e40dfd2bf..96a91f10e2e 100644
--- a/arch/powerpc/xmon/setjmp.S
+++ b/arch/powerpc/xmon/setjmp.S
@@ -14,61 +14,61 @@
_GLOBAL(xmon_setjmp)
mflr r0
- STL r0,0(r3)
- STL r1,SZL(r3)
- STL r2,2*SZL(r3)
+ PPC_STL r0,0(r3)
+ PPC_STL r1,SZL(r3)
+ PPC_STL r2,2*SZL(r3)
mfcr r0
- STL r0,3*SZL(r3)
- STL r13,4*SZL(r3)
- STL r14,5*SZL(r3)
- STL r15,6*SZL(r3)
- STL r16,7*SZL(r3)
- STL r17,8*SZL(r3)
- STL r18,9*SZL(r3)
- STL r19,10*SZL(r3)
- STL r20,11*SZL(r3)
- STL r21,12*SZL(r3)
- STL r22,13*SZL(r3)
- STL r23,14*SZL(r3)
- STL r24,15*SZL(r3)
- STL r25,16*SZL(r3)
- STL r26,17*SZL(r3)
- STL r27,18*SZL(r3)
- STL r28,19*SZL(r3)
- STL r29,20*SZL(r3)
- STL r30,21*SZL(r3)
- STL r31,22*SZL(r3)
+ PPC_STL r0,3*SZL(r3)
+ PPC_STL r13,4*SZL(r3)
+ PPC_STL r14,5*SZL(r3)
+ PPC_STL r15,6*SZL(r3)
+ PPC_STL r16,7*SZL(r3)
+ PPC_STL r17,8*SZL(r3)
+ PPC_STL r18,9*SZL(r3)
+ PPC_STL r19,10*SZL(r3)
+ PPC_STL r20,11*SZL(r3)
+ PPC_STL r21,12*SZL(r3)
+ PPC_STL r22,13*SZL(r3)
+ PPC_STL r23,14*SZL(r3)
+ PPC_STL r24,15*SZL(r3)
+ PPC_STL r25,16*SZL(r3)
+ PPC_STL r26,17*SZL(r3)
+ PPC_STL r27,18*SZL(r3)
+ PPC_STL r28,19*SZL(r3)
+ PPC_STL r29,20*SZL(r3)
+ PPC_STL r30,21*SZL(r3)
+ PPC_STL r31,22*SZL(r3)
li r3,0
blr
_GLOBAL(xmon_longjmp)
- CMPI r4,0
+ PPC_LCMPI r4,0
bne 1f
li r4,1
-1: LDL r13,4*SZL(r3)
- LDL r14,5*SZL(r3)
- LDL r15,6*SZL(r3)
- LDL r16,7*SZL(r3)
- LDL r17,8*SZL(r3)
- LDL r18,9*SZL(r3)
- LDL r19,10*SZL(r3)
- LDL r20,11*SZL(r3)
- LDL r21,12*SZL(r3)
- LDL r22,13*SZL(r3)
- LDL r23,14*SZL(r3)
- LDL r24,15*SZL(r3)
- LDL r25,16*SZL(r3)
- LDL r26,17*SZL(r3)
- LDL r27,18*SZL(r3)
- LDL r28,19*SZL(r3)
- LDL r29,20*SZL(r3)
- LDL r30,21*SZL(r3)
- LDL r31,22*SZL(r3)
- LDL r0,3*SZL(r3)
+1: PPC_LL r13,4*SZL(r3)
+ PPC_LL r14,5*SZL(r3)
+ PPC_LL r15,6*SZL(r3)
+ PPC_LL r16,7*SZL(r3)
+ PPC_LL r17,8*SZL(r3)
+ PPC_LL r18,9*SZL(r3)
+ PPC_LL r19,10*SZL(r3)
+ PPC_LL r20,11*SZL(r3)
+ PPC_LL r21,12*SZL(r3)
+ PPC_LL r22,13*SZL(r3)
+ PPC_LL r23,14*SZL(r3)
+ PPC_LL r24,15*SZL(r3)
+ PPC_LL r25,16*SZL(r3)
+ PPC_LL r26,17*SZL(r3)
+ PPC_LL r27,18*SZL(r3)
+ PPC_LL r28,19*SZL(r3)
+ PPC_LL r29,20*SZL(r3)
+ PPC_LL r30,21*SZL(r3)
+ PPC_LL r31,22*SZL(r3)
+ PPC_LL r0,3*SZL(r3)
mtcrf 0x38,r0
- LDL r0,0(r3)
- LDL r1,SZL(r3)
- LDL r2,2*SZL(r3)
+ PPC_LL r0,0(r3)
+ PPC_LL r1,SZL(r3)
+ PPC_LL r2,2*SZL(r3)
mtlr r0
mr r3,r4
blr
@@ -84,52 +84,52 @@ _GLOBAL(xmon_longjmp)
* different ABIs, though).
*/
_GLOBAL(xmon_save_regs)
- STL r0,0*SZL(r3)
- STL r2,2*SZL(r3)
- STL r3,3*SZL(r3)
- STL r4,4*SZL(r3)
- STL r5,5*SZL(r3)
- STL r6,6*SZL(r3)
- STL r7,7*SZL(r3)
- STL r8,8*SZL(r3)
- STL r9,9*SZL(r3)
- STL r10,10*SZL(r3)
- STL r11,11*SZL(r3)
- STL r12,12*SZL(r3)
- STL r13,13*SZL(r3)
- STL r14,14*SZL(r3)
- STL r15,15*SZL(r3)
- STL r16,16*SZL(r3)
- STL r17,17*SZL(r3)
- STL r18,18*SZL(r3)
- STL r19,19*SZL(r3)
- STL r20,20*SZL(r3)
- STL r21,21*SZL(r3)
- STL r22,22*SZL(r3)
- STL r23,23*SZL(r3)
- STL r24,24*SZL(r3)
- STL r25,25*SZL(r3)
- STL r26,26*SZL(r3)
- STL r27,27*SZL(r3)
- STL r28,28*SZL(r3)
- STL r29,29*SZL(r3)
- STL r30,30*SZL(r3)
- STL r31,31*SZL(r3)
+ PPC_STL r0,0*SZL(r3)
+ PPC_STL r2,2*SZL(r3)
+ PPC_STL r3,3*SZL(r3)
+ PPC_STL r4,4*SZL(r3)
+ PPC_STL r5,5*SZL(r3)
+ PPC_STL r6,6*SZL(r3)
+ PPC_STL r7,7*SZL(r3)
+ PPC_STL r8,8*SZL(r3)
+ PPC_STL r9,9*SZL(r3)
+ PPC_STL r10,10*SZL(r3)
+ PPC_STL r11,11*SZL(r3)
+ PPC_STL r12,12*SZL(r3)
+ PPC_STL r13,13*SZL(r3)
+ PPC_STL r14,14*SZL(r3)
+ PPC_STL r15,15*SZL(r3)
+ PPC_STL r16,16*SZL(r3)
+ PPC_STL r17,17*SZL(r3)
+ PPC_STL r18,18*SZL(r3)
+ PPC_STL r19,19*SZL(r3)
+ PPC_STL r20,20*SZL(r3)
+ PPC_STL r21,21*SZL(r3)
+ PPC_STL r22,22*SZL(r3)
+ PPC_STL r23,23*SZL(r3)
+ PPC_STL r24,24*SZL(r3)
+ PPC_STL r25,25*SZL(r3)
+ PPC_STL r26,26*SZL(r3)
+ PPC_STL r27,27*SZL(r3)
+ PPC_STL r28,28*SZL(r3)
+ PPC_STL r29,29*SZL(r3)
+ PPC_STL r30,30*SZL(r3)
+ PPC_STL r31,31*SZL(r3)
/* go up one stack frame for SP */
- LDL r4,0(r1)
- STL r4,1*SZL(r3)
+ PPC_LL r4,0(r1)
+ PPC_STL r4,1*SZL(r3)
/* get caller's LR */
- LDL r0,LRSAVE(r4)
- STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
- STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
+ PPC_LL r0,LRSAVE(r4)
+ PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
mfmsr r0
- STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
mfctr r0
- STL r0,_CTR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3)
mfxer r0
- STL r0,_XER-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3)
mfcr r0
- STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
li r0,0
- STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
blr
diff --git a/arch/powerpc/xmon/start_32.c b/arch/powerpc/xmon/start_32.c
index 69b658c0f76..c2464df4217 100644
--- a/arch/powerpc/xmon/start_32.c
+++ b/arch/powerpc/xmon/start_32.c
@@ -11,7 +11,6 @@
#include <linux/cuda.h>
#include <linux/kernel.h>
#include <linux/errno.h>
-#include <linux/sysrq.h>
#include <linux/bitops.h>
#include <asm/xmon.h>
#include <asm/prom.h>
@@ -22,10 +21,11 @@
#include <asm/processor.h>
#include <asm/delay.h>
#include <asm/btext.h>
+#include <asm/time.h>
+#include "nonstdio.h"
static volatile unsigned char __iomem *sccc, *sccd;
unsigned int TXRDY, RXRDY, DLAB;
-static int xmon_expect(const char *str, unsigned int timeout);
static int use_serial;
static int use_screen;
@@ -33,16 +33,6 @@ static int via_modem;
static int xmon_use_sccb;
static struct device_node *channel_node;
-#define TB_SPEED 25000000
-
-static inline unsigned int readtb(void)
-{
- unsigned int ret;
-
- asm volatile("mftb %0" : "=r" (ret) :);
- return ret;
-}
-
void buf_access(void)
{
if (DLAB)
@@ -91,23 +81,7 @@ static unsigned long chrp_find_phys_io_base(void)
}
#endif /* CONFIG_PPC_CHRP */
-#ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handle_xmon(int key, struct pt_regs *regs,
- struct tty_struct *tty)
-{
- xmon(regs);
-}
-
-static struct sysrq_key_op sysrq_xmon_op =
-{
- .handler = sysrq_handle_xmon,
- .help_msg = "Xmon",
- .action_msg = "Entering xmon",
-};
-#endif
-
-void
-xmon_map_scc(void)
+void xmon_map_scc(void)
{
#ifdef CONFIG_PPC_MULTIPLATFORM
volatile unsigned char __iomem *base;
@@ -217,8 +191,6 @@ xmon_map_scc(void)
RXRDY = 1;
DLAB = 0x80;
#endif /* platform */
-
- register_sysrq_key('x', &sysrq_xmon_op);
}
static int scc_initialized = 0;
@@ -238,8 +210,7 @@ static inline void do_poll_adb(void)
#endif /* CONFIG_ADB_CUDA */
}
-int
-xmon_write(void *handle, void *ptr, int nb)
+int xmon_write(void *ptr, int nb)
{
char *p = ptr;
int i, c, ct;
@@ -311,8 +282,7 @@ static unsigned char xmon_shift_keytab[128] =
"\0.\0*\0+\0\0\0\0\0/\r\0-\0" /* 0x40 - 0x4f */
"\0\0000123456789\0\0\0"; /* 0x50 - 0x5f */
-static int
-xmon_get_adb_key(void)
+static int xmon_get_adb_key(void)
{
int k, t, on;
@@ -350,32 +320,21 @@ xmon_get_adb_key(void)
}
#endif /* CONFIG_BOOTX_TEXT */
-int
-xmon_read(void *handle, void *ptr, int nb)
+int xmon_readchar(void)
{
- char *p = ptr;
- int i;
-
#ifdef CONFIG_BOOTX_TEXT
- if (use_screen) {
- for (i = 0; i < nb; ++i)
- *p++ = xmon_get_adb_key();
- return i;
- }
+ if (use_screen)
+ return xmon_get_adb_key();
#endif
- if (!scc_initialized)
- xmon_init_scc();
- for (i = 0; i < nb; ++i) {
+ if (!scc_initialized)
+ xmon_init_scc();
while ((*sccc & RXRDY) == 0)
- do_poll_adb();
+ do_poll_adb();
buf_access();
- *p++ = *sccd;
- }
- return i;
+ return *sccd;
}
-int
-xmon_read_poll(void)
+int xmon_read_poll(void)
{
if ((*sccc & RXRDY) == 0) {
do_poll_adb();
@@ -395,8 +354,7 @@ static unsigned char scc_inittab[] = {
3, 0xc1, /* rx enable, 8 bits */
};
-void
-xmon_init_scc(void)
+void xmon_init_scc(void)
{
if ( _machine == _MACH_chrp )
{
@@ -410,6 +368,7 @@ xmon_init_scc(void)
else if ( _machine == _MACH_Pmac )
{
int i, x;
+ unsigned long timeout;
if (channel_node != 0)
pmac_call_feature(
@@ -424,8 +383,12 @@ xmon_init_scc(void)
PMAC_FTR_MODEM_ENABLE,
channel_node, 0, 1);
printk(KERN_INFO "Modem powered up by debugger !\n");
- t0 = readtb();
- while (readtb() - t0 < 3*TB_SPEED)
+ t0 = get_tbl();
+ timeout = 3 * tb_ticks_per_sec;
+ if (timeout == 0)
+ /* assume 25MHz if tb_ticks_per_sec not set */
+ timeout = 75000000;
+ while (get_tbl() - t0 < timeout)
eieio();
}
/* use the B channel if requested */
@@ -447,164 +410,19 @@ xmon_init_scc(void)
scc_initialized = 1;
if (via_modem) {
for (;;) {
- xmon_write(NULL, "ATE1V1\r", 7);
+ xmon_write("ATE1V1\r", 7);
if (xmon_expect("OK", 5)) {
- xmon_write(NULL, "ATA\r", 4);
+ xmon_write("ATA\r", 4);
if (xmon_expect("CONNECT", 40))
break;
}
- xmon_write(NULL, "+++", 3);
+ xmon_write("+++", 3);
xmon_expect("OK", 3);
}
}
}
-void *xmon_stdin;
-void *xmon_stdout;
-void *xmon_stderr;
-
-int xmon_putc(int c, void *f)
-{
- char ch = c;
-
- if (c == '\n')
- xmon_putc('\r', f);
- return xmon_write(f, &ch, 1) == 1? c: -1;
-}
-
-int xmon_putchar(int c)
-{
- return xmon_putc(c, xmon_stdout);
-}
-
-int xmon_fputs(char *str, void *f)
-{
- int n = strlen(str);
-
- return xmon_write(f, str, n) == n? 0: -1;
-}
-
-int
-xmon_readchar(void)
-{
- char ch;
-
- for (;;) {
- switch (xmon_read(xmon_stdin, &ch, 1)) {
- case 1:
- return ch;
- case -1:
- xmon_printf("read(stdin) returned -1\r\n", 0, 0);
- return -1;
- }
- }
-}
-
-static char line[256];
-static char *lineptr;
-static int lineleft;
-
-int xmon_expect(const char *str, unsigned int timeout)
-{
- int c;
- unsigned int t0;
-
- timeout *= TB_SPEED;
- t0 = readtb();
- do {
- lineptr = line;
- for (;;) {
- c = xmon_read_poll();
- if (c == -1) {
- if (readtb() - t0 > timeout)
- return 0;
- continue;
- }
- if (c == '\n')
- break;
- if (c != '\r' && lineptr < &line[sizeof(line) - 1])
- *lineptr++ = c;
- }
- *lineptr = 0;
- } while (strstr(line, str) == NULL);
- return 1;
-}
-
-int
-xmon_getchar(void)
-{
- int c;
-
- if (lineleft == 0) {
- lineptr = line;
- for (;;) {
- c = xmon_readchar();
- if (c == -1 || c == 4)
- break;
- if (c == '\r' || c == '\n') {
- *lineptr++ = '\n';
- xmon_putchar('\n');
- break;
- }
- switch (c) {
- case 0177:
- case '\b':
- if (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- case 'U' & 0x1F:
- while (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- default:
- if (lineptr >= &line[sizeof(line) - 1])
- xmon_putchar('\a');
- else {
- xmon_putchar(c);
- *lineptr++ = c;
- }
- }
- }
- lineleft = lineptr - line;
- lineptr = line;
- }
- if (lineleft == 0)
- return -1;
- --lineleft;
- return *lineptr++;
-}
-
-char *
-xmon_fgets(char *str, int nb, void *f)
-{
- char *p;
- int c;
-
- for (p = str; p < str + nb - 1; ) {
- c = xmon_getchar();
- if (c == -1) {
- if (p == str)
- return NULL;
- break;
- }
- *p++ = c;
- if (c == '\n')
- break;
- }
- *p = 0;
- return str;
-}
-
-void
-xmon_enter(void)
+void xmon_enter(void)
{
#ifdef CONFIG_ADB_PMU
if (_machine == _MACH_Pmac) {
@@ -613,8 +431,7 @@ xmon_enter(void)
#endif
}
-void
-xmon_leave(void)
+void xmon_leave(void)
{
#ifdef CONFIG_ADB_PMU
if (_machine == _MACH_Pmac) {
diff --git a/arch/powerpc/xmon/start_64.c b/arch/powerpc/xmon/start_64.c
index e50c158191e..712552c4f24 100644
--- a/arch/powerpc/xmon/start_64.c
+++ b/arch/powerpc/xmon/start_64.c
@@ -6,182 +6,29 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/sysrq.h>
-#include <linux/init.h>
#include <asm/machdep.h>
-#include <asm/io.h>
-#include <asm/page.h>
-#include <asm/prom.h>
-#include <asm/processor.h>
#include <asm/udbg.h>
-#include <asm/system.h>
#include "nonstdio.h"
-#ifdef CONFIG_MAGIC_SYSRQ
-
-static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs,
- struct tty_struct *tty)
-{
- /* ensure xmon is enabled */
- xmon_init(1);
- debugger(pt_regs);
-}
-
-static struct sysrq_key_op sysrq_xmon_op =
+void xmon_map_scc(void)
{
- .handler = sysrq_handle_xmon,
- .help_msg = "Xmon",
- .action_msg = "Entering xmon",
-};
-
-static int __init setup_xmon_sysrq(void)
-{
- register_sysrq_key('x', &sysrq_xmon_op);
- return 0;
}
-__initcall(setup_xmon_sysrq);
-#endif /* CONFIG_MAGIC_SYSRQ */
-int
-xmon_write(void *handle, void *ptr, int nb)
+int xmon_write(void *ptr, int nb)
{
return udbg_write(ptr, nb);
}
-int
-xmon_read(void *handle, void *ptr, int nb)
+int xmon_readchar(void)
{
- return udbg_read(ptr, nb);
+ if (udbg_getc)
+ return udbg_getc();
+ return -1;
}
-int
-xmon_read_poll(void)
+int xmon_read_poll(void)
{
if (udbg_getc_poll)
return udbg_getc_poll();
return -1;
}
-
-FILE *xmon_stdin;
-FILE *xmon_stdout;
-
-int
-xmon_putc(int c, void *f)
-{
- char ch = c;
-
- if (c == '\n')
- xmon_putc('\r', f);
- return xmon_write(f, &ch, 1) == 1? c: -1;
-}
-
-int
-xmon_putchar(int c)
-{
- return xmon_putc(c, xmon_stdout);
-}
-
-int
-xmon_fputs(char *str, void *f)
-{
- int n = strlen(str);
-
- return xmon_write(f, str, n) == n? 0: -1;
-}
-
-int
-xmon_readchar(void)
-{
- char ch;
-
- for (;;) {
- switch (xmon_read(xmon_stdin, &ch, 1)) {
- case 1:
- return ch;
- case -1:
- xmon_printf("read(stdin) returned -1\r\n", 0, 0);
- return -1;
- }
- }
-}
-
-static char line[256];
-static char *lineptr;
-static int lineleft;
-
-int
-xmon_getchar(void)
-{
- int c;
-
- if (lineleft == 0) {
- lineptr = line;
- for (;;) {
- c = xmon_readchar();
- if (c == -1 || c == 4)
- break;
- if (c == '\r' || c == '\n') {
- *lineptr++ = '\n';
- xmon_putchar('\n');
- break;
- }
- switch (c) {
- case 0177:
- case '\b':
- if (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- case 'U' & 0x1F:
- while (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- default:
- if (lineptr >= &line[sizeof(line) - 1])
- xmon_putchar('\a');
- else {
- xmon_putchar(c);
- *lineptr++ = c;
- }
- }
- }
- lineleft = lineptr - line;
- lineptr = line;
- }
- if (lineleft == 0)
- return -1;
- --lineleft;
- return *lineptr++;
-}
-
-char *
-xmon_fgets(char *str, int nb, void *f)
-{
- char *p;
- int c;
-
- for (p = str; p < str + nb - 1; ) {
- c = xmon_getchar();
- if (c == -1) {
- if (p == str)
- return NULL;
- break;
- }
- *p++ = c;
- if (c == '\n')
- break;
- }
- *p = 0;
- return str;
-}
diff --git a/arch/powerpc/xmon/start_8xx.c b/arch/powerpc/xmon/start_8xx.c
index a48bd594cf6..4c17b0486ad 100644
--- a/arch/powerpc/xmon/start_8xx.c
+++ b/arch/powerpc/xmon/start_8xx.c
@@ -15,273 +15,30 @@
#include <asm/8xx_immap.h>
#include <asm/mpc8xx.h>
#include <asm/commproc.h>
+#include "nonstdio.h"
-extern void xmon_printf(const char *fmt, ...);
extern int xmon_8xx_write(char *str, int nb);
extern int xmon_8xx_read_poll(void);
extern int xmon_8xx_read_char(void);
-void prom_drawhex(uint);
-void prom_drawstring(const char *str);
-static int use_screen = 1; /* default */
-
-#define TB_SPEED 25000000
-
-static inline unsigned int readtb(void)
-{
- unsigned int ret;
-
- asm volatile("mftb %0" : "=r" (ret) :);
- return ret;
-}
-
-void buf_access(void)
-{
-}
-
-void
-xmon_map_scc(void)
+void xmon_map_scc(void)
{
-
cpmp = (cpm8xx_t *)&(((immap_t *)IMAP_ADDR)->im_cpm);
- use_screen = 0;
-
- prom_drawstring("xmon uses serial port\n");
}
-static int scc_initialized = 0;
-
void xmon_init_scc(void);
-int
-xmon_write(void *handle, void *ptr, int nb)
+int xmon_write(void *ptr, int nb)
{
- char *p = ptr;
- int i, c, ct;
-
- if (!scc_initialized)
- xmon_init_scc();
-
return(xmon_8xx_write(ptr, nb));
}
-int xmon_wants_key;
-
-int
-xmon_read(void *handle, void *ptr, int nb)
+int xmon_readchar(void)
{
- char *p = ptr;
- int i;
-
- if (!scc_initialized)
- xmon_init_scc();
-
- for (i = 0; i < nb; ++i) {
- *p++ = xmon_8xx_read_char();
- }
- return i;
+ return xmon_8xx_read_char();
}
-int
-xmon_read_poll(void)
+int xmon_read_poll(void)
{
return(xmon_8xx_read_poll());
}
-
-void
-xmon_init_scc()
-{
- scc_initialized = 1;
-}
-
-#if 0
-extern int (*prom_entry)(void *);
-
-int
-xmon_exit(void)
-{
- struct prom_args {
- char *service;
- } args;
-
- for (;;) {
- args.service = "exit";
- (*prom_entry)(&args);
- }
-}
-#endif
-
-void *xmon_stdin;
-void *xmon_stdout;
-void *xmon_stderr;
-
-void
-xmon_init(void)
-{
-}
-
-int
-xmon_putc(int c, void *f)
-{
- char ch = c;
-
- if (c == '\n')
- xmon_putc('\r', f);
- return xmon_write(f, &ch, 1) == 1? c: -1;
-}
-
-int
-xmon_putchar(int c)
-{
- return xmon_putc(c, xmon_stdout);
-}
-
-int
-xmon_fputs(char *str, void *f)
-{
- int n = strlen(str);
-
- return xmon_write(f, str, n) == n? 0: -1;
-}
-
-int
-xmon_readchar(void)
-{
- char ch;
-
- for (;;) {
- switch (xmon_read(xmon_stdin, &ch, 1)) {
- case 1:
- return ch;
- case -1:
- xmon_printf("read(stdin) returned -1\r\n", 0, 0);
- return -1;
- }
- }
-}
-
-static char line[256];
-static char *lineptr;
-static int lineleft;
-
-#if 0
-int xmon_expect(const char *str, unsigned int timeout)
-{
- int c;
- unsigned int t0;
-
- timeout *= TB_SPEED;
- t0 = readtb();
- do {
- lineptr = line;
- for (;;) {
- c = xmon_read_poll();
- if (c == -1) {
- if (readtb() - t0 > timeout)
- return 0;
- continue;
- }
- if (c == '\n')
- break;
- if (c != '\r' && lineptr < &line[sizeof(line) - 1])
- *lineptr++ = c;
- }
- *lineptr = 0;
- } while (strstr(line, str) == NULL);
- return 1;
-}
-#endif
-
-int
-xmon_getchar(void)
-{
- int c;
-
- if (lineleft == 0) {
- lineptr = line;
- for (;;) {
- c = xmon_readchar();
- if (c == -1 || c == 4)
- break;
- if (c == '\r' || c == '\n') {
- *lineptr++ = '\n';
- xmon_putchar('\n');
- break;
- }
- switch (c) {
- case 0177:
- case '\b':
- if (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- case 'U' & 0x1F:
- while (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- default:
- if (lineptr >= &line[sizeof(line) - 1])
- xmon_putchar('\a');
- else {
- xmon_putchar(c);
- *lineptr++ = c;
- }
- }
- }
- lineleft = lineptr - line;
- lineptr = line;
- }
- if (lineleft == 0)
- return -1;
- --lineleft;
- return *lineptr++;
-}
-
-char *
-xmon_fgets(char *str, int nb, void *f)
-{
- char *p;
- int c;
-
- for (p = str; p < str + nb - 1; ) {
- c = xmon_getchar();
- if (c == -1) {
- if (p == str)
- return 0;
- break;
- }
- *p++ = c;
- if (c == '\n')
- break;
- }
- *p = 0;
- return str;
-}
-
-void
-prom_drawhex(uint val)
-{
- unsigned char buf[10];
-
- int i;
- for (i = 7; i >= 0; i--)
- {
- buf[i] = "0123456789abcdef"[val & 0x0f];
- val >>= 4;
- }
- buf[8] = '\0';
- xmon_fputs(buf, xmon_stdout);
-}
-
-void
-prom_drawstring(const char *str)
-{
- xmon_fputs(str, xmon_stdout);
-}
diff --git a/arch/powerpc/xmon/subr_prf.c b/arch/powerpc/xmon/subr_prf.c
deleted file mode 100644
index b48738c6dd3..00000000000
--- a/arch/powerpc/xmon/subr_prf.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Written by Cort Dougan to replace the version originally used
- * by Paul Mackerras, which came from NetBSD and thus had copyright
- * conflicts with Linux.
- *
- * This file makes liberal use of the standard linux utility
- * routines to reduce the size of the binary. We assume we can
- * trust some parts of Linux inside the debugger.
- * -- Cort (cort@cs.nmt.edu)
- *
- * Copyright (C) 1999 Cort Dougan.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <stdarg.h>
-#include "nonstdio.h"
-
-extern int xmon_write(void *, void *, int);
-
-void xmon_vfprintf(void *f, const char *fmt, va_list ap)
-{
- static char xmon_buf[2048];
- int n;
-
- n = vsprintf(xmon_buf, fmt, ap);
- xmon_write(f, xmon_buf, n);
-}
-
-void xmon_printf(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- xmon_vfprintf(stdout, fmt, ap);
- va_end(ap);
-}
-EXPORT_SYMBOL(xmon_printf);
-
-void xmon_fprintf(void *f, const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- xmon_vfprintf(f, fmt, ap);
- va_end(ap);
-}
-
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1124f114620..cfcb2a56d66 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1,7 +1,7 @@
/*
* Routines providing a simple monitor for use on the PowerMac.
*
- * Copyright (C) 1996 Paul Mackerras.
+ * Copyright (C) 1996-2005 Paul Mackerras.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -18,6 +18,7 @@
#include <linux/kallsyms.h>
#include <linux/cpumask.h>
#include <linux/module.h>
+#include <linux/sysrq.h>
#include <asm/ptrace.h>
#include <asm/string.h>
@@ -144,15 +145,10 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
static const char *getvecname(unsigned long vec);
extern int print_insn_powerpc(unsigned long, unsigned long, int);
-extern void printf(const char *fmt, ...);
-extern void xmon_vfprintf(void *f, const char *fmt, va_list ap);
-extern int xmon_putc(int c, void *f);
-extern int putchar(int ch);
extern void xmon_enter(void);
extern void xmon_leave(void);
-extern int xmon_read_poll(void);
extern long setjmp(long *);
extern void longjmp(long *, long);
extern void xmon_save_regs(struct pt_regs *);
@@ -748,7 +744,6 @@ cmds(struct pt_regs *excp)
printf("%x:", smp_processor_id());
#endif /* CONFIG_SMP */
printf("mon> ");
- fflush(stdout);
flush_input();
termch = 0;
cmd = skipbl();
@@ -1797,7 +1792,7 @@ memex(void)
for(;;){
if (!mnoread)
n = mread(adrs, val, size);
- printf("%.16x%c", adrs, brev? 'r': ' ');
+ printf(REG"%c", adrs, brev? 'r': ' ');
if (!mnoread) {
if (brev)
byterev(val, size);
@@ -1976,17 +1971,18 @@ prdump(unsigned long adrs, long ndump)
nr = mread(adrs, temp, r);
adrs += nr;
for (m = 0; m < r; ++m) {
- if ((m & 7) == 0 && m > 0)
- putchar(' ');
+ if ((m & (sizeof(long) - 1)) == 0 && m > 0)
+ putchar(' ');
if (m < nr)
printf("%.2x", temp[m]);
else
printf("%s", fault_chars[fault_type]);
}
- if (m <= 8)
- printf(" ");
- for (; m < 16; ++m)
+ for (; m < 16; ++m) {
+ if ((m & (sizeof(long) - 1)) == 0)
+ putchar(' ');
printf(" ");
+ }
printf(" |");
for (m = 0; m < r; ++m) {
if (m < nr) {
@@ -2151,7 +2147,6 @@ memzcan(void)
ok = mread(a, &v, 1);
if (ok && !ook) {
printf("%.8x .. ", a);
- fflush(stdout);
} else if (!ok && ook)
printf("%.8x\n", a - mskip);
ook = ok;
@@ -2372,7 +2367,7 @@ int
inchar(void)
{
if (lineptr == NULL || *lineptr == 0) {
- if (fgets(line, sizeof(line), stdin) == NULL) {
+ if (xmon_gets(line, sizeof(line)) == NULL) {
lineptr = NULL;
return EOF;
}
@@ -2526,4 +2521,29 @@ void xmon_init(int enable)
__debugger_dabr_match = NULL;
__debugger_fault_handler = NULL;
}
+ xmon_map_scc();
+}
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs,
+ struct tty_struct *tty)
+{
+ /* ensure xmon is enabled */
+ xmon_init(1);
+ debugger(pt_regs);
+}
+
+static struct sysrq_key_op sysrq_xmon_op =
+{
+ .handler = sysrq_handle_xmon,
+ .help_msg = "Xmon",
+ .action_msg = "Entering xmon",
+};
+
+static int __init setup_xmon_sysrq(void)
+{
+ register_sysrq_key('x', &sysrq_xmon_op);
+ return 0;
}
+__initcall(setup_xmon_sysrq);
+#endif /* CONFIG_MAGIC_SYSRQ */