aboutsummaryrefslogtreecommitdiff
path: root/arch/ppc64
diff options
context:
space:
mode:
authorDave Kleikamp <shaggy@austin.ibm.com>2005-07-13 08:57:38 -0500
committerDave Kleikamp <shaggy@austin.ibm.com>2005-07-13 08:57:38 -0500
commitf7f24758ac98a506770bc5910d33567610fa3403 (patch)
treeff7fad3d01bf9dc2e2e54b908f9fca4891e1ee72 /arch/ppc64
parentb38a3ab3d1bb0dc3288f73903d4dc4672b5cd2d0 (diff)
parentc32511e2718618f0b53479eb36e07439aa363a74 (diff)
Merge with /home/shaggy/git/linus-clean/
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Diffstat (limited to 'arch/ppc64')
-rw-r--r--arch/ppc64/Kconfig40
-rw-r--r--arch/ppc64/boot/Makefile5
-rw-r--r--arch/ppc64/boot/main.c8
-rw-r--r--arch/ppc64/boot/mknote.c43
-rw-r--r--arch/ppc64/boot/piggyback.c83
-rw-r--r--arch/ppc64/boot/prom.c16
-rw-r--r--arch/ppc64/kernel/ItLpQueue.c294
-rw-r--r--arch/ppc64/kernel/LparData.c11
-rw-r--r--arch/ppc64/kernel/Makefile1
-rw-r--r--arch/ppc64/kernel/cputable.c365
-rw-r--r--arch/ppc64/kernel/head.S16
-rw-r--r--arch/ppc64/kernel/hvconsole.c51
-rw-r--r--arch/ppc64/kernel/iSeries_proc.c48
-rw-r--r--arch/ppc64/kernel/iSeries_setup.c119
-rw-r--r--arch/ppc64/kernel/idle.c283
-rw-r--r--arch/ppc64/kernel/irq.c9
-rw-r--r--arch/ppc64/kernel/kprobes.c125
-rw-r--r--arch/ppc64/kernel/lparcfg.c1
-rw-r--r--arch/ppc64/kernel/machine_kexec.c302
-rw-r--r--arch/ppc64/kernel/maple_setup.c3
-rw-r--r--arch/ppc64/kernel/mf.c6
-rw-r--r--arch/ppc64/kernel/misc.S181
-rw-r--r--arch/ppc64/kernel/mpic.c29
-rw-r--r--arch/ppc64/kernel/mpic.h3
-rw-r--r--arch/ppc64/kernel/nvram.c8
-rw-r--r--arch/ppc64/kernel/of_device.c15
-rw-r--r--arch/ppc64/kernel/pSeries_setup.c160
-rw-r--r--arch/ppc64/kernel/pSeries_smp.c5
-rw-r--r--arch/ppc64/kernel/pacaData.c212
-rw-r--r--arch/ppc64/kernel/pci.c22
-rw-r--r--arch/ppc64/kernel/pmac_setup.c5
-rw-r--r--arch/ppc64/kernel/ppc_ksyms.c1
-rw-r--r--arch/ppc64/kernel/process.c4
-rw-r--r--arch/ppc64/kernel/ptrace.c4
-rw-r--r--arch/ppc64/kernel/setup.c30
-rw-r--r--arch/ppc64/kernel/sys_ppc32.c54
-rw-r--r--arch/ppc64/kernel/sysfs.c21
-rw-r--r--arch/ppc64/kernel/time.c9
-rw-r--r--arch/ppc64/kernel/vdso32/vdso32.lds.S4
-rw-r--r--arch/ppc64/kernel/xics.c25
-rw-r--r--arch/ppc64/mm/hash_native.c47
41 files changed, 1578 insertions, 1090 deletions
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig
index cb27068bfcd..fdd8afba715 100644
--- a/arch/ppc64/Kconfig
+++ b/arch/ppc64/Kconfig
@@ -142,6 +142,23 @@ config PPC_SPLPAR
processors, that is, which share physical processors between
two or more partitions.
+config KEXEC
+ bool "kexec system call (EXPERIMENTAL)"
+ depends on PPC_MULTIPLATFORM && EXPERIMENTAL
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is indepedent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similiarity to the exec system call.
+
+ It is an ongoing process to be certain the hardware in a machine
+ is properly shutdown, so do not be surprised if this code does not
+ initially work for you. It may help to enable device hotplugging
+ support. As of this writing the exact hardware interface is
+ strongly in flux, so no good recommendation can be made.
+
config IBMVIO
depends on PPC_PSERIES || PPC_ISERIES
bool
@@ -270,26 +287,7 @@ config SCHED_SMT
when dealing with POWER5 cpus at a cost of slightly increased
overhead in some places. If unsure say N here.
-config PREEMPT
- bool "Preemptible Kernel"
- help
- This option reduces the latency of the kernel when reacting to
- real-time or interactive events by allowing a low priority process to
- be preempted even if it is in kernel mode executing a system call.
-
- Say Y here if you are building a kernel for a desktop, embedded
- or real-time system. Say N if you are unsure.
-
-config PREEMPT_BKL
- bool "Preempt The Big Kernel Lock"
- depends on PREEMPT
- default y
- help
- This option reduces the latency of the kernel by making the
- big kernel lock preemptible.
-
- Say Y here if you are building a kernel for a desktop system.
- Say N if you are unsure.
+source "kernel/Kconfig.preempt"
config EEH
bool "PCI Extended Error Handling (EEH)" if EMBEDDED
@@ -431,6 +429,8 @@ config CMDLINE
endmenu
+source "net/Kconfig"
+
source "drivers/Kconfig"
source "fs/Kconfig"
diff --git a/arch/ppc64/boot/Makefile b/arch/ppc64/boot/Makefile
index d3e1d6af920..683b2d43c15 100644
--- a/arch/ppc64/boot/Makefile
+++ b/arch/ppc64/boot/Makefile
@@ -52,7 +52,7 @@ obj-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.o, $(section)))
src-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.c, $(section)))
gz-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.gz, $(section)))
-hostprogs-y := piggy addnote addRamDisk
+hostprogs-y := addnote addRamDisk
targets += zImage zImage.initrd imagesize.c \
$(patsubst $(obj)/%,%, $(call obj-sec, $(required) $(initrd))) \
$(patsubst $(obj)/%,%, $(call src-sec, $(required) $(initrd))) \
@@ -78,9 +78,6 @@ addsection = $(CROSS32OBJCOPY) $(1) \
quiet_cmd_addnote = ADDNOTE $@
cmd_addnote = $(CROSS32LD) $(BOOTLFLAGS) -o $@ $(obj-boot) && $(obj)/addnote $@
-quiet_cmd_piggy = PIGGY $@
- cmd_piggy = $(obj)/piggyback $(@:.o=) < $< | $(CROSS32AS) -o $@
-
$(call gz-sec, $(required)): $(obj)/kernel-%.gz: % FORCE
$(call if_changed,gzip)
diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c
index da12ea2ca46..199d9804f61 100644
--- a/arch/ppc64/boot/main.c
+++ b/arch/ppc64/boot/main.c
@@ -17,7 +17,6 @@
extern void *finddevice(const char *);
extern int getprop(void *, const char *, void *, int);
-extern void printk(char *fmt, ...);
extern void printf(const char *fmt, ...);
extern int sprintf(char *buf, const char *fmt, ...);
void gunzip(void *, int, unsigned char *, int *);
@@ -147,10 +146,10 @@ void start(unsigned long a1, unsigned long a2, void *promptr)
}
a1 = initrd.addr;
a2 = initrd.size;
- printf("initial ramdisk moving 0x%lx <- 0x%lx (%lx bytes)\n\r",
+ printf("initial ramdisk moving 0x%lx <- 0x%lx (0x%lx bytes)\n\r",
initrd.addr, (unsigned long)_initrd_start, initrd.size);
memmove((void *)initrd.addr, (void *)_initrd_start, initrd.size);
- printf("initrd head: 0x%lx\n\r", *((u32 *)initrd.addr));
+ printf("initrd head: 0x%lx\n\r", *((unsigned long *)initrd.addr));
}
/* Eventually gunzip the kernel */
@@ -201,9 +200,6 @@ void start(unsigned long a1, unsigned long a2, void *promptr)
flush_cache((void *)vmlinux.addr, vmlinux.size);
- if (a1)
- printf("initrd head: 0x%lx\n\r", *((u32 *)initrd.addr));
-
kernel_entry = (kernel_entry_t)vmlinux.addr;
#ifdef DEBUG
printf( "kernel:\n\r"
diff --git a/arch/ppc64/boot/mknote.c b/arch/ppc64/boot/mknote.c
deleted file mode 100644
index 120cc1d8973..00000000000
--- a/arch/ppc64/boot/mknote.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) Cort Dougan 1999.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Generate a note section as per the CHRP specification.
- *
- */
-
-#include <stdio.h>
-
-#define PL(x) printf("%c%c%c%c", ((x)>>24)&0xff, ((x)>>16)&0xff, ((x)>>8)&0xff, (x)&0xff );
-
-int main(void)
-{
-/* header */
- /* namesz */
- PL(strlen("PowerPC")+1);
- /* descrsz */
- PL(6*4);
- /* type */
- PL(0x1275);
- /* name */
- printf("PowerPC"); printf("%c", 0);
-
-/* descriptor */
- /* real-mode */
- PL(0xffffffff);
- /* real-base */
- PL(0x00c00000);
- /* real-size */
- PL(0xffffffff);
- /* virt-base */
- PL(0xffffffff);
- /* virt-size */
- PL(0xffffffff);
- /* load-base */
- PL(0x4000);
- return 0;
-}
diff --git a/arch/ppc64/boot/piggyback.c b/arch/ppc64/boot/piggyback.c
deleted file mode 100644
index 235c7a87269..00000000000
--- a/arch/ppc64/boot/piggyback.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright 2001 IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-
-extern long ce_exec_config[];
-
-int main(int argc, char *argv[])
-{
- int i, cnt, pos, len;
- unsigned int cksum, val;
- unsigned char *lp;
- unsigned char buf[8192];
- char *varname;
- if (argc != 2)
- {
- fprintf(stderr, "usage: %s name <in-file >out-file\n",
- argv[0]);
- exit(1);
- }
-
- varname = strrchr(argv[1], '/');
- if (varname)
- varname++;
- else
- varname = argv[1];
-
- fprintf(stdout, "#\n");
- fprintf(stdout, "# Miscellaneous data structures:\n");
- fprintf(stdout, "# WARNING - this file is automatically generated!\n");
- fprintf(stdout, "#\n");
- fprintf(stdout, "\n");
- fprintf(stdout, "\t.data\n");
- fprintf(stdout, "\t.globl %s_data\n", varname);
- fprintf(stdout, "%s_data:\n", varname);
- pos = 0;
- cksum = 0;
- while ((len = read(0, buf, sizeof(buf))) > 0)
- {
- cnt = 0;
- lp = (unsigned char *)buf;
- len = (len + 3) & ~3; /* Round up to longwords */
- for (i = 0; i < len; i += 4)
- {
- if (cnt == 0)
- {
- fprintf(stdout, "\t.long\t");
- }
- fprintf(stdout, "0x%02X%02X%02X%02X", lp[0], lp[1], lp[2], lp[3]);
- val = *(unsigned long *)lp;
- cksum ^= val;
- lp += 4;
- if (++cnt == 4)
- {
- cnt = 0;
- fprintf(stdout, " # %x \n", pos+i-12);
- fflush(stdout);
- } else
- {
- fprintf(stdout, ",");
- }
- }
- if (cnt)
- {
- fprintf(stdout, "0\n");
- }
- pos += len;
- }
- fprintf(stdout, "\t.globl %s_len\n", varname);
- fprintf(stdout, "%s_len:\t.long\t0x%x\n", varname, pos);
- fflush(stdout);
- fclose(stdout);
- fprintf(stderr, "cksum = %x\n", cksum);
- exit(0);
-}
-
diff --git a/arch/ppc64/boot/prom.c b/arch/ppc64/boot/prom.c
index d5218b15824..5e48b80ff5a 100644
--- a/arch/ppc64/boot/prom.c
+++ b/arch/ppc64/boot/prom.c
@@ -40,7 +40,7 @@ void *finddevice(const char *name);
int getprop(void *phandle, const char *name, void *buf, int buflen);
void chrpboot(int a1, int a2, void *prom); /* in main.c */
-void printk(char *fmt, ...);
+int printf(char *fmt, ...);
/* there is no convenient header to get this from... -- paulus */
extern unsigned long strlen(const char *);
@@ -220,7 +220,7 @@ readchar(void)
case 1:
return ch;
case -1:
- printk("read(stdin) returned -1\r\n");
+ printf("read(stdin) returned -1\r\n");
return -1;
}
}
@@ -627,18 +627,6 @@ int sprintf(char * buf, const char *fmt, ...)
static char sprint_buf[1024];
-void
-printk(char *fmt, ...)
-{
- va_list args;
- int n;
-
- va_start(args, fmt);
- n = vsprintf(sprint_buf, fmt, args);
- va_end(args);
- write(stdout, sprint_buf, n);
-}
-
int
printf(char *fmt, ...)
{
diff --git a/arch/ppc64/kernel/ItLpQueue.c b/arch/ppc64/kernel/ItLpQueue.c
index cdea00d7707..4231861288a 100644
--- a/arch/ppc64/kernel/ItLpQueue.c
+++ b/arch/ppc64/kernel/ItLpQueue.c
@@ -1,7 +1,7 @@
/*
* ItLpQueue.c
* Copyright (C) 2001 Mike Corrigan IBM Corporation
- *
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -11,156 +11,252 @@
#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <asm/system.h>
#include <asm/paca.h>
#include <asm/iSeries/ItLpQueue.h>
#include <asm/iSeries/HvLpEvent.h>
#include <asm/iSeries/HvCallEvent.h>
-static __inline__ int set_inUse( struct ItLpQueue * lpQueue )
-{
- int t;
- u32 * inUseP = &(lpQueue->xInUseWord);
-
- __asm__ __volatile__("\n\
-1: lwarx %0,0,%2 \n\
- cmpwi 0,%0,0 \n\
- li %0,0 \n\
- bne- 2f \n\
- addi %0,%0,1 \n\
- stwcx. %0,0,%2 \n\
- bne- 1b \n\
-2: eieio"
- : "=&r" (t), "=m" (lpQueue->xInUseWord)
- : "r" (inUseP), "m" (lpQueue->xInUseWord)
- : "cc");
-
- return t;
-}
+/*
+ * The LpQueue is used to pass event data from the hypervisor to
+ * the partition. This is where I/O interrupt events are communicated.
+ *
+ * It is written to by the hypervisor so cannot end up in the BSS.
+ */
+struct hvlpevent_queue hvlpevent_queue __attribute__((__section__(".data")));
-static __inline__ void clear_inUse( struct ItLpQueue * lpQueue )
-{
- lpQueue->xInUseWord = 0;
-}
+DEFINE_PER_CPU(unsigned long[HvLpEvent_Type_NumTypes], hvlpevent_counts);
+
+static char *event_types[HvLpEvent_Type_NumTypes] = {
+ "Hypervisor",
+ "Machine Facilities",
+ "Session Manager",
+ "SPD I/O",
+ "Virtual Bus",
+ "PCI I/O",
+ "RIO I/O",
+ "Virtual Lan",
+ "Virtual I/O"
+};
/* Array of LpEvent handler functions */
extern LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes];
-unsigned long ItLpQueueInProcess = 0;
-struct HvLpEvent * ItLpQueue_getNextLpEvent( struct ItLpQueue * lpQueue )
+static struct HvLpEvent * get_next_hvlpevent(void)
{
- struct HvLpEvent * nextLpEvent =
- (struct HvLpEvent *)lpQueue->xSlicCurEventPtr;
- if ( nextLpEvent->xFlags.xValid ) {
+ struct HvLpEvent * event;
+ event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr;
+
+ if (event->xFlags.xValid) {
/* rmb() needed only for weakly consistent machines (regatta) */
rmb();
/* Set pointer to next potential event */
- lpQueue->xSlicCurEventPtr += ((nextLpEvent->xSizeMinus1 +
- LpEventAlign ) /
- LpEventAlign ) *
- LpEventAlign;
+ hvlpevent_queue.xSlicCurEventPtr += ((event->xSizeMinus1 +
+ LpEventAlign) / LpEventAlign) * LpEventAlign;
+
/* Wrap to beginning if no room at end */
- if (lpQueue->xSlicCurEventPtr > lpQueue->xSlicLastValidEventPtr)
- lpQueue->xSlicCurEventPtr = lpQueue->xSlicEventStackPtr;
+ if (hvlpevent_queue.xSlicCurEventPtr >
+ hvlpevent_queue.xSlicLastValidEventPtr) {
+ hvlpevent_queue.xSlicCurEventPtr =
+ hvlpevent_queue.xSlicEventStackPtr;
+ }
+ } else {
+ event = NULL;
}
- else
- nextLpEvent = NULL;
- return nextLpEvent;
+ return event;
}
-int ItLpQueue_isLpIntPending( struct ItLpQueue * lpQueue )
+static unsigned long spread_lpevents = NR_CPUS;
+
+int hvlpevent_is_pending(void)
{
- int retval = 0;
- struct HvLpEvent * nextLpEvent;
- if ( lpQueue ) {
- nextLpEvent = (struct HvLpEvent *)lpQueue->xSlicCurEventPtr;
- retval = nextLpEvent->xFlags.xValid | lpQueue->xPlicOverflowIntPending;
- }
- return retval;
+ struct HvLpEvent *next_event;
+
+ if (smp_processor_id() >= spread_lpevents)
+ return 0;
+
+ next_event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr;
+
+ return next_event->xFlags.xValid |
+ hvlpevent_queue.xPlicOverflowIntPending;
}
-void ItLpQueue_clearValid( struct HvLpEvent * event )
+static void hvlpevent_clear_valid(struct HvLpEvent * event)
{
- /* Clear the valid bit of the event
- * Also clear bits within this event that might
- * look like valid bits (on 64-byte boundaries)
- */
- unsigned extra = (( event->xSizeMinus1 + LpEventAlign ) /
- LpEventAlign ) - 1;
- switch ( extra ) {
- case 3:
- ((struct HvLpEvent*)((char*)event+3*LpEventAlign))->xFlags.xValid=0;
- case 2:
- ((struct HvLpEvent*)((char*)event+2*LpEventAlign))->xFlags.xValid=0;
- case 1:
- ((struct HvLpEvent*)((char*)event+1*LpEventAlign))->xFlags.xValid=0;
- case 0:
- ;
+ /* Tell the Hypervisor that we're done with this event.
+ * Also clear bits within this event that might look like valid bits.
+ * ie. on 64-byte boundaries.
+ */
+ struct HvLpEvent *tmp;
+ unsigned extra = ((event->xSizeMinus1 + LpEventAlign) /
+ LpEventAlign) - 1;
+
+ switch (extra) {
+ case 3:
+ tmp = (struct HvLpEvent*)((char*)event + 3 * LpEventAlign);
+ tmp->xFlags.xValid = 0;
+ case 2:
+ tmp = (struct HvLpEvent*)((char*)event + 2 * LpEventAlign);
+ tmp->xFlags.xValid = 0;
+ case 1:
+ tmp = (struct HvLpEvent*)((char*)event + 1 * LpEventAlign);
+ tmp->xFlags.xValid = 0;
}
+
mb();
+
event->xFlags.xValid = 0;
}
-unsigned ItLpQueue_process( struct ItLpQueue * lpQueue, struct pt_regs *regs )
+void process_hvlpevents(struct pt_regs *regs)
{
- unsigned numIntsProcessed = 0;
- struct HvLpEvent * nextLpEvent;
+ struct HvLpEvent * event;
/* If we have recursed, just return */
- if ( !set_inUse( lpQueue ) )
- return 0;
-
- if (ItLpQueueInProcess == 0)
- ItLpQueueInProcess = 1;
- else
- BUG();
+ if (!spin_trylock(&hvlpevent_queue.lock))
+ return;
for (;;) {
- nextLpEvent = ItLpQueue_getNextLpEvent( lpQueue );
- if ( nextLpEvent ) {
- /* Count events to return to caller
- * and count processed events in lpQueue
- */
- ++numIntsProcessed;
- lpQueue->xLpIntCount++;
- /* Call appropriate handler here, passing
+ event = get_next_hvlpevent();
+ if (event) {
+ /* Call appropriate handler here, passing
* a pointer to the LpEvent. The handler
* must make a copy of the LpEvent if it
* needs it in a bottom half. (perhaps for
* an ACK)
- *
- * Handlers are responsible for ACK processing
+ *
+ * Handlers are responsible for ACK processing
*
* The Hypervisor guarantees that LpEvents will
* only be delivered with types that we have
* registered for, so no type check is necessary
* here!
- */
- if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes )
- lpQueue->xLpIntCountByType[nextLpEvent->xType]++;
- if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes &&
- lpEventHandler[nextLpEvent->xType] )
- lpEventHandler[nextLpEvent->xType](nextLpEvent, regs);
+ */
+ if (event->xType < HvLpEvent_Type_NumTypes)
+ __get_cpu_var(hvlpevent_counts)[event->xType]++;
+ if (event->xType < HvLpEvent_Type_NumTypes &&
+ lpEventHandler[event->xType])
+ lpEventHandler[event->xType](event, regs);
else
- printk(KERN_INFO "Unexpected Lp Event type=%d\n", nextLpEvent->xType );
-
- ItLpQueue_clearValid( nextLpEvent );
- } else if ( lpQueue->xPlicOverflowIntPending )
+ printk(KERN_INFO "Unexpected Lp Event type=%d\n", event->xType );
+
+ hvlpevent_clear_valid(event);
+ } else if (hvlpevent_queue.xPlicOverflowIntPending)
/*
* No more valid events. If overflow events are
* pending process them
*/
- HvCallEvent_getOverflowLpEvents( lpQueue->xIndex);
+ HvCallEvent_getOverflowLpEvents(hvlpevent_queue.xIndex);
else
break;
}
- ItLpQueueInProcess = 0;
- mb();
- clear_inUse( lpQueue );
+ spin_unlock(&hvlpevent_queue.lock);
+}
+
+static int set_spread_lpevents(char *str)
+{
+ unsigned long val = simple_strtoul(str, NULL, 0);
+
+ /*
+ * The parameter is the number of processors to share in processing
+ * lp events.
+ */
+ if (( val > 0) && (val <= NR_CPUS)) {
+ spread_lpevents = val;
+ printk("lpevent processing spread over %ld processors\n", val);
+ } else {
+ printk("invalid spread_lpevents %ld\n", val);
+ }
- get_paca()->lpevent_count += numIntsProcessed;
+ return 1;
+}
+__setup("spread_lpevents=", set_spread_lpevents);
+
+void setup_hvlpevent_queue(void)
+{
+ void *eventStack;
+
+ /*
+ * Allocate a page for the Event Stack. The Hypervisor needs the
+ * absolute real address, so we subtract out the KERNELBASE and add
+ * in the absolute real address of the kernel load area.
+ */
+ eventStack = alloc_bootmem_pages(LpEventStackSize);
+ memset(eventStack, 0, LpEventStackSize);
+
+ /* Invoke the hypervisor to initialize the event stack */
+ HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize);
+
+ hvlpevent_queue.xSlicEventStackPtr = (char *)eventStack;
+ hvlpevent_queue.xSlicCurEventPtr = (char *)eventStack;
+ hvlpevent_queue.xSlicLastValidEventPtr = (char *)eventStack +
+ (LpEventStackSize - LpEventMaxSize);
+ hvlpevent_queue.xIndex = 0;
+}
+
+static int proc_lpevents_show(struct seq_file *m, void *v)
+{
+ int cpu, i;
+ unsigned long sum;
+ static unsigned long cpu_totals[NR_CPUS];
+
+ /* FIXME: do we care that there's no locking here? */
+ sum = 0;
+ for_each_online_cpu(cpu) {
+ cpu_totals[cpu] = 0;
+ for (i = 0; i < HvLpEvent_Type_NumTypes; i++) {
+ cpu_totals[cpu] += per_cpu(hvlpevent_counts, cpu)[i];
+ }
+ sum += cpu_totals[cpu];
+ }
+
+ seq_printf(m, "LpEventQueue 0\n");
+ seq_printf(m, " events processed:\t%lu\n", sum);
+
+ for (i = 0; i < HvLpEvent_Type_NumTypes; ++i) {
+ sum = 0;
+ for_each_online_cpu(cpu) {
+ sum += per_cpu(hvlpevent_counts, cpu)[i];
+ }
+
+ seq_printf(m, " %-20s %10lu\n", event_types[i], sum);
+ }
+
+ seq_printf(m, "\n events processed by processor:\n");
+
+ for_each_online_cpu(cpu) {
+ seq_printf(m, " CPU%02d %10lu\n", cpu, cpu_totals[cpu]);
+ }
+
+ return 0;
+}
+
+static int proc_lpevents_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_lpevents_show, NULL);
+}
- return numIntsProcessed;
+static struct file_operations proc_lpevents_operations = {
+ .open = proc_lpevents_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init proc_lpevents_init(void)
+{
+ struct proc_dir_entry *e;
+
+ e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL);
+ if (e)
+ e->proc_fops = &proc_lpevents_operations;
+
+ return 0;
}
+__initcall(proc_lpevents_init);
+
diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c
index badc5a44361..6ffcf67dd50 100644
--- a/arch/ppc64/kernel/LparData.c
+++ b/arch/ppc64/kernel/LparData.c
@@ -28,13 +28,6 @@
#include <asm/iSeries/IoHriProcessorVpd.h>
#include <asm/iSeries/ItSpCommArea.h>
-/* The LpQueue is used to pass event data from the hypervisor to
- * the partition. This is where I/O interrupt events are communicated.
- */
-
-/* May be filled in by the hypervisor so cannot end up in the BSS */
-struct ItLpQueue xItLpQueue __attribute__((__section__(".data")));
-
/* The HvReleaseData is the root of the information shared between
* the hypervisor and Linux.
@@ -200,7 +193,7 @@ struct ItVpdAreas itVpdAreas = {
0,0,0, /* 13 - 15 */
sizeof(struct IoHriProcessorVpd),/* 16 length of Proc Vpd */
0,0,0,0,0,0, /* 17 - 22 */
- sizeof(struct ItLpQueue),/* 23 length of Lp Queue */
+ sizeof(struct hvlpevent_queue), /* 23 length of Lp Queue */
0,0 /* 24 - 25 */
},
.xSlicVpdAdrs = { /* VPD addresses */
@@ -218,7 +211,7 @@ struct ItVpdAreas itVpdAreas = {
0,0,0, /* 13 - 15 */
&xIoHriProcessorVpd, /* 16 Proc Vpd */
0,0,0,0,0,0, /* 17 - 22 */
- &xItLpQueue, /* 23 Lp Queue */
+ &hvlpevent_queue, /* 23 Lp Queue */
0,0
}
};
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile
index dffbfb7ac8d..d9b2660ef22 100644
--- a/arch/ppc64/kernel/Makefile
+++ b/arch/ppc64/kernel/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \
obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \
bpa_iic.o spider-pic.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o
obj-$(CONFIG_EEH) += eeh.o
obj-$(CONFIG_PROC_FS) += proc_ppc64.o
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c
index 1d162c7c59d..8d4c46f6f0b 100644
--- a/arch/ppc64/kernel/cputable.c
+++ b/arch/ppc64/kernel/cputable.c
@@ -49,160 +49,219 @@ extern void __setup_cpu_be(unsigned long offset, struct cpu_spec* spec);
#endif
struct cpu_spec cpu_specs[] = {
- { /* Power3 */
- 0xffff0000, 0x00400000, "POWER3 (630)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_IABR | CPU_FTR_PMC8,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power3,
- COMMON_PPC64_FW
- },
- { /* Power3+ */
- 0xffff0000, 0x00410000, "POWER3 (630+)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_IABR | CPU_FTR_PMC8,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power3,
- COMMON_PPC64_FW
- },
- { /* Northstar */
- 0xffff0000, 0x00330000, "RS64-II (northstar)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power3,
- COMMON_PPC64_FW
- },
- { /* Pulsar */
- 0xffff0000, 0x00340000, "RS64-III (pulsar)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power3,
- COMMON_PPC64_FW
- },
- { /* I-star */
- 0xffff0000, 0x00360000, "RS64-III (icestar)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power3,
- COMMON_PPC64_FW
- },
- { /* S-star */
- 0xffff0000, 0x00370000, "RS64-IV (sstar)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power3,
- COMMON_PPC64_FW
- },
- { /* Power4 */
- 0xffff0000, 0x00350000, "POWER4 (gp)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power4,
- COMMON_PPC64_FW
- },
- { /* Power4+ */
- 0xffff0000, 0x00380000, "POWER4+ (gq)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power4,
- COMMON_PPC64_FW
- },
- { /* PPC970 */
- 0xffff0000, 0x00390000, "PPC970",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
- CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
- COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP,
- 128, 128,
- __setup_cpu_ppc970,
- COMMON_PPC64_FW
- },
- { /* PPC970FX */
- 0xffff0000, 0x003c0000, "PPC970FX",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
- CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
- COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP,
- 128, 128,
- __setup_cpu_ppc970,
- COMMON_PPC64_FW
- },
- { /* Power5 */
- 0xffff0000, 0x003a0000, "POWER5 (gr)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT |
- CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE |
- CPU_FTR_MMCRA_SIHV,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power4,
- COMMON_PPC64_FW
- },
- { /* Power5 */
- 0xffff0000, 0x003b0000, "POWER5 (gs)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT |
- CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE |
- CPU_FTR_MMCRA_SIHV,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power4,
- COMMON_PPC64_FW
- },
- { /* BE DD1.x */
- 0xffff0000, 0x00700000, "Broadband Engine",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
- CPU_FTR_SMT,
- COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP,
- 128, 128,
- __setup_cpu_be,
- COMMON_PPC64_FW
- },
- { /* default match */
- 0x00000000, 0x00000000, "POWER4 (compatible)",
- CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
- CPU_FTR_PPCAS_ARCH_V2,
- COMMON_USER_PPC64,
- 128, 128,
- __setup_cpu_power4,
- COMMON_PPC64_FW
- }
+ { /* Power3 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00400000,
+ .cpu_name = "POWER3 (630)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR |
+ CPU_FTR_PMC8,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power3,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* Power3+ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00410000,
+ .cpu_name = "POWER3 (630+)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR |
+ CPU_FTR_PMC8,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power3,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* Northstar */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00330000,
+ .cpu_name = "RS64-II (northstar)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR |
+ CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power3,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* Pulsar */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00340000,
+ .cpu_name = "RS64-III (pulsar)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR |
+ CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power3,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* I-star */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00360000,
+ .cpu_name = "RS64-III (icestar)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR |
+ CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power3,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* S-star */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00370000,
+ .cpu_name = "RS64-IV (sstar)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR |
+ CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power3,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* Power4 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00350000,
+ .cpu_name = "POWER4 (gp)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power4,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* Power4+ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00380000,
+ .cpu_name = "POWER4+ (gq)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power4,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* PPC970 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00390000,
+ .cpu_name = "PPC970",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
+ CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
+ .cpu_user_features = COMMON_USER_PPC64 |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_ppc970,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* PPC970FX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003c0000,
+ .cpu_name = "PPC970FX",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
+ CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
+ .cpu_user_features = COMMON_USER_PPC64 |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_ppc970,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* Power5 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003a0000,
+ .cpu_name = "POWER5 (gr)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT |
+ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE |
+ CPU_FTR_MMCRA_SIHV,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power4,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* Power5 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003b0000,
+ .cpu_name = "POWER5 (gs)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT |
+ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE |
+ CPU_FTR_MMCRA_SIHV,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power4,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* BE DD1.x */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00700000,
+ .cpu_name = "Broadband Engine",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
+ CPU_FTR_SMT,
+ .cpu_user_features = COMMON_USER_PPC64 |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_be,
+ .firmware_features = COMMON_PPC64_FW,
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "POWER4 (compatible)",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
+ CPU_FTR_PPCAS_ARCH_V2,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power4,
+ .firmware_features = COMMON_PPC64_FW,
+ }
};
firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
- {FW_FEATURE_PFT, "hcall-pft"},
- {FW_FEATURE_TCE, "hcall-tce"},
- {FW_FEATURE_SPRG0, "hcall-sprg0"},
- {FW_FEATURE_DABR, "hcall-dabr"},
- {FW_FEATURE_COPY, "hcall-copy"},
- {FW_FEATURE_ASR, "hcall-asr"},
- {FW_FEATURE_DEBUG, "hcall-debug"},
- {FW_FEATURE_PERF, "hcall-perf"},
- {FW_FEATURE_DUMP, "hcall-dump"},
- {FW_FEATURE_INTERRUPT, "hcall-interrupt"},
- {FW_FEATURE_MIGRATE, "hcall-migrate"},
- {FW_FEATURE_PERFMON, "hcall-perfmon"},
- {FW_FEATURE_CRQ, "hcall-crq"},
- {FW_FEATURE_VIO, "hcall-vio"},
- {FW_FEATURE_RDMA, "hcall-rdma"},
- {FW_FEATURE_LLAN, "hcall-lLAN"},
- {FW_FEATURE_BULK, "hcall-bulk"},
- {FW_FEATURE_XDABR, "hcall-xdabr"},
- {FW_FEATURE_MULTITCE, "hcall-multi-tce"},
- {FW_FEATURE_SPLPAR, "hcall-splpar"},
+ {FW_FEATURE_PFT, "hcall-pft"},
+ {FW_FEATURE_TCE, "hcall-tce"},
+ {FW_FEATURE_SPRG0, "hcall-sprg0"},
+ {FW_FEATURE_DABR, "hcall-dabr"},
+ {FW_FEATURE_COPY, "hcall-copy"},
+ {FW_FEATURE_ASR, "hcall-asr"},
+ {FW_FEATURE_DEBUG, "hcall-debug"},
+ {FW_FEATURE_PERF, "hcall-perf"},
+ {FW_FEATURE_DUMP, "hcall-dump"},
+ {FW_FEATURE_INTERRUPT, "hcall-interrupt"},
+ {FW_FEATURE_MIGRATE, "hcall-migrate"},
+ {FW_FEATURE_PERFMON, "hcall-perfmon"},
+ {FW_FEATURE_CRQ, "hcall-crq"},
+ {FW_FEATURE_VIO, "hcall-vio"},
+ {FW_FEATURE_RDMA, "hcall-rdma"},
+ {FW_FEATURE_LLAN, "hcall-lLAN"},
+ {FW_FEATURE_BULK, "hcall-bulk"},
+ {FW_FEATURE_XDABR, "hcall-xdabr"},
+ {FW_FEATURE_MULTITCE, "hcall-multi-tce"},
+ {FW_FEATURE_SPLPAR, "hcall-splpar"},
};
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S
index 02c8f4e3e4b..93ebcac0d5a 100644
--- a/arch/ppc64/kernel/head.S
+++ b/arch/ppc64/kernel/head.S
@@ -308,6 +308,7 @@ exception_marker:
label##_pSeries: \
HMT_MEDIUM; \
mtspr SPRG1,r13; /* save r13 */ \
+ RUNLATCH_ON(r13); \
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
#define STD_EXCEPTION_ISERIES(n, label, area) \
@@ -315,6 +316,7 @@ label##_pSeries: \
label##_iSeries: \
HMT_MEDIUM; \
mtspr SPRG1,r13; /* save r13 */ \
+ RUNLATCH_ON(r13); \
EXCEPTION_PROLOG_ISERIES_1(area); \
EXCEPTION_PROLOG_ISERIES_2; \
b label##_common
@@ -324,6 +326,7 @@ label##_iSeries: \
label##_iSeries: \
HMT_MEDIUM; \
mtspr SPRG1,r13; /* save r13 */ \
+ RUNLATCH_ON(r13); \
EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \
lbz r10,PACAPROCENABLED(r13); \
cmpwi 0,r10,0; \
@@ -393,6 +396,7 @@ __start_interrupts:
_machine_check_pSeries:
HMT_MEDIUM
mtspr SPRG1,r13 /* save r13 */
+ RUNLATCH_ON(r13)
EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
. = 0x300
@@ -419,6 +423,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
data_access_slb_pSeries:
HMT_MEDIUM
mtspr SPRG1,r13
+ RUNLATCH_ON(r13)
mfspr r13,SPRG3 /* get paca address into r13 */
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
std r10,PACA_EXSLB+EX_R10(r13)
@@ -439,6 +444,7 @@ data_access_slb_pSeries:
instruction_access_slb_pSeries:
HMT_MEDIUM
mtspr SPRG1,r13
+ RUNLATCH_ON(r13)
mfspr r13,SPRG3 /* get paca address into r13 */
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
std r10,PACA_EXSLB+EX_R10(r13)
@@ -464,6 +470,7 @@ instruction_access_slb_pSeries:
.globl system_call_pSeries
system_call_pSeries:
HMT_MEDIUM
+ RUNLATCH_ON(r9)
mr r9,r13
mfmsr r10
mfspr r13,SPRG3
@@ -707,11 +714,13 @@ fwnmi_data_area:
system_reset_fwnmi:
HMT_MEDIUM
mtspr SPRG1,r13 /* save r13 */
+ RUNLATCH_ON(r13)
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
.globl machine_check_fwnmi
machine_check_fwnmi:
HMT_MEDIUM
mtspr SPRG1,r13 /* save r13 */
+ RUNLATCH_ON(r13)
EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
/*
@@ -848,6 +857,7 @@ unrecov_fer:
.align 7
.globl data_access_common
data_access_common:
+ RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */
mfspr r10,DAR
std r10,PACA_EXGEN+EX_DAR(r13)
mfspr r10,DSISR
@@ -1194,7 +1204,7 @@ _GLOBAL(pSeries_secondary_smp_init)
bl .__restore_cpu_setup
/* Set up a paca value for this processor. Since we have the
- * physical cpu id in r3, we need to search the pacas to find
+ * physical cpu id in r24, we need to search the pacas to find
* which logical id maps to our physical one.
*/
LOADADDR(r13, paca) /* Get base vaddr of paca array */
@@ -1207,8 +1217,8 @@ _GLOBAL(pSeries_secondary_smp_init)
cmpwi r5,NR_CPUS
blt 1b
-99: HMT_LOW /* Couldn't find our CPU id */
- b 99b
+ mr r3,r24 /* not found, copy phys to r3 */
+ b .kexec_wait /* next kernel might do better */
2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */
/* From now on, r24 is expected to be logica cpuid */
diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/ppc64/kernel/hvconsole.c
index c72fb8ffe97..138e128a388 100644
--- a/arch/ppc64/kernel/hvconsole.c
+++ b/arch/ppc64/kernel/hvconsole.c
@@ -27,7 +27,6 @@
#include <linux/module.h>
#include <asm/hvcall.h>
#include <asm/hvconsole.h>
-#include <asm/prom.h>
/**
* hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
@@ -42,29 +41,14 @@ int hvc_get_chars(uint32_t vtermno, char *buf, int count)
unsigned long got;
if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got,
- (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) {
- /*
- * Work around a HV bug where it gives us a null
- * after every \r. -- paulus
- */
- if (got > 0) {
- int i;
- for (i = 1; i < got; ++i) {
- if (buf[i] == 0 && buf[i-1] == '\r') {
- --got;
- if (i < got)
- memmove(&buf[i], &buf[i+1],
- got - i);
- }
- }
- }
+ (unsigned long *)buf, (unsigned long *)buf+1) == H_Success)
return got;
- }
return 0;
}
EXPORT_SYMBOL(hvc_get_chars);
+
/**
* hvc_put_chars: send characters to firmware for denoted vterm adapter
* @vtermno: The vtermno or unit_address of the adapter from which the data
@@ -88,34 +72,3 @@ int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
}
EXPORT_SYMBOL(hvc_put_chars);
-
-/*
- * We hope/assume that the first vty found corresponds to the first console
- * device.
- */
-int hvc_find_vtys(void)
-{
- struct device_node *vty;
- int num_found = 0;
-
- for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL;
- vty = of_find_node_by_name(vty, "vty")) {
- uint32_t *vtermno;
-
- /* We have statically defined space for only a certain number of
- * console adapters. */
- if (num_found >= MAX_NR_HVC_CONSOLES)
- break;
-
- vtermno = (uint32_t *)get_property(vty, "reg", NULL);
- if (!vtermno)
- continue;
-
- if (device_is_compatible(vty, "hvterm1")) {
- hvc_instantiate(*vtermno, num_found);
- ++num_found;
- }
- }
-
- return num_found;
-}
diff --git a/arch/ppc64/kernel/iSeries_proc.c b/arch/ppc64/kernel/iSeries_proc.c
index 356bd9931fc..0fe3116eba2 100644
--- a/arch/ppc64/kernel/iSeries_proc.c
+++ b/arch/ppc64/kernel/iSeries_proc.c
@@ -40,50 +40,6 @@ static int __init iseries_proc_create(void)
}
core_initcall(iseries_proc_create);
-static char *event_types[9] = {
- "Hypervisor\t\t",
- "Machine Facilities\t",
- "Session Manager\t",
- "SPD I/O\t\t",
- "Virtual Bus\t\t",
- "PCI I/O\t\t",
- "RIO I/O\t\t",
- "Virtual Lan\t\t",
- "Virtual I/O\t\t"
-};
-
-static int proc_lpevents_show(struct seq_file *m, void *v)
-{
- unsigned int i;
-
- seq_printf(m, "LpEventQueue 0\n");
- seq_printf(m, " events processed:\t%lu\n",
- (unsigned long)xItLpQueue.xLpIntCount);
-
- for (i = 0; i < 9; ++i)
- seq_printf(m, " %s %10lu\n", event_types[i],
- (unsigned long)xItLpQueue.xLpIntCountByType[i]);
-
- seq_printf(m, "\n events processed by processor:\n");
-
- for_each_online_cpu(i)
- seq_printf(m, " CPU%02d %10u\n", i, paca[i].lpevent_count);
-
- return 0;
-}
-
-static int proc_lpevents_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_lpevents_show, NULL);
-}
-
-static struct file_operations proc_lpevents_operations = {
- .open = proc_lpevents_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static unsigned long startTitan = 0;
static unsigned long startTb = 0;
@@ -148,10 +104,6 @@ static int __init iseries_proc_init(void)
{
struct proc_dir_entry *e;
- e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL);
- if (e)
- e->proc_fops = &proc_lpevents_operations;
-
e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL);
if (e)
e->proc_fops = &proc_titantod_operations;
diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c
index 86966ce76b5..077c82fc9f3 100644
--- a/arch/ppc64/kernel/iSeries_setup.c
+++ b/arch/ppc64/kernel/iSeries_setup.c
@@ -24,7 +24,6 @@
#include <linux/smp.h>
#include <linux/param.h>
#include <linux/string.h>
-#include <linux/bootmem.h>
#include <linux/initrd.h>
#include <linux/seq_file.h>
#include <linux/kdev_t.h>
@@ -676,7 +675,6 @@ static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr)
*/
static void __init iSeries_setup_arch(void)
{
- void *eventStack;
unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index;
/* Add an eye catcher and the systemcfg layout version number */
@@ -685,24 +683,7 @@ static void __init iSeries_setup_arch(void)
systemcfg->version.minor = SYSTEMCFG_MINOR;
/* Setup the Lp Event Queue */
-
- /* Allocate a page for the Event Stack
- * The hypervisor wants the absolute real address, so
- * we subtract out the KERNELBASE and add in the
- * absolute real address of the kernel load area
- */
- eventStack = alloc_bootmem_pages(LpEventStackSize);
- memset(eventStack, 0, LpEventStackSize);
-
- /* Invoke the hypervisor to initialize the event stack */
- HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize);
-
- /* Initialize fields in our Lp Event Queue */
- xItLpQueue.xSlicEventStackPtr = (char *)eventStack;
- xItLpQueue.xSlicCurEventPtr = (char *)eventStack;
- xItLpQueue.xSlicLastValidEventPtr = (char *)eventStack +
- (LpEventStackSize - LpEventMaxSize);
- xItLpQueue.xIndex = 0;
+ setup_hvlpevent_queue();
/* Compute processor frequency */
procFreqHz = ((1UL << 34) * 1000000) /
@@ -853,27 +834,91 @@ static int __init iSeries_src_init(void)
late_initcall(iSeries_src_init);
-static int set_spread_lpevents(char *str)
+static inline void process_iSeries_events(void)
{
- unsigned long i;
- unsigned long val = simple_strtoul(str, NULL, 0);
+ asm volatile ("li 0,0x5555; sc" : : : "r0", "r3");
+}
+
+static void yield_shared_processor(void)
+{
+ unsigned long tb;
+
+ HvCall_setEnabledInterrupts(HvCall_MaskIPI |
+ HvCall_MaskLpEvent |
+ HvCall_MaskLpProd |
+ HvCall_MaskTimeout);
+
+ tb = get_tb();
+ /* Compute future tb value when yield should expire */
+ HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy);
/*
- * The parameter is the number of processors to share in processing
- * lp events.
+ * The decrementer stops during the yield. Force a fake decrementer
+ * here and let the timer_interrupt code sort out the actual time.
*/
- if (( val > 0) && (val <= NR_CPUS)) {
- for (i = 1; i < val; ++i)
- paca[i].lpqueue_ptr = paca[0].lpqueue_ptr;
+ get_paca()->lppaca.int_dword.fields.decr_int = 1;
+ process_iSeries_events();
+}
- printk("lpevent processing spread over %ld processors\n", val);
- } else {
- printk("invalid spread_lpevents %ld\n", val);
+static int iseries_shared_idle(void)
+{
+ while (1) {
+ while (!need_resched() && !hvlpevent_is_pending()) {
+ local_irq_disable();
+ ppc64_runlatch_off();
+
+ /* Recheck with irqs off */
+ if (!need_resched() && !hvlpevent_is_pending())
+ yield_shared_processor();
+
+ HMT_medium();
+ local_irq_enable();
+ }
+
+ ppc64_runlatch_on();
+
+ if (hvlpevent_is_pending())
+ process_iSeries_events();
+
+ schedule();
+ }
+
+ return 0;
+}
+
+static int iseries_dedicated_idle(void)
+{
+ long oldval;
+
+ while (1) {
+ oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
+
+ if (!oldval) {
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ while (!need_resched()) {
+ ppc64_runlatch_off();
+ HMT_low();
+
+ if (hvlpevent_is_pending()) {
+ HMT_medium();
+ ppc64_runlatch_on();
+ process_iSeries_events();
+ }
+ }
+
+ HMT_medium();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ } else {
+ set_need_resched();
+ }
+
+ ppc64_runlatch_on();
+ schedule();
}
- return 1;
+ return 0;
}
-__setup("spread_lpevents=", set_spread_lpevents);
#ifndef CONFIG_PCI
void __init iSeries_init_IRQ(void) { }
@@ -900,5 +945,13 @@ void __init iSeries_early_setup(void)
ppc_md.get_rtc_time = iSeries_get_rtc_time;
ppc_md.calibrate_decr = iSeries_calibrate_decr;
ppc_md.progress = iSeries_progress;
+
+ if (get_paca()->lppaca.shared_proc) {
+ ppc_md.idle_loop = iseries_shared_idle;
+ printk(KERN_INFO "Using shared processor idle loop\n");
+ } else {
+ ppc_md.idle_loop = iseries_dedicated_idle;
+ printk(KERN_INFO "Using dedicated idle loop\n");
+ }
}
diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c
index bdf13b4dc1c..954395d4263 100644
--- a/arch/ppc64/kernel/idle.c
+++ b/arch/ppc64/kernel/idle.c
@@ -20,109 +20,18 @@
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/cpu.h>
-#include <linux/module.h>
#include <linux/sysctl.h>
-#include <linux/smp.h>
#include <asm/system.h>
#include <asm/processor.h>
-#include <asm/mmu.h>
#include <asm/cputable.h>
#include <asm/time.h>
-#include <asm/iSeries/HvCall.h>
-#include <asm/iSeries/ItLpQueue.h>
-#include <asm/plpar_wrappers.h>
#include <asm/systemcfg.h>
+#include <asm/machdep.h>
extern void power4_idle(void);
-static int (*idle_loop)(void);
-
-#ifdef CONFIG_PPC_ISERIES
-static unsigned long maxYieldTime = 0;
-static unsigned long minYieldTime = 0xffffffffffffffffUL;
-
-static inline void process_iSeries_events(void)
-{
- asm volatile ("li 0,0x5555; sc" : : : "r0", "r3");
-}
-
-static void yield_shared_processor(void)
-{
- unsigned long tb;
- unsigned long yieldTime;
-
- HvCall_setEnabledInterrupts(HvCall_MaskIPI |
- HvCall_MaskLpEvent |
- HvCall_MaskLpProd |
- HvCall_MaskTimeout);
-
- tb = get_tb();
- /* Compute future tb value when yield should expire */
- HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy);
-
- yieldTime = get_tb() - tb;
- if (yieldTime > maxYieldTime)
- maxYieldTime = yieldTime;
-
- if (yieldTime < minYieldTime)
- minYieldTime = yieldTime;
-
- /*
- * The decrementer stops during the yield. Force a fake decrementer
- * here and let the timer_interrupt code sort out the actual time.
- */
- get_paca()->lppaca.int_dword.fields.decr_int = 1;
- process_iSeries_events();
-}
-
-static int iSeries_idle(void)
-{
- struct paca_struct *lpaca;
- long oldval;
-
- /* ensure iSeries run light will be out when idle */
- ppc64_runlatch_off();
-
- lpaca = get_paca();
-
- while (1) {
- if (lpaca->lppaca.shared_proc) {
- if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
- process_iSeries_events();
- if (!need_resched())
- yield_shared_processor();
- } else {
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
-
- while (!need_resched()) {
- HMT_medium();
- if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
- process_iSeries_events();
- HMT_low();
- }
-
- HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
- }
- }
-
- ppc64_runlatch_on();
- schedule();
- ppc64_runlatch_off();
- }
-
- return 0;
-}
-
-#else
-
-static int default_idle(void)
+int default_idle(void)
{
long oldval;
unsigned int cpu = smp_processor_id();
@@ -134,7 +43,8 @@ static int default_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);
while (!need_resched() && !cpu_is_offline(cpu)) {
- barrier();
+ ppc64_runlatch_off();
+
/*
* Go into low thread priority and possibly
* low power mode.
@@ -149,6 +59,7 @@ static int default_idle(void)
set_need_resched();
}
+ ppc64_runlatch_on();
schedule();
if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
cpu_die();
@@ -157,127 +68,19 @@ static int default_idle(void)
return 0;
}
-#ifdef CONFIG_PPC_PSERIES
-
-DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
-
-int dedicated_idle(void)
+int native_idle(void)
{
- long oldval;
- struct paca_struct *lpaca = get_paca(), *ppaca;
- unsigned long start_snooze;
- unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
- unsigned int cpu = smp_processor_id();
-
- ppaca = &paca[cpu ^ 1];
-
while (1) {
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- lpaca->lppaca.idle = 1;
-
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
- start_snooze = __get_tb() +
- *smt_snooze_delay * tb_ticks_per_usec;
- while (!need_resched() && !cpu_is_offline(cpu)) {
- /*
- * Go into low thread priority and possibly
- * low power mode.
- */
- HMT_low();
- HMT_very_low();
-
- if (*smt_snooze_delay == 0 ||
- __get_tb() < start_snooze)
- continue;
-
- HMT_medium();
-
- if (!(ppaca->lppaca.idle)) {
- local_irq_disable();
-
- /*
- * We are about to sleep the thread
- * and so wont be polling any
- * more.
- */
- clear_thread_flag(TIF_POLLING_NRFLAG);
-
- /*
- * SMT dynamic mode. Cede will result
- * in this thread going dormant, if the
- * partner thread is still doing work.
- * Thread wakes up if partner goes idle,
- * an interrupt is presented, or a prod
- * occurs. Returning from the cede
- * enables external interrupts.
- */
- if (!need_resched())
- cede_processor();
- else
- local_irq_enable();
- } else {
- /*
- * Give the HV an opportunity at the
- * processor, since we are not doing
- * any work.
- */
- poll_pending();
- }
- }
-
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
- }
-
- HMT_medium();
- lpaca->lppaca.idle = 0;
- schedule();
- if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
- cpu_die();
- }
- return 0;
-}
-
-static int shared_idle(void)
-{
- struct paca_struct *lpaca = get_paca();
- unsigned int cpu = smp_processor_id();
-
- while (1) {
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- lpaca->lppaca.idle = 1;
+ ppc64_runlatch_off();
- while (!need_resched() && !cpu_is_offline(cpu)) {
- local_irq_disable();
+ if (!need_resched())
+ power4_idle();
- /*
- * Yield the processor to the hypervisor. We return if
- * an external interrupt occurs (which are driven prior
- * to returning here) or if a prod occurs from another
- * processor. When returning here, external interrupts
- * are enabled.
- *
- * Check need_resched() again with interrupts disabled
- * to avoid a race.
- */
- if (!need_resched())
- cede_processor();
- else
- local_irq_enable();
+ if (need_resched()) {
+ ppc64_runlatch_on();
+ schedule();
}
- HMT_medium();
- lpaca->lppaca.idle = 0;
- schedule();
if (cpu_is_offline(smp_processor_id()) &&
system_state == SYSTEM_RUNNING)
cpu_die();
@@ -286,29 +89,10 @@ static int shared_idle(void)
return 0;
}
-#endif /* CONFIG_PPC_PSERIES */
-
-static int native_idle(void)
-{
- while(1) {
- /* check CPU type here */
- if (!need_resched())
- power4_idle();
- if (need_resched())
- schedule();
-
- if (cpu_is_offline(raw_smp_processor_id()) &&
- system_state == SYSTEM_RUNNING)
- cpu_die();
- }
- return 0;
-}
-
-#endif /* CONFIG_PPC_ISERIES */
-
void cpu_idle(void)
{
- idle_loop();
+ BUG_ON(NULL == ppc_md.idle_loop);
+ ppc_md.idle_loop();
}
int powersave_nap;
@@ -342,42 +126,3 @@ register_powersave_nap_sysctl(void)
}
__initcall(register_powersave_nap_sysctl);
#endif
-
-int idle_setup(void)
-{
- /*
- * Move that junk to each platform specific file, eventually define
- * a pSeries_idle for shared processor stuff
- */
-#ifdef CONFIG_PPC_ISERIES
- idle_loop = iSeries_idle;
- return 1;
-#else
- idle_loop = default_idle;
-#endif
-#ifdef CONFIG_PPC_PSERIES
- if (systemcfg->platform & PLATFORM_PSERIES) {
- if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
- if (get_paca()->lppaca.shared_proc) {
- printk(KERN_INFO "Using shared processor idle loop\n");
- idle_loop = shared_idle;
- } else {
- printk(KERN_INFO "Using dedicated idle loop\n");
- idle_loop = dedicated_idle;
- }
- } else {
- printk(KERN_INFO "Using default idle loop\n");
- idle_loop = default_idle;
- }
- }
-#endif /* CONFIG_PPC_PSERIES */
-#ifndef CONFIG_PPC_ISERIES
- if (systemcfg->platform == PLATFORM_POWERMAC ||
- systemcfg->platform == PLATFORM_MAPLE) {
- printk(KERN_INFO "Using native/NAP idle loop\n");
- idle_loop = native_idle;
- }
-#endif /* CONFIG_PPC_ISERIES */
-
- return 1;
-}
diff --git a/arch/ppc64/kernel/irq.c b/arch/ppc64/kernel/irq.c
index 3defc8c33ad..f41afe54504 100644
--- a/arch/ppc64/kernel/irq.c
+++ b/arch/ppc64/kernel/irq.c
@@ -66,7 +66,6 @@ EXPORT_SYMBOL(irq_desc);
int distribute_irqs = 1;
int __irq_offset_value;
int ppc_spurious_interrupts;
-unsigned long lpevent_count;
u64 ppc64_interrupt_controller;
int show_interrupts(struct seq_file *p, void *v)
@@ -245,7 +244,7 @@ void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq)
spin_lock(&desc->lock);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, regs);
if (likely(!(desc->status & IRQ_PENDING)))
break;
desc->status &= ~IRQ_PENDING;
@@ -269,7 +268,6 @@ out:
void do_IRQ(struct pt_regs *regs)
{
struct paca_struct *lpaca;
- struct ItLpQueue *lpq;
irq_enter();
@@ -295,9 +293,8 @@ void do_IRQ(struct pt_regs *regs)
iSeries_smp_message_recv(regs);
}
#endif /* CONFIG_SMP */
- lpq = lpaca->lpqueue_ptr;
- if (lpq && ItLpQueue_isLpIntPending(lpq))
- lpevent_count += ItLpQueue_process(lpq, regs);
+ if (hvlpevent_is_pending())
+ process_hvlpevents(regs);
irq_exit();
diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c
index 782ce3efa2c..a3d519518fb 100644
--- a/arch/ppc64/kernel/kprobes.c
+++ b/arch/ppc64/kernel/kprobes.c
@@ -36,6 +36,8 @@
#include <asm/kdebug.h>
#include <asm/sstep.h>
+static DECLARE_MUTEX(kprobe_mutex);
+
static struct kprobe *current_kprobe;
static unsigned long kprobe_status, kprobe_saved_msr;
static struct kprobe *kprobe_prev;
@@ -54,6 +56,15 @@ int arch_prepare_kprobe(struct kprobe *p)
printk("Cannot register a kprobe on rfid or mtmsrd\n");
ret = -EINVAL;
}
+
+ /* insn must be on a special executable page on ppc64 */
+ if (!ret) {
+ up(&kprobe_mutex);
+ p->ainsn.insn = get_insn_slot();
+ down(&kprobe_mutex);
+ if (!p->ainsn.insn)
+ ret = -ENOMEM;
+ }
return ret;
}
@@ -79,16 +90,22 @@ void arch_disarm_kprobe(struct kprobe *p)
void arch_remove_kprobe(struct kprobe *p)
{
+ up(&kprobe_mutex);
+ free_insn_slot(p->ainsn.insn);
+ down(&kprobe_mutex);
}
static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
{
+ kprobe_opcode_t insn = *p->ainsn.insn;
+
regs->msr |= MSR_SE;
- /*single step inline if it a breakpoint instruction*/
- if (p->opcode == BREAKPOINT_INSTRUCTION)
+
+ /* single step inline if it is a trap variant */
+ if (IS_TW(insn) || IS_TD(insn) || IS_TWI(insn) || IS_TDI(insn))
regs->nip = (unsigned long)p->addr;
else
- regs->nip = (unsigned long)&p->ainsn.insn;
+ regs->nip = (unsigned long)p->ainsn.insn;
}
static inline void save_previous_kprobe(void)
@@ -105,6 +122,23 @@ static inline void restore_previous_kprobe(void)
kprobe_saved_msr = kprobe_saved_msr_prev;
}
+void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri;
+
+ if ((ri = get_free_rp_inst(rp)) != NULL) {
+ ri->rp = rp;
+ ri->task = current;
+ ri->ret_addr = (kprobe_opcode_t *)regs->link;
+
+ /* Replace the return addr with trampoline addr */
+ regs->link = (unsigned long)kretprobe_trampoline;
+ add_rp_inst(ri);
+ } else {
+ rp->nmissed++;
+ }
+}
+
static inline int kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p;
@@ -195,6 +229,78 @@ no_kprobe:
}
/*
+ * Function return probe trampoline:
+ * - init_kprobes() establishes a probepoint here
+ * - When the probed function returns, this probe
+ * causes the handlers to fire
+ */
+void kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n"
+ "nop\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head;
+ struct hlist_node *node, *tmp;
+ unsigned long orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+ head = kretprobe_inst_table_head(current);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+ regs->nip = orig_ret_address;
+
+ unlock_kprobes();
+
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we have handled unlocking
+ * and re-enabling preemption.
+ */
+ return 1;
+}
+
+/*
* Called after single-stepping. p->addr is the address of the
* instruction whose first byte has been replaced by the "breakpoint"
* instruction. To avoid the SMP problems that can occur when we
@@ -205,9 +311,10 @@ no_kprobe:
static void resume_execution(struct kprobe *p, struct pt_regs *regs)
{
int ret;
+ unsigned int insn = *p->ainsn.insn;
regs->nip = (unsigned long)p->addr;
- ret = emulate_step(regs, p->ainsn.insn[0]);
+ ret = emulate_step(regs, insn);
if (ret == 0)
regs->nip = (unsigned long)p->addr + 4;
}
@@ -331,3 +438,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
return 1;
}
+
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline_p);
+}
diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c
index 387923fcf9b..02e96627fa6 100644
--- a/arch/ppc64/kernel/lparcfg.c
+++ b/arch/ppc64/kernel/lparcfg.c
@@ -34,6 +34,7 @@
#include <asm/system.h>
#include <asm/time.h>
#include <asm/iSeries/ItExtVpdPanel.h>
+#include <asm/prom.h>
#define MODULE_VERS "1.6"
#define MODULE_NAME "lparcfg"
diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c
new file mode 100644
index 00000000000..fdb2fc649d7
--- /dev/null
+++ b/arch/ppc64/kernel/machine_kexec.c
@@ -0,0 +1,302 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ *
+ * Copyright (C) 2004-2005, IBM Corp.
+ *
+ * Created by: Milton D Miller II
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+
+#include <linux/cpumask.h>
+#include <linux/kexec.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/paca.h>
+#include <asm/mmu.h>
+#include <asm/sections.h> /* _end */
+#include <asm/prom.h>
+
+#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */
+
+/* Have this around till we move it into crash specific file */
+note_buf_t crash_notes[NR_CPUS];
+
+/* Dummy for now. Not sure if we need to have a crash shutdown in here
+ * and if what it will achieve. Letting it be now to compile the code
+ * in generic kexec environment
+ */
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* do nothing right now */
+ /* smp_relase_cpus() if we want smp on panic kernel */
+ /* cpu_irq_down to isolate us until we are ready */
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+ int i;
+ unsigned long begin, end; /* limits of segment */
+ unsigned long low, high; /* limits of blocked memory range */
+ struct device_node *node;
+ unsigned long *basep;
+ unsigned int *sizep;
+
+ if (!ppc_md.hpte_clear_all)
+ return -ENOENT;
+
+ /*
+ * Since we use the kernel fault handlers and paging code to
+ * handle the virtual mode, we must make sure no destination
+ * overlaps kernel static data or bss.
+ */
+ for (i = 0; i < image->nr_segments; i++)
+ if (image->segment[i].mem < __pa(_end))
+ return -ETXTBSY;
+
+ /*
+ * For non-LPAR, we absolutely can not overwrite the mmu hash
+ * table, since we are still using the bolted entries in it to
+ * do the copy. Check that here.
+ *
+ * It is safe if the end is below the start of the blocked
+ * region (end <= low), or if the beginning is after the
+ * end of the blocked region (begin >= high). Use the
+ * boolean identity !(a || b) === (!a && !b).
+ */
+ if (htab_address) {
+ low = __pa(htab_address);
+ high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ begin = image->segment[i].mem;
+ end = begin + image->segment[i].memsz;
+
+ if ((begin < high) && (end > low))
+ return -ETXTBSY;
+ }
+ }
+
+ /* We also should not overwrite the tce tables */
+ for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
+ node = of_find_node_by_type(node, "pci")) {
+ basep = (unsigned long *)get_property(node, "linux,tce-base",
+ NULL);
+ sizep = (unsigned int *)get_property(node, "linux,tce-size",
+ NULL);
+ if (basep == NULL || sizep == NULL)
+ continue;
+
+ low = *basep;
+ high = low + (*sizep);
+
+ for (i = 0; i < image->nr_segments; i++) {
+ begin = image->segment[i].mem;
+ end = begin + image->segment[i].memsz;
+
+ if ((begin < high) && (end > low))
+ return -ETXTBSY;
+ }
+ }
+
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+ /* we do nothing in prepare that needs to be undone */
+}
+
+#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
+
+static void copy_segments(unsigned long ind)
+{
+ unsigned long entry;
+ unsigned long *ptr;
+ void *dest;
+ void *addr;
+
+ /*
+ * We rely on kexec_load to create a lists that properly
+ * initializes these pointers before they are used.
+ * We will still crash if the list is wrong, but at least
+ * the compiler will be quiet.
+ */
+ ptr = NULL;
+ dest = NULL;
+
+ for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
+ addr = __va(entry & PAGE_MASK);
+
+ switch (entry & IND_FLAGS) {
+ case IND_DESTINATION:
+ dest = addr;
+ break;
+ case IND_INDIRECTION:
+ ptr = addr;
+ break;
+ case IND_SOURCE:
+ copy_page(dest, addr);
+ dest += PAGE_SIZE;
+ }
+ }
+}
+
+void kexec_copy_flush(struct kimage *image)
+{
+ long i, nr_segments = image->nr_segments;
+ struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
+
+ /* save the ranges on the stack to efficiently flush the icache */
+ memcpy(ranges, image->segment, sizeof(ranges));
+
+ /*
+ * After this call we may not use anything allocated in dynamic
+ * memory, including *image.
+ *
+ * Only globals and the stack are allowed.
+ */
+ copy_segments(image->head);
+
+ /*
+ * we need to clear the icache for all dest pages sometime,
+ * including ones that were in place on the original copy
+ */
+ for (i = 0; i < nr_segments; i++)
+ flush_icache_range(ranges[i].mem + KERNELBASE,
+ ranges[i].mem + KERNELBASE +
+ ranges[i].memsz);
+}
+
+#ifdef CONFIG_SMP
+
+/* FIXME: we should schedule this function to be called on all cpus based
+ * on calling the interrupts, but we would like to call it off irq level
+ * so that the interrupt controller is clean.
+ */
+void kexec_smp_down(void *arg)
+{
+ if (ppc_md.cpu_irq_down)
+ ppc_md.cpu_irq_down();
+
+ local_irq_disable();
+ kexec_smp_wait();
+ /* NOTREACHED */
+}
+
+static void kexec_prepare_cpus(void)
+{
+ int my_cpu, i, notified=-1;
+
+ smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
+ my_cpu = get_cpu();
+
+ /* check the others cpus are now down (via paca hw cpu id == -1) */
+ for (i=0; i < NR_CPUS; i++) {
+ if (i == my_cpu)
+ continue;
+
+ while (paca[i].hw_cpu_id != -1) {
+ if (!cpu_possible(i)) {
+ printk("kexec: cpu %d hw_cpu_id %d is not"
+ " possible, ignoring\n",
+ i, paca[i].hw_cpu_id);
+ break;
+ }
+ if (!cpu_online(i)) {
+ /* Fixme: this can be spinning in
+ * pSeries_secondary_wait with a paca
+ * waiting for it to go online.
+ */
+ printk("kexec: cpu %d hw_cpu_id %d is not"
+ " online, ignoring\n",
+ i, paca[i].hw_cpu_id);
+ break;
+ }
+ if (i != notified) {
+ printk( "kexec: waiting for cpu %d (physical"
+ " %d) to go down\n",
+ i, paca[i].hw_cpu_id);
+ notified = i;
+ }
+ }
+ }
+
+ /* after we tell the others to go down */
+ if (ppc_md.cpu_irq_down)
+ ppc_md.cpu_irq_down();
+
+ put_cpu();
+
+ local_irq_disable();
+}
+
+#else /* ! SMP */
+
+static void kexec_prepare_cpus(void)
+{
+ /*
+ * move the secondarys to us so that we can copy
+ * the new kernel 0-0x100 safely
+ *
+ * do this if kexec in setup.c ?
+ */
+ smp_relase_cpus();
+ if (ppc_md.cpu_irq_down)
+ ppc_md.cpu_irq_down();
+ local_irq_disable();
+}
+
+#endif /* SMP */
+
+/*
+ * kexec thread structure and stack.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled. It also must be statically allocated
+ * or allocated as part of the kimage, because everything else may be
+ * overwritten when we copy the kexec image. We piggyback on the
+ * "init_task" linker section here to statically allocate a stack.
+ *
+ * We could use a smaller stack if we don't care about anything using
+ * current, but that audit has not been performed.
+ */
+union thread_union kexec_stack
+ __attribute__((__section__(".data.init_task"))) = { };
+
+/* Our assembly helper, in kexec_stub.S */
+extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
+ void *image, void *control,
+ void (*clear_all)(void)) ATTRIB_NORET;
+
+/* too late to fail here */
+void machine_kexec(struct kimage *image)
+{
+
+ /* prepare control code if any */
+
+ /* shutdown other cpus into our wait loop and quiesce interrupts */
+ kexec_prepare_cpus();
+
+ /* switch to a staticly allocated stack. Based on irq stack code.
+ * XXX: the task struct will likely be invalid once we do the copy!
+ */
+ kexec_stack.thread_info.task = current_thread_info()->task;
+ kexec_stack.thread_info.flags = 0;
+
+ /* Some things are best done in assembly. Finding globals with
+ * a toc is easier in C, so pass in what we can.
+ */
+ kexec_sequence(&kexec_stack, image->start, image,
+ page_address(image->control_code_page),
+ ppc_md.hpte_clear_all);
+ /* NOTREACHED */
+}
diff --git a/arch/ppc64/kernel/maple_setup.c b/arch/ppc64/kernel/maple_setup.c
index da8900b51f4..bb55b5a5691 100644
--- a/arch/ppc64/kernel/maple_setup.c
+++ b/arch/ppc64/kernel/maple_setup.c
@@ -177,6 +177,8 @@ void __init maple_setup_arch(void)
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
#endif
+
+ printk(KERN_INFO "Using native/NAP idle loop\n");
}
/*
@@ -297,4 +299,5 @@ struct machdep_calls __initdata maple_md = {
.get_rtc_time = maple_get_rtc_time,
.calibrate_decr = generic_calibrate_decr,
.progress = maple_progress,
+ .idle_loop = native_idle,
};
diff --git a/arch/ppc64/kernel/mf.c b/arch/ppc64/kernel/mf.c
index d98bebf7042..ef4a338ebd0 100644
--- a/arch/ppc64/kernel/mf.c
+++ b/arch/ppc64/kernel/mf.c
@@ -801,10 +801,8 @@ int mf_get_boot_rtc(struct rtc_time *tm)
return rc;
/* We need to poll here as we are not yet taking interrupts */
while (rtc_data.busy) {
- extern unsigned long lpevent_count;
- struct ItLpQueue *lpq = get_paca()->lpqueue_ptr;
- if (lpq && ItLpQueue_isLpIntPending(lpq))
- lpevent_count += ItLpQueue_process(lpq, NULL);
+ if (hvlpevent_is_pending())
+ process_hvlpevents(NULL);
}
return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm);
}
diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S
index e3c73b3425d..59f4f997381 100644
--- a/arch/ppc64/kernel/misc.S
+++ b/arch/ppc64/kernel/misc.S
@@ -680,6 +680,177 @@ _GLOBAL(kernel_thread)
ld r30,-16(r1)
blr
+/* kexec_wait(phys_cpu)
+ *
+ * wait for the flag to change, indicating this kernel is going away but
+ * the slave code for the next one is at addresses 0 to 100.
+ *
+ * This is used by all slaves.
+ *
+ * Physical (hardware) cpu id should be in r3.
+ */
+_GLOBAL(kexec_wait)
+ bl 1f
+1: mflr r5
+ addi r5,r5,kexec_flag-1b
+
+99: HMT_LOW
+#ifdef CONFIG_KEXEC /* use no memory without kexec */
+ lwz r4,0(r5)
+ cmpwi 0,r4,0
+ bnea 0x60
+#endif
+ b 99b
+
+/* this can be in text because we won't change it until we are
+ * running in real anyways
+ */
+kexec_flag:
+ .long 0
+
+
+#ifdef CONFIG_KEXEC
+
+/* kexec_smp_wait(void)
+ *
+ * call with interrupts off
+ * note: this is a terminal routine, it does not save lr
+ *
+ * get phys id from paca
+ * set paca id to -1 to say we got here
+ * switch to real mode
+ * join other cpus in kexec_wait(phys_id)
+ */
+_GLOBAL(kexec_smp_wait)
+ lhz r3,PACAHWCPUID(r13)
+ li r4,-1
+ sth r4,PACAHWCPUID(r13) /* let others know we left */
+ bl real_mode
+ b .kexec_wait
+
+/*
+ * switch to real mode (turn mmu off)
+ * we use the early kernel trick that the hardware ignores bits
+ * 0 and 1 (big endian) of the effective address in real mode
+ *
+ * don't overwrite r3 here, it is live for kexec_wait above.
+ */
+real_mode: /* assume normal blr return */
+1: li r9,MSR_RI
+ li r10,MSR_DR|MSR_IR
+ mflr r11 /* return address to SRR0 */
+ mfmsr r12
+ andc r9,r12,r9
+ andc r10,r12,r10
+
+ mtmsrd r9,1
+ mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR0,r11
+ rfid
+
+
+/*
+ * kexec_sequence(newstack, start, image, control, clear_all())
+ *
+ * does the grungy work with stack switching and real mode switches
+ * also does simple calls to other code
+ */
+
+_GLOBAL(kexec_sequence)
+ mflr r0
+ std r0,16(r1)
+
+ /* switch stacks to newstack -- &kexec_stack.stack */
+ stdu r1,THREAD_SIZE-112(r3)
+ mr r1,r3
+
+ li r0,0
+ std r0,16(r1)
+
+ /* save regs for local vars on new stack.
+ * yes, we won't go back, but ...
+ */
+ std r31,-8(r1)
+ std r30,-16(r1)
+ std r29,-24(r1)
+ std r28,-32(r1)
+ std r27,-40(r1)
+ std r26,-48(r1)
+ std r25,-56(r1)
+
+ stdu r1,-112-64(r1)
+
+ /* save args into preserved regs */
+ mr r31,r3 /* newstack (both) */
+ mr r30,r4 /* start (real) */
+ mr r29,r5 /* image (virt) */
+ mr r28,r6 /* control, unused */
+ mr r27,r7 /* clear_all() fn desc */
+ mr r26,r8 /* spare */
+ lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */
+
+ /* disable interrupts, we are overwriting kernel data next */
+ mfmsr r3
+ rlwinm r3,r3,0,17,15
+ mtmsrd r3,1
+
+ /* copy dest pages, flush whole dest image */
+ mr r3,r29
+ bl .kexec_copy_flush /* (image) */
+
+ /* turn off mmu */
+ bl real_mode
+
+ /* clear out hardware hash page table and tlb */
+ ld r5,0(r27) /* deref function descriptor */
+ mtctr r5
+ bctrl /* ppc_md.hash_clear_all(void); */
+
+/*
+ * kexec image calling is:
+ * the first 0x100 bytes of the entry point are copied to 0
+ *
+ * all slaves branch to slave = 0x60 (absolute)
+ * slave(phys_cpu_id);
+ *
+ * master goes to start = entry point
+ * start(phys_cpu_id, start, 0);
+ *
+ *
+ * a wrapper is needed to call existing kernels, here is an approximate
+ * description of one method:
+ *
+ * v2: (2.6.10)
+ * start will be near the boot_block (maybe 0x100 bytes before it?)
+ * it will have a 0x60, which will b to boot_block, where it will wait
+ * and 0 will store phys into struct boot-block and load r3 from there,
+ * copy kernel 0-0x100 and tell slaves to back down to 0x60 again
+ *
+ * v1: (2.6.9)
+ * boot block will have all cpus scanning device tree to see if they
+ * are the boot cpu ?????
+ * other device tree differences (prop sizes, va vs pa, etc)...
+ */
+
+ /* copy 0x100 bytes starting at start to 0 */
+ li r3,0
+ mr r4,r30
+ li r5,0x100
+ li r6,0
+ bl .copy_and_flush /* (dest, src, copy limit, start offset) */
+1: /* assume normal blr return */
+
+ /* release other cpus to the new kernel secondary start at 0x60 */
+ mflr r5
+ li r6,1
+ stw r6,kexec_flag-1b(5)
+ mr r3,r25 # my phys cpu
+ mr r4,r30 # start, aka phys mem offset
+ mtlr 4
+ li r5,0
+ blr /* image->start(physid, image->start, 0); */
+#endif /* CONFIG_KEXEC */
+
/* Why isn't this a) automatic, b) written in 'C'? */
.balign 8
_GLOBAL(sys_call_table32)
@@ -951,11 +1122,13 @@ _GLOBAL(sys_call_table32)
.llong .compat_sys_mq_timedreceive /* 265 */
.llong .compat_sys_mq_notify
.llong .compat_sys_mq_getsetattr
- .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */
+ .llong .compat_sys_kexec_load
.llong .sys32_add_key
- .llong .sys32_request_key
+ .llong .sys32_request_key /* 270 */
.llong .compat_sys_keyctl
.llong .compat_sys_waitid
+ .llong .sys32_ioprio_set
+ .llong .sys32_ioprio_get
.balign 8
_GLOBAL(sys_call_table)
@@ -1227,8 +1400,10 @@ _GLOBAL(sys_call_table)
.llong .sys_mq_timedreceive /* 265 */
.llong .sys_mq_notify
.llong .sys_mq_getsetattr
- .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */
+ .llong .sys_kexec_load
.llong .sys_add_key
.llong .sys_request_key /* 270 */
.llong .sys_keyctl
.llong .sys_waitid
+ .llong .sys_ioprio_set
+ .llong .sys_ioprio_get
diff --git a/arch/ppc64/kernel/mpic.c b/arch/ppc64/kernel/mpic.c
index 593ea5b82af..e8fbab1df37 100644
--- a/arch/ppc64/kernel/mpic.c
+++ b/arch/ppc64/kernel/mpic.c
@@ -792,6 +792,35 @@ void mpic_setup_this_cpu(void)
#endif /* CONFIG_SMP */
}
+/*
+ * XXX: someone who knows mpic should check this.
+ * do we need to eoi the ipi here (see xics comments)?
+ * or can we reset the mpic in the new kernel?
+ */
+void mpic_teardown_this_cpu(void)
+{
+ struct mpic *mpic = mpic_primary;
+ unsigned long flags;
+ u32 msk = 1 << hard_smp_processor_id();
+ unsigned int i;
+
+ BUG_ON(mpic == NULL);
+
+ DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
+ spin_lock_irqsave(&mpic_lock, flags);
+
+ /* let the mpic know we don't want intrs. */
+ for (i = 0; i < mpic->num_sources ; i++)
+ mpic_irq_write(i, MPIC_IRQ_DESTINATION,
+ mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk);
+
+ /* Set current processor priority to max */
+ mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf);
+
+ spin_unlock_irqrestore(&mpic_lock, flags);
+}
+
+
void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask)
{
struct mpic *mpic = mpic_primary;
diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h
index 63e177143ea..99fbbc9a084 100644
--- a/arch/ppc64/kernel/mpic.h
+++ b/arch/ppc64/kernel/mpic.h
@@ -255,6 +255,9 @@ extern unsigned int mpic_irq_get_priority(unsigned int irq);
/* Setup a non-boot CPU */
extern void mpic_setup_this_cpu(void);
+/* Clean up for kexec (or cpu offline or ...) */
+extern void mpic_teardown_this_cpu(void);
+
/* Request IPIs on primary mpic */
extern void mpic_request_ipis(void);
diff --git a/arch/ppc64/kernel/nvram.c b/arch/ppc64/kernel/nvram.c
index 4e71781a441..4fb1a9f5060 100644
--- a/arch/ppc64/kernel/nvram.c
+++ b/arch/ppc64/kernel/nvram.c
@@ -338,9 +338,8 @@ static int nvram_remove_os_partition(void)
*/
static int nvram_create_os_partition(void)
{
- struct list_head * p;
- struct nvram_partition *part = NULL;
- struct nvram_partition *new_part = NULL;
+ struct nvram_partition *part;
+ struct nvram_partition *new_part;
struct nvram_partition *free_part = NULL;
int seq_init[2] = { 0, 0 };
loff_t tmp_index;
@@ -349,8 +348,7 @@ static int nvram_create_os_partition(void)
/* Find a free partition that will give us the maximum needed size
If can't find one that will give us the minimum size needed */
- list_for_each(p, &nvram_part->partition) {
- part = list_entry(p, struct nvram_partition, partition);
+ list_for_each_entry(part, &nvram_part->partition, partition) {
if (part->header.signature != NVRAM_SIG_FREE)
continue;
diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c
index 66bd5ab7c25..b80e81984ba 100644
--- a/arch/ppc64/kernel/of_device.c
+++ b/arch/ppc64/kernel/of_device.c
@@ -3,6 +3,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <asm/errno.h>
#include <asm/of_device.h>
@@ -15,20 +16,20 @@
* Used by a driver to check whether an of_device present in the
* system is in its list of supported devices.
*/
-const struct of_match * of_match_device(const struct of_match *matches,
+const struct of_device_id *of_match_device(const struct of_device_id *matches,
const struct of_device *dev)
{
if (!dev->node)
return NULL;
- while (matches->name || matches->type || matches->compatible) {
+ while (matches->name[0] || matches->type[0] || matches->compatible[0]) {
int match = 1;
- if (matches->name && matches->name != OF_ANY_MATCH)
+ if (matches->name[0])
match &= dev->node->name
&& !strcmp(matches->name, dev->node->name);
- if (matches->type && matches->type != OF_ANY_MATCH)
+ if (matches->type[0])
match &= dev->node->type
&& !strcmp(matches->type, dev->node->type);
- if (matches->compatible && matches->compatible != OF_ANY_MATCH)
+ if (matches->compatible[0])
match &= device_is_compatible(dev->node,
matches->compatible);
if (match)
@@ -42,7 +43,7 @@ static int of_platform_bus_match(struct device *dev, struct device_driver *drv)
{
struct of_device * of_dev = to_of_device(dev);
struct of_platform_driver * of_drv = to_of_platform_driver(drv);
- const struct of_match * matches = of_drv->match_table;
+ const struct of_device_id * matches = of_drv->match_table;
if (!matches)
return 0;
@@ -75,7 +76,7 @@ static int of_device_probe(struct device *dev)
int error = -ENODEV;
struct of_platform_driver *drv;
struct of_device *of_dev;
- const struct of_match *match;
+ const struct of_device_id *match;
drv = to_of_platform_driver(dev->driver);
of_dev = to_of_device(dev);
diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c
index f2b41243342..5bec956e44a 100644
--- a/arch/ppc64/kernel/pSeries_setup.c
+++ b/arch/ppc64/kernel/pSeries_setup.c
@@ -19,6 +19,7 @@
#undef DEBUG
#include <linux/config.h>
+#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -82,6 +83,9 @@ int fwnmi_active; /* TRUE if an FWNMI handler is present */
extern void pSeries_system_reset_exception(struct pt_regs *regs);
extern int pSeries_machine_check_exception(struct pt_regs *regs);
+static int pseries_shared_idle(void);
+static int pseries_dedicated_idle(void);
+
static volatile void __iomem * chrp_int_ack_special;
struct mpic *pSeries_mpic;
@@ -187,14 +191,16 @@ static void __init pSeries_setup_arch(void)
{
/* Fixup ppc_md depending on the type of interrupt controller */
if (ppc64_interrupt_controller == IC_OPEN_PIC) {
- ppc_md.init_IRQ = pSeries_init_mpic;
+ ppc_md.init_IRQ = pSeries_init_mpic;
ppc_md.get_irq = mpic_get_irq;
+ ppc_md.cpu_irq_down = mpic_teardown_this_cpu;
/* Allocate the mpic now, so that find_and_init_phbs() can
* fill the ISUs */
pSeries_setup_mpic();
} else {
ppc_md.init_IRQ = xics_init_IRQ;
ppc_md.get_irq = xics_get_irq;
+ ppc_md.cpu_irq_down = xics_teardown_cpu;
}
#ifdef CONFIG_SMP
@@ -227,6 +233,20 @@ static void __init pSeries_setup_arch(void)
if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
vpa_init(boot_cpuid);
+
+ /* Choose an idle loop */
+ if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+ if (get_paca()->lppaca.shared_proc) {
+ printk(KERN_INFO "Using shared processor idle loop\n");
+ ppc_md.idle_loop = pseries_shared_idle;
+ } else {
+ printk(KERN_INFO "Using dedicated idle loop\n");
+ ppc_md.idle_loop = pseries_dedicated_idle;
+ }
+ } else {
+ printk(KERN_INFO "Using default idle loop\n");
+ ppc_md.idle_loop = default_idle;
+ }
}
static int __init pSeries_init_panel(void)
@@ -416,6 +436,144 @@ static int __init pSeries_probe(int platform)
return 1;
}
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+static inline void dedicated_idle_sleep(unsigned int cpu)
+{
+ struct paca_struct *ppaca = &paca[cpu ^ 1];
+
+ /* Only sleep if the other thread is not idle */
+ if (!(ppaca->lppaca.idle)) {
+ local_irq_disable();
+
+ /*
+ * We are about to sleep the thread and so wont be polling any
+ * more.
+ */
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+
+ /*
+ * SMT dynamic mode. Cede will result in this thread going
+ * dormant, if the partner thread is still doing work. Thread
+ * wakes up if partner goes idle, an interrupt is presented, or
+ * a prod occurs. Returning from the cede enables external
+ * interrupts.
+ */
+ if (!need_resched())
+ cede_processor();
+ else
+ local_irq_enable();
+ } else {
+ /*
+ * Give the HV an opportunity at the processor, since we are
+ * not doing any work.
+ */
+ poll_pending();
+ }
+}
+
+static int pseries_dedicated_idle(void)
+{
+ long oldval;
+ struct paca_struct *lpaca = get_paca();
+ unsigned int cpu = smp_processor_id();
+ unsigned long start_snooze;
+ unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
+
+ while (1) {
+ /*
+ * Indicate to the HV that we are idle. Now would be
+ * a good time to find other work to dispatch.
+ */
+ lpaca->lppaca.idle = 1;
+
+ oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
+ if (!oldval) {
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ start_snooze = __get_tb() +
+ *smt_snooze_delay * tb_ticks_per_usec;
+
+ while (!need_resched() && !cpu_is_offline(cpu)) {
+ ppc64_runlatch_off();
+
+ /*
+ * Go into low thread priority and possibly
+ * low power mode.
+ */
+ HMT_low();
+ HMT_very_low();
+
+ if (*smt_snooze_delay != 0 &&
+ __get_tb() > start_snooze) {
+ HMT_medium();
+ dedicated_idle_sleep(cpu);
+ }
+
+ }
+
+ HMT_medium();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ } else {
+ set_need_resched();
+ }
+
+ lpaca->lppaca.idle = 0;
+ ppc64_runlatch_on();
+
+ schedule();
+
+ if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
+ cpu_die();
+ }
+}
+
+static int pseries_shared_idle(void)
+{
+ struct paca_struct *lpaca = get_paca();
+ unsigned int cpu = smp_processor_id();
+
+ while (1) {
+ /*
+ * Indicate to the HV that we are idle. Now would be
+ * a good time to find other work to dispatch.
+ */
+ lpaca->lppaca.idle = 1;
+
+ while (!need_resched() && !cpu_is_offline(cpu)) {
+ local_irq_disable();
+ ppc64_runlatch_off();
+
+ /*
+ * Yield the processor to the hypervisor. We return if
+ * an external interrupt occurs (which are driven prior
+ * to returning here) or if a prod occurs from another
+ * processor. When returning here, external interrupts
+ * are enabled.
+ *
+ * Check need_resched() again with interrupts disabled
+ * to avoid a race.
+ */
+ if (!need_resched())
+ cede_processor();
+ else
+ local_irq_enable();
+
+ HMT_medium();
+ }
+
+ lpaca->lppaca.idle = 0;
+ ppc64_runlatch_on();
+
+ schedule();
+
+ if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
+ cpu_die();
+ }
+
+ return 0;
+}
+
struct machdep_calls __initdata pSeries_md = {
.probe = pSeries_probe,
.setup_arch = pSeries_setup_arch,
diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c
index 30154140f7e..62c55a12356 100644
--- a/arch/ppc64/kernel/pSeries_smp.c
+++ b/arch/ppc64/kernel/pSeries_smp.c
@@ -93,10 +93,13 @@ static int query_cpu_stopped(unsigned int pcpu)
int pSeries_cpu_disable(void)
{
+ int cpu = smp_processor_id();
+
+ cpu_clear(cpu, cpu_online_map);
systemcfg->processorCount--;
/*fix boot_cpuid here*/
- if (smp_processor_id() == boot_cpuid)
+ if (cpu == boot_cpuid)
boot_cpuid = any_online_cpu(cpu_online_map);
/* FIXME: abstract this to not be platform specific later on */
diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c
index a3e0975c26c..6316188737b 100644
--- a/arch/ppc64/kernel/pacaData.c
+++ b/arch/ppc64/kernel/pacaData.c
@@ -42,21 +42,7 @@ extern unsigned long __toc_start;
* processors. The processor VPD array needs one entry per physical
* processor (not thread).
*/
-#ifdef CONFIG_PPC_ISERIES
-#define EXTRA_INITS(number, lpq) \
- .lppaca_ptr = &paca[number].lppaca, \
- .lpqueue_ptr = (lpq), /* &xItLpQueue, */ \
- .reg_save_ptr = &paca[number].reg_save, \
- .reg_save = { \
- .xDesc = 0xd397d9e2, /* "LpRS" */ \
- .xSize = sizeof(struct ItLpRegSave) \
- },
-#else
-#define EXTRA_INITS(number, lpq)
-#endif
-
-#define PACAINITDATA(number,start,lpq,asrr,asrv) \
-{ \
+#define PACA_INIT_COMMON(number, start, asrr, asrv) \
.lock_token = 0x8000, \
.paca_index = (number), /* Paca Index */ \
.default_decr = 0x00ff0000, /* Initial Decr */ \
@@ -74,147 +60,79 @@ extern unsigned long __toc_start;
.end_of_quantum = 0xfffffffffffffffful, \
.slb_count = 64, \
}, \
- EXTRA_INITS((number), (lpq)) \
-}
-struct paca_struct paca[] = {
#ifdef CONFIG_PPC_ISERIES
- PACAINITDATA( 0, 1, &xItLpQueue, 0, STAB0_VIRT_ADDR),
+#define PACA_INIT_ISERIES(number) \
+ .lppaca_ptr = &paca[number].lppaca, \
+ .reg_save_ptr = &paca[number].reg_save, \
+ .reg_save = { \
+ .xDesc = 0xd397d9e2, /* "LpRS" */ \
+ .xSize = sizeof(struct ItLpRegSave) \
+ }
+
+#define PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 0, 0, 0) \
+ PACA_INIT_ISERIES(number) \
+}
+
+#define BOOTCPU_PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 1, 0, STAB0_VIRT_ADDR) \
+ PACA_INIT_ISERIES(number) \
+}
+
#else
- PACAINITDATA( 0, 1, NULL, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR),
+#define PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 0, 0, 0) \
+}
+
+#define BOOTCPU_PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR) \
+}
#endif
+
+struct paca_struct paca[] = {
+ BOOTCPU_PACA_INIT(0),
#if NR_CPUS > 1
- PACAINITDATA( 1, 0, NULL, 0, 0),
- PACAINITDATA( 2, 0, NULL, 0, 0),
- PACAINITDATA( 3, 0, NULL, 0, 0),
+ PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3),
#if NR_CPUS > 4
- PACAINITDATA( 4, 0, NULL, 0, 0),
- PACAINITDATA( 5, 0, NULL, 0, 0),
- PACAINITDATA( 6, 0, NULL, 0, 0),
- PACAINITDATA( 7, 0, NULL, 0, 0),
+ PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7),
#if NR_CPUS > 8
- PACAINITDATA( 8, 0, NULL, 0, 0),
- PACAINITDATA( 9, 0, NULL, 0, 0),
- PACAINITDATA(10, 0, NULL, 0, 0),
- PACAINITDATA(11, 0, NULL, 0, 0),
- PACAINITDATA(12, 0, NULL, 0, 0),
- PACAINITDATA(13, 0, NULL, 0, 0),
- PACAINITDATA(14, 0, NULL, 0, 0),
- PACAINITDATA(15, 0, NULL, 0, 0),
- PACAINITDATA(16, 0, NULL, 0, 0),
- PACAINITDATA(17, 0, NULL, 0, 0),
- PACAINITDATA(18, 0, NULL, 0, 0),
- PACAINITDATA(19, 0, NULL, 0, 0),
- PACAINITDATA(20, 0, NULL, 0, 0),
- PACAINITDATA(21, 0, NULL, 0, 0),
- PACAINITDATA(22, 0, NULL, 0, 0),
- PACAINITDATA(23, 0, NULL, 0, 0),
- PACAINITDATA(24, 0, NULL, 0, 0),
- PACAINITDATA(25, 0, NULL, 0, 0),
- PACAINITDATA(26, 0, NULL, 0, 0),
- PACAINITDATA(27, 0, NULL, 0, 0),
- PACAINITDATA(28, 0, NULL, 0, 0),
- PACAINITDATA(29, 0, NULL, 0, 0),
- PACAINITDATA(30, 0, NULL, 0, 0),
- PACAINITDATA(31, 0, NULL, 0, 0),
+ PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11),
+ PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15),
+ PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19),
+ PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23),
+ PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27),
+ PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31),
#if NR_CPUS > 32
- PACAINITDATA(32, 0, NULL, 0, 0),
- PACAINITDATA(33, 0, NULL, 0, 0),
- PACAINITDATA(34, 0, NULL, 0, 0),
- PACAINITDATA(35, 0, NULL, 0, 0),
- PACAINITDATA(36, 0, NULL, 0, 0),
- PACAINITDATA(37, 0, NULL, 0, 0),
- PACAINITDATA(38, 0, NULL, 0, 0),
- PACAINITDATA(39, 0, NULL, 0, 0),
- PACAINITDATA(40, 0, NULL, 0, 0),
- PACAINITDATA(41, 0, NULL, 0, 0),
- PACAINITDATA(42, 0, NULL, 0, 0),
- PACAINITDATA(43, 0, NULL, 0, 0),
- PACAINITDATA(44, 0, NULL, 0, 0),
- PACAINITDATA(45, 0, NULL, 0, 0),
- PACAINITDATA(46, 0, NULL, 0, 0),
- PACAINITDATA(47, 0, NULL, 0, 0),
- PACAINITDATA(48, 0, NULL, 0, 0),
- PACAINITDATA(49, 0, NULL, 0, 0),
- PACAINITDATA(50, 0, NULL, 0, 0),
- PACAINITDATA(51, 0, NULL, 0, 0),
- PACAINITDATA(52, 0, NULL, 0, 0),
- PACAINITDATA(53, 0, NULL, 0, 0),
- PACAINITDATA(54, 0, NULL, 0, 0),
- PACAINITDATA(55, 0, NULL, 0, 0),
- PACAINITDATA(56, 0, NULL, 0, 0),
- PACAINITDATA(57, 0, NULL, 0, 0),
- PACAINITDATA(58, 0, NULL, 0, 0),
- PACAINITDATA(59, 0, NULL, 0, 0),
- PACAINITDATA(60, 0, NULL, 0, 0),
- PACAINITDATA(61, 0, NULL, 0, 0),
- PACAINITDATA(62, 0, NULL, 0, 0),
- PACAINITDATA(63, 0, NULL, 0, 0),
+ PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35),
+ PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39),
+ PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43),
+ PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47),
+ PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51),
+ PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55),
+ PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59),
+ PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63),
#if NR_CPUS > 64
- PACAINITDATA(64, 0, NULL, 0, 0),
- PACAINITDATA(65, 0, NULL, 0, 0),
- PACAINITDATA(66, 0, NULL, 0, 0),
- PACAINITDATA(67, 0, NULL, 0, 0),
- PACAINITDATA(68, 0, NULL, 0, 0),
- PACAINITDATA(69, 0, NULL, 0, 0),
- PACAINITDATA(70, 0, NULL, 0, 0),
- PACAINITDATA(71, 0, NULL, 0, 0),
- PACAINITDATA(72, 0, NULL, 0, 0),
- PACAINITDATA(73, 0, NULL, 0, 0),
- PACAINITDATA(74, 0, NULL, 0, 0),
- PACAINITDATA(75, 0, NULL, 0, 0),
- PACAINITDATA(76, 0, NULL, 0, 0),
- PACAINITDATA(77, 0, NULL, 0, 0),
- PACAINITDATA(78, 0, NULL, 0, 0),
- PACAINITDATA(79, 0, NULL, 0, 0),
- PACAINITDATA(80, 0, NULL, 0, 0),
- PACAINITDATA(81, 0, NULL, 0, 0),
- PACAINITDATA(82, 0, NULL, 0, 0),
- PACAINITDATA(83, 0, NULL, 0, 0),
- PACAINITDATA(84, 0, NULL, 0, 0),
- PACAINITDATA(85, 0, NULL, 0, 0),
- PACAINITDATA(86, 0, NULL, 0, 0),
- PACAINITDATA(87, 0, NULL, 0, 0),
- PACAINITDATA(88, 0, NULL, 0, 0),
- PACAINITDATA(89, 0, NULL, 0, 0),
- PACAINITDATA(90, 0, NULL, 0, 0),
- PACAINITDATA(91, 0, NULL, 0, 0),
- PACAINITDATA(92, 0, NULL, 0, 0),
- PACAINITDATA(93, 0, NULL, 0, 0),
- PACAINITDATA(94, 0, NULL, 0, 0),
- PACAINITDATA(95, 0, NULL, 0, 0),
- PACAINITDATA(96, 0, NULL, 0, 0),
- PACAINITDATA(97, 0, NULL, 0, 0),
- PACAINITDATA(98, 0, NULL, 0, 0),
- PACAINITDATA(99, 0, NULL, 0, 0),
- PACAINITDATA(100, 0, NULL, 0, 0),
- PACAINITDATA(101, 0, NULL, 0, 0),
- PACAINITDATA(102, 0, NULL, 0, 0),
- PACAINITDATA(103, 0, NULL, 0, 0),
- PACAINITDATA(104, 0, NULL, 0, 0),
- PACAINITDATA(105, 0, NULL, 0, 0),
- PACAINITDATA(106, 0, NULL, 0, 0),
- PACAINITDATA(107, 0, NULL, 0, 0),
- PACAINITDATA(108, 0, NULL, 0, 0),
- PACAINITDATA(109, 0, NULL, 0, 0),
- PACAINITDATA(110, 0, NULL, 0, 0),
- PACAINITDATA(111, 0, NULL, 0, 0),
- PACAINITDATA(112, 0, NULL, 0, 0),
- PACAINITDATA(113, 0, NULL, 0, 0),
- PACAINITDATA(114, 0, NULL, 0, 0),
- PACAINITDATA(115, 0, NULL, 0, 0),
- PACAINITDATA(116, 0, NULL, 0, 0),
- PACAINITDATA(117, 0, NULL, 0, 0),
- PACAINITDATA(118, 0, NULL, 0, 0),
- PACAINITDATA(119, 0, NULL, 0, 0),
- PACAINITDATA(120, 0, NULL, 0, 0),
- PACAINITDATA(121, 0, NULL, 0, 0),
- PACAINITDATA(122, 0, NULL, 0, 0),
- PACAINITDATA(123, 0, NULL, 0, 0),
- PACAINITDATA(124, 0, NULL, 0, 0),
- PACAINITDATA(125, 0, NULL, 0, 0),
- PACAINITDATA(126, 0, NULL, 0, 0),
- PACAINITDATA(127, 0, NULL, 0, 0),
+ PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67),
+ PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71),
+ PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75),
+ PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79),
+ PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83),
+ PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87),
+ PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91),
+ PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95),
+ PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99),
+ PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103),
+ PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107),
+ PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111),
+ PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115),
+ PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119),
+ PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123),
+ PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127),
#endif
#endif
#endif
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
index 580676f87d2..ae6f579d3fa 100644
--- a/arch/ppc64/kernel/pci.c
+++ b/arch/ppc64/kernel/pci.c
@@ -351,7 +351,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
*offset += hose->pci_mem_offset;
res_bit = IORESOURCE_MEM;
} else {
- io_offset = (unsigned long)hose->io_base_virt;
+ io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
*offset += io_offset;
res_bit = IORESOURCE_IO;
}
@@ -378,7 +378,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
/* found it! construct the final physical address */
if (mmap_state == pci_mmap_io)
- *offset += hose->io_base_phys - io_offset;
+ *offset += hose->io_base_phys - io_offset;
return rp;
}
@@ -944,4 +944,22 @@ int pci_read_irq_line(struct pci_dev *pci_dev)
}
EXPORT_SYMBOL(pci_read_irq_line);
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+ const struct resource *rsrc,
+ u64 *start, u64 *end)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long offset = 0;
+
+ if (hose == NULL)
+ return;
+
+ if (rsrc->flags & IORESOURCE_IO)
+ offset = pci_io_base - (unsigned long)hose->io_base_virt +
+ hose->io_base_phys;
+
+ *start = rsrc->start + offset;
+ *end = rsrc->end + offset;
+}
+
#endif /* CONFIG_PPC_MULTIPLATFORM */
diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c
index 6cf03d387b9..3013cdb5f93 100644
--- a/arch/ppc64/kernel/pmac_setup.c
+++ b/arch/ppc64/kernel/pmac_setup.c
@@ -186,6 +186,8 @@ void __init pmac_setup_arch(void)
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
#endif
+
+ printk(KERN_INFO "Using native/NAP idle loop\n");
}
#ifdef CONFIG_SCSI
@@ -507,5 +509,6 @@ struct machdep_calls __initdata pmac_md = {
.calibrate_decr = pmac_calibrate_decr,
.feature_call = pmac_do_feature_call,
.progress = pmac_progress,
- .check_legacy_ioport = pmac_check_legacy_ioport
+ .check_legacy_ioport = pmac_check_legacy_ioport,
+ .idle_loop = native_idle,
};
diff --git a/arch/ppc64/kernel/ppc_ksyms.c b/arch/ppc64/kernel/ppc_ksyms.c
index b230a63fe4c..705742f4eec 100644
--- a/arch/ppc64/kernel/ppc_ksyms.c
+++ b/arch/ppc64/kernel/ppc_ksyms.c
@@ -75,6 +75,7 @@ EXPORT_SYMBOL(giveup_fpu);
EXPORT_SYMBOL(giveup_altivec);
#endif
EXPORT_SYMBOL(__flush_icache_range);
+EXPORT_SYMBOL(flush_dcache_range);
#ifdef CONFIG_SMP
#ifdef CONFIG_PPC_ISERIES
diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c
index aba89554d89..f7cae05e40f 100644
--- a/arch/ppc64/kernel/process.c
+++ b/arch/ppc64/kernel/process.c
@@ -36,6 +36,7 @@
#include <linux/kallsyms.h>
#include <linux/interrupt.h>
#include <linux/utsname.h>
+#include <linux/kprobes.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
@@ -307,6 +308,8 @@ void show_regs(struct pt_regs * regs)
void exit_thread(void)
{
+ kprobe_flush_task(current);
+
#ifndef CONFIG_SMP
if (last_task_used_math == current)
last_task_used_math = NULL;
@@ -321,6 +324,7 @@ void flush_thread(void)
{
struct thread_info *t = current_thread_info();
+ kprobe_flush_task(current);
if (t->flags & _TIF_ABI_PENDING)
t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c
index 9f8c6087ae5..2993f108d96 100644
--- a/arch/ppc64/kernel/ptrace.c
+++ b/arch/ppc64/kernel/ptrace.c
@@ -305,6 +305,8 @@ static void do_syscall_trace(void)
void do_syscall_trace_enter(struct pt_regs *regs)
{
+ secure_computing(regs->gpr[0]);
+
if (test_thread_flag(TIF_SYSCALL_TRACE)
&& (current->ptrace & PT_PTRACED))
do_syscall_trace();
@@ -320,8 +322,6 @@ void do_syscall_trace_enter(struct pt_regs *regs)
void do_syscall_trace_leave(struct pt_regs *regs)
{
- secure_computing(regs->gpr[0]);
-
if (unlikely(current->audit_context))
audit_syscall_exit(current,
(regs->ccr&0x1000)?AUDITSC_FAILURE:AUDITSC_SUCCESS,
diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c
index 0a47a5ef428..d1b33f0b26c 100644
--- a/arch/ppc64/kernel/setup.c
+++ b/arch/ppc64/kernel/setup.c
@@ -96,7 +96,6 @@ extern void udbg_init_maple_realmode(void);
extern unsigned long klimit;
extern void mm_init_ppc64(void);
-extern int idle_setup(void);
extern void stab_initialize(unsigned long stab);
extern void htab_initialize(void);
extern void early_init_devtree(void *flat_dt);
@@ -677,11 +676,16 @@ void __init setup_system(void)
DBG(" <- setup_system()\n");
}
-
-void machine_restart(char *cmd)
+/* also used by kexec */
+void machine_shutdown(void)
{
if (ppc_md.nvram_sync)
ppc_md.nvram_sync();
+}
+
+void machine_restart(char *cmd)
+{
+ machine_shutdown();
ppc_md.restart(cmd);
#ifdef CONFIG_SMP
smp_send_stop();
@@ -690,13 +694,11 @@ void machine_restart(char *cmd)
local_irq_disable();
while (1) ;
}
-
EXPORT_SYMBOL(machine_restart);
-
+
void machine_power_off(void)
{
- if (ppc_md.nvram_sync)
- ppc_md.nvram_sync();
+ machine_shutdown();
ppc_md.power_off();
#ifdef CONFIG_SMP
smp_send_stop();
@@ -705,13 +707,11 @@ void machine_power_off(void)
local_irq_disable();
while (1) ;
}
-
EXPORT_SYMBOL(machine_power_off);
-
+
void machine_halt(void)
{
- if (ppc_md.nvram_sync)
- ppc_md.nvram_sync();
+ machine_shutdown();
ppc_md.halt();
#ifdef CONFIG_SMP
smp_send_stop();
@@ -720,7 +720,6 @@ void machine_halt(void)
local_irq_disable();
while (1) ;
}
-
EXPORT_SYMBOL(machine_halt);
static int ppc64_panic_event(struct notifier_block *this,
@@ -1081,8 +1080,11 @@ void __init setup_arch(char **cmdline_p)
ppc_md.setup_arch();
- /* Select the correct idle loop for the platform. */
- idle_setup();
+ /* Use the default idle loop if the platform hasn't provided one. */
+ if (NULL == ppc_md.idle_loop) {
+ ppc_md.idle_loop = default_idle;
+ printk(KERN_INFO "Using default idle loop\n");
+ }
paging_init();
ppc64_boot_msg(0x15, "Setup Done");
diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c
index 118436e8085..206619080e6 100644
--- a/arch/ppc64/kernel/sys_ppc32.c
+++ b/arch/ppc64/kernel/sys_ppc32.c
@@ -30,47 +30,26 @@
#include <linux/sem.h>
#include <linux/msg.h>
#include <linux/shm.h>
-#include <linux/slab.h>
-#include <linux/uio.h>
-#include <linux/aio.h>
-#include <linux/nfs_fs.h>
-#include <linux/module.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
-#include <linux/nfsd/syscall.h>
#include <linux/poll.h>
#include <linux/personality.h>
#include <linux/stat.h>
-#include <linux/filter.h>
-#include <linux/highmem.h>
-#include <linux/highuid.h>
#include <linux/mman.h>
-#include <linux/ipv6.h>
#include <linux/in.h>
-#include <linux/icmpv6.h>
#include <linux/syscalls.h>
#include <linux/unistd.h>
#include <linux/sysctl.h>
#include <linux/binfmts.h>
-#include <linux/dnotify.h>
#include <linux/security.h>
#include <linux/compat.h>
#include <linux/ptrace.h>
-#include <linux/aio_abi.h>
#include <linux/elf.h>
-#include <net/scm.h>
-#include <net/sock.h>
-
#include <asm/ptrace.h>
#include <asm/types.h>
#include <asm/ipc.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/semaphore.h>
-#include <asm/ppcdebug.h>
#include <asm/time.h>
#include <asm/mmu_context.h>
#include <asm/systemcfg.h>
@@ -350,8 +329,6 @@ asmlinkage long sys32_adjtimex(struct timex32 __user *utp)
return ret;
}
-
-/* These are here just in case some old sparc32 binary calls it. */
asmlinkage long sys32_pause(void)
{
current->state = TASK_INTERRUPTIBLE;
@@ -360,8 +337,6 @@ asmlinkage long sys32_pause(void)
return -ERESTARTNOHAND;
}
-
-
static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i)
{
long usec;
@@ -847,16 +822,6 @@ asmlinkage long sys32_getpgid(u32 pid)
}
-/* Note: it is necessary to treat which and who as unsigned ints,
- * with the corresponding cast to a signed int to insure that the
- * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
- * and the register representation of a signed int (msr in 64-bit mode) is performed.
- */
-asmlinkage long sys32_getpriority(u32 which, u32 who)
-{
- return sys_getpriority((int)which, (int)who);
-}
-
/* Note: it is necessary to treat pid as an unsigned int,
* with the corresponding cast to a signed int to insure that the
@@ -1048,6 +1013,11 @@ asmlinkage long sys32_setpgid(u32 pid, u32 pgid)
return sys_setpgid((int)pid, (int)pgid);
}
+long sys32_getpriority(u32 which, u32 who)
+{
+ /* sign extend which and who */
+ return sys_getpriority((int)which, (int)who);
+}
long sys32_setpriority(u32 which, u32 who, u32 niceval)
{
@@ -1055,6 +1025,18 @@ long sys32_setpriority(u32 which, u32 who, u32 niceval)
return sys_setpriority((int)which, (int)who, (int)niceval);
}
+long sys32_ioprio_get(u32 which, u32 who)
+{
+ /* sign extend which and who */
+ return sys_ioprio_get((int)which, (int)who);
+}
+
+long sys32_ioprio_set(u32 which, u32 who, u32 ioprio)
+{
+ /* sign extend which, who and ioprio */
+ return sys_ioprio_set((int)which, (int)who, (int)ioprio);
+}
+
/* Note: it is necessary to treat newmask as an unsigned int,
* with the corresponding cast to a signed int to insure that the
* proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
@@ -1273,8 +1255,6 @@ long ppc32_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
(u64)len_high << 32 | len_low, advice);
}
-extern asmlinkage long sys_timer_create(clockid_t, sigevent_t __user *, timer_t __user *);
-
long ppc32_timer_create(clockid_t clock,
struct compat_sigevent __user *ev32,
timer_t __user *timer_id)
diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c
index c8fa6569b2f..02b8ac4e016 100644
--- a/arch/ppc64/kernel/sysfs.c
+++ b/arch/ppc64/kernel/sysfs.c
@@ -112,7 +112,6 @@ void ppc64_enable_pmcs(void)
unsigned long hid0;
#ifdef CONFIG_PPC_PSERIES
unsigned long set, reset;
- int ret;
#endif /* CONFIG_PPC_PSERIES */
/* Only need to enable them once */
@@ -145,11 +144,7 @@ void ppc64_enable_pmcs(void)
case PLATFORM_PSERIES_LPAR:
set = 1UL << 63;
reset = 0;
- ret = plpar_hcall_norets(H_PERFMON, set, reset);
- if (ret)
- printk(KERN_ERR "H_PERFMON call on cpu %u "
- "returned %d\n",
- smp_processor_id(), ret);
+ plpar_hcall_norets(H_PERFMON, set, reset);
break;
#endif /* CONFIG_PPC_PSERIES */
@@ -161,13 +156,6 @@ void ppc64_enable_pmcs(void)
/* instruct hypervisor to maintain PMCs */
if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
get_paca()->lppaca.pmcregs_in_use = 1;
-
- /*
- * On SMT machines we have to set the run latch in the ctrl register
- * in order to make PMC6 spin.
- */
- if (cpu_has_feature(CPU_FTR_SMT))
- ppc64_runlatch_on();
#endif /* CONFIG_PPC_PSERIES */
}
@@ -400,7 +388,12 @@ static int __init topology_init(void)
struct cpu *c = &per_cpu(cpu_devices, cpu);
#ifdef CONFIG_NUMA
- parent = &node_devices[cpu_to_node(cpu)];
+ /* The node to which a cpu belongs can't be known
+ * until the cpu is made present.
+ */
+ parent = NULL;
+ if (cpu_present(cpu))
+ parent = &node_devices[cpu_to_node(cpu)];
#endif
/*
* For now, we just see if the system supports making
diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c
index 2348a75e050..909462e1ade 100644
--- a/arch/ppc64/kernel/time.c
+++ b/arch/ppc64/kernel/time.c
@@ -91,6 +91,7 @@ unsigned long tb_to_xs;
unsigned tb_to_us;
unsigned long processor_freq;
DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL_GPL(rtc_lock);
unsigned long tb_to_ns_scale;
unsigned long tb_to_ns_shift;
@@ -98,7 +99,6 @@ unsigned long tb_to_ns_shift;
struct gettimeofday_struct do_gtod;
extern unsigned long wall_jiffies;
-extern unsigned long lpevent_count;
extern int smp_tb_synchronized;
extern struct timezone sys_tz;
@@ -366,11 +366,8 @@ int timer_interrupt(struct pt_regs * regs)
set_dec(next_dec);
#ifdef CONFIG_PPC_ISERIES
- {
- struct ItLpQueue *lpq = lpaca->lpqueue_ptr;
- if (lpq && ItLpQueue_isLpIntPending(lpq))
- lpevent_count += ItLpQueue_process(lpq, regs);
- }
+ if (hvlpevent_is_pending())
+ process_hvlpevents(regs);
#endif
/* collect purr register values often, for accurate calculations */
diff --git a/arch/ppc64/kernel/vdso32/vdso32.lds.S b/arch/ppc64/kernel/vdso32/vdso32.lds.S
index 11290c902ba..6f87a916a39 100644
--- a/arch/ppc64/kernel/vdso32/vdso32.lds.S
+++ b/arch/ppc64/kernel/vdso32/vdso32.lds.S
@@ -40,9 +40,9 @@ SECTIONS
.gcc_except_table : { *(.gcc_except_table) }
.fixup : { *(.fixup) }
- .got ALIGN(4) : { *(.got.plt) *(.got) }
-
.dynamic : { *(.dynamic) } :text :dynamic
+ .got : { *(.got) }
+ .plt : { *(.plt) }
_end = .;
__end = .;
diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c
index 879f39b90a3..677c4450984 100644
--- a/arch/ppc64/kernel/xics.c
+++ b/arch/ppc64/kernel/xics.c
@@ -647,6 +647,31 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
}
}
+void xics_teardown_cpu(void)
+{
+ int cpu = smp_processor_id();
+ int status;
+
+ ops->cppr_info(cpu, 0x00);
+ iosync();
+
+ /*
+ * we need to EOI the IPI if we got here from kexec down IPI
+ *
+ * xics doesn't care if we duplicate an EOI as long as we
+ * don't EOI and raise priority.
+ *
+ * probably need to check all the other interrupts too
+ * should we be flagging idle loop instead?
+ * or creating some task to be scheduled?
+ */
+ ops->xirr_info_set(cpu, XICS_IPI);
+
+ status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
+ (1UL << interrupt_server_size) - 1 - default_distrib_server, 0);
+ WARN_ON(status != 0);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
/* Interrupts are disabled. */
diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c
index 52b6b930534..4fec05817d6 100644
--- a/arch/ppc64/mm/hash_native.c
+++ b/arch/ppc64/mm/hash_native.c
@@ -304,6 +304,50 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
local_irq_restore(flags);
}
+/*
+ * clear all mappings on kexec. All cpus are in real mode (or they will
+ * be when they isi), and we are the only one left. We rely on our kernel
+ * mapping being 0xC0's and the hardware ignoring those two real bits.
+ *
+ * TODO: add batching support when enabled. remember, no dynamic memory here,
+ * athough there is the control page available...
+ */
+static void native_hpte_clear(void)
+{
+ unsigned long slot, slots, flags;
+ HPTE *hptep = htab_address;
+ Hpte_dword0 dw0;
+ unsigned long pteg_count;
+
+ pteg_count = htab_hash_mask + 1;
+
+ local_irq_save(flags);
+
+ /* we take the tlbie lock and hold it. Some hardware will
+ * deadlock if we try to tlbie from two processors at once.
+ */
+ spin_lock(&native_tlbie_lock);
+
+ slots = pteg_count * HPTES_PER_GROUP;
+
+ for (slot = 0; slot < slots; slot++, hptep++) {
+ /*
+ * we could lock the pte here, but we are the only cpu
+ * running, right? and for crash dump, we probably
+ * don't want to wait for a maybe bad cpu.
+ */
+ dw0 = hptep->dw0.dw0;
+
+ if (dw0.v) {
+ hptep->dw0.dword0 = 0;
+ tlbie(slot2va(dw0.avpn, dw0.l, dw0.h, slot), dw0.l);
+ }
+ }
+
+ spin_unlock(&native_tlbie_lock);
+ local_irq_restore(flags);
+}
+
static void native_flush_hash_range(unsigned long context,
unsigned long number, int local)
{
@@ -415,7 +459,8 @@ void hpte_init_native(void)
ppc_md.hpte_updatepp = native_hpte_updatepp;
ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
ppc_md.hpte_insert = native_hpte_insert;
- ppc_md.hpte_remove = native_hpte_remove;
+ ppc_md.hpte_remove = native_hpte_remove;
+ ppc_md.hpte_clear_all = native_hpte_clear;
if (tlb_batching_enabled())
ppc_md.flush_hash_range = native_flush_hash_range;
htab_finish_init();