diff options
Diffstat (limited to 'arch/ppc64')
41 files changed, 1578 insertions, 1090 deletions
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index cb27068bfcd..fdd8afba715 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -142,6 +142,23 @@ config PPC_SPLPAR processors, that is, which share physical processors between two or more partitions. +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on PPC_MULTIPLATFORM && EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + config IBMVIO depends on PPC_PSERIES || PPC_ISERIES bool @@ -270,26 +287,7 @@ config SCHED_SMT when dealing with POWER5 cpus at a cost of slightly increased overhead in some places. If unsure say N here. -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. - -config PREEMPT_BKL - bool "Preempt The Big Kernel Lock" - depends on PREEMPT - default y - help - This option reduces the latency of the kernel by making the - big kernel lock preemptible. - - Say Y here if you are building a kernel for a desktop system. - Say N if you are unsure. +source "kernel/Kconfig.preempt" config EEH bool "PCI Extended Error Handling (EEH)" if EMBEDDED @@ -431,6 +429,8 @@ config CMDLINE endmenu +source "net/Kconfig" + source "drivers/Kconfig" source "fs/Kconfig" diff --git a/arch/ppc64/boot/Makefile b/arch/ppc64/boot/Makefile index d3e1d6af920..683b2d43c15 100644 --- a/arch/ppc64/boot/Makefile +++ b/arch/ppc64/boot/Makefile @@ -52,7 +52,7 @@ obj-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.o, $(section))) src-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.c, $(section))) gz-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.gz, $(section))) -hostprogs-y := piggy addnote addRamDisk +hostprogs-y := addnote addRamDisk targets += zImage zImage.initrd imagesize.c \ $(patsubst $(obj)/%,%, $(call obj-sec, $(required) $(initrd))) \ $(patsubst $(obj)/%,%, $(call src-sec, $(required) $(initrd))) \ @@ -78,9 +78,6 @@ addsection = $(CROSS32OBJCOPY) $(1) \ quiet_cmd_addnote = ADDNOTE $@ cmd_addnote = $(CROSS32LD) $(BOOTLFLAGS) -o $@ $(obj-boot) && $(obj)/addnote $@ -quiet_cmd_piggy = PIGGY $@ - cmd_piggy = $(obj)/piggyback $(@:.o=) < $< | $(CROSS32AS) -o $@ - $(call gz-sec, $(required)): $(obj)/kernel-%.gz: % FORCE $(call if_changed,gzip) diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c index da12ea2ca46..199d9804f61 100644 --- a/arch/ppc64/boot/main.c +++ b/arch/ppc64/boot/main.c @@ -17,7 +17,6 @@ extern void *finddevice(const char *); extern int getprop(void *, const char *, void *, int); -extern void printk(char *fmt, ...); extern void printf(const char *fmt, ...); extern int sprintf(char *buf, const char *fmt, ...); void gunzip(void *, int, unsigned char *, int *); @@ -147,10 +146,10 @@ void start(unsigned long a1, unsigned long a2, void *promptr) } a1 = initrd.addr; a2 = initrd.size; - printf("initial ramdisk moving 0x%lx <- 0x%lx (%lx bytes)\n\r", + printf("initial ramdisk moving 0x%lx <- 0x%lx (0x%lx bytes)\n\r", initrd.addr, (unsigned long)_initrd_start, initrd.size); memmove((void *)initrd.addr, (void *)_initrd_start, initrd.size); - printf("initrd head: 0x%lx\n\r", *((u32 *)initrd.addr)); + printf("initrd head: 0x%lx\n\r", *((unsigned long *)initrd.addr)); } /* Eventually gunzip the kernel */ @@ -201,9 +200,6 @@ void start(unsigned long a1, unsigned long a2, void *promptr) flush_cache((void *)vmlinux.addr, vmlinux.size); - if (a1) - printf("initrd head: 0x%lx\n\r", *((u32 *)initrd.addr)); - kernel_entry = (kernel_entry_t)vmlinux.addr; #ifdef DEBUG printf( "kernel:\n\r" diff --git a/arch/ppc64/boot/mknote.c b/arch/ppc64/boot/mknote.c deleted file mode 100644 index 120cc1d8973..00000000000 --- a/arch/ppc64/boot/mknote.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) Cort Dougan 1999. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Generate a note section as per the CHRP specification. - * - */ - -#include <stdio.h> - -#define PL(x) printf("%c%c%c%c", ((x)>>24)&0xff, ((x)>>16)&0xff, ((x)>>8)&0xff, (x)&0xff ); - -int main(void) -{ -/* header */ - /* namesz */ - PL(strlen("PowerPC")+1); - /* descrsz */ - PL(6*4); - /* type */ - PL(0x1275); - /* name */ - printf("PowerPC"); printf("%c", 0); - -/* descriptor */ - /* real-mode */ - PL(0xffffffff); - /* real-base */ - PL(0x00c00000); - /* real-size */ - PL(0xffffffff); - /* virt-base */ - PL(0xffffffff); - /* virt-size */ - PL(0xffffffff); - /* load-base */ - PL(0x4000); - return 0; -} diff --git a/arch/ppc64/boot/piggyback.c b/arch/ppc64/boot/piggyback.c deleted file mode 100644 index 235c7a87269..00000000000 --- a/arch/ppc64/boot/piggyback.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright 2001 IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <stdio.h> -#include <unistd.h> -#include <string.h> - -extern long ce_exec_config[]; - -int main(int argc, char *argv[]) -{ - int i, cnt, pos, len; - unsigned int cksum, val; - unsigned char *lp; - unsigned char buf[8192]; - char *varname; - if (argc != 2) - { - fprintf(stderr, "usage: %s name <in-file >out-file\n", - argv[0]); - exit(1); - } - - varname = strrchr(argv[1], '/'); - if (varname) - varname++; - else - varname = argv[1]; - - fprintf(stdout, "#\n"); - fprintf(stdout, "# Miscellaneous data structures:\n"); - fprintf(stdout, "# WARNING - this file is automatically generated!\n"); - fprintf(stdout, "#\n"); - fprintf(stdout, "\n"); - fprintf(stdout, "\t.data\n"); - fprintf(stdout, "\t.globl %s_data\n", varname); - fprintf(stdout, "%s_data:\n", varname); - pos = 0; - cksum = 0; - while ((len = read(0, buf, sizeof(buf))) > 0) - { - cnt = 0; - lp = (unsigned char *)buf; - len = (len + 3) & ~3; /* Round up to longwords */ - for (i = 0; i < len; i += 4) - { - if (cnt == 0) - { - fprintf(stdout, "\t.long\t"); - } - fprintf(stdout, "0x%02X%02X%02X%02X", lp[0], lp[1], lp[2], lp[3]); - val = *(unsigned long *)lp; - cksum ^= val; - lp += 4; - if (++cnt == 4) - { - cnt = 0; - fprintf(stdout, " # %x \n", pos+i-12); - fflush(stdout); - } else - { - fprintf(stdout, ","); - } - } - if (cnt) - { - fprintf(stdout, "0\n"); - } - pos += len; - } - fprintf(stdout, "\t.globl %s_len\n", varname); - fprintf(stdout, "%s_len:\t.long\t0x%x\n", varname, pos); - fflush(stdout); - fclose(stdout); - fprintf(stderr, "cksum = %x\n", cksum); - exit(0); -} - diff --git a/arch/ppc64/boot/prom.c b/arch/ppc64/boot/prom.c index d5218b15824..5e48b80ff5a 100644 --- a/arch/ppc64/boot/prom.c +++ b/arch/ppc64/boot/prom.c @@ -40,7 +40,7 @@ void *finddevice(const char *name); int getprop(void *phandle, const char *name, void *buf, int buflen); void chrpboot(int a1, int a2, void *prom); /* in main.c */ -void printk(char *fmt, ...); +int printf(char *fmt, ...); /* there is no convenient header to get this from... -- paulus */ extern unsigned long strlen(const char *); @@ -220,7 +220,7 @@ readchar(void) case 1: return ch; case -1: - printk("read(stdin) returned -1\r\n"); + printf("read(stdin) returned -1\r\n"); return -1; } } @@ -627,18 +627,6 @@ int sprintf(char * buf, const char *fmt, ...) static char sprint_buf[1024]; -void -printk(char *fmt, ...) -{ - va_list args; - int n; - - va_start(args, fmt); - n = vsprintf(sprint_buf, fmt, args); - va_end(args); - write(stdout, sprint_buf, n); -} - int printf(char *fmt, ...) { diff --git a/arch/ppc64/kernel/ItLpQueue.c b/arch/ppc64/kernel/ItLpQueue.c index cdea00d7707..4231861288a 100644 --- a/arch/ppc64/kernel/ItLpQueue.c +++ b/arch/ppc64/kernel/ItLpQueue.c @@ -1,7 +1,7 @@ /* * ItLpQueue.c * Copyright (C) 2001 Mike Corrigan IBM Corporation - * + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -11,156 +11,252 @@ #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/proc_fs.h> #include <asm/system.h> #include <asm/paca.h> #include <asm/iSeries/ItLpQueue.h> #include <asm/iSeries/HvLpEvent.h> #include <asm/iSeries/HvCallEvent.h> -static __inline__ int set_inUse( struct ItLpQueue * lpQueue ) -{ - int t; - u32 * inUseP = &(lpQueue->xInUseWord); - - __asm__ __volatile__("\n\ -1: lwarx %0,0,%2 \n\ - cmpwi 0,%0,0 \n\ - li %0,0 \n\ - bne- 2f \n\ - addi %0,%0,1 \n\ - stwcx. %0,0,%2 \n\ - bne- 1b \n\ -2: eieio" - : "=&r" (t), "=m" (lpQueue->xInUseWord) - : "r" (inUseP), "m" (lpQueue->xInUseWord) - : "cc"); - - return t; -} +/* + * The LpQueue is used to pass event data from the hypervisor to + * the partition. This is where I/O interrupt events are communicated. + * + * It is written to by the hypervisor so cannot end up in the BSS. + */ +struct hvlpevent_queue hvlpevent_queue __attribute__((__section__(".data"))); -static __inline__ void clear_inUse( struct ItLpQueue * lpQueue ) -{ - lpQueue->xInUseWord = 0; -} +DEFINE_PER_CPU(unsigned long[HvLpEvent_Type_NumTypes], hvlpevent_counts); + +static char *event_types[HvLpEvent_Type_NumTypes] = { + "Hypervisor", + "Machine Facilities", + "Session Manager", + "SPD I/O", + "Virtual Bus", + "PCI I/O", + "RIO I/O", + "Virtual Lan", + "Virtual I/O" +}; /* Array of LpEvent handler functions */ extern LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes]; -unsigned long ItLpQueueInProcess = 0; -struct HvLpEvent * ItLpQueue_getNextLpEvent( struct ItLpQueue * lpQueue ) +static struct HvLpEvent * get_next_hvlpevent(void) { - struct HvLpEvent * nextLpEvent = - (struct HvLpEvent *)lpQueue->xSlicCurEventPtr; - if ( nextLpEvent->xFlags.xValid ) { + struct HvLpEvent * event; + event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr; + + if (event->xFlags.xValid) { /* rmb() needed only for weakly consistent machines (regatta) */ rmb(); /* Set pointer to next potential event */ - lpQueue->xSlicCurEventPtr += ((nextLpEvent->xSizeMinus1 + - LpEventAlign ) / - LpEventAlign ) * - LpEventAlign; + hvlpevent_queue.xSlicCurEventPtr += ((event->xSizeMinus1 + + LpEventAlign) / LpEventAlign) * LpEventAlign; + /* Wrap to beginning if no room at end */ - if (lpQueue->xSlicCurEventPtr > lpQueue->xSlicLastValidEventPtr) - lpQueue->xSlicCurEventPtr = lpQueue->xSlicEventStackPtr; + if (hvlpevent_queue.xSlicCurEventPtr > + hvlpevent_queue.xSlicLastValidEventPtr) { + hvlpevent_queue.xSlicCurEventPtr = + hvlpevent_queue.xSlicEventStackPtr; + } + } else { + event = NULL; } - else - nextLpEvent = NULL; - return nextLpEvent; + return event; } -int ItLpQueue_isLpIntPending( struct ItLpQueue * lpQueue ) +static unsigned long spread_lpevents = NR_CPUS; + +int hvlpevent_is_pending(void) { - int retval = 0; - struct HvLpEvent * nextLpEvent; - if ( lpQueue ) { - nextLpEvent = (struct HvLpEvent *)lpQueue->xSlicCurEventPtr; - retval = nextLpEvent->xFlags.xValid | lpQueue->xPlicOverflowIntPending; - } - return retval; + struct HvLpEvent *next_event; + + if (smp_processor_id() >= spread_lpevents) + return 0; + + next_event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr; + + return next_event->xFlags.xValid | + hvlpevent_queue.xPlicOverflowIntPending; } -void ItLpQueue_clearValid( struct HvLpEvent * event ) +static void hvlpevent_clear_valid(struct HvLpEvent * event) { - /* Clear the valid bit of the event - * Also clear bits within this event that might - * look like valid bits (on 64-byte boundaries) - */ - unsigned extra = (( event->xSizeMinus1 + LpEventAlign ) / - LpEventAlign ) - 1; - switch ( extra ) { - case 3: - ((struct HvLpEvent*)((char*)event+3*LpEventAlign))->xFlags.xValid=0; - case 2: - ((struct HvLpEvent*)((char*)event+2*LpEventAlign))->xFlags.xValid=0; - case 1: - ((struct HvLpEvent*)((char*)event+1*LpEventAlign))->xFlags.xValid=0; - case 0: - ; + /* Tell the Hypervisor that we're done with this event. + * Also clear bits within this event that might look like valid bits. + * ie. on 64-byte boundaries. + */ + struct HvLpEvent *tmp; + unsigned extra = ((event->xSizeMinus1 + LpEventAlign) / + LpEventAlign) - 1; + + switch (extra) { + case 3: + tmp = (struct HvLpEvent*)((char*)event + 3 * LpEventAlign); + tmp->xFlags.xValid = 0; + case 2: + tmp = (struct HvLpEvent*)((char*)event + 2 * LpEventAlign); + tmp->xFlags.xValid = 0; + case 1: + tmp = (struct HvLpEvent*)((char*)event + 1 * LpEventAlign); + tmp->xFlags.xValid = 0; } + mb(); + event->xFlags.xValid = 0; } -unsigned ItLpQueue_process( struct ItLpQueue * lpQueue, struct pt_regs *regs ) +void process_hvlpevents(struct pt_regs *regs) { - unsigned numIntsProcessed = 0; - struct HvLpEvent * nextLpEvent; + struct HvLpEvent * event; /* If we have recursed, just return */ - if ( !set_inUse( lpQueue ) ) - return 0; - - if (ItLpQueueInProcess == 0) - ItLpQueueInProcess = 1; - else - BUG(); + if (!spin_trylock(&hvlpevent_queue.lock)) + return; for (;;) { - nextLpEvent = ItLpQueue_getNextLpEvent( lpQueue ); - if ( nextLpEvent ) { - /* Count events to return to caller - * and count processed events in lpQueue - */ - ++numIntsProcessed; - lpQueue->xLpIntCount++; - /* Call appropriate handler here, passing + event = get_next_hvlpevent(); + if (event) { + /* Call appropriate handler here, passing * a pointer to the LpEvent. The handler * must make a copy of the LpEvent if it * needs it in a bottom half. (perhaps for * an ACK) - * - * Handlers are responsible for ACK processing + * + * Handlers are responsible for ACK processing * * The Hypervisor guarantees that LpEvents will * only be delivered with types that we have * registered for, so no type check is necessary * here! - */ - if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes ) - lpQueue->xLpIntCountByType[nextLpEvent->xType]++; - if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes && - lpEventHandler[nextLpEvent->xType] ) - lpEventHandler[nextLpEvent->xType](nextLpEvent, regs); + */ + if (event->xType < HvLpEvent_Type_NumTypes) + __get_cpu_var(hvlpevent_counts)[event->xType]++; + if (event->xType < HvLpEvent_Type_NumTypes && + lpEventHandler[event->xType]) + lpEventHandler[event->xType](event, regs); else - printk(KERN_INFO "Unexpected Lp Event type=%d\n", nextLpEvent->xType ); - - ItLpQueue_clearValid( nextLpEvent ); - } else if ( lpQueue->xPlicOverflowIntPending ) + printk(KERN_INFO "Unexpected Lp Event type=%d\n", event->xType ); + + hvlpevent_clear_valid(event); + } else if (hvlpevent_queue.xPlicOverflowIntPending) /* * No more valid events. If overflow events are * pending process them */ - HvCallEvent_getOverflowLpEvents( lpQueue->xIndex); + HvCallEvent_getOverflowLpEvents(hvlpevent_queue.xIndex); else break; } - ItLpQueueInProcess = 0; - mb(); - clear_inUse( lpQueue ); + spin_unlock(&hvlpevent_queue.lock); +} + +static int set_spread_lpevents(char *str) +{ + unsigned long val = simple_strtoul(str, NULL, 0); + + /* + * The parameter is the number of processors to share in processing + * lp events. + */ + if (( val > 0) && (val <= NR_CPUS)) { + spread_lpevents = val; + printk("lpevent processing spread over %ld processors\n", val); + } else { + printk("invalid spread_lpevents %ld\n", val); + } - get_paca()->lpevent_count += numIntsProcessed; + return 1; +} +__setup("spread_lpevents=", set_spread_lpevents); + +void setup_hvlpevent_queue(void) +{ + void *eventStack; + + /* + * Allocate a page for the Event Stack. The Hypervisor needs the + * absolute real address, so we subtract out the KERNELBASE and add + * in the absolute real address of the kernel load area. + */ + eventStack = alloc_bootmem_pages(LpEventStackSize); + memset(eventStack, 0, LpEventStackSize); + + /* Invoke the hypervisor to initialize the event stack */ + HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize); + + hvlpevent_queue.xSlicEventStackPtr = (char *)eventStack; + hvlpevent_queue.xSlicCurEventPtr = (char *)eventStack; + hvlpevent_queue.xSlicLastValidEventPtr = (char *)eventStack + + (LpEventStackSize - LpEventMaxSize); + hvlpevent_queue.xIndex = 0; +} + +static int proc_lpevents_show(struct seq_file *m, void *v) +{ + int cpu, i; + unsigned long sum; + static unsigned long cpu_totals[NR_CPUS]; + + /* FIXME: do we care that there's no locking here? */ + sum = 0; + for_each_online_cpu(cpu) { + cpu_totals[cpu] = 0; + for (i = 0; i < HvLpEvent_Type_NumTypes; i++) { + cpu_totals[cpu] += per_cpu(hvlpevent_counts, cpu)[i]; + } + sum += cpu_totals[cpu]; + } + + seq_printf(m, "LpEventQueue 0\n"); + seq_printf(m, " events processed:\t%lu\n", sum); + + for (i = 0; i < HvLpEvent_Type_NumTypes; ++i) { + sum = 0; + for_each_online_cpu(cpu) { + sum += per_cpu(hvlpevent_counts, cpu)[i]; + } + + seq_printf(m, " %-20s %10lu\n", event_types[i], sum); + } + + seq_printf(m, "\n events processed by processor:\n"); + + for_each_online_cpu(cpu) { + seq_printf(m, " CPU%02d %10lu\n", cpu, cpu_totals[cpu]); + } + + return 0; +} + +static int proc_lpevents_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_lpevents_show, NULL); +} - return numIntsProcessed; +static struct file_operations proc_lpevents_operations = { + .open = proc_lpevents_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_lpevents_init(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL); + if (e) + e->proc_fops = &proc_lpevents_operations; + + return 0; } +__initcall(proc_lpevents_init); + diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c index badc5a44361..6ffcf67dd50 100644 --- a/arch/ppc64/kernel/LparData.c +++ b/arch/ppc64/kernel/LparData.c @@ -28,13 +28,6 @@ #include <asm/iSeries/IoHriProcessorVpd.h> #include <asm/iSeries/ItSpCommArea.h> -/* The LpQueue is used to pass event data from the hypervisor to - * the partition. This is where I/O interrupt events are communicated. - */ - -/* May be filled in by the hypervisor so cannot end up in the BSS */ -struct ItLpQueue xItLpQueue __attribute__((__section__(".data"))); - /* The HvReleaseData is the root of the information shared between * the hypervisor and Linux. @@ -200,7 +193,7 @@ struct ItVpdAreas itVpdAreas = { 0,0,0, /* 13 - 15 */ sizeof(struct IoHriProcessorVpd),/* 16 length of Proc Vpd */ 0,0,0,0,0,0, /* 17 - 22 */ - sizeof(struct ItLpQueue),/* 23 length of Lp Queue */ + sizeof(struct hvlpevent_queue), /* 23 length of Lp Queue */ 0,0 /* 24 - 25 */ }, .xSlicVpdAdrs = { /* VPD addresses */ @@ -218,7 +211,7 @@ struct ItVpdAreas itVpdAreas = { 0,0,0, /* 13 - 15 */ &xIoHriProcessorVpd, /* 16 Proc Vpd */ 0,0,0,0,0,0, /* 17 - 22 */ - &xItLpQueue, /* 23 Lp Queue */ + &hvlpevent_queue, /* 23 Lp Queue */ 0,0 } }; diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index dffbfb7ac8d..d9b2660ef22 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \ obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \ bpa_iic.o spider-pic.o +obj-$(CONFIG_KEXEC) += machine_kexec.o obj-$(CONFIG_EEH) += eeh.o obj-$(CONFIG_PROC_FS) += proc_ppc64.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 1d162c7c59d..8d4c46f6f0b 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -49,160 +49,219 @@ extern void __setup_cpu_be(unsigned long offset, struct cpu_spec* spec); #endif struct cpu_spec cpu_specs[] = { - { /* Power3 */ - 0xffff0000, 0x00400000, "POWER3 (630)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Power3+ */ - 0xffff0000, 0x00410000, "POWER3 (630+)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Northstar */ - 0xffff0000, 0x00330000, "RS64-II (northstar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Pulsar */ - 0xffff0000, 0x00340000, "RS64-III (pulsar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* I-star */ - 0xffff0000, 0x00360000, "RS64-III (icestar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* S-star */ - 0xffff0000, 0x00370000, "RS64-IV (sstar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Power4 */ - 0xffff0000, 0x00350000, "POWER4 (gp)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* Power4+ */ - 0xffff0000, 0x00380000, "POWER4+ (gq)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* PPC970 */ - 0xffff0000, 0x00390000, "PPC970", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_ppc970, - COMMON_PPC64_FW - }, - { /* PPC970FX */ - 0xffff0000, 0x003c0000, "PPC970FX", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_ppc970, - COMMON_PPC64_FW - }, - { /* Power5 */ - 0xffff0000, 0x003a0000, "POWER5 (gr)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | - CPU_FTR_MMCRA_SIHV, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* Power5 */ - 0xffff0000, 0x003b0000, "POWER5 (gs)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | - CPU_FTR_MMCRA_SIHV, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* BE DD1.x */ - 0xffff0000, 0x00700000, "Broadband Engine", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_SMT, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_be, - COMMON_PPC64_FW - }, - { /* default match */ - 0x00000000, 0x00000000, "POWER4 (compatible)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - } + { /* Power3 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00400000, + .cpu_name = "POWER3 (630)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power3+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00410000, + .cpu_name = "POWER3 (630+)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Northstar */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00330000, + .cpu_name = "RS64-II (northstar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Pulsar */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00340000, + .cpu_name = "RS64-III (pulsar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* I-star */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00360000, + .cpu_name = "RS64-III (icestar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* S-star */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00370000, + .cpu_name = "RS64-IV (sstar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power4 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00350000, + .cpu_name = "POWER4 (gp)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power4+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00380000, + .cpu_name = "POWER4+ (gq)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* PPC970 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00390000, + .cpu_name = "PPC970", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, + }, + { /* PPC970FX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003c0000, + .cpu_name = "PPC970FX", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power5 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003a0000, + .cpu_name = "POWER5 (gr)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | + CPU_FTR_MMCRA_SIHV, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power5 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003b0000, + .cpu_name = "POWER5 (gs)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | + CPU_FTR_MMCRA_SIHV, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* BE DD1.x */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00700000, + .cpu_name = "Broadband Engine", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_SMT, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_be, + .firmware_features = COMMON_PPC64_FW, + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "POWER4 (compatible)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + } }; firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { - {FW_FEATURE_PFT, "hcall-pft"}, - {FW_FEATURE_TCE, "hcall-tce"}, - {FW_FEATURE_SPRG0, "hcall-sprg0"}, - {FW_FEATURE_DABR, "hcall-dabr"}, - {FW_FEATURE_COPY, "hcall-copy"}, - {FW_FEATURE_ASR, "hcall-asr"}, - {FW_FEATURE_DEBUG, "hcall-debug"}, - {FW_FEATURE_PERF, "hcall-perf"}, - {FW_FEATURE_DUMP, "hcall-dump"}, - {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, - {FW_FEATURE_MIGRATE, "hcall-migrate"}, - {FW_FEATURE_PERFMON, "hcall-perfmon"}, - {FW_FEATURE_CRQ, "hcall-crq"}, - {FW_FEATURE_VIO, "hcall-vio"}, - {FW_FEATURE_RDMA, "hcall-rdma"}, - {FW_FEATURE_LLAN, "hcall-lLAN"}, - {FW_FEATURE_BULK, "hcall-bulk"}, - {FW_FEATURE_XDABR, "hcall-xdabr"}, - {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, - {FW_FEATURE_SPLPAR, "hcall-splpar"}, + {FW_FEATURE_PFT, "hcall-pft"}, + {FW_FEATURE_TCE, "hcall-tce"}, + {FW_FEATURE_SPRG0, "hcall-sprg0"}, + {FW_FEATURE_DABR, "hcall-dabr"}, + {FW_FEATURE_COPY, "hcall-copy"}, + {FW_FEATURE_ASR, "hcall-asr"}, + {FW_FEATURE_DEBUG, "hcall-debug"}, + {FW_FEATURE_PERF, "hcall-perf"}, + {FW_FEATURE_DUMP, "hcall-dump"}, + {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, + {FW_FEATURE_MIGRATE, "hcall-migrate"}, + {FW_FEATURE_PERFMON, "hcall-perfmon"}, + {FW_FEATURE_CRQ, "hcall-crq"}, + {FW_FEATURE_VIO, "hcall-vio"}, + {FW_FEATURE_RDMA, "hcall-rdma"}, + {FW_FEATURE_LLAN, "hcall-lLAN"}, + {FW_FEATURE_BULK, "hcall-bulk"}, + {FW_FEATURE_XDABR, "hcall-xdabr"}, + {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, + {FW_FEATURE_SPLPAR, "hcall-splpar"}, }; diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 02c8f4e3e4b..93ebcac0d5a 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -308,6 +308,7 @@ exception_marker: label##_pSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) #define STD_EXCEPTION_ISERIES(n, label, area) \ @@ -315,6 +316,7 @@ label##_pSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_ISERIES_1(area); \ EXCEPTION_PROLOG_ISERIES_2; \ b label##_common @@ -324,6 +326,7 @@ label##_iSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \ lbz r10,PACAPROCENABLED(r13); \ cmpwi 0,r10,0; \ @@ -393,6 +396,7 @@ __start_interrupts: _machine_check_pSeries: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) . = 0x300 @@ -419,6 +423,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) data_access_slb_pSeries: HMT_MEDIUM mtspr SPRG1,r13 + RUNLATCH_ON(r13) mfspr r13,SPRG3 /* get paca address into r13 */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ std r10,PACA_EXSLB+EX_R10(r13) @@ -439,6 +444,7 @@ data_access_slb_pSeries: instruction_access_slb_pSeries: HMT_MEDIUM mtspr SPRG1,r13 + RUNLATCH_ON(r13) mfspr r13,SPRG3 /* get paca address into r13 */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ std r10,PACA_EXSLB+EX_R10(r13) @@ -464,6 +470,7 @@ instruction_access_slb_pSeries: .globl system_call_pSeries system_call_pSeries: HMT_MEDIUM + RUNLATCH_ON(r9) mr r9,r13 mfmsr r10 mfspr r13,SPRG3 @@ -707,11 +714,13 @@ fwnmi_data_area: system_reset_fwnmi: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) .globl machine_check_fwnmi machine_check_fwnmi: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) /* @@ -848,6 +857,7 @@ unrecov_fer: .align 7 .globl data_access_common data_access_common: + RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */ mfspr r10,DAR std r10,PACA_EXGEN+EX_DAR(r13) mfspr r10,DSISR @@ -1194,7 +1204,7 @@ _GLOBAL(pSeries_secondary_smp_init) bl .__restore_cpu_setup /* Set up a paca value for this processor. Since we have the - * physical cpu id in r3, we need to search the pacas to find + * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ LOADADDR(r13, paca) /* Get base vaddr of paca array */ @@ -1207,8 +1217,8 @@ _GLOBAL(pSeries_secondary_smp_init) cmpwi r5,NR_CPUS blt 1b -99: HMT_LOW /* Couldn't find our CPU id */ - b 99b + mr r3,r24 /* not found, copy phys to r3 */ + b .kexec_wait /* next kernel might do better */ 2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ /* From now on, r24 is expected to be logica cpuid */ diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/ppc64/kernel/hvconsole.c index c72fb8ffe97..138e128a388 100644 --- a/arch/ppc64/kernel/hvconsole.c +++ b/arch/ppc64/kernel/hvconsole.c @@ -27,7 +27,6 @@ #include <linux/module.h> #include <asm/hvcall.h> #include <asm/hvconsole.h> -#include <asm/prom.h> /** * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper @@ -42,29 +41,14 @@ int hvc_get_chars(uint32_t vtermno, char *buf, int count) unsigned long got; if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got, - (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) { - /* - * Work around a HV bug where it gives us a null - * after every \r. -- paulus - */ - if (got > 0) { - int i; - for (i = 1; i < got; ++i) { - if (buf[i] == 0 && buf[i-1] == '\r') { - --got; - if (i < got) - memmove(&buf[i], &buf[i+1], - got - i); - } - } - } + (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) return got; - } return 0; } EXPORT_SYMBOL(hvc_get_chars); + /** * hvc_put_chars: send characters to firmware for denoted vterm adapter * @vtermno: The vtermno or unit_address of the adapter from which the data @@ -88,34 +72,3 @@ int hvc_put_chars(uint32_t vtermno, const char *buf, int count) } EXPORT_SYMBOL(hvc_put_chars); - -/* - * We hope/assume that the first vty found corresponds to the first console - * device. - */ -int hvc_find_vtys(void) -{ - struct device_node *vty; - int num_found = 0; - - for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL; - vty = of_find_node_by_name(vty, "vty")) { - uint32_t *vtermno; - - /* We have statically defined space for only a certain number of - * console adapters. */ - if (num_found >= MAX_NR_HVC_CONSOLES) - break; - - vtermno = (uint32_t *)get_property(vty, "reg", NULL); - if (!vtermno) - continue; - - if (device_is_compatible(vty, "hvterm1")) { - hvc_instantiate(*vtermno, num_found); - ++num_found; - } - } - - return num_found; -} diff --git a/arch/ppc64/kernel/iSeries_proc.c b/arch/ppc64/kernel/iSeries_proc.c index 356bd9931fc..0fe3116eba2 100644 --- a/arch/ppc64/kernel/iSeries_proc.c +++ b/arch/ppc64/kernel/iSeries_proc.c @@ -40,50 +40,6 @@ static int __init iseries_proc_create(void) } core_initcall(iseries_proc_create); -static char *event_types[9] = { - "Hypervisor\t\t", - "Machine Facilities\t", - "Session Manager\t", - "SPD I/O\t\t", - "Virtual Bus\t\t", - "PCI I/O\t\t", - "RIO I/O\t\t", - "Virtual Lan\t\t", - "Virtual I/O\t\t" -}; - -static int proc_lpevents_show(struct seq_file *m, void *v) -{ - unsigned int i; - - seq_printf(m, "LpEventQueue 0\n"); - seq_printf(m, " events processed:\t%lu\n", - (unsigned long)xItLpQueue.xLpIntCount); - - for (i = 0; i < 9; ++i) - seq_printf(m, " %s %10lu\n", event_types[i], - (unsigned long)xItLpQueue.xLpIntCountByType[i]); - - seq_printf(m, "\n events processed by processor:\n"); - - for_each_online_cpu(i) - seq_printf(m, " CPU%02d %10u\n", i, paca[i].lpevent_count); - - return 0; -} - -static int proc_lpevents_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_lpevents_show, NULL); -} - -static struct file_operations proc_lpevents_operations = { - .open = proc_lpevents_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static unsigned long startTitan = 0; static unsigned long startTb = 0; @@ -148,10 +104,6 @@ static int __init iseries_proc_init(void) { struct proc_dir_entry *e; - e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL); - if (e) - e->proc_fops = &proc_lpevents_operations; - e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL); if (e) e->proc_fops = &proc_titantod_operations; diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index 86966ce76b5..077c82fc9f3 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -24,7 +24,6 @@ #include <linux/smp.h> #include <linux/param.h> #include <linux/string.h> -#include <linux/bootmem.h> #include <linux/initrd.h> #include <linux/seq_file.h> #include <linux/kdev_t.h> @@ -676,7 +675,6 @@ static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr) */ static void __init iSeries_setup_arch(void) { - void *eventStack; unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index; /* Add an eye catcher and the systemcfg layout version number */ @@ -685,24 +683,7 @@ static void __init iSeries_setup_arch(void) systemcfg->version.minor = SYSTEMCFG_MINOR; /* Setup the Lp Event Queue */ - - /* Allocate a page for the Event Stack - * The hypervisor wants the absolute real address, so - * we subtract out the KERNELBASE and add in the - * absolute real address of the kernel load area - */ - eventStack = alloc_bootmem_pages(LpEventStackSize); - memset(eventStack, 0, LpEventStackSize); - - /* Invoke the hypervisor to initialize the event stack */ - HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize); - - /* Initialize fields in our Lp Event Queue */ - xItLpQueue.xSlicEventStackPtr = (char *)eventStack; - xItLpQueue.xSlicCurEventPtr = (char *)eventStack; - xItLpQueue.xSlicLastValidEventPtr = (char *)eventStack + - (LpEventStackSize - LpEventMaxSize); - xItLpQueue.xIndex = 0; + setup_hvlpevent_queue(); /* Compute processor frequency */ procFreqHz = ((1UL << 34) * 1000000) / @@ -853,27 +834,91 @@ static int __init iSeries_src_init(void) late_initcall(iSeries_src_init); -static int set_spread_lpevents(char *str) +static inline void process_iSeries_events(void) { - unsigned long i; - unsigned long val = simple_strtoul(str, NULL, 0); + asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); +} + +static void yield_shared_processor(void) +{ + unsigned long tb; + + HvCall_setEnabledInterrupts(HvCall_MaskIPI | + HvCall_MaskLpEvent | + HvCall_MaskLpProd | + HvCall_MaskTimeout); + + tb = get_tb(); + /* Compute future tb value when yield should expire */ + HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); /* - * The parameter is the number of processors to share in processing - * lp events. + * The decrementer stops during the yield. Force a fake decrementer + * here and let the timer_interrupt code sort out the actual time. */ - if (( val > 0) && (val <= NR_CPUS)) { - for (i = 1; i < val; ++i) - paca[i].lpqueue_ptr = paca[0].lpqueue_ptr; + get_paca()->lppaca.int_dword.fields.decr_int = 1; + process_iSeries_events(); +} - printk("lpevent processing spread over %ld processors\n", val); - } else { - printk("invalid spread_lpevents %ld\n", val); +static int iseries_shared_idle(void) +{ + while (1) { + while (!need_resched() && !hvlpevent_is_pending()) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* Recheck with irqs off */ + if (!need_resched() && !hvlpevent_is_pending()) + yield_shared_processor(); + + HMT_medium(); + local_irq_enable(); + } + + ppc64_runlatch_on(); + + if (hvlpevent_is_pending()) + process_iSeries_events(); + + schedule(); + } + + return 0; +} + +static int iseries_dedicated_idle(void) +{ + long oldval; + + while (1) { + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + while (!need_resched()) { + ppc64_runlatch_off(); + HMT_low(); + + if (hvlpevent_is_pending()) { + HMT_medium(); + ppc64_runlatch_on(); + process_iSeries_events(); + } + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + ppc64_runlatch_on(); + schedule(); } - return 1; + return 0; } -__setup("spread_lpevents=", set_spread_lpevents); #ifndef CONFIG_PCI void __init iSeries_init_IRQ(void) { } @@ -900,5 +945,13 @@ void __init iSeries_early_setup(void) ppc_md.get_rtc_time = iSeries_get_rtc_time; ppc_md.calibrate_decr = iSeries_calibrate_decr; ppc_md.progress = iSeries_progress; + + if (get_paca()->lppaca.shared_proc) { + ppc_md.idle_loop = iseries_shared_idle; + printk(KERN_INFO "Using shared processor idle loop\n"); + } else { + ppc_md.idle_loop = iseries_dedicated_idle; + printk(KERN_INFO "Using dedicated idle loop\n"); + } } diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index bdf13b4dc1c..954395d4263 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -20,109 +20,18 @@ #include <linux/kernel.h> #include <linux/smp.h> #include <linux/cpu.h> -#include <linux/module.h> #include <linux/sysctl.h> -#include <linux/smp.h> #include <asm/system.h> #include <asm/processor.h> -#include <asm/mmu.h> #include <asm/cputable.h> #include <asm/time.h> -#include <asm/iSeries/HvCall.h> -#include <asm/iSeries/ItLpQueue.h> -#include <asm/plpar_wrappers.h> #include <asm/systemcfg.h> +#include <asm/machdep.h> extern void power4_idle(void); -static int (*idle_loop)(void); - -#ifdef CONFIG_PPC_ISERIES -static unsigned long maxYieldTime = 0; -static unsigned long minYieldTime = 0xffffffffffffffffUL; - -static inline void process_iSeries_events(void) -{ - asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); -} - -static void yield_shared_processor(void) -{ - unsigned long tb; - unsigned long yieldTime; - - HvCall_setEnabledInterrupts(HvCall_MaskIPI | - HvCall_MaskLpEvent | - HvCall_MaskLpProd | - HvCall_MaskTimeout); - - tb = get_tb(); - /* Compute future tb value when yield should expire */ - HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); - - yieldTime = get_tb() - tb; - if (yieldTime > maxYieldTime) - maxYieldTime = yieldTime; - - if (yieldTime < minYieldTime) - minYieldTime = yieldTime; - - /* - * The decrementer stops during the yield. Force a fake decrementer - * here and let the timer_interrupt code sort out the actual time. - */ - get_paca()->lppaca.int_dword.fields.decr_int = 1; - process_iSeries_events(); -} - -static int iSeries_idle(void) -{ - struct paca_struct *lpaca; - long oldval; - - /* ensure iSeries run light will be out when idle */ - ppc64_runlatch_off(); - - lpaca = get_paca(); - - while (1) { - if (lpaca->lppaca.shared_proc) { - if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) - process_iSeries_events(); - if (!need_resched()) - yield_shared_processor(); - } else { - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - - while (!need_resched()) { - HMT_medium(); - if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) - process_iSeries_events(); - HMT_low(); - } - - HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } - } - - ppc64_runlatch_on(); - schedule(); - ppc64_runlatch_off(); - } - - return 0; -} - -#else - -static int default_idle(void) +int default_idle(void) { long oldval; unsigned int cpu = smp_processor_id(); @@ -134,7 +43,8 @@ static int default_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (!need_resched() && !cpu_is_offline(cpu)) { - barrier(); + ppc64_runlatch_off(); + /* * Go into low thread priority and possibly * low power mode. @@ -149,6 +59,7 @@ static int default_idle(void) set_need_resched(); } + ppc64_runlatch_on(); schedule(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); @@ -157,127 +68,19 @@ static int default_idle(void) return 0; } -#ifdef CONFIG_PPC_PSERIES - -DECLARE_PER_CPU(unsigned long, smt_snooze_delay); - -int dedicated_idle(void) +int native_idle(void) { - long oldval; - struct paca_struct *lpaca = get_paca(), *ppaca; - unsigned long start_snooze; - unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); - unsigned int cpu = smp_processor_id(); - - ppaca = &paca[cpu ^ 1]; - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - lpaca->lppaca.idle = 1; - - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - start_snooze = __get_tb() + - *smt_snooze_delay * tb_ticks_per_usec; - while (!need_resched() && !cpu_is_offline(cpu)) { - /* - * Go into low thread priority and possibly - * low power mode. - */ - HMT_low(); - HMT_very_low(); - - if (*smt_snooze_delay == 0 || - __get_tb() < start_snooze) - continue; - - HMT_medium(); - - if (!(ppaca->lppaca.idle)) { - local_irq_disable(); - - /* - * We are about to sleep the thread - * and so wont be polling any - * more. - */ - clear_thread_flag(TIF_POLLING_NRFLAG); - - /* - * SMT dynamic mode. Cede will result - * in this thread going dormant, if the - * partner thread is still doing work. - * Thread wakes up if partner goes idle, - * an interrupt is presented, or a prod - * occurs. Returning from the cede - * enables external interrupts. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - } else { - /* - * Give the HV an opportunity at the - * processor, since we are not doing - * any work. - */ - poll_pending(); - } - } - - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } - - HMT_medium(); - lpaca->lppaca.idle = 0; - schedule(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - } - return 0; -} - -static int shared_idle(void) -{ - struct paca_struct *lpaca = get_paca(); - unsigned int cpu = smp_processor_id(); - - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - lpaca->lppaca.idle = 1; + ppc64_runlatch_off(); - while (!need_resched() && !cpu_is_offline(cpu)) { - local_irq_disable(); + if (!need_resched()) + power4_idle(); - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - * - * Check need_resched() again with interrupts disabled - * to avoid a race. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); + if (need_resched()) { + ppc64_runlatch_on(); + schedule(); } - HMT_medium(); - lpaca->lppaca.idle = 0; - schedule(); if (cpu_is_offline(smp_processor_id()) && system_state == SYSTEM_RUNNING) cpu_die(); @@ -286,29 +89,10 @@ static int shared_idle(void) return 0; } -#endif /* CONFIG_PPC_PSERIES */ - -static int native_idle(void) -{ - while(1) { - /* check CPU type here */ - if (!need_resched()) - power4_idle(); - if (need_resched()) - schedule(); - - if (cpu_is_offline(raw_smp_processor_id()) && - system_state == SYSTEM_RUNNING) - cpu_die(); - } - return 0; -} - -#endif /* CONFIG_PPC_ISERIES */ - void cpu_idle(void) { - idle_loop(); + BUG_ON(NULL == ppc_md.idle_loop); + ppc_md.idle_loop(); } int powersave_nap; @@ -342,42 +126,3 @@ register_powersave_nap_sysctl(void) } __initcall(register_powersave_nap_sysctl); #endif - -int idle_setup(void) -{ - /* - * Move that junk to each platform specific file, eventually define - * a pSeries_idle for shared processor stuff - */ -#ifdef CONFIG_PPC_ISERIES - idle_loop = iSeries_idle; - return 1; -#else - idle_loop = default_idle; -#endif -#ifdef CONFIG_PPC_PSERIES - if (systemcfg->platform & PLATFORM_PSERIES) { - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { - if (get_paca()->lppaca.shared_proc) { - printk(KERN_INFO "Using shared processor idle loop\n"); - idle_loop = shared_idle; - } else { - printk(KERN_INFO "Using dedicated idle loop\n"); - idle_loop = dedicated_idle; - } - } else { - printk(KERN_INFO "Using default idle loop\n"); - idle_loop = default_idle; - } - } -#endif /* CONFIG_PPC_PSERIES */ -#ifndef CONFIG_PPC_ISERIES - if (systemcfg->platform == PLATFORM_POWERMAC || - systemcfg->platform == PLATFORM_MAPLE) { - printk(KERN_INFO "Using native/NAP idle loop\n"); - idle_loop = native_idle; - } -#endif /* CONFIG_PPC_ISERIES */ - - return 1; -} diff --git a/arch/ppc64/kernel/irq.c b/arch/ppc64/kernel/irq.c index 3defc8c33ad..f41afe54504 100644 --- a/arch/ppc64/kernel/irq.c +++ b/arch/ppc64/kernel/irq.c @@ -66,7 +66,6 @@ EXPORT_SYMBOL(irq_desc); int distribute_irqs = 1; int __irq_offset_value; int ppc_spurious_interrupts; -unsigned long lpevent_count; u64 ppc64_interrupt_controller; int show_interrupts(struct seq_file *p, void *v) @@ -245,7 +244,7 @@ void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq) spin_lock(&desc->lock); if (!noirqdebug) - note_interrupt(irq, desc, action_ret); + note_interrupt(irq, desc, action_ret, regs); if (likely(!(desc->status & IRQ_PENDING))) break; desc->status &= ~IRQ_PENDING; @@ -269,7 +268,6 @@ out: void do_IRQ(struct pt_regs *regs) { struct paca_struct *lpaca; - struct ItLpQueue *lpq; irq_enter(); @@ -295,9 +293,8 @@ void do_IRQ(struct pt_regs *regs) iSeries_smp_message_recv(regs); } #endif /* CONFIG_SMP */ - lpq = lpaca->lpqueue_ptr; - if (lpq && ItLpQueue_isLpIntPending(lpq)) - lpevent_count += ItLpQueue_process(lpq, regs); + if (hvlpevent_is_pending()) + process_hvlpevents(regs); irq_exit(); diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index 782ce3efa2c..a3d519518fb 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -36,6 +36,8 @@ #include <asm/kdebug.h> #include <asm/sstep.h> +static DECLARE_MUTEX(kprobe_mutex); + static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_saved_msr; static struct kprobe *kprobe_prev; @@ -54,6 +56,15 @@ int arch_prepare_kprobe(struct kprobe *p) printk("Cannot register a kprobe on rfid or mtmsrd\n"); ret = -EINVAL; } + + /* insn must be on a special executable page on ppc64 */ + if (!ret) { + up(&kprobe_mutex); + p->ainsn.insn = get_insn_slot(); + down(&kprobe_mutex); + if (!p->ainsn.insn) + ret = -ENOMEM; + } return ret; } @@ -79,16 +90,22 @@ void arch_disarm_kprobe(struct kprobe *p) void arch_remove_kprobe(struct kprobe *p) { + up(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + down(&kprobe_mutex); } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { + kprobe_opcode_t insn = *p->ainsn.insn; + regs->msr |= MSR_SE; - /*single step inline if it a breakpoint instruction*/ - if (p->opcode == BREAKPOINT_INSTRUCTION) + + /* single step inline if it is a trap variant */ + if (IS_TW(insn) || IS_TD(insn) || IS_TWI(insn) || IS_TDI(insn)) regs->nip = (unsigned long)p->addr; else - regs->nip = (unsigned long)&p->ainsn.insn; + regs->nip = (unsigned long)p->ainsn.insn; } static inline void save_previous_kprobe(void) @@ -105,6 +122,23 @@ static inline void restore_previous_kprobe(void) kprobe_saved_msr = kprobe_saved_msr_prev; } +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + struct kretprobe_instance *ri; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->task = current; + ri->ret_addr = (kprobe_opcode_t *)regs->link; + + /* Replace the return addr with trampoline addr */ + regs->link = (unsigned long)kretprobe_trampoline; + add_rp_inst(ri); + } else { + rp->nmissed++; + } +} + static inline int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; @@ -195,6 +229,78 @@ no_kprobe: } /* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +void kretprobe_trampoline_holder(void) +{ + asm volatile(".global kretprobe_trampoline\n" + "kretprobe_trampoline:\n" + "nop\n"); +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head; + struct hlist_node *node, *tmp; + unsigned long orig_ret_address = 0; + unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + + head = kretprobe_inst_table_head(current); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more then one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + regs->nip = orig_ret_address; + + unlock_kprobes(); + + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we have handled unlocking + * and re-enabling preemption. + */ + return 1; +} + +/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" * instruction. To avoid the SMP problems that can occur when we @@ -205,9 +311,10 @@ no_kprobe: static void resume_execution(struct kprobe *p, struct pt_regs *regs) { int ret; + unsigned int insn = *p->ainsn.insn; regs->nip = (unsigned long)p->addr; - ret = emulate_step(regs, p->ainsn.insn[0]); + ret = emulate_step(regs, insn); if (ret == 0) regs->nip = (unsigned long)p->addr + 4; } @@ -331,3 +438,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs)); return 1; } + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 387923fcf9b..02e96627fa6 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c @@ -34,6 +34,7 @@ #include <asm/system.h> #include <asm/time.h> #include <asm/iSeries/ItExtVpdPanel.h> +#include <asm/prom.h> #define MODULE_VERS "1.6" #define MODULE_NAME "lparcfg" diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c new file mode 100644 index 00000000000..fdb2fc649d7 --- /dev/null +++ b/arch/ppc64/kernel/machine_kexec.c @@ -0,0 +1,302 @@ +/* + * machine_kexec.c - handle transition of Linux booting another kernel + * + * Copyright (C) 2004-2005, IBM Corp. + * + * Created by: Milton D Miller II + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +#include <linux/cpumask.h> +#include <linux/kexec.h> +#include <linux/smp.h> +#include <linux/thread_info.h> +#include <linux/errno.h> + +#include <asm/page.h> +#include <asm/current.h> +#include <asm/machdep.h> +#include <asm/cacheflush.h> +#include <asm/paca.h> +#include <asm/mmu.h> +#include <asm/sections.h> /* _end */ +#include <asm/prom.h> + +#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ + +/* Have this around till we move it into crash specific file */ +note_buf_t crash_notes[NR_CPUS]; + +/* Dummy for now. Not sure if we need to have a crash shutdown in here + * and if what it will achieve. Letting it be now to compile the code + * in generic kexec environment + */ +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* do nothing right now */ + /* smp_relase_cpus() if we want smp on panic kernel */ + /* cpu_irq_down to isolate us until we are ready */ +} + +int machine_kexec_prepare(struct kimage *image) +{ + int i; + unsigned long begin, end; /* limits of segment */ + unsigned long low, high; /* limits of blocked memory range */ + struct device_node *node; + unsigned long *basep; + unsigned int *sizep; + + if (!ppc_md.hpte_clear_all) + return -ENOENT; + + /* + * Since we use the kernel fault handlers and paging code to + * handle the virtual mode, we must make sure no destination + * overlaps kernel static data or bss. + */ + for (i = 0; i < image->nr_segments; i++) + if (image->segment[i].mem < __pa(_end)) + return -ETXTBSY; + + /* + * For non-LPAR, we absolutely can not overwrite the mmu hash + * table, since we are still using the bolted entries in it to + * do the copy. Check that here. + * + * It is safe if the end is below the start of the blocked + * region (end <= low), or if the beginning is after the + * end of the blocked region (begin >= high). Use the + * boolean identity !(a || b) === (!a && !b). + */ + if (htab_address) { + low = __pa(htab_address); + high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE; + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + /* We also should not overwrite the tce tables */ + for (node = of_find_node_by_type(NULL, "pci"); node != NULL; + node = of_find_node_by_type(node, "pci")) { + basep = (unsigned long *)get_property(node, "linux,tce-base", + NULL); + sizep = (unsigned int *)get_property(node, "linux,tce-size", + NULL); + if (basep == NULL || sizep == NULL) + continue; + + low = *basep; + high = low + (*sizep); + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ + /* we do nothing in prepare that needs to be undone */ +} + +#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) + +static void copy_segments(unsigned long ind) +{ + unsigned long entry; + unsigned long *ptr; + void *dest; + void *addr; + + /* + * We rely on kexec_load to create a lists that properly + * initializes these pointers before they are used. + * We will still crash if the list is wrong, but at least + * the compiler will be quiet. + */ + ptr = NULL; + dest = NULL; + + for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { + addr = __va(entry & PAGE_MASK); + + switch (entry & IND_FLAGS) { + case IND_DESTINATION: + dest = addr; + break; + case IND_INDIRECTION: + ptr = addr; + break; + case IND_SOURCE: + copy_page(dest, addr); + dest += PAGE_SIZE; + } + } +} + +void kexec_copy_flush(struct kimage *image) +{ + long i, nr_segments = image->nr_segments; + struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; + + /* save the ranges on the stack to efficiently flush the icache */ + memcpy(ranges, image->segment, sizeof(ranges)); + + /* + * After this call we may not use anything allocated in dynamic + * memory, including *image. + * + * Only globals and the stack are allowed. + */ + copy_segments(image->head); + + /* + * we need to clear the icache for all dest pages sometime, + * including ones that were in place on the original copy + */ + for (i = 0; i < nr_segments; i++) + flush_icache_range(ranges[i].mem + KERNELBASE, + ranges[i].mem + KERNELBASE + + ranges[i].memsz); +} + +#ifdef CONFIG_SMP + +/* FIXME: we should schedule this function to be called on all cpus based + * on calling the interrupts, but we would like to call it off irq level + * so that the interrupt controller is clean. + */ +void kexec_smp_down(void *arg) +{ + if (ppc_md.cpu_irq_down) + ppc_md.cpu_irq_down(); + + local_irq_disable(); + kexec_smp_wait(); + /* NOTREACHED */ +} + +static void kexec_prepare_cpus(void) +{ + int my_cpu, i, notified=-1; + + smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); + my_cpu = get_cpu(); + + /* check the others cpus are now down (via paca hw cpu id == -1) */ + for (i=0; i < NR_CPUS; i++) { + if (i == my_cpu) + continue; + + while (paca[i].hw_cpu_id != -1) { + if (!cpu_possible(i)) { + printk("kexec: cpu %d hw_cpu_id %d is not" + " possible, ignoring\n", + i, paca[i].hw_cpu_id); + break; + } + if (!cpu_online(i)) { + /* Fixme: this can be spinning in + * pSeries_secondary_wait with a paca + * waiting for it to go online. + */ + printk("kexec: cpu %d hw_cpu_id %d is not" + " online, ignoring\n", + i, paca[i].hw_cpu_id); + break; + } + if (i != notified) { + printk( "kexec: waiting for cpu %d (physical" + " %d) to go down\n", + i, paca[i].hw_cpu_id); + notified = i; + } + } + } + + /* after we tell the others to go down */ + if (ppc_md.cpu_irq_down) + ppc_md.cpu_irq_down(); + + put_cpu(); + + local_irq_disable(); +} + +#else /* ! SMP */ + +static void kexec_prepare_cpus(void) +{ + /* + * move the secondarys to us so that we can copy + * the new kernel 0-0x100 safely + * + * do this if kexec in setup.c ? + */ + smp_relase_cpus(); + if (ppc_md.cpu_irq_down) + ppc_md.cpu_irq_down(); + local_irq_disable(); +} + +#endif /* SMP */ + +/* + * kexec thread structure and stack. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. It also must be statically allocated + * or allocated as part of the kimage, because everything else may be + * overwritten when we copy the kexec image. We piggyback on the + * "init_task" linker section here to statically allocate a stack. + * + * We could use a smaller stack if we don't care about anything using + * current, but that audit has not been performed. + */ +union thread_union kexec_stack + __attribute__((__section__(".data.init_task"))) = { }; + +/* Our assembly helper, in kexec_stub.S */ +extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, + void *image, void *control, + void (*clear_all)(void)) ATTRIB_NORET; + +/* too late to fail here */ +void machine_kexec(struct kimage *image) +{ + + /* prepare control code if any */ + + /* shutdown other cpus into our wait loop and quiesce interrupts */ + kexec_prepare_cpus(); + + /* switch to a staticly allocated stack. Based on irq stack code. + * XXX: the task struct will likely be invalid once we do the copy! + */ + kexec_stack.thread_info.task = current_thread_info()->task; + kexec_stack.thread_info.flags = 0; + + /* Some things are best done in assembly. Finding globals with + * a toc is easier in C, so pass in what we can. + */ + kexec_sequence(&kexec_stack, image->start, image, + page_address(image->control_code_page), + ppc_md.hpte_clear_all); + /* NOTREACHED */ +} diff --git a/arch/ppc64/kernel/maple_setup.c b/arch/ppc64/kernel/maple_setup.c index da8900b51f4..bb55b5a5691 100644 --- a/arch/ppc64/kernel/maple_setup.c +++ b/arch/ppc64/kernel/maple_setup.c @@ -177,6 +177,8 @@ void __init maple_setup_arch(void) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif + + printk(KERN_INFO "Using native/NAP idle loop\n"); } /* @@ -297,4 +299,5 @@ struct machdep_calls __initdata maple_md = { .get_rtc_time = maple_get_rtc_time, .calibrate_decr = generic_calibrate_decr, .progress = maple_progress, + .idle_loop = native_idle, }; diff --git a/arch/ppc64/kernel/mf.c b/arch/ppc64/kernel/mf.c index d98bebf7042..ef4a338ebd0 100644 --- a/arch/ppc64/kernel/mf.c +++ b/arch/ppc64/kernel/mf.c @@ -801,10 +801,8 @@ int mf_get_boot_rtc(struct rtc_time *tm) return rc; /* We need to poll here as we are not yet taking interrupts */ while (rtc_data.busy) { - extern unsigned long lpevent_count; - struct ItLpQueue *lpq = get_paca()->lpqueue_ptr; - if (lpq && ItLpQueue_isLpIntPending(lpq)) - lpevent_count += ItLpQueue_process(lpq, NULL); + if (hvlpevent_is_pending()) + process_hvlpevents(NULL); } return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm); } diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index e3c73b3425d..59f4f997381 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -680,6 +680,177 @@ _GLOBAL(kernel_thread) ld r30,-16(r1) blr +/* kexec_wait(phys_cpu) + * + * wait for the flag to change, indicating this kernel is going away but + * the slave code for the next one is at addresses 0 to 100. + * + * This is used by all slaves. + * + * Physical (hardware) cpu id should be in r3. + */ +_GLOBAL(kexec_wait) + bl 1f +1: mflr r5 + addi r5,r5,kexec_flag-1b + +99: HMT_LOW +#ifdef CONFIG_KEXEC /* use no memory without kexec */ + lwz r4,0(r5) + cmpwi 0,r4,0 + bnea 0x60 +#endif + b 99b + +/* this can be in text because we won't change it until we are + * running in real anyways + */ +kexec_flag: + .long 0 + + +#ifdef CONFIG_KEXEC + +/* kexec_smp_wait(void) + * + * call with interrupts off + * note: this is a terminal routine, it does not save lr + * + * get phys id from paca + * set paca id to -1 to say we got here + * switch to real mode + * join other cpus in kexec_wait(phys_id) + */ +_GLOBAL(kexec_smp_wait) + lhz r3,PACAHWCPUID(r13) + li r4,-1 + sth r4,PACAHWCPUID(r13) /* let others know we left */ + bl real_mode + b .kexec_wait + +/* + * switch to real mode (turn mmu off) + * we use the early kernel trick that the hardware ignores bits + * 0 and 1 (big endian) of the effective address in real mode + * + * don't overwrite r3 here, it is live for kexec_wait above. + */ +real_mode: /* assume normal blr return */ +1: li r9,MSR_RI + li r10,MSR_DR|MSR_IR + mflr r11 /* return address to SRR0 */ + mfmsr r12 + andc r9,r12,r9 + andc r10,r12,r10 + + mtmsrd r9,1 + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + rfid + + +/* + * kexec_sequence(newstack, start, image, control, clear_all()) + * + * does the grungy work with stack switching and real mode switches + * also does simple calls to other code + */ + +_GLOBAL(kexec_sequence) + mflr r0 + std r0,16(r1) + + /* switch stacks to newstack -- &kexec_stack.stack */ + stdu r1,THREAD_SIZE-112(r3) + mr r1,r3 + + li r0,0 + std r0,16(r1) + + /* save regs for local vars on new stack. + * yes, we won't go back, but ... + */ + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + std r26,-48(r1) + std r25,-56(r1) + + stdu r1,-112-64(r1) + + /* save args into preserved regs */ + mr r31,r3 /* newstack (both) */ + mr r30,r4 /* start (real) */ + mr r29,r5 /* image (virt) */ + mr r28,r6 /* control, unused */ + mr r27,r7 /* clear_all() fn desc */ + mr r26,r8 /* spare */ + lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ + + /* disable interrupts, we are overwriting kernel data next */ + mfmsr r3 + rlwinm r3,r3,0,17,15 + mtmsrd r3,1 + + /* copy dest pages, flush whole dest image */ + mr r3,r29 + bl .kexec_copy_flush /* (image) */ + + /* turn off mmu */ + bl real_mode + + /* clear out hardware hash page table and tlb */ + ld r5,0(r27) /* deref function descriptor */ + mtctr r5 + bctrl /* ppc_md.hash_clear_all(void); */ + +/* + * kexec image calling is: + * the first 0x100 bytes of the entry point are copied to 0 + * + * all slaves branch to slave = 0x60 (absolute) + * slave(phys_cpu_id); + * + * master goes to start = entry point + * start(phys_cpu_id, start, 0); + * + * + * a wrapper is needed to call existing kernels, here is an approximate + * description of one method: + * + * v2: (2.6.10) + * start will be near the boot_block (maybe 0x100 bytes before it?) + * it will have a 0x60, which will b to boot_block, where it will wait + * and 0 will store phys into struct boot-block and load r3 from there, + * copy kernel 0-0x100 and tell slaves to back down to 0x60 again + * + * v1: (2.6.9) + * boot block will have all cpus scanning device tree to see if they + * are the boot cpu ????? + * other device tree differences (prop sizes, va vs pa, etc)... + */ + + /* copy 0x100 bytes starting at start to 0 */ + li r3,0 + mr r4,r30 + li r5,0x100 + li r6,0 + bl .copy_and_flush /* (dest, src, copy limit, start offset) */ +1: /* assume normal blr return */ + + /* release other cpus to the new kernel secondary start at 0x60 */ + mflr r5 + li r6,1 + stw r6,kexec_flag-1b(5) + mr r3,r25 # my phys cpu + mr r4,r30 # start, aka phys mem offset + mtlr 4 + li r5,0 + blr /* image->start(physid, image->start, 0); */ +#endif /* CONFIG_KEXEC */ + /* Why isn't this a) automatic, b) written in 'C'? */ .balign 8 _GLOBAL(sys_call_table32) @@ -951,11 +1122,13 @@ _GLOBAL(sys_call_table32) .llong .compat_sys_mq_timedreceive /* 265 */ .llong .compat_sys_mq_notify .llong .compat_sys_mq_getsetattr - .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .llong .compat_sys_kexec_load .llong .sys32_add_key - .llong .sys32_request_key + .llong .sys32_request_key /* 270 */ .llong .compat_sys_keyctl .llong .compat_sys_waitid + .llong .sys32_ioprio_set + .llong .sys32_ioprio_get .balign 8 _GLOBAL(sys_call_table) @@ -1227,8 +1400,10 @@ _GLOBAL(sys_call_table) .llong .sys_mq_timedreceive /* 265 */ .llong .sys_mq_notify .llong .sys_mq_getsetattr - .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .llong .sys_kexec_load .llong .sys_add_key .llong .sys_request_key /* 270 */ .llong .sys_keyctl .llong .sys_waitid + .llong .sys_ioprio_set + .llong .sys_ioprio_get diff --git a/arch/ppc64/kernel/mpic.c b/arch/ppc64/kernel/mpic.c index 593ea5b82af..e8fbab1df37 100644 --- a/arch/ppc64/kernel/mpic.c +++ b/arch/ppc64/kernel/mpic.c @@ -792,6 +792,35 @@ void mpic_setup_this_cpu(void) #endif /* CONFIG_SMP */ } +/* + * XXX: someone who knows mpic should check this. + * do we need to eoi the ipi here (see xics comments)? + * or can we reset the mpic in the new kernel? + */ +void mpic_teardown_this_cpu(void) +{ + struct mpic *mpic = mpic_primary; + unsigned long flags; + u32 msk = 1 << hard_smp_processor_id(); + unsigned int i; + + BUG_ON(mpic == NULL); + + DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); + spin_lock_irqsave(&mpic_lock, flags); + + /* let the mpic know we don't want intrs. */ + for (i = 0; i < mpic->num_sources ; i++) + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk); + + /* Set current processor priority to max */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf); + + spin_unlock_irqrestore(&mpic_lock, flags); +} + + void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) { struct mpic *mpic = mpic_primary; diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h index 63e177143ea..99fbbc9a084 100644 --- a/arch/ppc64/kernel/mpic.h +++ b/arch/ppc64/kernel/mpic.h @@ -255,6 +255,9 @@ extern unsigned int mpic_irq_get_priority(unsigned int irq); /* Setup a non-boot CPU */ extern void mpic_setup_this_cpu(void); +/* Clean up for kexec (or cpu offline or ...) */ +extern void mpic_teardown_this_cpu(void); + /* Request IPIs on primary mpic */ extern void mpic_request_ipis(void); diff --git a/arch/ppc64/kernel/nvram.c b/arch/ppc64/kernel/nvram.c index 4e71781a441..4fb1a9f5060 100644 --- a/arch/ppc64/kernel/nvram.c +++ b/arch/ppc64/kernel/nvram.c @@ -338,9 +338,8 @@ static int nvram_remove_os_partition(void) */ static int nvram_create_os_partition(void) { - struct list_head * p; - struct nvram_partition *part = NULL; - struct nvram_partition *new_part = NULL; + struct nvram_partition *part; + struct nvram_partition *new_part; struct nvram_partition *free_part = NULL; int seq_init[2] = { 0, 0 }; loff_t tmp_index; @@ -349,8 +348,7 @@ static int nvram_create_os_partition(void) /* Find a free partition that will give us the maximum needed size If can't find one that will give us the minimum size needed */ - list_for_each(p, &nvram_part->partition) { - part = list_entry(p, struct nvram_partition, partition); + list_for_each_entry(part, &nvram_part->partition, partition) { if (part->header.signature != NVRAM_SIG_FREE) continue; diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c index 66bd5ab7c25..b80e81984ba 100644 --- a/arch/ppc64/kernel/of_device.c +++ b/arch/ppc64/kernel/of_device.c @@ -3,6 +3,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/mod_devicetable.h> #include <asm/errno.h> #include <asm/of_device.h> @@ -15,20 +16,20 @@ * Used by a driver to check whether an of_device present in the * system is in its list of supported devices. */ -const struct of_match * of_match_device(const struct of_match *matches, +const struct of_device_id *of_match_device(const struct of_device_id *matches, const struct of_device *dev) { if (!dev->node) return NULL; - while (matches->name || matches->type || matches->compatible) { + while (matches->name[0] || matches->type[0] || matches->compatible[0]) { int match = 1; - if (matches->name && matches->name != OF_ANY_MATCH) + if (matches->name[0]) match &= dev->node->name && !strcmp(matches->name, dev->node->name); - if (matches->type && matches->type != OF_ANY_MATCH) + if (matches->type[0]) match &= dev->node->type && !strcmp(matches->type, dev->node->type); - if (matches->compatible && matches->compatible != OF_ANY_MATCH) + if (matches->compatible[0]) match &= device_is_compatible(dev->node, matches->compatible); if (match) @@ -42,7 +43,7 @@ static int of_platform_bus_match(struct device *dev, struct device_driver *drv) { struct of_device * of_dev = to_of_device(dev); struct of_platform_driver * of_drv = to_of_platform_driver(drv); - const struct of_match * matches = of_drv->match_table; + const struct of_device_id * matches = of_drv->match_table; if (!matches) return 0; @@ -75,7 +76,7 @@ static int of_device_probe(struct device *dev) int error = -ENODEV; struct of_platform_driver *drv; struct of_device *of_dev; - const struct of_match *match; + const struct of_device_id *match; drv = to_of_platform_driver(dev->driver); of_dev = to_of_device(dev); diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index f2b41243342..5bec956e44a 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -19,6 +19,7 @@ #undef DEBUG #include <linux/config.h> +#include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -82,6 +83,9 @@ int fwnmi_active; /* TRUE if an FWNMI handler is present */ extern void pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); +static int pseries_shared_idle(void); +static int pseries_dedicated_idle(void); + static volatile void __iomem * chrp_int_ack_special; struct mpic *pSeries_mpic; @@ -187,14 +191,16 @@ static void __init pSeries_setup_arch(void) { /* Fixup ppc_md depending on the type of interrupt controller */ if (ppc64_interrupt_controller == IC_OPEN_PIC) { - ppc_md.init_IRQ = pSeries_init_mpic; + ppc_md.init_IRQ = pSeries_init_mpic; ppc_md.get_irq = mpic_get_irq; + ppc_md.cpu_irq_down = mpic_teardown_this_cpu; /* Allocate the mpic now, so that find_and_init_phbs() can * fill the ISUs */ pSeries_setup_mpic(); } else { ppc_md.init_IRQ = xics_init_IRQ; ppc_md.get_irq = xics_get_irq; + ppc_md.cpu_irq_down = xics_teardown_cpu; } #ifdef CONFIG_SMP @@ -227,6 +233,20 @@ static void __init pSeries_setup_arch(void) if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) vpa_init(boot_cpuid); + + /* Choose an idle loop */ + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (get_paca()->lppaca.shared_proc) { + printk(KERN_INFO "Using shared processor idle loop\n"); + ppc_md.idle_loop = pseries_shared_idle; + } else { + printk(KERN_INFO "Using dedicated idle loop\n"); + ppc_md.idle_loop = pseries_dedicated_idle; + } + } else { + printk(KERN_INFO "Using default idle loop\n"); + ppc_md.idle_loop = default_idle; + } } static int __init pSeries_init_panel(void) @@ -416,6 +436,144 @@ static int __init pSeries_probe(int platform) return 1; } +DECLARE_PER_CPU(unsigned long, smt_snooze_delay); + +static inline void dedicated_idle_sleep(unsigned int cpu) +{ + struct paca_struct *ppaca = &paca[cpu ^ 1]; + + /* Only sleep if the other thread is not idle */ + if (!(ppaca->lppaca.idle)) { + local_irq_disable(); + + /* + * We are about to sleep the thread and so wont be polling any + * more. + */ + clear_thread_flag(TIF_POLLING_NRFLAG); + + /* + * SMT dynamic mode. Cede will result in this thread going + * dormant, if the partner thread is still doing work. Thread + * wakes up if partner goes idle, an interrupt is presented, or + * a prod occurs. Returning from the cede enables external + * interrupts. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + } else { + /* + * Give the HV an opportunity at the processor, since we are + * not doing any work. + */ + poll_pending(); + } +} + +static int pseries_dedicated_idle(void) +{ + long oldval; + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + unsigned long start_snooze; + unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + start_snooze = __get_tb() + + *smt_snooze_delay * tb_ticks_per_usec; + + while (!need_resched() && !cpu_is_offline(cpu)) { + ppc64_runlatch_off(); + + /* + * Go into low thread priority and possibly + * low power mode. + */ + HMT_low(); + HMT_very_low(); + + if (*smt_snooze_delay != 0 && + __get_tb() > start_snooze) { + HMT_medium(); + dedicated_idle_sleep(cpu); + } + + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + + schedule(); + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } +} + +static int pseries_shared_idle(void) +{ + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + while (!need_resched() && !cpu_is_offline(cpu)) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + * + * Check need_resched() again with interrupts disabled + * to avoid a race. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + + HMT_medium(); + } + + lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + + schedule(); + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } + + return 0; +} + struct machdep_calls __initdata pSeries_md = { .probe = pSeries_probe, .setup_arch = pSeries_setup_arch, diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 30154140f7e..62c55a12356 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -93,10 +93,13 @@ static int query_cpu_stopped(unsigned int pcpu) int pSeries_cpu_disable(void) { + int cpu = smp_processor_id(); + + cpu_clear(cpu, cpu_online_map); systemcfg->processorCount--; /*fix boot_cpuid here*/ - if (smp_processor_id() == boot_cpuid) + if (cpu == boot_cpuid) boot_cpuid = any_online_cpu(cpu_online_map); /* FIXME: abstract this to not be platform specific later on */ diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c index a3e0975c26c..6316188737b 100644 --- a/arch/ppc64/kernel/pacaData.c +++ b/arch/ppc64/kernel/pacaData.c @@ -42,21 +42,7 @@ extern unsigned long __toc_start; * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -#ifdef CONFIG_PPC_ISERIES -#define EXTRA_INITS(number, lpq) \ - .lppaca_ptr = &paca[number].lppaca, \ - .lpqueue_ptr = (lpq), /* &xItLpQueue, */ \ - .reg_save_ptr = &paca[number].reg_save, \ - .reg_save = { \ - .xDesc = 0xd397d9e2, /* "LpRS" */ \ - .xSize = sizeof(struct ItLpRegSave) \ - }, -#else -#define EXTRA_INITS(number, lpq) -#endif - -#define PACAINITDATA(number,start,lpq,asrr,asrv) \ -{ \ +#define PACA_INIT_COMMON(number, start, asrr, asrv) \ .lock_token = 0x8000, \ .paca_index = (number), /* Paca Index */ \ .default_decr = 0x00ff0000, /* Initial Decr */ \ @@ -74,147 +60,79 @@ extern unsigned long __toc_start; .end_of_quantum = 0xfffffffffffffffful, \ .slb_count = 64, \ }, \ - EXTRA_INITS((number), (lpq)) \ -} -struct paca_struct paca[] = { #ifdef CONFIG_PPC_ISERIES - PACAINITDATA( 0, 1, &xItLpQueue, 0, STAB0_VIRT_ADDR), +#define PACA_INIT_ISERIES(number) \ + .lppaca_ptr = &paca[number].lppaca, \ + .reg_save_ptr = &paca[number].reg_save, \ + .reg_save = { \ + .xDesc = 0xd397d9e2, /* "LpRS" */ \ + .xSize = sizeof(struct ItLpRegSave) \ + } + +#define PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 0, 0, 0) \ + PACA_INIT_ISERIES(number) \ +} + +#define BOOTCPU_PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 1, 0, STAB0_VIRT_ADDR) \ + PACA_INIT_ISERIES(number) \ +} + #else - PACAINITDATA( 0, 1, NULL, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR), +#define PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 0, 0, 0) \ +} + +#define BOOTCPU_PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR) \ +} #endif + +struct paca_struct paca[] = { + BOOTCPU_PACA_INIT(0), #if NR_CPUS > 1 - PACAINITDATA( 1, 0, NULL, 0, 0), - PACAINITDATA( 2, 0, NULL, 0, 0), - PACAINITDATA( 3, 0, NULL, 0, 0), + PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3), #if NR_CPUS > 4 - PACAINITDATA( 4, 0, NULL, 0, 0), - PACAINITDATA( 5, 0, NULL, 0, 0), - PACAINITDATA( 6, 0, NULL, 0, 0), - PACAINITDATA( 7, 0, NULL, 0, 0), + PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7), #if NR_CPUS > 8 - PACAINITDATA( 8, 0, NULL, 0, 0), - PACAINITDATA( 9, 0, NULL, 0, 0), - PACAINITDATA(10, 0, NULL, 0, 0), - PACAINITDATA(11, 0, NULL, 0, 0), - PACAINITDATA(12, 0, NULL, 0, 0), - PACAINITDATA(13, 0, NULL, 0, 0), - PACAINITDATA(14, 0, NULL, 0, 0), - PACAINITDATA(15, 0, NULL, 0, 0), - PACAINITDATA(16, 0, NULL, 0, 0), - PACAINITDATA(17, 0, NULL, 0, 0), - PACAINITDATA(18, 0, NULL, 0, 0), - PACAINITDATA(19, 0, NULL, 0, 0), - PACAINITDATA(20, 0, NULL, 0, 0), - PACAINITDATA(21, 0, NULL, 0, 0), - PACAINITDATA(22, 0, NULL, 0, 0), - PACAINITDATA(23, 0, NULL, 0, 0), - PACAINITDATA(24, 0, NULL, 0, 0), - PACAINITDATA(25, 0, NULL, 0, 0), - PACAINITDATA(26, 0, NULL, 0, 0), - PACAINITDATA(27, 0, NULL, 0, 0), - PACAINITDATA(28, 0, NULL, 0, 0), - PACAINITDATA(29, 0, NULL, 0, 0), - PACAINITDATA(30, 0, NULL, 0, 0), - PACAINITDATA(31, 0, NULL, 0, 0), + PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11), + PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15), + PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19), + PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23), + PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27), + PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31), #if NR_CPUS > 32 - PACAINITDATA(32, 0, NULL, 0, 0), - PACAINITDATA(33, 0, NULL, 0, 0), - PACAINITDATA(34, 0, NULL, 0, 0), - PACAINITDATA(35, 0, NULL, 0, 0), - PACAINITDATA(36, 0, NULL, 0, 0), - PACAINITDATA(37, 0, NULL, 0, 0), - PACAINITDATA(38, 0, NULL, 0, 0), - PACAINITDATA(39, 0, NULL, 0, 0), - PACAINITDATA(40, 0, NULL, 0, 0), - PACAINITDATA(41, 0, NULL, 0, 0), - PACAINITDATA(42, 0, NULL, 0, 0), - PACAINITDATA(43, 0, NULL, 0, 0), - PACAINITDATA(44, 0, NULL, 0, 0), - PACAINITDATA(45, 0, NULL, 0, 0), - PACAINITDATA(46, 0, NULL, 0, 0), - PACAINITDATA(47, 0, NULL, 0, 0), - PACAINITDATA(48, 0, NULL, 0, 0), - PACAINITDATA(49, 0, NULL, 0, 0), - PACAINITDATA(50, 0, NULL, 0, 0), - PACAINITDATA(51, 0, NULL, 0, 0), - PACAINITDATA(52, 0, NULL, 0, 0), - PACAINITDATA(53, 0, NULL, 0, 0), - PACAINITDATA(54, 0, NULL, 0, 0), - PACAINITDATA(55, 0, NULL, 0, 0), - PACAINITDATA(56, 0, NULL, 0, 0), - PACAINITDATA(57, 0, NULL, 0, 0), - PACAINITDATA(58, 0, NULL, 0, 0), - PACAINITDATA(59, 0, NULL, 0, 0), - PACAINITDATA(60, 0, NULL, 0, 0), - PACAINITDATA(61, 0, NULL, 0, 0), - PACAINITDATA(62, 0, NULL, 0, 0), - PACAINITDATA(63, 0, NULL, 0, 0), + PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35), + PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39), + PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43), + PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47), + PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51), + PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55), + PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59), + PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63), #if NR_CPUS > 64 - PACAINITDATA(64, 0, NULL, 0, 0), - PACAINITDATA(65, 0, NULL, 0, 0), - PACAINITDATA(66, 0, NULL, 0, 0), - PACAINITDATA(67, 0, NULL, 0, 0), - PACAINITDATA(68, 0, NULL, 0, 0), - PACAINITDATA(69, 0, NULL, 0, 0), - PACAINITDATA(70, 0, NULL, 0, 0), - PACAINITDATA(71, 0, NULL, 0, 0), - PACAINITDATA(72, 0, NULL, 0, 0), - PACAINITDATA(73, 0, NULL, 0, 0), - PACAINITDATA(74, 0, NULL, 0, 0), - PACAINITDATA(75, 0, NULL, 0, 0), - PACAINITDATA(76, 0, NULL, 0, 0), - PACAINITDATA(77, 0, NULL, 0, 0), - PACAINITDATA(78, 0, NULL, 0, 0), - PACAINITDATA(79, 0, NULL, 0, 0), - PACAINITDATA(80, 0, NULL, 0, 0), - PACAINITDATA(81, 0, NULL, 0, 0), - PACAINITDATA(82, 0, NULL, 0, 0), - PACAINITDATA(83, 0, NULL, 0, 0), - PACAINITDATA(84, 0, NULL, 0, 0), - PACAINITDATA(85, 0, NULL, 0, 0), - PACAINITDATA(86, 0, NULL, 0, 0), - PACAINITDATA(87, 0, NULL, 0, 0), - PACAINITDATA(88, 0, NULL, 0, 0), - PACAINITDATA(89, 0, NULL, 0, 0), - PACAINITDATA(90, 0, NULL, 0, 0), - PACAINITDATA(91, 0, NULL, 0, 0), - PACAINITDATA(92, 0, NULL, 0, 0), - PACAINITDATA(93, 0, NULL, 0, 0), - PACAINITDATA(94, 0, NULL, 0, 0), - PACAINITDATA(95, 0, NULL, 0, 0), - PACAINITDATA(96, 0, NULL, 0, 0), - PACAINITDATA(97, 0, NULL, 0, 0), - PACAINITDATA(98, 0, NULL, 0, 0), - PACAINITDATA(99, 0, NULL, 0, 0), - PACAINITDATA(100, 0, NULL, 0, 0), - PACAINITDATA(101, 0, NULL, 0, 0), - PACAINITDATA(102, 0, NULL, 0, 0), - PACAINITDATA(103, 0, NULL, 0, 0), - PACAINITDATA(104, 0, NULL, 0, 0), - PACAINITDATA(105, 0, NULL, 0, 0), - PACAINITDATA(106, 0, NULL, 0, 0), - PACAINITDATA(107, 0, NULL, 0, 0), - PACAINITDATA(108, 0, NULL, 0, 0), - PACAINITDATA(109, 0, NULL, 0, 0), - PACAINITDATA(110, 0, NULL, 0, 0), - PACAINITDATA(111, 0, NULL, 0, 0), - PACAINITDATA(112, 0, NULL, 0, 0), - PACAINITDATA(113, 0, NULL, 0, 0), - PACAINITDATA(114, 0, NULL, 0, 0), - PACAINITDATA(115, 0, NULL, 0, 0), - PACAINITDATA(116, 0, NULL, 0, 0), - PACAINITDATA(117, 0, NULL, 0, 0), - PACAINITDATA(118, 0, NULL, 0, 0), - PACAINITDATA(119, 0, NULL, 0, 0), - PACAINITDATA(120, 0, NULL, 0, 0), - PACAINITDATA(121, 0, NULL, 0, 0), - PACAINITDATA(122, 0, NULL, 0, 0), - PACAINITDATA(123, 0, NULL, 0, 0), - PACAINITDATA(124, 0, NULL, 0, 0), - PACAINITDATA(125, 0, NULL, 0, 0), - PACAINITDATA(126, 0, NULL, 0, 0), - PACAINITDATA(127, 0, NULL, 0, 0), + PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67), + PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71), + PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75), + PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79), + PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83), + PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87), + PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91), + PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95), + PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99), + PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103), + PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107), + PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111), + PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115), + PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119), + PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123), + PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127), #endif #endif #endif diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c index 580676f87d2..ae6f579d3fa 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/ppc64/kernel/pci.c @@ -351,7 +351,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, *offset += hose->pci_mem_offset; res_bit = IORESOURCE_MEM; } else { - io_offset = (unsigned long)hose->io_base_virt; + io_offset = (unsigned long)hose->io_base_virt - pci_io_base; *offset += io_offset; res_bit = IORESOURCE_IO; } @@ -378,7 +378,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, /* found it! construct the final physical address */ if (mmap_state == pci_mmap_io) - *offset += hose->io_base_phys - io_offset; + *offset += hose->io_base_phys - io_offset; return rp; } @@ -944,4 +944,22 @@ int pci_read_irq_line(struct pci_dev *pci_dev) } EXPORT_SYMBOL(pci_read_irq_line); +void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, + u64 *start, u64 *end) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long offset = 0; + + if (hose == NULL) + return; + + if (rsrc->flags & IORESOURCE_IO) + offset = pci_io_base - (unsigned long)hose->io_base_virt + + hose->io_base_phys; + + *start = rsrc->start + offset; + *end = rsrc->end + offset; +} + #endif /* CONFIG_PPC_MULTIPLATFORM */ diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index 6cf03d387b9..3013cdb5f93 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -186,6 +186,8 @@ void __init pmac_setup_arch(void) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif + + printk(KERN_INFO "Using native/NAP idle loop\n"); } #ifdef CONFIG_SCSI @@ -507,5 +509,6 @@ struct machdep_calls __initdata pmac_md = { .calibrate_decr = pmac_calibrate_decr, .feature_call = pmac_do_feature_call, .progress = pmac_progress, - .check_legacy_ioport = pmac_check_legacy_ioport + .check_legacy_ioport = pmac_check_legacy_ioport, + .idle_loop = native_idle, }; diff --git a/arch/ppc64/kernel/ppc_ksyms.c b/arch/ppc64/kernel/ppc_ksyms.c index b230a63fe4c..705742f4eec 100644 --- a/arch/ppc64/kernel/ppc_ksyms.c +++ b/arch/ppc64/kernel/ppc_ksyms.c @@ -75,6 +75,7 @@ EXPORT_SYMBOL(giveup_fpu); EXPORT_SYMBOL(giveup_altivec); #endif EXPORT_SYMBOL(__flush_icache_range); +EXPORT_SYMBOL(flush_dcache_range); #ifdef CONFIG_SMP #ifdef CONFIG_PPC_ISERIES diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index aba89554d89..f7cae05e40f 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -36,6 +36,7 @@ #include <linux/kallsyms.h> #include <linux/interrupt.h> #include <linux/utsname.h> +#include <linux/kprobes.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -307,6 +308,8 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { + kprobe_flush_task(current); + #ifndef CONFIG_SMP if (last_task_used_math == current) last_task_used_math = NULL; @@ -321,6 +324,7 @@ void flush_thread(void) { struct thread_info *t = current_thread_info(); + kprobe_flush_task(current); if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c index 9f8c6087ae5..2993f108d96 100644 --- a/arch/ppc64/kernel/ptrace.c +++ b/arch/ppc64/kernel/ptrace.c @@ -305,6 +305,8 @@ static void do_syscall_trace(void) void do_syscall_trace_enter(struct pt_regs *regs) { + secure_computing(regs->gpr[0]); + if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) do_syscall_trace(); @@ -320,8 +322,6 @@ void do_syscall_trace_enter(struct pt_regs *regs) void do_syscall_trace_leave(struct pt_regs *regs) { - secure_computing(regs->gpr[0]); - if (unlikely(current->audit_context)) audit_syscall_exit(current, (regs->ccr&0x1000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index 0a47a5ef428..d1b33f0b26c 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -96,7 +96,6 @@ extern void udbg_init_maple_realmode(void); extern unsigned long klimit; extern void mm_init_ppc64(void); -extern int idle_setup(void); extern void stab_initialize(unsigned long stab); extern void htab_initialize(void); extern void early_init_devtree(void *flat_dt); @@ -677,11 +676,16 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } - -void machine_restart(char *cmd) +/* also used by kexec */ +void machine_shutdown(void) { if (ppc_md.nvram_sync) ppc_md.nvram_sync(); +} + +void machine_restart(char *cmd) +{ + machine_shutdown(); ppc_md.restart(cmd); #ifdef CONFIG_SMP smp_send_stop(); @@ -690,13 +694,11 @@ void machine_restart(char *cmd) local_irq_disable(); while (1) ; } - EXPORT_SYMBOL(machine_restart); - + void machine_power_off(void) { - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); + machine_shutdown(); ppc_md.power_off(); #ifdef CONFIG_SMP smp_send_stop(); @@ -705,13 +707,11 @@ void machine_power_off(void) local_irq_disable(); while (1) ; } - EXPORT_SYMBOL(machine_power_off); - + void machine_halt(void) { - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); + machine_shutdown(); ppc_md.halt(); #ifdef CONFIG_SMP smp_send_stop(); @@ -720,7 +720,6 @@ void machine_halt(void) local_irq_disable(); while (1) ; } - EXPORT_SYMBOL(machine_halt); static int ppc64_panic_event(struct notifier_block *this, @@ -1081,8 +1080,11 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); - /* Select the correct idle loop for the platform. */ - idle_setup(); + /* Use the default idle loop if the platform hasn't provided one. */ + if (NULL == ppc_md.idle_loop) { + ppc_md.idle_loop = default_idle; + printk(KERN_INFO "Using default idle loop\n"); + } paging_init(); ppc64_boot_msg(0x15, "Setup Done"); diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c index 118436e8085..206619080e6 100644 --- a/arch/ppc64/kernel/sys_ppc32.c +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -30,47 +30,26 @@ #include <linux/sem.h> #include <linux/msg.h> #include <linux/shm.h> -#include <linux/slab.h> -#include <linux/uio.h> -#include <linux/aio.h> -#include <linux/nfs_fs.h> -#include <linux/module.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr.h> -#include <linux/nfsd/syscall.h> #include <linux/poll.h> #include <linux/personality.h> #include <linux/stat.h> -#include <linux/filter.h> -#include <linux/highmem.h> -#include <linux/highuid.h> #include <linux/mman.h> -#include <linux/ipv6.h> #include <linux/in.h> -#include <linux/icmpv6.h> #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/sysctl.h> #include <linux/binfmts.h> -#include <linux/dnotify.h> #include <linux/security.h> #include <linux/compat.h> #include <linux/ptrace.h> -#include <linux/aio_abi.h> #include <linux/elf.h> -#include <net/scm.h> -#include <net/sock.h> - #include <asm/ptrace.h> #include <asm/types.h> #include <asm/ipc.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/semaphore.h> -#include <asm/ppcdebug.h> #include <asm/time.h> #include <asm/mmu_context.h> #include <asm/systemcfg.h> @@ -350,8 +329,6 @@ asmlinkage long sys32_adjtimex(struct timex32 __user *utp) return ret; } - -/* These are here just in case some old sparc32 binary calls it. */ asmlinkage long sys32_pause(void) { current->state = TASK_INTERRUPTIBLE; @@ -360,8 +337,6 @@ asmlinkage long sys32_pause(void) return -ERESTARTNOHAND; } - - static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) { long usec; @@ -847,16 +822,6 @@ asmlinkage long sys32_getpgid(u32 pid) } -/* Note: it is necessary to treat which and who as unsigned ints, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long sys32_getpriority(u32 which, u32 who) -{ - return sys_getpriority((int)which, (int)who); -} - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the @@ -1048,6 +1013,11 @@ asmlinkage long sys32_setpgid(u32 pid, u32 pgid) return sys_setpgid((int)pid, (int)pgid); } +long sys32_getpriority(u32 which, u32 who) +{ + /* sign extend which and who */ + return sys_getpriority((int)which, (int)who); +} long sys32_setpriority(u32 which, u32 who, u32 niceval) { @@ -1055,6 +1025,18 @@ long sys32_setpriority(u32 which, u32 who, u32 niceval) return sys_setpriority((int)which, (int)who, (int)niceval); } +long sys32_ioprio_get(u32 which, u32 who) +{ + /* sign extend which and who */ + return sys_ioprio_get((int)which, (int)who); +} + +long sys32_ioprio_set(u32 which, u32 who, u32 ioprio) +{ + /* sign extend which, who and ioprio */ + return sys_ioprio_set((int)which, (int)who, (int)ioprio); +} + /* Note: it is necessary to treat newmask as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -1273,8 +1255,6 @@ long ppc32_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, (u64)len_high << 32 | len_low, advice); } -extern asmlinkage long sys_timer_create(clockid_t, sigevent_t __user *, timer_t __user *); - long ppc32_timer_create(clockid_t clock, struct compat_sigevent __user *ev32, timer_t __user *timer_id) diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index c8fa6569b2f..02b8ac4e016 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -112,7 +112,6 @@ void ppc64_enable_pmcs(void) unsigned long hid0; #ifdef CONFIG_PPC_PSERIES unsigned long set, reset; - int ret; #endif /* CONFIG_PPC_PSERIES */ /* Only need to enable them once */ @@ -145,11 +144,7 @@ void ppc64_enable_pmcs(void) case PLATFORM_PSERIES_LPAR: set = 1UL << 63; reset = 0; - ret = plpar_hcall_norets(H_PERFMON, set, reset); - if (ret) - printk(KERN_ERR "H_PERFMON call on cpu %u " - "returned %d\n", - smp_processor_id(), ret); + plpar_hcall_norets(H_PERFMON, set, reset); break; #endif /* CONFIG_PPC_PSERIES */ @@ -161,13 +156,6 @@ void ppc64_enable_pmcs(void) /* instruct hypervisor to maintain PMCs */ if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) get_paca()->lppaca.pmcregs_in_use = 1; - - /* - * On SMT machines we have to set the run latch in the ctrl register - * in order to make PMC6 spin. - */ - if (cpu_has_feature(CPU_FTR_SMT)) - ppc64_runlatch_on(); #endif /* CONFIG_PPC_PSERIES */ } @@ -400,7 +388,12 @@ static int __init topology_init(void) struct cpu *c = &per_cpu(cpu_devices, cpu); #ifdef CONFIG_NUMA - parent = &node_devices[cpu_to_node(cpu)]; + /* The node to which a cpu belongs can't be known + * until the cpu is made present. + */ + parent = NULL; + if (cpu_present(cpu)) + parent = &node_devices[cpu_to_node(cpu)]; #endif /* * For now, we just see if the system supports making diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 2348a75e050..909462e1ade 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -91,6 +91,7 @@ unsigned long tb_to_xs; unsigned tb_to_us; unsigned long processor_freq; DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL_GPL(rtc_lock); unsigned long tb_to_ns_scale; unsigned long tb_to_ns_shift; @@ -98,7 +99,6 @@ unsigned long tb_to_ns_shift; struct gettimeofday_struct do_gtod; extern unsigned long wall_jiffies; -extern unsigned long lpevent_count; extern int smp_tb_synchronized; extern struct timezone sys_tz; @@ -366,11 +366,8 @@ int timer_interrupt(struct pt_regs * regs) set_dec(next_dec); #ifdef CONFIG_PPC_ISERIES - { - struct ItLpQueue *lpq = lpaca->lpqueue_ptr; - if (lpq && ItLpQueue_isLpIntPending(lpq)) - lpevent_count += ItLpQueue_process(lpq, regs); - } + if (hvlpevent_is_pending()) + process_hvlpevents(regs); #endif /* collect purr register values often, for accurate calculations */ diff --git a/arch/ppc64/kernel/vdso32/vdso32.lds.S b/arch/ppc64/kernel/vdso32/vdso32.lds.S index 11290c902ba..6f87a916a39 100644 --- a/arch/ppc64/kernel/vdso32/vdso32.lds.S +++ b/arch/ppc64/kernel/vdso32/vdso32.lds.S @@ -40,9 +40,9 @@ SECTIONS .gcc_except_table : { *(.gcc_except_table) } .fixup : { *(.fixup) } - .got ALIGN(4) : { *(.got.plt) *(.got) } - .dynamic : { *(.dynamic) } :text :dynamic + .got : { *(.got) } + .plt : { *(.plt) } _end = .; __end = .; diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c index 879f39b90a3..677c4450984 100644 --- a/arch/ppc64/kernel/xics.c +++ b/arch/ppc64/kernel/xics.c @@ -647,6 +647,31 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) } } +void xics_teardown_cpu(void) +{ + int cpu = smp_processor_id(); + int status; + + ops->cppr_info(cpu, 0x00); + iosync(); + + /* + * we need to EOI the IPI if we got here from kexec down IPI + * + * xics doesn't care if we duplicate an EOI as long as we + * don't EOI and raise priority. + * + * probably need to check all the other interrupts too + * should we be flagging idle loop instead? + * or creating some task to be scheduled? + */ + ops->xirr_info_set(cpu, XICS_IPI); + + status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - default_distrib_server, 0); + WARN_ON(status != 0); +} + #ifdef CONFIG_HOTPLUG_CPU /* Interrupts are disabled. */ diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c index 52b6b930534..4fec05817d6 100644 --- a/arch/ppc64/mm/hash_native.c +++ b/arch/ppc64/mm/hash_native.c @@ -304,6 +304,50 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, local_irq_restore(flags); } +/* + * clear all mappings on kexec. All cpus are in real mode (or they will + * be when they isi), and we are the only one left. We rely on our kernel + * mapping being 0xC0's and the hardware ignoring those two real bits. + * + * TODO: add batching support when enabled. remember, no dynamic memory here, + * athough there is the control page available... + */ +static void native_hpte_clear(void) +{ + unsigned long slot, slots, flags; + HPTE *hptep = htab_address; + Hpte_dword0 dw0; + unsigned long pteg_count; + + pteg_count = htab_hash_mask + 1; + + local_irq_save(flags); + + /* we take the tlbie lock and hold it. Some hardware will + * deadlock if we try to tlbie from two processors at once. + */ + spin_lock(&native_tlbie_lock); + + slots = pteg_count * HPTES_PER_GROUP; + + for (slot = 0; slot < slots; slot++, hptep++) { + /* + * we could lock the pte here, but we are the only cpu + * running, right? and for crash dump, we probably + * don't want to wait for a maybe bad cpu. + */ + dw0 = hptep->dw0.dw0; + + if (dw0.v) { + hptep->dw0.dword0 = 0; + tlbie(slot2va(dw0.avpn, dw0.l, dw0.h, slot), dw0.l); + } + } + + spin_unlock(&native_tlbie_lock); + local_irq_restore(flags); +} + static void native_flush_hash_range(unsigned long context, unsigned long number, int local) { @@ -415,7 +459,8 @@ void hpte_init_native(void) ppc_md.hpte_updatepp = native_hpte_updatepp; ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; ppc_md.hpte_insert = native_hpte_insert; - ppc_md.hpte_remove = native_hpte_remove; + ppc_md.hpte_remove = native_hpte_remove; + ppc_md.hpte_clear_all = native_hpte_clear; if (tlb_batching_enabled()) ppc_md.flush_hash_range = native_flush_hash_range; htab_finish_init(); |