aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile8
-rw-r--r--arch/powerpc/kernel/lparcfg.c612
-rw-r--r--arch/powerpc/kernel/proc_ppc64.c126
-rw-r--r--arch/powerpc/kernel/prom.c4
-rw-r--r--arch/powerpc/kernel/rtas_pci.c513
-rw-r--r--arch/powerpc/kernel/setup-common.c2
-rw-r--r--arch/powerpc/kernel/setup_32.c3
-rw-r--r--arch/powerpc/kernel/setup_64.c8
-rw-r--r--arch/powerpc/kernel/sysfs.c384
9 files changed, 1644 insertions, 16 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 92cfabf929b..c04bbd32059 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -16,13 +16,17 @@ obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
signal_64.o ptrace32.o systbl.o \
paca.o ioctl32.o cpu_setup_power4.o \
- firmware.o
+ firmware.o sysfs.o
obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
obj-$(CONFIG_POWER4) += idle_power4.o
obj-$(CONFIG_PPC_OF) += of_device.o
-obj-$(CONFIG_PPC_RTAS) += rtas.o
+procfs-$(CONFIG_PPC64) := proc_ppc64.o
+obj-$(CONFIG_PROC_FS) += $(procfs-y)
+rtaspci-$(CONFIG_PPC64) := rtas_pci.o
+obj-$(CONFIG_PPC_RTAS) += rtas.o $(rtaspci-y)
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
+obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
new file mode 100644
index 00000000000..5e954fae031
--- /dev/null
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -0,0 +1,612 @@
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ * Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <asm/iseries/hv_lp_config.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/iseries/it_exp_vpd_panel.h>
+#include <asm/prom.h>
+#include <asm/systemcfg.h>
+
+#define MODULE_VERS "1.6"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+/* find a better place for this function... */
+void log_plpar_hcall_return(unsigned long rc, char *tag)
+{
+ if (rc == 0) /* success, return */
+ return;
+/* check for null tag ? */
+ if (rc == H_Hardware)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with hardware fault\n", tag);
+ else if (rc == H_Function)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; function not allowed\n", tag);
+ else if (rc == H_Authority)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; not authorized to this function\n",
+ tag);
+ else if (rc == H_Parameter)
+ printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
+ tag);
+ else
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
+ tag, rc);
+
+}
+
+static struct proc_dir_entry *proc_ppc64_lparcfg;
+#define LPARCFG_BUFF_SIZE 4096
+
+#ifdef CONFIG_PPC_ISERIES
+
+/*
+ * For iSeries legacy systems, the PPA purr function is available from the
+ * emulated_time_base field in the paca.
+ */
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+ struct paca_struct *lpaca;
+
+ for_each_cpu(cpu) {
+ lpaca = paca + cpu;
+ sum_purr += lpaca->lppaca.emulated_time_base;
+
+#ifdef PURR_DEBUG
+ printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n",
+ cpu, lpaca->lppaca.emulated_time_base);
+#endif
+ }
+ return sum_purr;
+}
+
+#define lparcfg_write NULL
+
+/*
+ * Methods used to fetch LPAR data when running on an iSeries platform.
+ */
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ unsigned long pool_id, lp_index;
+ int shared, entitled_capacity, max_entitled_capacity;
+ int processors, max_processors;
+ struct paca_struct *lpaca = get_paca();
+ unsigned long purr = get_purr();
+
+ seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+ shared = (int)(lpaca->lppaca_ptr->shared_proc);
+ seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n",
+ e2a(xItExtVpdPanel.mfgID[2]),
+ e2a(xItExtVpdPanel.mfgID[3]),
+ e2a(xItExtVpdPanel.systemSerial[1]),
+ e2a(xItExtVpdPanel.systemSerial[2]),
+ e2a(xItExtVpdPanel.systemSerial[3]),
+ e2a(xItExtVpdPanel.systemSerial[4]),
+ e2a(xItExtVpdPanel.systemSerial[5]));
+
+ seq_printf(m, "system_type=%c%c%c%c\n",
+ e2a(xItExtVpdPanel.machineType[0]),
+ e2a(xItExtVpdPanel.machineType[1]),
+ e2a(xItExtVpdPanel.machineType[2]),
+ e2a(xItExtVpdPanel.machineType[3]));
+
+ lp_index = HvLpConfig_getLpIndex();
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ seq_printf(m, "system_active_processors=%d\n",
+ (int)HvLpConfig_getSystemPhysicalProcessors());
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ (int)HvLpConfig_getSystemPhysicalProcessors());
+
+ processors = (int)HvLpConfig_getPhysicalProcessors();
+ seq_printf(m, "partition_active_processors=%d\n", processors);
+
+ max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
+ seq_printf(m, "partition_potential_processors=%d\n", max_processors);
+
+ if (shared) {
+ entitled_capacity = HvLpConfig_getSharedProcUnits();
+ max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
+ } else {
+ entitled_capacity = processors * 100;
+ max_entitled_capacity = max_processors * 100;
+ }
+ seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ max_entitled_capacity);
+
+ if (shared) {
+ pool_id = HvLpConfig_getSharedPoolIndex();
+ seq_printf(m, "pool=%d\n", (int)pool_id);
+ seq_printf(m, "pool_capacity=%d\n",
+ (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
+ 100));
+ seq_printf(m, "purr=%ld\n", purr);
+ }
+
+ seq_printf(m, "shared_processor_mode=%d\n", shared);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_ISERIES */
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ * entitled_capacity,unallocated_capacity,
+ * aggregation, resource_capability).
+ *
+ * R4 = Entitled Processor Capacity Percentage.
+ * R5 = Unallocated Processor Capacity Percentage.
+ * R6 (AABBCCDDEEFFGGHH).
+ * XXXX - reserved (0)
+ * XXXX - reserved (0)
+ * XXXX - Group Number
+ * XXXX - Pool Number.
+ * R7 (IIJJKKLLMMNNOOPP).
+ * XX - reserved. (0)
+ * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
+ * XX - variable processor Capacity Weight
+ * XX - Unallocated Variable Processor Capacity Weight.
+ * XXXX - Active processors in Physical Processor Pool.
+ * XXXX - Processors active on platform.
+ */
+static unsigned int h_get_ppp(unsigned long *entitled,
+ unsigned long *unallocated,
+ unsigned long *aggregation,
+ unsigned long *resource)
+{
+ unsigned long rc;
+ rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
+ aggregation, resource);
+
+ log_plpar_hcall_return(rc, "H_GET_PPP");
+
+ return rc;
+}
+
+static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
+{
+ unsigned long rc;
+ unsigned long dummy;
+ rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
+
+ log_plpar_hcall_return(rc, "H_PIC");
+}
+
+static unsigned long get_purr(void);
+
+/* Track sum of all purrs across all processors. This is used to further */
+/* calculate usage values by different applications */
+
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+ struct cpu_usage *cu;
+
+ for_each_cpu(cpu) {
+ cu = &per_cpu(cpu_usage_array, cpu);
+ sum_purr += cu->current_tb;
+ }
+ return sum_purr;
+}
+
+#define SPLPAR_CHARACTERISTICS_TOKEN 20
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call. Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+ int call_status;
+
+ char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!local_buffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+ __FILE__, __FUNCTION__, __LINE__);
+ return;
+ }
+
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ SPLPAR_CHARACTERISTICS_TOKEN,
+ __pa(rtas_data_buf));
+ memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (call_status != 0) {
+ printk(KERN_INFO
+ "%s %s Error calling get-system-parameter (0x%x)\n",
+ __FILE__, __FUNCTION__, call_status);
+ } else {
+ int splpar_strlen;
+ int idx, w_idx;
+ char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!workbuffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+ __FILE__, __FUNCTION__, __LINE__);
+ kfree(local_buffer);
+ return;
+ }
+#ifdef LPARCFG_DEBUG
+ printk(KERN_INFO "success calling get-system-parameter \n");
+#endif
+ splpar_strlen = local_buffer[0] * 16 + local_buffer[1];
+ local_buffer += 2; /* step over strlen value */
+
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ w_idx = 0;
+ idx = 0;
+ while ((*local_buffer) && (idx < splpar_strlen)) {
+ workbuffer[w_idx++] = local_buffer[idx++];
+ if ((local_buffer[idx] == ',')
+ || (local_buffer[idx] == '\0')) {
+ workbuffer[w_idx] = '\0';
+ if (w_idx) {
+ /* avoid the empty string */
+ seq_printf(m, "%s\n", workbuffer);
+ }
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ idx++; /* skip the comma */
+ w_idx = 0;
+ } else if (local_buffer[idx] == '=') {
+ /* code here to replace workbuffer contents
+ with different keyword strings */
+ if (0 == strcmp(workbuffer, "MaxEntCap")) {
+ strcpy(workbuffer,
+ "partition_max_entitled_capacity");
+ w_idx = strlen(workbuffer);
+ }
+ if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+ strcpy(workbuffer,
+ "system_potential_processors");
+ w_idx = strlen(workbuffer);
+ }
+ }
+ }
+ kfree(workbuffer);
+ local_buffer -= 2; /* back up over strlen value */
+ }
+ kfree(local_buffer);
+}
+
+static int lparcfg_count_active_processors(void);
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+ struct device_node *cpus_dn = NULL;
+ int count = 0;
+
+ while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+#ifdef LPARCFG_DEBUG
+ printk(KERN_ERR "cpus_dn %p \n", cpus_dn);
+#endif
+ count++;
+ }
+ return count;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ int partition_potential_processors;
+ int partition_active_processors;
+ struct device_node *rootdn;
+ const char *model = "";
+ const char *system_id = "";
+ unsigned int *lp_index_ptr, lp_index = 0;
+ struct device_node *rtas_node;
+ int *lrdrp;
+
+ rootdn = find_path_device("/");
+ if (rootdn) {
+ model = get_property(rootdn, "model", NULL);
+ system_id = get_property(rootdn, "system-id", NULL);
+ lp_index_ptr = (unsigned int *)
+ get_property(rootdn, "ibm,partition-no", NULL);
+ if (lp_index_ptr)
+ lp_index = *lp_index_ptr;
+ }
+
+ seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+ seq_printf(m, "serial_number=%s\n", system_id);
+
+ seq_printf(m, "system_type=%s\n", model);
+
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ rtas_node = find_path_device("/rtas");
+ lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+ if (lrdrp == NULL) {
+ partition_potential_processors = _systemcfg->processorCount;
+ } else {
+ partition_potential_processors = *(lrdrp + 4);
+ }
+
+ partition_active_processors = lparcfg_count_active_processors();
+
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ unsigned long h_entitled, h_unallocated;
+ unsigned long h_aggregation, h_resource;
+ unsigned long pool_idle_time, pool_procs;
+ unsigned long purr;
+
+ h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
+ &h_resource);
+
+ seq_printf(m, "R4=0x%lx\n", h_entitled);
+ seq_printf(m, "R5=0x%lx\n", h_unallocated);
+ seq_printf(m, "R6=0x%lx\n", h_aggregation);
+ seq_printf(m, "R7=0x%lx\n", h_resource);
+
+ purr = get_purr();
+
+ /* this call handles the ibm,get-system-parameter contents */
+ parse_system_parameter_string(m);
+
+ seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
+
+ seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
+
+ seq_printf(m, "system_active_processors=%ld\n",
+ (h_resource >> 0 * 8) & 0xffff);
+
+ /* pool related entries are apropriate for shared configs */
+ if (paca[0].lppaca.shared_proc) {
+
+ h_pic(&pool_idle_time, &pool_procs);
+
+ seq_printf(m, "pool=%ld\n",
+ (h_aggregation >> 0 * 8) & 0xffff);
+
+ /* report pool_capacity in percentage */
+ seq_printf(m, "pool_capacity=%ld\n",
+ ((h_resource >> 2 * 8) & 0xffff) * 100);
+
+ seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+
+ seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ }
+
+ seq_printf(m, "unallocated_capacity_weight=%ld\n",
+ (h_resource >> 4 * 8) & 0xFF);
+
+ seq_printf(m, "capacity_weight=%ld\n",
+ (h_resource >> 5 * 8) & 0xFF);
+
+ seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
+
+ seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
+
+ seq_printf(m, "purr=%ld\n", purr);
+
+ } else { /* non SPLPAR case */
+
+ seq_printf(m, "system_active_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ partition_potential_processors * 100);
+
+ seq_printf(m, "partition_entitled_capacity=%d\n",
+ partition_active_processors * 100);
+ }
+
+ seq_printf(m, "partition_active_processors=%d\n",
+ partition_active_processors);
+
+ seq_printf(m, "partition_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc);
+
+ return 0;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity). Format of input is "param_name=value";
+ * anything after value is ignored. Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight". We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+ size_t count, loff_t * off)
+{
+ char *kbuf;
+ char *tmp;
+ u64 new_entitled, *new_entitled_ptr = &new_entitled;
+ u8 new_weight, *new_weight_ptr = &new_weight;
+
+ unsigned long current_entitled; /* parameters for h_get_ppp */
+ unsigned long dummy;
+ unsigned long resource;
+ u8 current_weight;
+
+ ssize_t retval = -ENOMEM;
+
+ kbuf = kmalloc(count, GFP_KERNEL);
+ if (!kbuf)
+ goto out;
+
+ retval = -EFAULT;
+ if (copy_from_user(kbuf, buf, count))
+ goto out;
+
+ retval = -EINVAL;
+ kbuf[count - 1] = '\0';
+ tmp = strchr(kbuf, '=');
+ if (!tmp)
+ goto out;
+
+ *tmp++ = '\0';
+
+ if (!strcmp(kbuf, "partition_entitled_capacity")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ goto out;
+ new_weight_ptr = &current_weight;
+ } else if (!strcmp(kbuf, "capacity_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ goto out;
+ new_entitled_ptr = &current_entitled;
+ } else
+ goto out;
+
+ /* Get our current parameters */
+ retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
+ if (retval) {
+ retval = -EIO;
+ goto out;
+ }
+
+ current_weight = (resource >> 5 * 8) & 0xFF;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %lu\n",
+ __FUNCTION__, current_entitled, current_weight);
+
+ pr_debug("%s: new_entitled = %lu, new_weight = %lu\n",
+ __FUNCTION__, *new_entitled_ptr, *new_weight_ptr);
+
+ retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
+ *new_weight_ptr);
+
+ if (retval == H_Success || retval == H_Constrained) {
+ retval = count;
+ } else if (retval == H_Busy) {
+ retval = -EBUSY;
+ } else if (retval == H_Hardware) {
+ retval = -EIO;
+ } else if (retval == H_Parameter) {
+ retval = -EINVAL;
+ } else {
+ printk(KERN_WARNING "%s: received unknown hv return code %ld",
+ __FUNCTION__, retval);
+ retval = -EIO;
+ }
+
+ out:
+ kfree(kbuf);
+ return retval;
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lparcfg_data, NULL);
+}
+
+struct file_operations lparcfg_fops = {
+ .owner = THIS_MODULE,
+ .read = seq_read,
+ .open = lparcfg_open,
+ .release = single_release,
+};
+
+int __init lparcfg_init(void)
+{
+ struct proc_dir_entry *ent;
+ mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+ /* Allow writing if we have FW_FEATURE_SPLPAR */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ lparcfg_fops.write = lparcfg_write;
+ mode |= S_IWUSR;
+ }
+
+ ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
+ if (ent) {
+ ent->proc_fops = &lparcfg_fops;
+ ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL);
+ if (!ent->data) {
+ printk(KERN_ERR
+ "Failed to allocate buffer for lparcfg\n");
+ remove_proc_entry("lparcfg", ent->parent);
+ return -ENOMEM;
+ }
+ } else {
+ printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
+ return -EIO;
+ }
+
+ proc_ppc64_lparcfg = ent;
+ return 0;
+}
+
+void __exit lparcfg_cleanup(void)
+{
+ if (proc_ppc64_lparcfg) {
+ kfree(proc_ppc64_lparcfg->data);
+ remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
+ }
+}
+
+module_init(lparcfg_init);
+module_exit(lparcfg_cleanup);
+MODULE_DESCRIPTION("Interface for LPAR configuration data");
+MODULE_AUTHOR("Dave Engebretsen");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
new file mode 100644
index 00000000000..a1c19502fe8
--- /dev/null
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+#include <asm/systemcfg.h>
+#include <asm/rtas.h>
+#include <asm/uaccess.h>
+#include <asm/prom.h>
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence);
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos);
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma );
+
+static struct file_operations page_map_fops = {
+ .llseek = page_map_seek,
+ .read = page_map_read,
+ .mmap = page_map_mmap
+};
+
+/*
+ * Create the ppc64 and ppc64/rtas directories early. This allows us to
+ * assume that they have been previously created in drivers.
+ */
+static int __init proc_ppc64_create(void)
+{
+ struct proc_dir_entry *root;
+
+ root = proc_mkdir("ppc64", NULL);
+ if (!root)
+ return 1;
+
+ if (!(platform_is_pseries() || _machine == PLATFORM_CELL))
+ return 0;
+
+ if (!proc_mkdir("rtas", root))
+ return 1;
+
+ if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
+ return 1;
+
+ return 0;
+}
+core_initcall(proc_ppc64_create);
+
+static int __init proc_ppc64_init(void)
+{
+ struct proc_dir_entry *pde;
+
+ pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
+ if (!pde)
+ return 1;
+ pde->nlink = 1;
+ pde->data = _systemcfg;
+ pde->size = PAGE_SIZE;
+ pde->proc_fops = &page_map_fops;
+
+ return 0;
+}
+__initcall(proc_ppc64_init);
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence)
+{
+ loff_t new;
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+ switch(whence) {
+ case 0:
+ new = off;
+ break;
+ case 1:
+ new = file->f_pos + off;
+ break;
+ case 2:
+ new = dp->size + off;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if ( new < 0 || new > dp->size )
+ return -EINVAL;
+ return (file->f_pos = new);
+}
+
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos)
+{
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+ return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
+}
+
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
+{
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+ vma->vm_flags |= VM_SHM | VM_LOCKED;
+
+ if ((vma->vm_end - vma->vm_start) > dp->size)
+ return -EINVAL;
+
+ remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
+ dp->size, vma->vm_page_prot);
+ return 0;
+}
+
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fd3bcb4a9e9..6a5b468edb4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -71,10 +71,6 @@ struct isa_reg_property {
typedef int interpret_func(struct device_node *, unsigned long *,
int, int, int);
-extern struct rtas_t rtas;
-extern struct lmb lmb;
-extern unsigned long klimit;
-
static int __initdata dt_root_addr_cells;
static int __initdata dt_root_size_cells;
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
new file mode 100644
index 00000000000..0e5a8e11665
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -0,0 +1,513 @@
+/*
+ * arch/ppc64/kernel/rtas_pci.c
+ *
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * RTAS specific routines for PCI.
+ *
+ * Based on code from pci.c, chrp_pci.c and pSeries_pci.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/rtas.h>
+#include <asm/mpic.h>
+#include <asm/ppc-pci.h>
+
+/* RTAS tokens */
+static int read_pci_config;
+static int write_pci_config;
+static int ibm_read_pci_config;
+static int ibm_write_pci_config;
+
+static inline int config_access_valid(struct pci_dn *dn, int where)
+{
+ if (where < 256)
+ return 1;
+ if (where < 4096 && dn->pci_ext_config_space)
+ return 1;
+
+ return 0;
+}
+
+static int of_device_available(struct device_node * dn)
+{
+ char * status;
+
+ status = get_property(dn, "status", NULL);
+
+ if (!status)
+ return 1;
+
+ if (!strcmp(status, "okay"))
+ return 1;
+
+ return 0;
+}
+
+static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+{
+ int returnval = -1;
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+ (pdn->devfn << 8) | (where & 0xff);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
+ addr, BUID_HI(buid), BUID_LO(buid), size);
+ } else {
+ ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
+ }
+ *val = returnval;
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (returnval == EEH_IO_ERROR_VALUE(size) &&
+ eeh_dn_check_failure (pdn->node, NULL))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_read_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 *val)
+{
+ struct device_node *busdn, *dn;
+
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else
+ busdn = bus->sysdata; /* must be a phb */
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_available(dn))
+ return rtas_read_config(pdn, where, size, val);
+ }
+
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+{
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+ (pdn->devfn << 8) | (where & 0xff);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
+ BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
+ } else {
+ ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
+ }
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_write_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 val)
+{
+ struct device_node *busdn, *dn;
+
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else
+ busdn = bus->sysdata; /* must be a phb */
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_available(dn))
+ return rtas_write_config(pdn, where, size, val);
+ }
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+struct pci_ops rtas_pci_ops = {
+ rtas_pci_read_config,
+ rtas_pci_write_config
+};
+
+int is_python(struct device_node *dev)
+{
+ char *model = (char *)get_property(dev, "model", NULL);
+
+ if (model && strstr(model, "Python"))
+ return 1;
+
+ return 0;
+}
+
+static int get_phb_reg_prop(struct device_node *dev,
+ unsigned int addr_size_words,
+ struct reg_property64 *reg)
+{
+ unsigned int *ui_ptr = NULL, len;
+
+ /* Found a PHB, now figure out where his registers are mapped. */
+ ui_ptr = (unsigned int *)get_property(dev, "reg", &len);
+ if (ui_ptr == NULL)
+ return 1;
+
+ if (addr_size_words == 1) {
+ reg->address = ((struct reg_property32 *)ui_ptr)->address;
+ reg->size = ((struct reg_property32 *)ui_ptr)->size;
+ } else {
+ *reg = *((struct reg_property64 *)ui_ptr);
+ }
+
+ return 0;
+}
+
+static void python_countermeasures(struct device_node *dev,
+ unsigned int addr_size_words)
+{
+ struct reg_property64 reg_struct;
+ void __iomem *chip_regs;
+ volatile u32 val;
+
+ if (get_phb_reg_prop(dev, addr_size_words, &reg_struct))
+ return;
+
+ /* Python's register file is 1 MB in size. */
+ chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000);
+
+ /*
+ * Firmware doesn't always clear this bit which is critical
+ * for good performance - Anton
+ */
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+ val = in_be32(chip_regs + 0xf6030);
+ if (val & PRG_CL_RESET_VALID) {
+ printk(KERN_INFO "Python workaround: ");
+ val &= ~PRG_CL_RESET_VALID;
+ out_be32(chip_regs + 0xf6030, val);
+ /*
+ * We must read it back for changes to
+ * take effect
+ */
+ val = in_be32(chip_regs + 0xf6030);
+ printk("reg0: %x\n", val);
+ }
+
+ iounmap(chip_regs);
+}
+
+void __init init_pci_config_tokens (void)
+{
+ read_pci_config = rtas_token("read-pci-config");
+ write_pci_config = rtas_token("write-pci-config");
+ ibm_read_pci_config = rtas_token("ibm,read-pci-config");
+ ibm_write_pci_config = rtas_token("ibm,write-pci-config");
+}
+
+unsigned long __devinit get_phb_buid (struct device_node *phb)
+{
+ int addr_cells;
+ unsigned int *buid_vals;
+ unsigned int len;
+ unsigned long buid;
+
+ if (ibm_read_pci_config == -1) return 0;
+
+ /* PHB's will always be children of the root node,
+ * or so it is promised by the current firmware. */
+ if (phb->parent == NULL)
+ return 0;
+ if (phb->parent->parent)
+ return 0;
+
+ buid_vals = (unsigned int *) get_property(phb, "reg", &len);
+ if (buid_vals == NULL)
+ return 0;
+
+ addr_cells = prom_n_addr_cells(phb);
+ if (addr_cells == 1) {
+ buid = (unsigned long) buid_vals[0];
+ } else {
+ buid = (((unsigned long)buid_vals[0]) << 32UL) |
+ (((unsigned long)buid_vals[1]) & 0xffffffff);
+ }
+ return buid;
+}
+
+static int phb_set_bus_ranges(struct device_node *dev,
+ struct pci_controller *phb)
+{
+ int *bus_range;
+ unsigned int len;
+
+ bus_range = (int *) get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ return 1;
+ }
+
+ phb->first_busno = bus_range[0];
+ phb->last_busno = bus_range[1];
+
+ return 0;
+}
+
+static int __devinit setup_phb(struct device_node *dev,
+ struct pci_controller *phb,
+ unsigned int addr_size_words)
+{
+ pci_setup_pci_controller(phb);
+
+ if (is_python(dev))
+ python_countermeasures(dev, addr_size_words);
+
+ if (phb_set_bus_ranges(dev, phb))
+ return 1;
+
+ phb->arch_data = dev;
+ phb->ops = &rtas_pci_ops;
+ phb->buid = get_phb_buid(dev);
+
+ return 0;
+}
+
+static void __devinit add_linux_pci_domain(struct device_node *dev,
+ struct pci_controller *phb,
+ struct property *of_prop)
+{
+ memset(of_prop, 0, sizeof(struct property));
+ of_prop->name = "linux,pci-domain";
+ of_prop->length = sizeof(phb->global_number);
+ of_prop->value = (unsigned char *)&of_prop[1];
+ memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number));
+ prom_add_property(dev, of_prop);
+}
+
+static struct pci_controller * __init alloc_phb(struct device_node *dev,
+ unsigned int addr_size_words)
+{
+ struct pci_controller *phb;
+ struct property *of_prop;
+
+ phb = alloc_bootmem(sizeof(struct pci_controller));
+ if (phb == NULL)
+ return NULL;
+
+ of_prop = alloc_bootmem(sizeof(struct property) +
+ sizeof(phb->global_number));
+ if (!of_prop)
+ return NULL;
+
+ if (setup_phb(dev, phb, addr_size_words))
+ return NULL;
+
+ add_linux_pci_domain(dev, phb, of_prop);
+
+ return phb;
+}
+
+static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words)
+{
+ struct pci_controller *phb;
+
+ phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller),
+ GFP_KERNEL);
+ if (phb == NULL)
+ return NULL;
+
+ if (setup_phb(dev, phb, addr_size_words))
+ return NULL;
+
+ phb->is_dynamic = 1;
+
+ /* TODO: linux,pci-domain? */
+
+ return phb;
+}
+
+unsigned long __init find_and_init_phbs(void)
+{
+ struct device_node *node;
+ struct pci_controller *phb;
+ unsigned int root_size_cells = 0;
+ unsigned int index;
+ unsigned int *opprop = NULL;
+ struct device_node *root = of_find_node_by_path("/");
+
+ if (ppc64_interrupt_controller == IC_OPEN_PIC) {
+ opprop = (unsigned int *)get_property(root,
+ "platform-open-pic", NULL);
+ }
+
+ root_size_cells = prom_n_size_cells(root);
+
+ index = 0;
+
+ for (node = of_get_next_child(root, NULL);
+ node != NULL;
+ node = of_get_next_child(root, node)) {
+ if (node->type == NULL || strcmp(node->type, "pci") != 0)
+ continue;
+
+ phb = alloc_phb(node, root_size_cells);
+ if (!phb)
+ continue;
+
+ pci_process_bridge_OF_ranges(phb, node, 0);
+ pci_setup_phb_io(phb, index == 0);
+#ifdef CONFIG_PPC_PSERIES
+ if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
+ int addr = root_size_cells * (index + 2) - 1;
+ mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
+ }
+#endif
+ index++;
+ }
+
+ of_node_put(root);
+ pci_devs_phb_init();
+
+ /*
+ * pci_probe_only and pci_assign_all_buses can be set via properties
+ * in chosen.
+ */
+ if (of_chosen) {
+ int *prop;
+
+ prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
+ NULL);
+ if (prop)
+ pci_probe_only = *prop;
+
+ prop = (int *)get_property(of_chosen,
+ "linux,pci-assign-all-buses", NULL);
+ if (prop)
+ pci_assign_all_buses = *prop;
+ }
+
+ return 0;
+}
+
+struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ unsigned int root_size_cells = 0;
+ struct pci_controller *phb;
+ int primary;
+
+ root_size_cells = prom_n_size_cells(root);
+
+ primary = list_empty(&hose_list);
+ phb = alloc_phb_dynamic(dn, root_size_cells);
+ if (!phb)
+ return NULL;
+
+ pci_process_bridge_OF_ranges(phb, dn, primary);
+
+ pci_setup_phb_io_dynamic(phb, primary);
+ of_node_put(root);
+
+ pci_devs_phb_init_dynamic(phb);
+ scan_phb(phb);
+
+ return phb;
+}
+EXPORT_SYMBOL(init_phb_dynamic);
+
+/* RPA-specific bits for removing PHBs */
+int pcibios_remove_root_bus(struct pci_controller *phb)
+{
+ struct pci_bus *b = phb->bus;
+ struct resource *res;
+ int rc, i;
+
+ res = b->resource[0];
+ if (!res->flags) {
+ printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__,
+ b->name);
+ return 1;
+ }
+
+ rc = unmap_bus_range(b);
+ if (rc) {
+ printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+
+ if (release_resource(res)) {
+ printk(KERN_ERR "%s: failed to release IO on bus %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+
+ for (i = 1; i < 3; ++i) {
+ res = b->resource[i];
+ if (!res->flags && i == 0) {
+ printk(KERN_ERR "%s: no MEM resource for PHB %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+ if (res->flags && release_resource(res)) {
+ printk(KERN_ERR
+ "%s: failed to release IO %d on bus %s\n",
+ __FUNCTION__, i, b->name);
+ return 1;
+ }
+ }
+
+ list_del(&phb->list_node);
+ if (phb->is_dynamic)
+ kfree(phb);
+
+ return 0;
+}
+EXPORT_SYMBOL(pcibios_remove_root_bus);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 06e4ef21562..bae4bff138f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -69,6 +69,8 @@ int _machine = 0;
EXPORT_SYMBOL(_machine);
#endif
+unsigned long klimit = (unsigned long) _end;
+
/*
* This still seems to be needed... -- paulus
*/
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index f73d7681b2e..c98cfcc9cd9 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -279,7 +279,6 @@ arch_initcall(ppc_init);
/* Warning, IO base is not yet inited */
void __init setup_arch(char **cmdline_p)
{
- extern char *klimit;
extern void do_init_bootmem(void);
/* so udelay does something sensible, assume <= 1000 bogomips */
@@ -338,7 +337,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.start_code = PAGE_OFFSET;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) klimit;
+ init_mm.brk = klimit;
/* Save unparsed command line copy for /proc/cmdline */
strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index be607b877a5..6791668213e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -96,14 +96,6 @@ extern void udbg_init_maple_realmode(void);
do { udbg_putc = call_rtas_display_status_delay; } while(0)
#endif
-/* extern void *stab; */
-extern unsigned long klimit;
-
-extern void mm_init_ppc64(void);
-extern void early_init_devtree(void *flat_dt);
-extern void unflatten_device_tree(void);
-extern void check_for_initrd(void);
-
int have_of = 1;
int boot_cpuid = 0;
int boot_cpuid_phys = 0;
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
new file mode 100644
index 00000000000..850af198fb5
--- /dev/null
+++ b/arch/powerpc/kernel/sysfs.c
@@ -0,0 +1,384 @@
+#include <linux/config.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/cpumask.h>
+#include <linux/notifier.h>
+
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/prom.h>
+#include <asm/systemcfg.h>
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+/* SMT stuff */
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+/* default to snooze disabled */
+DEFINE_PER_CPU(unsigned long, smt_snooze_delay);
+
+static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+ ssize_t ret;
+ unsigned long snooze;
+
+ ret = sscanf(buf, "%lu", &snooze);
+ if (ret != 1)
+ return -EINVAL;
+
+ per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+
+ return count;
+}
+
+static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+ return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
+}
+
+static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
+ store_smt_snooze_delay);
+
+/* Only parse OF options if the matching cmdline option was not specified */
+static int smt_snooze_cmdline;
+
+static int __init smt_setup(void)
+{
+ struct device_node *options;
+ unsigned int *val;
+ unsigned int cpu;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ options = find_path_device("/options");
+ if (!options)
+ return 1;
+
+ val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
+ NULL);
+ if (!smt_snooze_cmdline && val) {
+ for_each_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = *val;
+ }
+
+ return 1;
+}
+__initcall(smt_setup);
+
+static int __init setup_smt_snooze_delay(char *str)
+{
+ unsigned int cpu;
+ int snooze;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ smt_snooze_cmdline = 1;
+
+ if (get_option(&str, &snooze)) {
+ for_each_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = snooze;
+ }
+
+ return 1;
+}
+__setup("smt-snooze-delay=", setup_smt_snooze_delay);
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+/*
+ * Enabling PMCs will slow partition context switch times so we only do
+ * it the first time we write to the PMCs.
+ */
+
+static DEFINE_PER_CPU(char, pmcs_enabled);
+
+void ppc64_enable_pmcs(void)
+{
+ /* Only need to enable them once */
+ if (__get_cpu_var(pmcs_enabled))
+ return;
+
+ __get_cpu_var(pmcs_enabled) = 1;
+
+ if (ppc_md.enable_pmcs)
+ ppc_md.enable_pmcs();
+}
+EXPORT_SYMBOL(ppc64_enable_pmcs);
+
+/* XXX convert to rusty's on_one_cpu */
+static unsigned long run_on_cpu(unsigned long cpu,
+ unsigned long (*func)(unsigned long),
+ unsigned long arg)
+{
+ cpumask_t old_affinity = current->cpus_allowed;
+ unsigned long ret;
+
+ /* should return -EINVAL to userspace */
+ if (set_cpus_allowed(current, cpumask_of_cpu(cpu)))
+ return 0;
+
+ ret = func(arg);
+
+ set_cpus_allowed(current, old_affinity);
+
+ return ret;
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+static unsigned long read_##NAME(unsigned long junk) \
+{ \
+ return mfspr(ADDRESS); \
+} \
+static unsigned long write_##NAME(unsigned long val) \
+{ \
+ ppc64_enable_pmcs(); \
+ mtspr(ADDRESS, val); \
+ return 0; \
+} \
+static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+ unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
+ return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __attribute_used__ \
+ store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+ unsigned long val; \
+ int ret = sscanf(buf, "%lx", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ run_on_cpu(cpu->sysdev.id, write_##NAME, val); \
+ return count; \
+}
+
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
+SYSFS_PMCSETUP(purr, SPRN_PURR);
+
+static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0);
+static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1);
+static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1);
+static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2);
+static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3);
+static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4);
+static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5);
+static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6);
+static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7);
+static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8);
+static SYSDEV_ATTR(purr, 0600, show_purr, NULL);
+
+static void register_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct sys_device *s = &c->sysdev;
+
+#ifndef CONFIG_PPC_ISERIES
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_create_file(s, &attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+
+ sysdev_create_file(s, &attr_mmcr0);
+ sysdev_create_file(s, &attr_mmcr1);
+
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ sysdev_create_file(s, &attr_mmcra);
+
+ if (cur_cpu_spec->num_pmcs >= 1)
+ sysdev_create_file(s, &attr_pmc1);
+ if (cur_cpu_spec->num_pmcs >= 2)
+ sysdev_create_file(s, &attr_pmc2);
+ if (cur_cpu_spec->num_pmcs >= 3)
+ sysdev_create_file(s, &attr_pmc3);
+ if (cur_cpu_spec->num_pmcs >= 4)
+ sysdev_create_file(s, &attr_pmc4);
+ if (cur_cpu_spec->num_pmcs >= 5)
+ sysdev_create_file(s, &attr_pmc5);
+ if (cur_cpu_spec->num_pmcs >= 6)
+ sysdev_create_file(s, &attr_pmc6);
+ if (cur_cpu_spec->num_pmcs >= 7)
+ sysdev_create_file(s, &attr_pmc7);
+ if (cur_cpu_spec->num_pmcs >= 8)
+ sysdev_create_file(s, &attr_pmc8);
+
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_create_file(s, &attr_purr);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void unregister_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct sys_device *s = &c->sysdev;
+
+ BUG_ON(c->no_control);
+
+#ifndef CONFIG_PPC_ISERIES
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_remove_file(s, &attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+
+ sysdev_remove_file(s, &attr_mmcr0);
+ sysdev_remove_file(s, &attr_mmcr1);
+
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ sysdev_remove_file(s, &attr_mmcra);
+
+ if (cur_cpu_spec->num_pmcs >= 1)
+ sysdev_remove_file(s, &attr_pmc1);
+ if (cur_cpu_spec->num_pmcs >= 2)
+ sysdev_remove_file(s, &attr_pmc2);
+ if (cur_cpu_spec->num_pmcs >= 3)
+ sysdev_remove_file(s, &attr_pmc3);
+ if (cur_cpu_spec->num_pmcs >= 4)
+ sysdev_remove_file(s, &attr_pmc4);
+ if (cur_cpu_spec->num_pmcs >= 5)
+ sysdev_remove_file(s, &attr_pmc5);
+ if (cur_cpu_spec->num_pmcs >= 6)
+ sysdev_remove_file(s, &attr_pmc6);
+ if (cur_cpu_spec->num_pmcs >= 7)
+ sysdev_remove_file(s, &attr_pmc7);
+ if (cur_cpu_spec->num_pmcs >= 8)
+ sysdev_remove_file(s, &attr_pmc8);
+
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_remove_file(s, &attr_purr);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __devinit sysfs_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned int)(long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ register_cpu_online(cpu);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ unregister_cpu_online(cpu);
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata sysfs_cpu_nb = {
+ .notifier_call = sysfs_cpu_notify,
+};
+
+/* NUMA stuff */
+
+#ifdef CONFIG_NUMA
+static struct node node_devices[MAX_NUMNODES];
+
+static void register_nodes(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ if (node_online(i)) {
+ int p_node = parent_node(i);
+ struct node *parent = NULL;
+
+ if (p_node != i)
+ parent = &node_devices[p_node];
+
+ register_node(&node_devices[i], i, parent);
+ }
+ }
+}
+#else
+static void register_nodes(void)
+{
+ return;
+}
+#endif
+
+/* Only valid if CPU is present. */
+static ssize_t show_physical_id(struct sys_device *dev, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+ return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
+}
+static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
+
+static int __init topology_init(void)
+{
+ int cpu;
+ struct node *parent = NULL;
+
+ register_nodes();
+
+ register_cpu_notifier(&sysfs_cpu_nb);
+
+ for_each_cpu(cpu) {
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+#ifdef CONFIG_NUMA
+ /* The node to which a cpu belongs can't be known
+ * until the cpu is made present.
+ */
+ parent = NULL;
+ if (cpu_present(cpu))
+ parent = &node_devices[cpu_to_node(cpu)];
+#endif
+ /*
+ * For now, we just see if the system supports making
+ * the RTAS calls for CPU hotplug. But, there may be a
+ * more comprehensive way to do this for an individual
+ * CPU. For instance, the boot cpu might never be valid
+ * for hotplugging.
+ */
+ if (!ppc_md.cpu_die)
+ c->no_control = 1;
+
+ if (cpu_online(cpu) || (c->no_control == 0)) {
+ register_cpu(c, cpu, parent);
+
+ sysdev_create_file(&c->sysdev, &attr_physical_id);
+ }
+
+ if (cpu_online(cpu))
+ register_cpu_online(cpu);
+ }
+
+ return 0;
+}
+__initcall(topology_init);