aboutsummaryrefslogtreecommitdiff
path: root/drivers/misc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/Kconfig66
-rw-r--r--drivers/misc/Makefile6
-rw-r--r--drivers/misc/atmel_tclib.c161
-rw-r--r--drivers/misc/eeepc-laptop.c666
-rw-r--r--drivers/misc/enclosure.c202
-rw-r--r--drivers/misc/intel_menlow.c30
-rw-r--r--drivers/misc/kgdbts.c1090
-rw-r--r--drivers/misc/sgi-xp/Makefile11
-rw-r--r--drivers/misc/sgi-xp/xp.h463
-rw-r--r--drivers/misc/sgi-xp/xp_main.c279
-rw-r--r--drivers/misc/sgi-xp/xp_nofault.S35
-rw-r--r--drivers/misc/sgi-xp/xpc.h1187
-rw-r--r--drivers/misc/sgi-xp/xpc_channel.c2243
-rw-r--r--drivers/misc/sgi-xp/xpc_main.c1323
-rw-r--r--drivers/misc/sgi-xp/xpc_partition.c1174
-rw-r--r--drivers/misc/sgi-xp/xpnet.c677
-rw-r--r--drivers/misc/thinkpad_acpi.c765
17 files changed, 10088 insertions, 290 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 962817e49fb..636af286230 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -22,6 +22,39 @@ config ATMEL_PWM
purposes including software controlled power-efficent backlights
on LCD displays, motor control, and waveform generation.
+config ATMEL_TCLIB
+ bool "Atmel AT32/AT91 Timer/Counter Library"
+ depends on (AVR32 || ARCH_AT91)
+ help
+ Select this if you want a library to allocate the Timer/Counter
+ blocks found on many Atmel processors. This facilitates using
+ these blocks by different drivers despite processor differences.
+
+config ATMEL_TCB_CLKSRC
+ bool "TC Block Clocksource"
+ depends on ATMEL_TCLIB && GENERIC_TIME
+ default y
+ help
+ Select this to get a high precision clocksource based on a
+ TC block with a 5+ MHz base clock rate. Two timer channels
+ are combined to make a single 32-bit timer.
+
+ When GENERIC_CLOCKEVENTS is defined, the third timer channel
+ may be used as a clock event device supporting oneshot mode
+ (delays of up to two seconds) based on the 32 KiHz clock.
+
+config ATMEL_TCB_CLKSRC_BLOCK
+ int
+ depends on ATMEL_TCB_CLKSRC
+ prompt "TC Block" if ARCH_AT91RM9200 || ARCH_AT91SAM9260 || CPU_AT32AP700X
+ default 0
+ range 0 1
+ help
+ Some chips provide more than one TC block, so you have the
+ choice of which one to use for the clock framework. The other
+ TC can be used for other purposes, such as PWM generation and
+ interval timing.
+
config IBM_ASM
tristate "Device driver for IBM RSA service processor"
depends on X86 && PCI && INPUT && EXPERIMENTAL
@@ -107,6 +140,7 @@ config ACER_WMI
depends on EXPERIMENTAL
depends on ACPI
depends on LEDS_CLASS
+ depends on NEW_LEDS
depends on BACKLIGHT_CLASS_DEVICE
depends on SERIO_I8042
select ACPI_WMI
@@ -127,6 +161,7 @@ config ASUS_LAPTOP
depends on ACPI
depends on EXPERIMENTAL && !ACPI_ASUS
depends on LEDS_CLASS
+ depends on NEW_LEDS
depends on BACKLIGHT_CLASS_DEVICE
---help---
This is the new Linux driver for Asus laptops. It may also support some
@@ -208,10 +243,13 @@ config SONYPI_COMPAT
config THINKPAD_ACPI
tristate "ThinkPad ACPI Laptop Extras"
depends on X86 && ACPI
+ select BACKLIGHT_LCD_SUPPORT
select BACKLIGHT_CLASS_DEVICE
select HWMON
select NVRAM
- depends on INPUT
+ select INPUT
+ select NEW_LEDS
+ select LEDS_CLASS
---help---
This is a driver for the IBM and Lenovo ThinkPad laptops. It adds
support for Fn-Fx key combinations, Bluetooth control, video
@@ -311,6 +349,7 @@ config ATMEL_SSC
config INTEL_MENLOW
tristate "Thermal Management driver for Intel menlow platform"
depends on ACPI_THERMAL
+ select THERMAL
depends on X86
---help---
ACPI thermal management enhancement driver on
@@ -318,6 +357,19 @@ config INTEL_MENLOW
If unsure, say N.
+config EEEPC_LAPTOP
+ tristate "Eee PC Hotkey Driver (EXPERIMENTAL)"
+ depends on X86
+ depends on ACPI
+ depends on BACKLIGHT_CLASS_DEVICE
+ depends on HWMON
+ depends on EXPERIMENTAL
+ ---help---
+ This driver supports the Fn-Fx keys on Eee PC laptops.
+ It also adds the ability to switch camera/wlan on/off.
+
+ If you have an Eee PC laptop, say Y or M here.
+
config ENCLOSURE_SERVICES
tristate "Enclosure Services"
default n
@@ -327,4 +379,16 @@ config ENCLOSURE_SERVICES
driver (SCSI/ATA) which supports enclosures
or a SCSI enclosure device (SES) to use these services.
+config SGI_XP
+ tristate "Support communication between SGI SSIs"
+ depends on IA64_GENERIC || IA64_SGI_SN2
+ select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+ select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+ ---help---
+ An SGI machine can be divided into multiple Single System
+ Images which act independently of each other and have
+ hardware based memory protection from the others. Enabling
+ this feature will allow for direct communication between SSIs
+ based on a network adapter and DMA messaging.
+
endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 3b12f5da856..1952875a272 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -7,9 +7,11 @@ obj-$(CONFIG_IBM_ASM) += ibmasm/
obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/
obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o
obj-$(CONFIG_ACER_WMI) += acer-wmi.o
-obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o
+obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o
+obj-$(CONFIG_EEEPC_LAPTOP) += eeepc-laptop.o
obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o
obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o
+obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o
obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
obj-$(CONFIG_LKDTM) += lkdtm.o
obj-$(CONFIG_TIFM_CORE) += tifm_core.o
@@ -22,3 +24,5 @@ obj-$(CONFIG_FUJITSU_LAPTOP) += fujitsu-laptop.o
obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o
obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o
obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
+obj-$(CONFIG_KGDB_TESTS) += kgdbts.o
+obj-$(CONFIG_SGI_XP) += sgi-xp/
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
new file mode 100644
index 00000000000..05dc8a31f28
--- /dev/null
+++ b/drivers/misc/atmel_tclib.c
@@ -0,0 +1,161 @@
+#include <linux/atmel_tc.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+/* Number of bytes to reserve for the iomem resource */
+#define ATMEL_TC_IOMEM_SIZE 256
+
+
+/*
+ * This is a thin library to solve the problem of how to portably allocate
+ * one of the TC blocks. For simplicity, it doesn't currently expect to
+ * share individual timers between different drivers.
+ */
+
+#if defined(CONFIG_AVR32)
+/* AVR32 has these divide PBB */
+const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, };
+EXPORT_SYMBOL(atmel_tc_divisors);
+
+#elif defined(CONFIG_ARCH_AT91)
+/* AT91 has these divide MCK */
+const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
+EXPORT_SYMBOL(atmel_tc_divisors);
+
+#endif
+
+static DEFINE_SPINLOCK(tc_list_lock);
+static LIST_HEAD(tc_list);
+
+/**
+ * atmel_tc_alloc - allocate a specified TC block
+ * @block: which block to allocate
+ * @name: name to be associated with the iomem resource
+ *
+ * Caller allocates a block. If it is available, a pointer to a
+ * pre-initialized struct atmel_tc is returned. The caller can access
+ * the registers directly through the "regs" field.
+ */
+struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name)
+{
+ struct atmel_tc *tc;
+ struct platform_device *pdev = NULL;
+ struct resource *r;
+
+ spin_lock(&tc_list_lock);
+ list_for_each_entry(tc, &tc_list, node) {
+ if (tc->pdev->id == block) {
+ pdev = tc->pdev;
+ break;
+ }
+ }
+
+ if (!pdev || tc->iomem)
+ goto fail;
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ r = request_mem_region(r->start, ATMEL_TC_IOMEM_SIZE, name);
+ if (!r)
+ goto fail;
+
+ tc->regs = ioremap(r->start, ATMEL_TC_IOMEM_SIZE);
+ if (!tc->regs)
+ goto fail_ioremap;
+
+ tc->iomem = r;
+
+out:
+ spin_unlock(&tc_list_lock);
+ return tc;
+
+fail_ioremap:
+ release_resource(r);
+fail:
+ tc = NULL;
+ goto out;
+}
+EXPORT_SYMBOL_GPL(atmel_tc_alloc);
+
+/**
+ * atmel_tc_free - release a specified TC block
+ * @tc: Timer/counter block that was returned by atmel_tc_alloc()
+ *
+ * This reverses the effect of atmel_tc_alloc(), unmapping the I/O
+ * registers, invalidating the resource returned by that routine and
+ * making the TC available to other drivers.
+ */
+void atmel_tc_free(struct atmel_tc *tc)
+{
+ spin_lock(&tc_list_lock);
+ if (tc->regs) {
+ iounmap(tc->regs);
+ release_resource(tc->iomem);
+ tc->regs = NULL;
+ tc->iomem = NULL;
+ }
+ spin_unlock(&tc_list_lock);
+}
+EXPORT_SYMBOL_GPL(atmel_tc_free);
+
+static int __init tc_probe(struct platform_device *pdev)
+{
+ struct atmel_tc *tc;
+ struct clk *clk;
+ int irq;
+
+ if (!platform_get_resource(pdev, IORESOURCE_MEM, 0))
+ return -EINVAL;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return -EINVAL;
+
+ tc = kzalloc(sizeof(struct atmel_tc), GFP_KERNEL);
+ if (!tc)
+ return -ENOMEM;
+
+ tc->pdev = pdev;
+
+ clk = clk_get(&pdev->dev, "t0_clk");
+ if (IS_ERR(clk)) {
+ kfree(tc);
+ return -EINVAL;
+ }
+
+ tc->clk[0] = clk;
+ tc->clk[1] = clk_get(&pdev->dev, "t1_clk");
+ if (IS_ERR(tc->clk[1]))
+ tc->clk[1] = clk;
+ tc->clk[2] = clk_get(&pdev->dev, "t2_clk");
+ if (IS_ERR(tc->clk[2]))
+ tc->clk[2] = clk;
+
+ tc->irq[0] = irq;
+ tc->irq[1] = platform_get_irq(pdev, 1);
+ if (tc->irq[1] < 0)
+ tc->irq[1] = irq;
+ tc->irq[2] = platform_get_irq(pdev, 2);
+ if (tc->irq[2] < 0)
+ tc->irq[2] = irq;
+
+ spin_lock(&tc_list_lock);
+ list_add_tail(&tc->node, &tc_list);
+ spin_unlock(&tc_list_lock);
+
+ return 0;
+}
+
+static struct platform_driver tc_driver = {
+ .driver.name = "atmel_tcb",
+};
+
+static int __init tc_init(void)
+{
+ return platform_driver_probe(&tc_driver, tc_probe);
+}
+arch_initcall(tc_init);
diff --git a/drivers/misc/eeepc-laptop.c b/drivers/misc/eeepc-laptop.c
new file mode 100644
index 00000000000..6d727609097
--- /dev/null
+++ b/drivers/misc/eeepc-laptop.c
@@ -0,0 +1,666 @@
+/*
+ * eepc-laptop.c - Asus Eee PC extras
+ *
+ * Based on asus_acpi.c as patched for the Eee PC by Asus:
+ * ftp://ftp.asus.com/pub/ASUS/EeePC/701/ASUS_ACPI_071126.rar
+ * Based on eee.c from eeepc-linux
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/backlight.h>
+#include <linux/fb.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/acpi_bus.h>
+#include <linux/uaccess.h>
+
+#define EEEPC_LAPTOP_VERSION "0.1"
+
+#define EEEPC_HOTK_NAME "Eee PC Hotkey Driver"
+#define EEEPC_HOTK_FILE "eeepc"
+#define EEEPC_HOTK_CLASS "hotkey"
+#define EEEPC_HOTK_DEVICE_NAME "Hotkey"
+#define EEEPC_HOTK_HID "ASUS010"
+
+#define EEEPC_LOG EEEPC_HOTK_FILE ": "
+#define EEEPC_ERR KERN_ERR EEEPC_LOG
+#define EEEPC_WARNING KERN_WARNING EEEPC_LOG
+#define EEEPC_NOTICE KERN_NOTICE EEEPC_LOG
+#define EEEPC_INFO KERN_INFO EEEPC_LOG
+
+/*
+ * Definitions for Asus EeePC
+ */
+#define NOTIFY_WLAN_ON 0x10
+#define NOTIFY_BRN_MIN 0x20
+#define NOTIFY_BRN_MAX 0x2f
+
+enum {
+ DISABLE_ASL_WLAN = 0x0001,
+ DISABLE_ASL_BLUETOOTH = 0x0002,
+ DISABLE_ASL_IRDA = 0x0004,
+ DISABLE_ASL_CAMERA = 0x0008,
+ DISABLE_ASL_TV = 0x0010,
+ DISABLE_ASL_GPS = 0x0020,
+ DISABLE_ASL_DISPLAYSWITCH = 0x0040,
+ DISABLE_ASL_MODEM = 0x0080,
+ DISABLE_ASL_CARDREADER = 0x0100
+};
+
+enum {
+ CM_ASL_WLAN = 0,
+ CM_ASL_BLUETOOTH,
+ CM_ASL_IRDA,
+ CM_ASL_1394,
+ CM_ASL_CAMERA,
+ CM_ASL_TV,
+ CM_ASL_GPS,
+ CM_ASL_DVDROM,
+ CM_ASL_DISPLAYSWITCH,
+ CM_ASL_PANELBRIGHT,
+ CM_ASL_BIOSFLASH,
+ CM_ASL_ACPIFLASH,
+ CM_ASL_CPUFV,
+ CM_ASL_CPUTEMPERATURE,
+ CM_ASL_FANCPU,
+ CM_ASL_FANCHASSIS,
+ CM_ASL_USBPORT1,
+ CM_ASL_USBPORT2,
+ CM_ASL_USBPORT3,
+ CM_ASL_MODEM,
+ CM_ASL_CARDREADER,
+ CM_ASL_LID
+};
+
+const char *cm_getv[] = {
+ "WLDG", NULL, NULL, NULL,
+ "CAMG", NULL, NULL, NULL,
+ NULL, "PBLG", NULL, NULL,
+ "CFVG", NULL, NULL, NULL,
+ "USBG", NULL, NULL, "MODG",
+ "CRDG", "LIDG"
+};
+
+const char *cm_setv[] = {
+ "WLDS", NULL, NULL, NULL,
+ "CAMS", NULL, NULL, NULL,
+ "SDSP", "PBLS", "HDPS", NULL,
+ "CFVS", NULL, NULL, NULL,
+ "USBG", NULL, NULL, "MODS",
+ "CRDS", NULL
+};
+
+#define EEEPC_EC "\\_SB.PCI0.SBRG.EC0."
+
+#define EEEPC_EC_FAN_PWM EEEPC_EC "SC02" /* Fan PWM duty cycle (%) */
+#define EEEPC_EC_SC02 0x63
+#define EEEPC_EC_FAN_HRPM EEEPC_EC "SC05" /* High byte, fan speed (RPM) */
+#define EEEPC_EC_FAN_LRPM EEEPC_EC "SC06" /* Low byte, fan speed (RPM) */
+#define EEEPC_EC_FAN_CTRL EEEPC_EC "SFB3" /* Byte containing SF25 */
+#define EEEPC_EC_SFB3 0xD3
+
+/*
+ * This is the main structure, we can use it to store useful information
+ * about the hotk device
+ */
+struct eeepc_hotk {
+ struct acpi_device *device; /* the device we are in */
+ acpi_handle handle; /* the handle of the hotk device */
+ u32 cm_supported; /* the control methods supported
+ by this BIOS */
+ uint init_flag; /* Init flags */
+ u16 event_count[128]; /* count for each event */
+};
+
+/* The actual device the driver binds to */
+static struct eeepc_hotk *ehotk;
+
+/* Platform device/driver */
+static struct platform_driver platform_driver = {
+ .driver = {
+ .name = EEEPC_HOTK_FILE,
+ .owner = THIS_MODULE,
+ }
+};
+
+static struct platform_device *platform_device;
+
+/*
+ * The hotkey driver declaration
+ */
+static int eeepc_hotk_add(struct acpi_device *device);
+static int eeepc_hotk_remove(struct acpi_device *device, int type);
+
+static const struct acpi_device_id eeepc_device_ids[] = {
+ {EEEPC_HOTK_HID, 0},
+ {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, eeepc_device_ids);
+
+static struct acpi_driver eeepc_hotk_driver = {
+ .name = EEEPC_HOTK_NAME,
+ .class = EEEPC_HOTK_CLASS,
+ .ids = eeepc_device_ids,
+ .ops = {
+ .add = eeepc_hotk_add,
+ .remove = eeepc_hotk_remove,
+ },
+};
+
+/* The backlight device /sys/class/backlight */
+static struct backlight_device *eeepc_backlight_device;
+
+/* The hwmon device */
+static struct device *eeepc_hwmon_device;
+
+/*
+ * The backlight class declaration
+ */
+static int read_brightness(struct backlight_device *bd);
+static int update_bl_status(struct backlight_device *bd);
+static struct backlight_ops eeepcbl_ops = {
+ .get_brightness = read_brightness,
+ .update_status = update_bl_status,
+};
+
+MODULE_AUTHOR("Corentin Chary, Eric Cooper");
+MODULE_DESCRIPTION(EEEPC_HOTK_NAME);
+MODULE_LICENSE("GPL");
+
+/*
+ * ACPI Helpers
+ */
+static int write_acpi_int(acpi_handle handle, const char *method, int val,
+ struct acpi_buffer *output)
+{
+ struct acpi_object_list params;
+ union acpi_object in_obj;
+ acpi_status status;
+
+ params.count = 1;
+ params.pointer = &in_obj;
+ in_obj.type = ACPI_TYPE_INTEGER;
+ in_obj.integer.value = val;
+
+ status = acpi_evaluate_object(handle, (char *)method, &params, output);
+ return (status == AE_OK ? 0 : -1);
+}
+
+static int read_acpi_int(acpi_handle handle, const char *method, int *val)
+{
+ acpi_status status;
+ ulong result;
+
+ status = acpi_evaluate_integer(handle, (char *)method, NULL, &result);
+ if (ACPI_FAILURE(status)) {
+ *val = -1;
+ return -1;
+ } else {
+ *val = result;
+ return 0;
+ }
+}
+
+static int set_acpi(int cm, int value)
+{
+ if (ehotk->cm_supported & (0x1 << cm)) {
+ const char *method = cm_setv[cm];
+ if (method == NULL)
+ return -ENODEV;
+ if (write_acpi_int(ehotk->handle, method, value, NULL))
+ printk(EEEPC_WARNING "Error writing %s\n", method);
+ }
+ return 0;
+}
+
+static int get_acpi(int cm)
+{
+ int value = -1;
+ if ((ehotk->cm_supported & (0x1 << cm))) {
+ const char *method = cm_getv[cm];
+ if (method == NULL)
+ return -ENODEV;
+ if (read_acpi_int(ehotk->handle, method, &value))
+ printk(EEEPC_WARNING "Error reading %s\n", method);
+ }
+ return value;
+}
+
+/*
+ * Backlight
+ */
+static int read_brightness(struct backlight_device *bd)
+{
+ return get_acpi(CM_ASL_PANELBRIGHT);
+}
+
+static int set_brightness(struct backlight_device *bd, int value)
+{
+ value = max(0, min(15, value));
+ return set_acpi(CM_ASL_PANELBRIGHT, value);
+}
+
+static int update_bl_status(struct backlight_device *bd)
+{
+ return set_brightness(bd, bd->props.brightness);
+}
+
+/*
+ * Sys helpers
+ */
+static int parse_arg(const char *buf, unsigned long count, int *val)
+{
+ if (!count)
+ return 0;
+ if (sscanf(buf, "%i", val) != 1)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t store_sys_acpi(int cm, const char *buf, size_t count)
+{
+ int rv, value;
+
+ rv = parse_arg(buf, count, &value);
+ if (rv > 0)
+ set_acpi(cm, value);
+ return rv;
+}
+
+static ssize_t show_sys_acpi(int cm, char *buf)
+{
+ return sprintf(buf, "%d\n", get_acpi(cm));
+}
+
+#define EEEPC_CREATE_DEVICE_ATTR(_name, _cm) \
+ static ssize_t show_##_name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+ { \
+ return show_sys_acpi(_cm, buf); \
+ } \
+ static ssize_t store_##_name(struct device *dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t count) \
+ { \
+ return store_sys_acpi(_cm, buf, count); \
+ } \
+ static struct device_attribute dev_attr_##_name = { \
+ .attr = { \
+ .name = __stringify(_name), \
+ .mode = 0644 }, \
+ .show = show_##_name, \
+ .store = store_##_name, \
+ }
+
+EEEPC_CREATE_DEVICE_ATTR(camera, CM_ASL_CAMERA);
+EEEPC_CREATE_DEVICE_ATTR(cardr, CM_ASL_CARDREADER);
+EEEPC_CREATE_DEVICE_ATTR(disp, CM_ASL_DISPLAYSWITCH);
+EEEPC_CREATE_DEVICE_ATTR(wlan, CM_ASL_WLAN);
+
+static struct attribute *platform_attributes[] = {
+ &dev_attr_camera.attr,
+ &dev_attr_cardr.attr,
+ &dev_attr_disp.attr,
+ &dev_attr_wlan.attr,
+ NULL
+};
+
+static struct attribute_group platform_attribute_group = {
+ .attrs = platform_attributes
+};
+
+/*
+ * Hotkey functions
+ */
+static int eeepc_hotk_check(void)
+{
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ int result;
+
+ result = acpi_bus_get_status(ehotk->device);
+ if (result)
+ return result;
+ if (ehotk->device->status.present) {
+ if (write_acpi_int(ehotk->handle, "INIT", ehotk->init_flag,
+ &buffer)) {
+ printk(EEEPC_ERR "Hotkey initialization failed\n");
+ return -ENODEV;
+ } else {
+ printk(EEEPC_NOTICE "Hotkey init flags 0x%x\n",
+ ehotk->init_flag);
+ }
+ /* get control methods supported */
+ if (read_acpi_int(ehotk->handle, "CMSG"
+ , &ehotk->cm_supported)) {
+ printk(EEEPC_ERR
+ "Get control methods supported failed\n");
+ return -ENODEV;
+ } else {
+ printk(EEEPC_INFO
+ "Get control methods supported: 0x%x\n",
+ ehotk->cm_supported);
+ }
+ } else {
+ printk(EEEPC_ERR "Hotkey device not present, aborting\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void notify_wlan(u32 *event)
+{
+ /* if DISABLE_ASL_WLAN is set, the notify code for fn+f2
+ will always be 0x10 */
+ if (ehotk->cm_supported & (0x1 << CM_ASL_WLAN)) {
+ const char *method = cm_getv[CM_ASL_WLAN];
+ int value;
+ if (read_acpi_int(ehotk->handle, method, &value))
+ printk(EEEPC_WARNING "Error reading %s\n",
+ method);
+ else if (value == 1)
+ *event = 0x11;
+ }
+}
+
+static void notify_brn(void)
+{
+ struct backlight_device *bd = eeepc_backlight_device;
+ bd->props.brightness = read_brightness(bd);
+}
+
+static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data)
+{
+ if (!ehotk)
+ return;
+ if (event == NOTIFY_WLAN_ON && (DISABLE_ASL_WLAN & ehotk->init_flag))
+ notify_wlan(&event);
+ if (event >= NOTIFY_BRN_MIN && event <= NOTIFY_BRN_MAX)
+ notify_brn();
+ acpi_bus_generate_proc_event(ehotk->device, event,
+ ehotk->event_count[event % 128]++);
+}
+
+static int eeepc_hotk_add(struct acpi_device *device)
+{
+ acpi_status status = AE_OK;
+ int result;
+
+ if (!device)
+ return -EINVAL;
+ printk(EEEPC_NOTICE EEEPC_HOTK_NAME "\n");
+ ehotk = kzalloc(sizeof(struct eeepc_hotk), GFP_KERNEL);
+ if (!ehotk)
+ return -ENOMEM;
+ ehotk->init_flag = DISABLE_ASL_WLAN | DISABLE_ASL_DISPLAYSWITCH;
+ ehotk->handle = device->handle;
+ strcpy(acpi_device_name(device), EEEPC_HOTK_DEVICE_NAME);
+ strcpy(acpi_device_class(device), EEEPC_HOTK_CLASS);
+ acpi_driver_data(device) = ehotk;
+ ehotk->device = device;
+ result = eeepc_hotk_check();
+ if (result)
+ goto end;
+ status = acpi_install_notify_handler(ehotk->handle, ACPI_SYSTEM_NOTIFY,
+ eeepc_hotk_notify, ehotk);
+ if (ACPI_FAILURE(status))
+ printk(EEEPC_ERR "Error installing notify handler\n");
+ end:
+ if (result) {
+ kfree(ehotk);
+ ehotk = NULL;
+ }
+ return result;
+}
+
+static int eeepc_hotk_remove(struct acpi_device *device, int type)
+{
+ acpi_status status = 0;
+
+ if (!device || !acpi_driver_data(device))
+ return -EINVAL;
+ status = acpi_remove_notify_handler(ehotk->handle, ACPI_SYSTEM_NOTIFY,
+ eeepc_hotk_notify);
+ if (ACPI_FAILURE(status))
+ printk(EEEPC_ERR "Error removing notify handler\n");
+ kfree(ehotk);
+ return 0;
+}
+
+/*
+ * Hwmon
+ */
+static int eeepc_get_fan_pwm(void)
+{
+ int value = 0;
+
+ read_acpi_int(NULL, EEEPC_EC_FAN_PWM, &value);
+ return (value);
+}
+
+static void eeepc_set_fan_pwm(int value)
+{
+ value = SENSORS_LIMIT(value, 0, 100);
+ ec_write(EEEPC_EC_SC02, value);
+}
+
+static int eeepc_get_fan_rpm(void)
+{
+ int high = 0;
+ int low = 0;
+
+ read_acpi_int(NULL, EEEPC_EC_FAN_HRPM, &high);
+ read_acpi_int(NULL, EEEPC_EC_FAN_LRPM, &low);
+ return (high << 8 | low);
+}
+
+static int eeepc_get_fan_ctrl(void)
+{
+ int value = 0;
+
+ read_acpi_int(NULL, EEEPC_EC_FAN_CTRL, &value);
+ return ((value & 0x02 ? 1 : 0));
+}
+
+static void eeepc_set_fan_ctrl(int manual)
+{
+ int value = 0;
+
+ read_acpi_int(NULL, EEEPC_EC_FAN_CTRL, &value);
+ if (manual)
+ value |= 0x02;
+ else
+ value &= ~0x02;
+ ec_write(EEEPC_EC_SFB3, value);
+}
+
+static ssize_t store_sys_hwmon(void (*set)(int), const char *buf, size_t count)
+{
+ int rv, value;
+
+ rv = parse_arg(buf, count, &value);
+ if (rv > 0)
+ set(value);
+ return rv;
+}
+
+static ssize_t show_sys_hwmon(int (*get)(void), char *buf)
+{
+ return sprintf(buf, "%d\n", get());
+}
+
+#define EEEPC_CREATE_SENSOR_ATTR(_name, _mode, _set, _get) \
+ static ssize_t show_##_name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+ { \
+ return show_sys_hwmon(_set, buf); \
+ } \
+ static ssize_t store_##_name(struct device *dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t count) \
+ { \
+ return store_sys_hwmon(_get, buf, count); \
+ } \
+ static SENSOR_DEVICE_ATTR(_name, _mode, show_##_name, store_##_name, 0);
+
+EEEPC_CREATE_SENSOR_ATTR(fan1_input, S_IRUGO, eeepc_get_fan_rpm, NULL);
+EEEPC_CREATE_SENSOR_ATTR(fan1_pwm, S_IRUGO | S_IWUSR,
+ eeepc_get_fan_pwm, eeepc_set_fan_pwm);
+EEEPC_CREATE_SENSOR_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
+ eeepc_get_fan_ctrl, eeepc_set_fan_ctrl);
+
+static struct attribute *hwmon_attributes[] = {
+ &sensor_dev_attr_fan1_pwm.dev_attr.attr,
+ &sensor_dev_attr_fan1_input.dev_attr.attr,
+ &sensor_dev_attr_pwm1_enable.dev_attr.attr,
+ NULL
+};
+
+static struct attribute_group hwmon_attribute_group = {
+ .attrs = hwmon_attributes
+};
+
+/*
+ * exit/init
+ */
+static void eeepc_backlight_exit(void)
+{
+ if (eeepc_backlight_device)
+ backlight_device_unregister(eeepc_backlight_device);
+ eeepc_backlight_device = NULL;
+}
+
+static void eeepc_hwmon_exit(void)
+{
+ struct device *hwmon;
+
+ hwmon = eeepc_hwmon_device;
+ if (!hwmon)
+ return ;
+ hwmon_device_unregister(hwmon);
+ sysfs_remove_group(&hwmon->kobj,
+ &hwmon_attribute_group);
+ eeepc_hwmon_device = NULL;
+}
+
+static void __exit eeepc_laptop_exit(void)
+{
+ eeepc_backlight_exit();
+ eeepc_hwmon_exit();
+ acpi_bus_unregister_driver(&eeepc_hotk_driver);
+ sysfs_remove_group(&platform_device->dev.kobj,
+ &platform_attribute_group);
+ platform_device_unregister(platform_device);
+ platform_driver_unregister(&platform_driver);
+}
+
+static int eeepc_backlight_init(struct device *dev)
+{
+ struct backlight_device *bd;
+
+ bd = backlight_device_register(EEEPC_HOTK_FILE, dev,
+ NULL, &eeepcbl_ops);
+ if (IS_ERR(bd)) {
+ printk(EEEPC_ERR
+ "Could not register eeepc backlight device\n");
+ eeepc_backlight_device = NULL;
+ return PTR_ERR(bd);
+ }
+ eeepc_backlight_device = bd;
+ bd->props.max_brightness = 15;
+ bd->props.brightness = read_brightness(NULL);
+ bd->props.power = FB_BLANK_UNBLANK;
+ backlight_update_status(bd);
+ return 0;
+}
+
+static int eeepc_hwmon_init(struct device *dev)
+{
+ struct device *hwmon;
+ int result;
+
+ hwmon = hwmon_device_register(dev);
+ if (IS_ERR(hwmon)) {
+ printk(EEEPC_ERR
+ "Could not register eeepc hwmon device\n");
+ eeepc_hwmon_device = NULL;
+ return PTR_ERR(hwmon);
+ }
+ eeepc_hwmon_device = hwmon;
+ result = sysfs_create_group(&hwmon->kobj,
+ &hwmon_attribute_group);
+ if (result)
+ eeepc_hwmon_exit();
+ return result;
+}
+
+static int __init eeepc_laptop_init(void)
+{
+ struct device *dev;
+ int result;
+
+ if (acpi_disabled)
+ return -ENODEV;
+ result = acpi_bus_register_driver(&eeepc_hotk_driver);
+ if (result < 0)
+ return result;
+ if (!ehotk) {
+ acpi_bus_unregister_driver(&eeepc_hotk_driver);
+ return -ENODEV;
+ }
+ dev = acpi_get_physical_device(ehotk->device->handle);
+ result = eeepc_backlight_init(dev);
+ if (result)
+ goto fail_backlight;
+ result = eeepc_hwmon_init(dev);
+ if (result)
+ goto fail_hwmon;
+ /* Register platform stuff */
+ result = platform_driver_register(&platform_driver);
+ if (result)
+ goto fail_platform_driver;
+ platform_device = platform_device_alloc(EEEPC_HOTK_FILE, -1);
+ if (!platform_device) {
+ result = -ENOMEM;
+ goto fail_platform_device1;
+ }
+ result = platform_device_add(platform_device);
+ if (result)
+ goto fail_platform_device2;
+ result = sysfs_create_group(&platform_device->dev.kobj,
+ &platform_attribute_group);
+ if (result)
+ goto fail_sysfs;
+ return 0;
+fail_sysfs:
+ platform_device_del(platform_device);
+fail_platform_device2:
+ platform_device_put(platform_device);
+fail_platform_device1:
+ platform_driver_unregister(&platform_driver);
+fail_platform_driver:
+ eeepc_hwmon_exit();
+fail_hwmon:
+ eeepc_backlight_exit();
+fail_backlight:
+ return result;
+}
+
+module_init(eeepc_laptop_init);
+module_exit(eeepc_laptop_exit);
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index 6fcb0e96adf..0736cff9d97 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -31,7 +31,6 @@
static LIST_HEAD(container_list);
static DEFINE_MUTEX(container_list_lock);
static struct class enclosure_class;
-static struct class enclosure_component_class;
/**
* enclosure_find - find an enclosure given a device
@@ -40,16 +39,16 @@ static struct class enclosure_component_class;
* Looks through the list of registered enclosures to see
* if it can find a match for a device. Returns NULL if no
* enclosure is found. Obtains a reference to the enclosure class
- * device which must be released with class_device_put().
+ * device which must be released with device_put().
*/
struct enclosure_device *enclosure_find(struct device *dev)
{
- struct enclosure_device *edev = NULL;
+ struct enclosure_device *edev;
mutex_lock(&container_list_lock);
list_for_each_entry(edev, &container_list, node) {
- if (edev->cdev.dev == dev) {
- class_device_get(&edev->cdev);
+ if (edev->edev.parent == dev) {
+ get_device(&edev->edev);
mutex_unlock(&container_list_lock);
return edev;
}
@@ -117,11 +116,11 @@ enclosure_register(struct device *dev, const char *name, int components,
edev->components = components;
- edev->cdev.class = &enclosure_class;
- edev->cdev.dev = get_device(dev);
+ edev->edev.class = &enclosure_class;
+ edev->edev.parent = get_device(dev);
edev->cb = cb;
- snprintf(edev->cdev.class_id, BUS_ID_SIZE, "%s", name);
- err = class_device_register(&edev->cdev);
+ snprintf(edev->edev.bus_id, BUS_ID_SIZE, "%s", name);
+ err = device_register(&edev->edev);
if (err)
goto err;
@@ -135,7 +134,7 @@ enclosure_register(struct device *dev, const char *name, int components,
return edev;
err:
- put_device(edev->cdev.dev);
+ put_device(edev->edev.parent);
kfree(edev);
return ERR_PTR(err);
}
@@ -158,29 +157,69 @@ void enclosure_unregister(struct enclosure_device *edev)
for (i = 0; i < edev->components; i++)
if (edev->component[i].number != -1)
- class_device_unregister(&edev->component[i].cdev);
+ device_unregister(&edev->component[i].cdev);
/* prevent any callbacks into service user */
edev->cb = &enclosure_null_callbacks;
- class_device_unregister(&edev->cdev);
+ device_unregister(&edev->edev);
}
EXPORT_SYMBOL_GPL(enclosure_unregister);
-static void enclosure_release(struct class_device *cdev)
+#define ENCLOSURE_NAME_SIZE 64
+
+static void enclosure_link_name(struct enclosure_component *cdev, char *name)
+{
+ strcpy(name, "enclosure_device:");
+ strcat(name, cdev->cdev.bus_id);
+}
+
+static void enclosure_remove_links(struct enclosure_component *cdev)
+{
+ char name[ENCLOSURE_NAME_SIZE];
+
+ enclosure_link_name(cdev, name);
+ sysfs_remove_link(&cdev->dev->kobj, name);
+ sysfs_remove_link(&cdev->cdev.kobj, "device");
+}
+
+static int enclosure_add_links(struct enclosure_component *cdev)
+{
+ int error;
+ char name[ENCLOSURE_NAME_SIZE];
+
+ error = sysfs_create_link(&cdev->cdev.kobj, &cdev->dev->kobj, "device");
+ if (error)
+ return error;
+
+ enclosure_link_name(cdev, name);
+ error = sysfs_create_link(&cdev->dev->kobj, &cdev->cdev.kobj, name);
+ if (error)
+ sysfs_remove_link(&cdev->cdev.kobj, "device");
+
+ return error;
+}
+
+static void enclosure_release(struct device *cdev)
{
struct enclosure_device *edev = to_enclosure_device(cdev);
- put_device(cdev->dev);
+ put_device(cdev->parent);
kfree(edev);
}
-static void enclosure_component_release(struct class_device *cdev)
+static void enclosure_component_release(struct device *dev)
{
- if (cdev->dev)
+ struct enclosure_component *cdev = to_enclosure_component(dev);
+
+ if (cdev->dev) {
+ enclosure_remove_links(cdev);
put_device(cdev->dev);
- class_device_put(cdev->parent);
+ }
+ put_device(dev->parent);
}
+static struct attribute_group *enclosure_groups[];
+
/**
* enclosure_component_register - add a particular component to an enclosure
* @edev: the enclosure to add the component
@@ -201,7 +240,7 @@ enclosure_component_register(struct enclosure_device *edev,
const char *name)
{
struct enclosure_component *ecomp;
- struct class_device *cdev;
+ struct device *cdev;
int err;
if (number >= edev->components)
@@ -215,14 +254,16 @@ enclosure_component_register(struct enclosure_device *edev,
ecomp->type = type;
ecomp->number = number;
cdev = &ecomp->cdev;
- cdev->parent = class_device_get(&edev->cdev);
- cdev->class = &enclosure_component_class;
+ cdev->parent = get_device(&edev->edev);
if (name)
- snprintf(cdev->class_id, BUS_ID_SIZE, "%s", name);
+ snprintf(cdev->bus_id, BUS_ID_SIZE, "%s", name);
else
- snprintf(cdev->class_id, BUS_ID_SIZE, "%u", number);
+ snprintf(cdev->bus_id, BUS_ID_SIZE, "%u", number);
- err = class_device_register(cdev);
+ cdev->release = enclosure_component_release;
+ cdev->groups = enclosure_groups;
+
+ err = device_register(cdev);
if (err)
ERR_PTR(err);
@@ -247,18 +288,19 @@ EXPORT_SYMBOL_GPL(enclosure_component_register);
int enclosure_add_device(struct enclosure_device *edev, int component,
struct device *dev)
{
- struct class_device *cdev;
+ struct enclosure_component *cdev;
if (!edev || component >= edev->components)
return -EINVAL;
- cdev = &edev->component[component].cdev;
+ cdev = &edev->component[component];
- class_device_del(cdev);
if (cdev->dev)
- put_device(cdev->dev);
+ enclosure_remove_links(cdev);
+
+ put_device(cdev->dev);
cdev->dev = get_device(dev);
- return class_device_add(cdev);
+ return enclosure_add_links(cdev);
}
EXPORT_SYMBOL_GPL(enclosure_add_device);
@@ -272,18 +314,17 @@ EXPORT_SYMBOL_GPL(enclosure_add_device);
*/
int enclosure_remove_device(struct enclosure_device *edev, int component)
{
- struct class_device *cdev;
+ struct enclosure_component *cdev;
if (!edev || component >= edev->components)
return -EINVAL;
- cdev = &edev->component[component].cdev;
+ cdev = &edev->component[component];
- class_device_del(cdev);
- if (cdev->dev)
- put_device(cdev->dev);
+ device_del(&cdev->cdev);
+ put_device(cdev->dev);
cdev->dev = NULL;
- return class_device_add(cdev);
+ return device_add(&cdev->cdev);
}
EXPORT_SYMBOL_GPL(enclosure_remove_device);
@@ -291,14 +332,16 @@ EXPORT_SYMBOL_GPL(enclosure_remove_device);
* sysfs pieces below
*/
-static ssize_t enclosure_show_components(struct class_device *cdev, char *buf)
+static ssize_t enclosure_show_components(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
{
struct enclosure_device *edev = to_enclosure_device(cdev);
return snprintf(buf, 40, "%d\n", edev->components);
}
-static struct class_device_attribute enclosure_attrs[] = {
+static struct device_attribute enclosure_attrs[] = {
__ATTR(components, S_IRUGO, enclosure_show_components, NULL),
__ATTR_NULL
};
@@ -306,8 +349,8 @@ static struct class_device_attribute enclosure_attrs[] = {
static struct class enclosure_class = {
.name = "enclosure",
.owner = THIS_MODULE,
- .release = enclosure_release,
- .class_dev_attrs = enclosure_attrs,
+ .dev_release = enclosure_release,
+ .dev_attrs = enclosure_attrs,
};
static const char *const enclosure_status [] = {
@@ -326,7 +369,8 @@ static const char *const enclosure_type [] = {
[ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device",
};
-static ssize_t get_component_fault(struct class_device *cdev, char *buf)
+static ssize_t get_component_fault(struct device *cdev,
+ struct device_attribute *attr, char *buf)
{
struct enclosure_device *edev = to_enclosure_device(cdev->parent);
struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -336,8 +380,9 @@ static ssize_t get_component_fault(struct class_device *cdev, char *buf)
return snprintf(buf, 40, "%d\n", ecomp->fault);
}
-static ssize_t set_component_fault(struct class_device *cdev, const char *buf,
- size_t count)
+static ssize_t set_component_fault(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct enclosure_device *edev = to_enclosure_device(cdev->parent);
struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -348,7 +393,8 @@ static ssize_t set_component_fault(struct class_device *cdev, const char *buf,
return count;
}
-static ssize_t get_component_status(struct class_device *cdev, char *buf)
+static ssize_t get_component_status(struct device *cdev,
+ struct device_attribute *attr,char *buf)
{
struct enclosure_device *edev = to_enclosure_device(cdev->parent);
struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -358,8 +404,9 @@ static ssize_t get_component_status(struct class_device *cdev, char *buf)
return snprintf(buf, 40, "%s\n", enclosure_status[ecomp->status]);
}
-static ssize_t set_component_status(struct class_device *cdev, const char *buf,
- size_t count)
+static ssize_t set_component_status(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct enclosure_device *edev = to_enclosure_device(cdev->parent);
struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -380,7 +427,8 @@ static ssize_t set_component_status(struct class_device *cdev, const char *buf,
return -EINVAL;
}
-static ssize_t get_component_active(struct class_device *cdev, char *buf)
+static ssize_t get_component_active(struct device *cdev,
+ struct device_attribute *attr, char *buf)
{
struct enclosure_device *edev = to_enclosure_device(cdev->parent);
struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -390,8 +438,9 @@ static ssize_t get_component_active(struct class_device *cdev, char *buf)
return snprintf(buf, 40, "%d\n", ecomp->active);
}
-static ssize_t set_component_active(struct class_device *cdev, const char *buf,
- size_t count)
+static ssize_t set_component_active(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct enclosure_device *edev = to_enclosure_device(cdev->parent);
struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -402,7 +451,8 @@ static ssize_t set_component_active(struct class_device *cdev, const char *buf,
return count;
}
-static ssize_t get_component_locate(struct class_device *cdev, char *buf)
+static ssize_t get_component_locate(struct device *cdev,
+ struct device_attribute *attr, char *buf)
{
struct enclosure_device *edev = to_enclosure_device(cdev->parent);
struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -412,8 +462,9 @@ static ssize_t get_component_locate(struct class_device *cdev, char *buf)
return snprintf(buf, 40, "%d\n", ecomp->locate);
}
-static ssize_t set_component_locate(struct class_device *cdev, const char *buf,
- size_t count)
+static ssize_t set_component_locate(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct enclosure_device *edev = to_enclosure_device(cdev->parent);
struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -424,7 +475,8 @@ static ssize_t set_component_locate(struct class_device *cdev, const char *buf,
return count;
}
-static ssize_t get_component_type(struct class_device *cdev, char *buf)
+static ssize_t get_component_type(struct device *cdev,
+ struct device_attribute *attr, char *buf)
{
struct enclosure_component *ecomp = to_enclosure_component(cdev);
@@ -432,24 +484,32 @@ static ssize_t get_component_type(struct class_device *cdev, char *buf)
}
-static struct class_device_attribute enclosure_component_attrs[] = {
- __ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault,
- set_component_fault),
- __ATTR(status, S_IRUGO | S_IWUSR, get_component_status,
- set_component_status),
- __ATTR(active, S_IRUGO | S_IWUSR, get_component_active,
- set_component_active),
- __ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate,
- set_component_locate),
- __ATTR(type, S_IRUGO, get_component_type, NULL),
- __ATTR_NULL
+static DEVICE_ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault,
+ set_component_fault);
+static DEVICE_ATTR(status, S_IRUGO | S_IWUSR, get_component_status,
+ set_component_status);
+static DEVICE_ATTR(active, S_IRUGO | S_IWUSR, get_component_active,
+ set_component_active);
+static DEVICE_ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate,
+ set_component_locate);
+static DEVICE_ATTR(type, S_IRUGO, get_component_type, NULL);
+
+static struct attribute *enclosure_component_attrs[] = {
+ &dev_attr_fault.attr,
+ &dev_attr_status.attr,
+ &dev_attr_active.attr,
+ &dev_attr_locate.attr,
+ &dev_attr_type.attr,
+ NULL
};
-static struct class enclosure_component_class = {
- .name = "enclosure_component",
- .owner = THIS_MODULE,
- .class_dev_attrs = enclosure_component_attrs,
- .release = enclosure_component_release,
+static struct attribute_group enclosure_group = {
+ .attrs = enclosure_component_attrs,
+};
+
+static struct attribute_group *enclosure_groups[] = {
+ &enclosure_group,
+ NULL
};
static int __init enclosure_init(void)
@@ -459,20 +519,12 @@ static int __init enclosure_init(void)
err = class_register(&enclosure_class);
if (err)
return err;
- err = class_register(&enclosure_component_class);
- if (err)
- goto err_out;
return 0;
- err_out:
- class_unregister(&enclosure_class);
-
- return err;
}
static void __exit enclosure_exit(void)
{
- class_unregister(&enclosure_component_class);
class_unregister(&enclosure_class);
}
diff --git a/drivers/misc/intel_menlow.c b/drivers/misc/intel_menlow.c
index de16e88eb8d..5bb8816c912 100644
--- a/drivers/misc/intel_menlow.c
+++ b/drivers/misc/intel_menlow.c
@@ -175,28 +175,18 @@ static int intel_menlow_memory_add(struct acpi_device *device)
goto end;
}
- if (cdev) {
- acpi_driver_data(device) = cdev;
- result = sysfs_create_link(&device->dev.kobj,
- &cdev->device.kobj, "thermal_cooling");
- if (result)
- goto unregister;
-
- result = sysfs_create_link(&cdev->device.kobj,
- &device->dev.kobj, "device");
- if (result) {
- sysfs_remove_link(&device->dev.kobj, "thermal_cooling");
- goto unregister;
- }
- }
+ acpi_driver_data(device) = cdev;
+ result = sysfs_create_link(&device->dev.kobj,
+ &cdev->device.kobj, "thermal_cooling");
+ if (result)
+ printk(KERN_ERR PREFIX "Create sysfs link\n");
+ result = sysfs_create_link(&cdev->device.kobj,
+ &device->dev.kobj, "device");
+ if (result)
+ printk(KERN_ERR PREFIX "Create sysfs link\n");
end:
return result;
-
- unregister:
- thermal_cooling_device_unregister(cdev);
- return result;
-
}
static int intel_menlow_memory_remove(struct acpi_device *device, int type)
@@ -213,7 +203,7 @@ static int intel_menlow_memory_remove(struct acpi_device *device, int type)
return 0;
}
-const static struct acpi_device_id intel_menlow_memory_ids[] = {
+static const struct acpi_device_id intel_menlow_memory_ids[] = {
{"INT0002", 0},
{"", 0},
};
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
new file mode 100644
index 00000000000..6d6286c4eea
--- /dev/null
+++ b/drivers/misc/kgdbts.c
@@ -0,0 +1,1090 @@
+/*
+ * kgdbts is a test suite for kgdb for the sole purpose of validating
+ * that key pieces of the kgdb internals are working properly such as
+ * HW/SW breakpoints, single stepping, and NMI.
+ *
+ * Created by: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (c) 2008 Wind River Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/* Information about the kgdb test suite.
+ * -------------------------------------
+ *
+ * The kgdb test suite is designed as a KGDB I/O module which
+ * simulates the communications that a debugger would have with kgdb.
+ * The tests are broken up in to a line by line and referenced here as
+ * a "get" which is kgdb requesting input and "put" which is kgdb
+ * sending a response.
+ *
+ * The kgdb suite can be invoked from the kernel command line
+ * arguments system or executed dynamically at run time. The test
+ * suite uses the variable "kgdbts" to obtain the information about
+ * which tests to run and to configure the verbosity level. The
+ * following are the various characters you can use with the kgdbts=
+ * line:
+ *
+ * When using the "kgdbts=" you only choose one of the following core
+ * test types:
+ * A = Run all the core tests silently
+ * V1 = Run all the core tests with minimal output
+ * V2 = Run all the core tests in debug mode
+ *
+ * You can also specify optional tests:
+ * N## = Go to sleep with interrupts of for ## seconds
+ * to test the HW NMI watchdog
+ * F## = Break at do_fork for ## iterations
+ * S## = Break at sys_open for ## iterations
+ *
+ * NOTE: that the do_fork and sys_open tests are mutually exclusive.
+ *
+ * To invoke the kgdb test suite from boot you use a kernel start
+ * argument as follows:
+ * kgdbts=V1 kgdbwait
+ * Or if you wanted to perform the NMI test for 6 seconds and do_fork
+ * test for 100 forks, you could use:
+ * kgdbts=V1N6F100 kgdbwait
+ *
+ * The test suite can also be invoked at run time with:
+ * echo kgdbts=V1N6F100 > /sys/module/kgdbts/parameters/kgdbts
+ * Or as another example:
+ * echo kgdbts=V2 > /sys/module/kgdbts/parameters/kgdbts
+ *
+ * When developing a new kgdb arch specific implementation or
+ * using these tests for the purpose of regression testing,
+ * several invocations are required.
+ *
+ * 1) Boot with the test suite enabled by using the kernel arguments
+ * "kgdbts=V1F100 kgdbwait"
+ * ## If kgdb arch specific implementation has NMI use
+ * "kgdbts=V1N6F100
+ *
+ * 2) After the system boot run the basic test.
+ * echo kgdbts=V1 > /sys/module/kgdbts/parameters/kgdbts
+ *
+ * 3) Run the concurrency tests. It is best to use n+1
+ * while loops where n is the number of cpus you have
+ * in your system. The example below uses only two
+ * loops.
+ *
+ * ## This tests break points on sys_open
+ * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
+ * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
+ * echo kgdbts=V1S10000 > /sys/module/kgdbts/parameters/kgdbts
+ * fg # and hit control-c
+ * fg # and hit control-c
+ * ## This tests break points on do_fork
+ * while [ 1 ] ; do date > /dev/null ; done &
+ * while [ 1 ] ; do date > /dev/null ; done &
+ * echo kgdbts=V1F1000 > /sys/module/kgdbts/parameters/kgdbts
+ * fg # and hit control-c
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/kgdb.h>
+#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+
+#define v1printk(a...) do { \
+ if (verbose) \
+ printk(KERN_INFO a); \
+ } while (0)
+#define v2printk(a...) do { \
+ if (verbose > 1) \
+ printk(KERN_INFO a); \
+ touch_nmi_watchdog(); \
+ } while (0)
+#define eprintk(a...) do { \
+ printk(KERN_ERR a); \
+ WARN_ON(1); \
+ } while (0)
+#define MAX_CONFIG_LEN 40
+
+static const char hexchars[] = "0123456789abcdef";
+static struct kgdb_io kgdbts_io_ops;
+static char get_buf[BUFMAX];
+static int get_buf_cnt;
+static char put_buf[BUFMAX];
+static int put_buf_cnt;
+static char scratch_buf[BUFMAX];
+static int verbose;
+static int repeat_test;
+static int test_complete;
+static int send_ack;
+static int final_ack;
+static int hw_break_val;
+static int hw_break_val2;
+#if defined(CONFIG_ARM) || defined(CONFIG_MIPS)
+static int arch_needs_sstep_emulation = 1;
+#else
+static int arch_needs_sstep_emulation;
+#endif
+static unsigned long sstep_addr;
+static int sstep_state;
+
+/* Storage for the registers, in GDB format. */
+static unsigned long kgdbts_gdb_regs[(NUMREGBYTES +
+ sizeof(unsigned long) - 1) /
+ sizeof(unsigned long)];
+static struct pt_regs kgdbts_regs;
+
+/* -1 = init not run yet, 0 = unconfigured, 1 = configured. */
+static int configured = -1;
+
+#ifdef CONFIG_KGDB_TESTS_BOOT_STRING
+static char config[MAX_CONFIG_LEN] = CONFIG_KGDB_TESTS_BOOT_STRING;
+#else
+static char config[MAX_CONFIG_LEN];
+#endif
+static struct kparam_string kps = {
+ .string = config,
+ .maxlen = MAX_CONFIG_LEN,
+};
+
+static void fill_get_buf(char *buf);
+
+struct test_struct {
+ char *get;
+ char *put;
+ void (*get_handler)(char *);
+ int (*put_handler)(char *, char *);
+};
+
+struct test_state {
+ char *name;
+ struct test_struct *tst;
+ int idx;
+ int (*run_test) (int, int);
+ int (*validate_put) (char *);
+};
+
+static struct test_state ts;
+
+static int kgdbts_unreg_thread(void *ptr)
+{
+ /* Wait until the tests are complete and then ungresiter the I/O
+ * driver.
+ */
+ while (!final_ack)
+ msleep_interruptible(1500);
+
+ if (configured)
+ kgdb_unregister_io_module(&kgdbts_io_ops);
+ configured = 0;
+
+ return 0;
+}
+
+/* This is noinline such that it can be used for a single location to
+ * place a breakpoint
+ */
+static noinline void kgdbts_break_test(void)
+{
+ v2printk("kgdbts: breakpoint complete\n");
+}
+
+/* Lookup symbol info in the kernel */
+static unsigned long lookup_addr(char *arg)
+{
+ unsigned long addr = 0;
+
+ if (!strcmp(arg, "kgdbts_break_test"))
+ addr = (unsigned long)kgdbts_break_test;
+ else if (!strcmp(arg, "sys_open"))
+ addr = (unsigned long)sys_open;
+ else if (!strcmp(arg, "do_fork"))
+ addr = (unsigned long)do_fork;
+ else if (!strcmp(arg, "hw_break_val"))
+ addr = (unsigned long)&hw_break_val;
+ return addr;
+}
+
+static void break_helper(char *bp_type, char *arg, unsigned long vaddr)
+{
+ unsigned long addr;
+
+ if (arg)
+ addr = lookup_addr(arg);
+ else
+ addr = vaddr;
+
+ sprintf(scratch_buf, "%s,%lx,%i", bp_type, addr,
+ BREAK_INSTR_SIZE);
+ fill_get_buf(scratch_buf);
+}
+
+static void sw_break(char *arg)
+{
+ break_helper("Z0", arg, 0);
+}
+
+static void sw_rem_break(char *arg)
+{
+ break_helper("z0", arg, 0);
+}
+
+static void hw_break(char *arg)
+{
+ break_helper("Z1", arg, 0);
+}
+
+static void hw_rem_break(char *arg)
+{
+ break_helper("z1", arg, 0);
+}
+
+static void hw_write_break(char *arg)
+{
+ break_helper("Z2", arg, 0);
+}
+
+static void hw_rem_write_break(char *arg)
+{
+ break_helper("z2", arg, 0);
+}
+
+static void hw_access_break(char *arg)
+{
+ break_helper("Z4", arg, 0);
+}
+
+static void hw_rem_access_break(char *arg)
+{
+ break_helper("z4", arg, 0);
+}
+
+static void hw_break_val_access(void)
+{
+ hw_break_val2 = hw_break_val;
+}
+
+static void hw_break_val_write(void)
+{
+ hw_break_val++;
+}
+
+static int check_and_rewind_pc(char *put_str, char *arg)
+{
+ unsigned long addr = lookup_addr(arg);
+ int offset = 0;
+
+ kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+ NUMREGBYTES);
+ gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+ v2printk("Stopped at IP: %lx\n", instruction_pointer(&kgdbts_regs));
+#ifdef CONFIG_X86
+ /* On x86 a breakpoint stop requires it to be decremented */
+ if (addr + 1 == kgdbts_regs.ip)
+ offset = -1;
+#endif
+ if (strcmp(arg, "silent") &&
+ instruction_pointer(&kgdbts_regs) + offset != addr) {
+ eprintk("kgdbts: BP mismatch %lx expected %lx\n",
+ instruction_pointer(&kgdbts_regs) + offset, addr);
+ return 1;
+ }
+#ifdef CONFIG_X86
+ /* On x86 adjust the instruction pointer if needed */
+ kgdbts_regs.ip += offset;
+#endif
+ return 0;
+}
+
+static int check_single_step(char *put_str, char *arg)
+{
+ unsigned long addr = lookup_addr(arg);
+ /*
+ * From an arch indepent point of view the instruction pointer
+ * should be on a different instruction
+ */
+ kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+ NUMREGBYTES);
+ gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+ v2printk("Singlestep stopped at IP: %lx\n",
+ instruction_pointer(&kgdbts_regs));
+ if (instruction_pointer(&kgdbts_regs) == addr) {
+ eprintk("kgdbts: SingleStep failed at %lx\n",
+ instruction_pointer(&kgdbts_regs));
+ return 1;
+ }
+
+ return 0;
+}
+
+static void write_regs(char *arg)
+{
+ memset(scratch_buf, 0, sizeof(scratch_buf));
+ scratch_buf[0] = 'G';
+ pt_regs_to_gdb_regs(kgdbts_gdb_regs, &kgdbts_regs);
+ kgdb_mem2hex((char *)kgdbts_gdb_regs, &scratch_buf[1], NUMREGBYTES);
+ fill_get_buf(scratch_buf);
+}
+
+static void skip_back_repeat_test(char *arg)
+{
+ int go_back = simple_strtol(arg, NULL, 10);
+
+ repeat_test--;
+ if (repeat_test <= 0)
+ ts.idx++;
+ else
+ ts.idx -= go_back;
+ fill_get_buf(ts.tst[ts.idx].get);
+}
+
+static int got_break(char *put_str, char *arg)
+{
+ test_complete = 1;
+ if (!strncmp(put_str+1, arg, 2)) {
+ if (!strncmp(arg, "T0", 2))
+ test_complete = 2;
+ return 0;
+ }
+ return 1;
+}
+
+static void emul_sstep_get(char *arg)
+{
+ if (!arch_needs_sstep_emulation) {
+ fill_get_buf(arg);
+ return;
+ }
+ switch (sstep_state) {
+ case 0:
+ v2printk("Emulate single step\n");
+ /* Start by looking at the current PC */
+ fill_get_buf("g");
+ break;
+ case 1:
+ /* set breakpoint */
+ break_helper("Z0", 0, sstep_addr);
+ break;
+ case 2:
+ /* Continue */
+ fill_get_buf("c");
+ break;
+ case 3:
+ /* Clear breakpoint */
+ break_helper("z0", 0, sstep_addr);
+ break;
+ default:
+ eprintk("kgdbts: ERROR failed sstep get emulation\n");
+ }
+ sstep_state++;
+}
+
+static int emul_sstep_put(char *put_str, char *arg)
+{
+ if (!arch_needs_sstep_emulation) {
+ if (!strncmp(put_str+1, arg, 2))
+ return 0;
+ return 1;
+ }
+ switch (sstep_state) {
+ case 1:
+ /* validate the "g" packet to get the IP */
+ kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+ NUMREGBYTES);
+ gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+ v2printk("Stopped at IP: %lx\n",
+ instruction_pointer(&kgdbts_regs));
+ /* Want to stop at IP + break instruction size by default */
+ sstep_addr = instruction_pointer(&kgdbts_regs) +
+ BREAK_INSTR_SIZE;
+ break;
+ case 2:
+ if (strncmp(put_str, "$OK", 3)) {
+ eprintk("kgdbts: failed sstep break set\n");
+ return 1;
+ }
+ break;
+ case 3:
+ if (strncmp(put_str, "$T0", 3)) {
+ eprintk("kgdbts: failed continue sstep\n");
+ return 1;
+ }
+ break;
+ case 4:
+ if (strncmp(put_str, "$OK", 3)) {
+ eprintk("kgdbts: failed sstep break unset\n");
+ return 1;
+ }
+ /* Single step is complete so continue on! */
+ sstep_state = 0;
+ return 0;
+ default:
+ eprintk("kgdbts: ERROR failed sstep put emulation\n");
+ }
+
+ /* Continue on the same test line until emulation is complete */
+ ts.idx--;
+ return 0;
+}
+
+static int final_ack_set(char *put_str, char *arg)
+{
+ if (strncmp(put_str+1, arg, 2))
+ return 1;
+ final_ack = 1;
+ return 0;
+}
+/*
+ * Test to plant a breakpoint and detach, which should clear out the
+ * breakpoint and restore the original instruction.
+ */
+static struct test_struct plant_and_detach_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ { "D", "OK" }, /* Detach */
+ { "", "" },
+};
+
+/*
+ * Simple test to write in a software breakpoint, check for the
+ * correct stop location and detach.
+ */
+static struct test_struct sw_breakpoint_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ { "c", "T0*", }, /* Continue */
+ { "g", "kgdbts_break_test", 0, check_and_rewind_pc },
+ { "write", "OK", write_regs },
+ { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+ { "D", "OK" }, /* Detach */
+ { "D", "OK", 0, got_break }, /* If the test worked we made it here */
+ { "", "" },
+};
+
+/*
+ * Test a known bad memory read location to test the fault handler and
+ * read bytes 1-8 at the bad address
+ */
+static struct test_struct bad_read_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "m0,1", "E*" }, /* read 1 byte at address 1 */
+ { "m0,2", "E*" }, /* read 1 byte at address 2 */
+ { "m0,3", "E*" }, /* read 1 byte at address 3 */
+ { "m0,4", "E*" }, /* read 1 byte at address 4 */
+ { "m0,5", "E*" }, /* read 1 byte at address 5 */
+ { "m0,6", "E*" }, /* read 1 byte at address 6 */
+ { "m0,7", "E*" }, /* read 1 byte at address 7 */
+ { "m0,8", "E*" }, /* read 1 byte at address 8 */
+ { "D", "OK" }, /* Detach which removes all breakpoints and continues */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a breakpoint, remove it, single step, plant it
+ * again and detach.
+ */
+static struct test_struct singlestep_break_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ { "c", "T0*", }, /* Continue */
+ { "g", "kgdbts_break_test", 0, check_and_rewind_pc },
+ { "write", "OK", write_regs }, /* Write registers */
+ { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+ { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+ { "g", "kgdbts_break_test", 0, check_single_step },
+ { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ { "c", "T0*", }, /* Continue */
+ { "g", "kgdbts_break_test", 0, check_and_rewind_pc },
+ { "write", "OK", write_regs }, /* Write registers */
+ { "D", "OK" }, /* Remove all breakpoints and continues */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a breakpoint at do_fork for what ever the number
+ * of iterations required by the variable repeat_test.
+ */
+static struct test_struct do_fork_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "do_fork", "OK", sw_break, }, /* set sw breakpoint */
+ { "c", "T0*", }, /* Continue */
+ { "g", "do_fork", 0, check_and_rewind_pc }, /* check location */
+ { "write", "OK", write_regs }, /* Write registers */
+ { "do_fork", "OK", sw_rem_break }, /*remove breakpoint */
+ { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+ { "g", "do_fork", 0, check_single_step },
+ { "do_fork", "OK", sw_break, }, /* set sw breakpoint */
+ { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+ { "D", "OK", 0, final_ack_set }, /* detach and unregister I/O */
+ { "", "" },
+};
+
+/* Test for hitting a breakpoint at sys_open for what ever the number
+ * of iterations required by the variable repeat_test.
+ */
+static struct test_struct sys_open_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "sys_open", "OK", sw_break, }, /* set sw breakpoint */
+ { "c", "T0*", }, /* Continue */
+ { "g", "sys_open", 0, check_and_rewind_pc }, /* check location */
+ { "write", "OK", write_regs }, /* Write registers */
+ { "sys_open", "OK", sw_rem_break }, /*remove breakpoint */
+ { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+ { "g", "sys_open", 0, check_single_step },
+ { "sys_open", "OK", sw_break, }, /* set sw breakpoint */
+ { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+ { "D", "OK", 0, final_ack_set }, /* detach and unregister I/O */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a simple hw breakpoint
+ */
+static struct test_struct hw_breakpoint_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */
+ { "c", "T0*", }, /* Continue */
+ { "g", "kgdbts_break_test", 0, check_and_rewind_pc },
+ { "write", "OK", write_regs },
+ { "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */
+ { "D", "OK" }, /* Detach */
+ { "D", "OK", 0, got_break }, /* If the test worked we made it here */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a hw write breakpoint
+ */
+static struct test_struct hw_write_break_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */
+ { "c", "T0*", 0, got_break }, /* Continue */
+ { "g", "silent", 0, check_and_rewind_pc },
+ { "write", "OK", write_regs },
+ { "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */
+ { "D", "OK" }, /* Detach */
+ { "D", "OK", 0, got_break }, /* If the test worked we made it here */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a hw access breakpoint
+ */
+static struct test_struct hw_access_break_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */
+ { "c", "T0*", 0, got_break }, /* Continue */
+ { "g", "silent", 0, check_and_rewind_pc },
+ { "write", "OK", write_regs },
+ { "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */
+ { "D", "OK" }, /* Detach */
+ { "D", "OK", 0, got_break }, /* If the test worked we made it here */
+ { "", "" },
+};
+
+/*
+ * Test for hitting a hw access breakpoint
+ */
+static struct test_struct nmi_sleep_test[] = {
+ { "?", "S0*" }, /* Clear break points */
+ { "c", "T0*", 0, got_break }, /* Continue */
+ { "D", "OK" }, /* Detach */
+ { "D", "OK", 0, got_break }, /* If the test worked we made it here */
+ { "", "" },
+};
+
+static void fill_get_buf(char *buf)
+{
+ unsigned char checksum = 0;
+ int count = 0;
+ char ch;
+
+ strcpy(get_buf, "$");
+ strcat(get_buf, buf);
+ while ((ch = buf[count])) {
+ checksum += ch;
+ count++;
+ }
+ strcat(get_buf, "#");
+ get_buf[count + 2] = hexchars[checksum >> 4];
+ get_buf[count + 3] = hexchars[checksum & 0xf];
+ get_buf[count + 4] = '\0';
+ v2printk("get%i: %s\n", ts.idx, get_buf);
+}
+
+static int validate_simple_test(char *put_str)
+{
+ char *chk_str;
+
+ if (ts.tst[ts.idx].put_handler)
+ return ts.tst[ts.idx].put_handler(put_str,
+ ts.tst[ts.idx].put);
+
+ chk_str = ts.tst[ts.idx].put;
+ if (*put_str == '$')
+ put_str++;
+
+ while (*chk_str != '\0' && *put_str != '\0') {
+ /* If someone does a * to match the rest of the string, allow
+ * it, or stop if the recieved string is complete.
+ */
+ if (*put_str == '#' || *chk_str == '*')
+ return 0;
+ if (*put_str != *chk_str)
+ return 1;
+
+ chk_str++;
+ put_str++;
+ }
+ if (*chk_str == '\0' && (*put_str == '\0' || *put_str == '#'))
+ return 0;
+
+ return 1;
+}
+
+static int run_simple_test(int is_get_char, int chr)
+{
+ int ret = 0;
+ if (is_get_char) {
+ /* Send an ACK on the get if a prior put completed and set the
+ * send ack variable
+ */
+ if (send_ack) {
+ send_ack = 0;
+ return '+';
+ }
+ /* On the first get char, fill the transmit buffer and then
+ * take from the get_string.
+ */
+ if (get_buf_cnt == 0) {
+ if (ts.tst[ts.idx].get_handler)
+ ts.tst[ts.idx].get_handler(ts.tst[ts.idx].get);
+ else
+ fill_get_buf(ts.tst[ts.idx].get);
+ }
+
+ if (get_buf[get_buf_cnt] == '\0') {
+ eprintk("kgdbts: ERROR GET: EOB on '%s' at %i\n",
+ ts.name, ts.idx);
+ get_buf_cnt = 0;
+ fill_get_buf("D");
+ }
+ ret = get_buf[get_buf_cnt];
+ get_buf_cnt++;
+ return ret;
+ }
+
+ /* This callback is a put char which is when kgdb sends data to
+ * this I/O module.
+ */
+ if (ts.tst[ts.idx].get[0] == '\0' &&
+ ts.tst[ts.idx].put[0] == '\0') {
+ eprintk("kgdbts: ERROR: beyond end of test on"
+ " '%s' line %i\n", ts.name, ts.idx);
+ return 0;
+ }
+
+ if (put_buf_cnt >= BUFMAX) {
+ eprintk("kgdbts: ERROR: put buffer overflow on"
+ " '%s' line %i\n", ts.name, ts.idx);
+ put_buf_cnt = 0;
+ return 0;
+ }
+ /* Ignore everything until the first valid packet start '$' */
+ if (put_buf_cnt == 0 && chr != '$')
+ return 0;
+
+ put_buf[put_buf_cnt] = chr;
+ put_buf_cnt++;
+
+ /* End of packet == #XX so look for the '#' */
+ if (put_buf_cnt > 3 && put_buf[put_buf_cnt - 3] == '#') {
+ put_buf[put_buf_cnt] = '\0';
+ v2printk("put%i: %s\n", ts.idx, put_buf);
+ /* Trigger check here */
+ if (ts.validate_put && ts.validate_put(put_buf)) {
+ eprintk("kgdbts: ERROR PUT: end of test "
+ "buffer on '%s' line %i expected %s got %s\n",
+ ts.name, ts.idx, ts.tst[ts.idx].put, put_buf);
+ }
+ ts.idx++;
+ put_buf_cnt = 0;
+ get_buf_cnt = 0;
+ send_ack = 1;
+ }
+ return 0;
+}
+
+static void init_simple_test(void)
+{
+ memset(&ts, 0, sizeof(ts));
+ ts.run_test = run_simple_test;
+ ts.validate_put = validate_simple_test;
+}
+
+static void run_plant_and_detach_test(int is_early)
+{
+ char before[BREAK_INSTR_SIZE];
+ char after[BREAK_INSTR_SIZE];
+
+ probe_kernel_read(before, (char *)kgdbts_break_test,
+ BREAK_INSTR_SIZE);
+ init_simple_test();
+ ts.tst = plant_and_detach_test;
+ ts.name = "plant_and_detach_test";
+ /* Activate test with initial breakpoint */
+ if (!is_early)
+ kgdb_breakpoint();
+ probe_kernel_read(after, (char *)kgdbts_break_test,
+ BREAK_INSTR_SIZE);
+ if (memcmp(before, after, BREAK_INSTR_SIZE)) {
+ printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n");
+ panic("kgdb memory corruption");
+ }
+
+ /* complete the detach test */
+ if (!is_early)
+ kgdbts_break_test();
+}
+
+static void run_breakpoint_test(int is_hw_breakpoint)
+{
+ test_complete = 0;
+ init_simple_test();
+ if (is_hw_breakpoint) {
+ ts.tst = hw_breakpoint_test;
+ ts.name = "hw_breakpoint_test";
+ } else {
+ ts.tst = sw_breakpoint_test;
+ ts.name = "sw_breakpoint_test";
+ }
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+ /* run code with the break point in it */
+ kgdbts_break_test();
+ kgdb_breakpoint();
+
+ if (test_complete)
+ return;
+
+ eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+}
+
+static void run_hw_break_test(int is_write_test)
+{
+ test_complete = 0;
+ init_simple_test();
+ if (is_write_test) {
+ ts.tst = hw_write_break_test;
+ ts.name = "hw_write_break_test";
+ } else {
+ ts.tst = hw_access_break_test;
+ ts.name = "hw_access_break_test";
+ }
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+ hw_break_val_access();
+ if (is_write_test) {
+ if (test_complete == 2)
+ eprintk("kgdbts: ERROR %s broke on access\n",
+ ts.name);
+ hw_break_val_write();
+ }
+ kgdb_breakpoint();
+
+ if (test_complete == 1)
+ return;
+
+ eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+}
+
+static void run_nmi_sleep_test(int nmi_sleep)
+{
+ unsigned long flags;
+
+ init_simple_test();
+ ts.tst = nmi_sleep_test;
+ ts.name = "nmi_sleep_test";
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+ local_irq_save(flags);
+ mdelay(nmi_sleep*1000);
+ touch_nmi_watchdog();
+ local_irq_restore(flags);
+ if (test_complete != 2)
+ eprintk("kgdbts: ERROR nmi_test did not hit nmi\n");
+ kgdb_breakpoint();
+ if (test_complete == 1)
+ return;
+
+ eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+}
+
+static void run_bad_read_test(void)
+{
+ init_simple_test();
+ ts.tst = bad_read_test;
+ ts.name = "bad_read_test";
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+}
+
+static void run_do_fork_test(void)
+{
+ init_simple_test();
+ ts.tst = do_fork_test;
+ ts.name = "do_fork_test";
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+}
+
+static void run_sys_open_test(void)
+{
+ init_simple_test();
+ ts.tst = sys_open_test;
+ ts.name = "sys_open_test";
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+}
+
+static void run_singlestep_break_test(void)
+{
+ init_simple_test();
+ ts.tst = singlestep_break_test;
+ ts.name = "singlestep_breakpoint_test";
+ /* Activate test with initial breakpoint */
+ kgdb_breakpoint();
+ kgdbts_break_test();
+ kgdbts_break_test();
+}
+
+static void kgdbts_run_tests(void)
+{
+ char *ptr;
+ int fork_test = 0;
+ int sys_open_test = 0;
+ int nmi_sleep = 0;
+
+ ptr = strstr(config, "F");
+ if (ptr)
+ fork_test = simple_strtol(ptr+1, NULL, 10);
+ ptr = strstr(config, "S");
+ if (ptr)
+ sys_open_test = simple_strtol(ptr+1, NULL, 10);
+ ptr = strstr(config, "N");
+ if (ptr)
+ nmi_sleep = simple_strtol(ptr+1, NULL, 10);
+
+ /* required internal KGDB tests */
+ v1printk("kgdbts:RUN plant and detach test\n");
+ run_plant_and_detach_test(0);
+ v1printk("kgdbts:RUN sw breakpoint test\n");
+ run_breakpoint_test(0);
+ v1printk("kgdbts:RUN bad memory access test\n");
+ run_bad_read_test();
+ v1printk("kgdbts:RUN singlestep breakpoint test\n");
+ run_singlestep_break_test();
+
+ /* ===Optional tests=== */
+
+ /* All HW break point tests */
+ if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) {
+ v1printk("kgdbts:RUN hw breakpoint test\n");
+ run_breakpoint_test(1);
+ v1printk("kgdbts:RUN hw write breakpoint test\n");
+ run_hw_break_test(1);
+ v1printk("kgdbts:RUN access write breakpoint test\n");
+ run_hw_break_test(0);
+ }
+
+ if (nmi_sleep) {
+ v1printk("kgdbts:RUN NMI sleep %i seconds test\n", nmi_sleep);
+ run_nmi_sleep_test(nmi_sleep);
+ }
+
+ /* If the do_fork test is run it will be the last test that is
+ * executed because a kernel thread will be spawned at the very
+ * end to unregister the debug hooks.
+ */
+ if (fork_test) {
+ repeat_test = fork_test;
+ printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n",
+ repeat_test);
+ kthread_run(kgdbts_unreg_thread, 0, "kgdbts_unreg");
+ run_do_fork_test();
+ return;
+ }
+
+ /* If the sys_open test is run it will be the last test that is
+ * executed because a kernel thread will be spawned at the very
+ * end to unregister the debug hooks.
+ */
+ if (sys_open_test) {
+ repeat_test = sys_open_test;
+ printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n",
+ repeat_test);
+ kthread_run(kgdbts_unreg_thread, 0, "kgdbts_unreg");
+ run_sys_open_test();
+ return;
+ }
+ /* Shutdown and unregister */
+ kgdb_unregister_io_module(&kgdbts_io_ops);
+ configured = 0;
+}
+
+static int kgdbts_option_setup(char *opt)
+{
+ if (strlen(opt) > MAX_CONFIG_LEN) {
+ printk(KERN_ERR "kgdbts: config string too long\n");
+ return -ENOSPC;
+ }
+ strcpy(config, opt);
+
+ verbose = 0;
+ if (strstr(config, "V1"))
+ verbose = 1;
+ if (strstr(config, "V2"))
+ verbose = 2;
+
+ return 0;
+}
+
+__setup("kgdbts=", kgdbts_option_setup);
+
+static int configure_kgdbts(void)
+{
+ int err = 0;
+
+ if (!strlen(config) || isspace(config[0]))
+ goto noconfig;
+ err = kgdbts_option_setup(config);
+ if (err)
+ goto noconfig;
+
+ final_ack = 0;
+ run_plant_and_detach_test(1);
+
+ err = kgdb_register_io_module(&kgdbts_io_ops);
+ if (err) {
+ configured = 0;
+ return err;
+ }
+ configured = 1;
+ kgdbts_run_tests();
+
+ return err;
+
+noconfig:
+ config[0] = 0;
+ configured = 0;
+
+ return err;
+}
+
+static int __init init_kgdbts(void)
+{
+ /* Already configured? */
+ if (configured == 1)
+ return 0;
+
+ return configure_kgdbts();
+}
+
+static void cleanup_kgdbts(void)
+{
+ if (configured == 1)
+ kgdb_unregister_io_module(&kgdbts_io_ops);
+}
+
+static int kgdbts_get_char(void)
+{
+ int val = 0;
+
+ if (ts.run_test)
+ val = ts.run_test(1, 0);
+
+ return val;
+}
+
+static void kgdbts_put_char(u8 chr)
+{
+ if (ts.run_test)
+ ts.run_test(0, chr);
+}
+
+static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp)
+{
+ int len = strlen(kmessage);
+
+ if (len >= MAX_CONFIG_LEN) {
+ printk(KERN_ERR "kgdbts: config string too long\n");
+ return -ENOSPC;
+ }
+
+ /* Only copy in the string if the init function has not run yet */
+ if (configured < 0) {
+ strcpy(config, kmessage);
+ return 0;
+ }
+
+ if (kgdb_connected) {
+ printk(KERN_ERR
+ "kgdbts: Cannot reconfigure while KGDB is connected.\n");
+
+ return -EBUSY;
+ }
+
+ strcpy(config, kmessage);
+ /* Chop out \n char as a result of echo */
+ if (config[len - 1] == '\n')
+ config[len - 1] = '\0';
+
+ if (configured == 1)
+ cleanup_kgdbts();
+
+ /* Go and configure with the new params. */
+ return configure_kgdbts();
+}
+
+static void kgdbts_pre_exp_handler(void)
+{
+ /* Increment the module count when the debugger is active */
+ if (!kgdb_connected)
+ try_module_get(THIS_MODULE);
+}
+
+static void kgdbts_post_exp_handler(void)
+{
+ /* decrement the module count when the debugger detaches */
+ if (!kgdb_connected)
+ module_put(THIS_MODULE);
+}
+
+static struct kgdb_io kgdbts_io_ops = {
+ .name = "kgdbts",
+ .read_char = kgdbts_get_char,
+ .write_char = kgdbts_put_char,
+ .pre_exception = kgdbts_pre_exp_handler,
+ .post_exception = kgdbts_post_exp_handler,
+};
+
+module_init(init_kgdbts);
+module_exit(cleanup_kgdbts);
+module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644);
+MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]");
+MODULE_DESCRIPTION("KGDB Test Suite");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wind River Systems, Inc.");
+
diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
new file mode 100644
index 00000000000..b6e40a7958c
--- /dev/null
+++ b/drivers/misc/sgi-xp/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for SGI's XP devices.
+#
+
+obj-$(CONFIG_SGI_XP) += xp.o
+xp-y := xp_main.o xp_nofault.o
+
+obj-$(CONFIG_SGI_XP) += xpc.o
+xpc-y := xpc_main.o xpc_channel.o xpc_partition.o
+
+obj-$(CONFIG_SGI_XP) += xpnet.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
new file mode 100644
index 00000000000..5515234be86
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp.h
@@ -0,0 +1,463 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * External Cross Partition (XP) structures and defines.
+ */
+
+#ifndef _DRIVERS_MISC_SGIXP_XP_H
+#define _DRIVERS_MISC_SGIXP_XP_H
+
+#include <linux/cache.h>
+#include <linux/hardirq.h>
+#include <linux/mutex.h>
+#include <asm/sn/types.h>
+#include <asm/sn/bte.h>
+
+#ifdef USE_DBUG_ON
+#define DBUG_ON(condition) BUG_ON(condition)
+#else
+#define DBUG_ON(condition)
+#endif
+
+/*
+ * Define the maximum number of logically defined partitions the system
+ * can support. It is constrained by the maximum number of hardware
+ * partitionable regions. The term 'region' in this context refers to the
+ * minimum number of nodes that can comprise an access protection grouping.
+ * The access protection is in regards to memory, IPI and IOI.
+ *
+ * The maximum number of hardware partitionable regions is equal to the
+ * maximum number of nodes in the entire system divided by the minimum number
+ * of nodes that comprise an access protection grouping.
+ */
+#define XP_MAX_PARTITIONS 64
+
+/*
+ * Define the number of u64s required to represent all the C-brick nasids
+ * as a bitmap. The cross-partition kernel modules deal only with
+ * C-brick nasids, thus the need for bitmaps which don't account for
+ * odd-numbered (non C-brick) nasids.
+ */
+#define XP_MAX_PHYSNODE_ID (MAX_NUMALINK_NODES / 2)
+#define XP_NASID_MASK_BYTES ((XP_MAX_PHYSNODE_ID + 7) / 8)
+#define XP_NASID_MASK_WORDS ((XP_MAX_PHYSNODE_ID + 63) / 64)
+
+/*
+ * Wrapper for bte_copy() that should it return a failure status will retry
+ * the bte_copy() once in the hope that the failure was due to a temporary
+ * aberration (i.e., the link going down temporarily).
+ *
+ * src - physical address of the source of the transfer.
+ * vdst - virtual address of the destination of the transfer.
+ * len - number of bytes to transfer from source to destination.
+ * mode - see bte_copy() for definition.
+ * notification - see bte_copy() for definition.
+ *
+ * Note: xp_bte_copy() should never be called while holding a spinlock.
+ */
+static inline bte_result_t
+xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
+{
+ bte_result_t ret;
+ u64 pdst = ia64_tpa(vdst);
+
+ /*
+ * Ensure that the physically mapped memory is contiguous.
+ *
+ * We do this by ensuring that the memory is from region 7 only.
+ * If the need should arise to use memory from one of the other
+ * regions, then modify the BUG_ON() statement to ensure that the
+ * memory from that region is always physically contiguous.
+ */
+ BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
+
+ ret = bte_copy(src, pdst, len, mode, notification);
+ if ((ret != BTE_SUCCESS) && BTE_ERROR_RETRY(ret)) {
+ if (!in_interrupt())
+ cond_resched();
+
+ ret = bte_copy(src, pdst, len, mode, notification);
+ }
+
+ return ret;
+}
+
+/*
+ * XPC establishes channel connections between the local partition and any
+ * other partition that is currently up. Over these channels, kernel-level
+ * `users' can communicate with their counterparts on the other partitions.
+ *
+ * The maxinum number of channels is limited to eight. For performance reasons,
+ * the internal cross partition structures require sixteen bytes per channel,
+ * and eight allows all of this interface-shared info to fit in one cache line.
+ *
+ * XPC_NCHANNELS reflects the total number of channels currently defined.
+ * If the need for additional channels arises, one can simply increase
+ * XPC_NCHANNELS accordingly. If the day should come where that number
+ * exceeds the MAXIMUM number of channels allowed (eight), then one will need
+ * to make changes to the XPC code to allow for this.
+ */
+#define XPC_MEM_CHANNEL 0 /* memory channel number */
+#define XPC_NET_CHANNEL 1 /* network channel number */
+
+#define XPC_NCHANNELS 2 /* #of defined channels */
+#define XPC_MAX_NCHANNELS 8 /* max #of channels allowed */
+
+#if XPC_NCHANNELS > XPC_MAX_NCHANNELS
+#error XPC_NCHANNELS exceeds MAXIMUM allowed.
+#endif
+
+/*
+ * The format of an XPC message is as follows:
+ *
+ * +-------+--------------------------------+
+ * | flags |////////////////////////////////|
+ * +-------+--------------------------------+
+ * | message # |
+ * +----------------------------------------+
+ * | payload (user-defined message) |
+ * | |
+ * :
+ * | |
+ * +----------------------------------------+
+ *
+ * The size of the payload is defined by the user via xpc_connect(). A user-
+ * defined message resides in the payload area.
+ *
+ * The user should have no dealings with the message header, but only the
+ * message's payload. When a message entry is allocated (via xpc_allocate())
+ * a pointer to the payload area is returned and not the actual beginning of
+ * the XPC message. The user then constructs a message in the payload area
+ * and passes that pointer as an argument on xpc_send() or xpc_send_notify().
+ *
+ * The size of a message entry (within a message queue) must be a cacheline
+ * sized multiple in order to facilitate the BTE transfer of messages from one
+ * message queue to another. A macro, XPC_MSG_SIZE(), is provided for the user
+ * that wants to fit as many msg entries as possible in a given memory size
+ * (e.g. a memory page).
+ */
+struct xpc_msg {
+ u8 flags; /* FOR XPC INTERNAL USE ONLY */
+ u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */
+ s64 number; /* FOR XPC INTERNAL USE ONLY */
+
+ u64 payload; /* user defined portion of message */
+};
+
+#define XPC_MSG_PAYLOAD_OFFSET (u64) (&((struct xpc_msg *)0)->payload)
+#define XPC_MSG_SIZE(_payload_size) \
+ L1_CACHE_ALIGN(XPC_MSG_PAYLOAD_OFFSET + (_payload_size))
+
+/*
+ * Define the return values and values passed to user's callout functions.
+ * (It is important to add new value codes at the end just preceding
+ * xpcUnknownReason, which must have the highest numerical value.)
+ */
+enum xpc_retval {
+ xpcSuccess = 0,
+
+ xpcNotConnected, /* 1: channel is not connected */
+ xpcConnected, /* 2: channel connected (opened) */
+ xpcRETIRED1, /* 3: (formerly xpcDisconnected) */
+
+ xpcMsgReceived, /* 4: message received */
+ xpcMsgDelivered, /* 5: message delivered and acknowledged */
+
+ xpcRETIRED2, /* 6: (formerly xpcTransferFailed) */
+
+ xpcNoWait, /* 7: operation would require wait */
+ xpcRetry, /* 8: retry operation */
+ xpcTimeout, /* 9: timeout in xpc_allocate_msg_wait() */
+ xpcInterrupted, /* 10: interrupted wait */
+
+ xpcUnequalMsgSizes, /* 11: message size disparity between sides */
+ xpcInvalidAddress, /* 12: invalid address */
+
+ xpcNoMemory, /* 13: no memory available for XPC structures */
+ xpcLackOfResources, /* 14: insufficient resources for operation */
+ xpcUnregistered, /* 15: channel is not registered */
+ xpcAlreadyRegistered, /* 16: channel is already registered */
+
+ xpcPartitionDown, /* 17: remote partition is down */
+ xpcNotLoaded, /* 18: XPC module is not loaded */
+ xpcUnloading, /* 19: this side is unloading XPC module */
+
+ xpcBadMagic, /* 20: XPC MAGIC string not found */
+
+ xpcReactivating, /* 21: remote partition was reactivated */
+
+ xpcUnregistering, /* 22: this side is unregistering channel */
+ xpcOtherUnregistering, /* 23: other side is unregistering channel */
+
+ xpcCloneKThread, /* 24: cloning kernel thread */
+ xpcCloneKThreadFailed, /* 25: cloning kernel thread failed */
+
+ xpcNoHeartbeat, /* 26: remote partition has no heartbeat */
+
+ xpcPioReadError, /* 27: PIO read error */
+ xpcPhysAddrRegFailed, /* 28: registration of phys addr range failed */
+
+ xpcBteDirectoryError, /* 29: maps to BTEFAIL_DIR */
+ xpcBtePoisonError, /* 30: maps to BTEFAIL_POISON */
+ xpcBteWriteError, /* 31: maps to BTEFAIL_WERR */
+ xpcBteAccessError, /* 32: maps to BTEFAIL_ACCESS */
+ xpcBtePWriteError, /* 33: maps to BTEFAIL_PWERR */
+ xpcBtePReadError, /* 34: maps to BTEFAIL_PRERR */
+ xpcBteTimeOutError, /* 35: maps to BTEFAIL_TOUT */
+ xpcBteXtalkError, /* 36: maps to BTEFAIL_XTERR */
+ xpcBteNotAvailable, /* 37: maps to BTEFAIL_NOTAVAIL */
+ xpcBteUnmappedError, /* 38: unmapped BTEFAIL_ error */
+
+ xpcBadVersion, /* 39: bad version number */
+ xpcVarsNotSet, /* 40: the XPC variables are not set up */
+ xpcNoRsvdPageAddr, /* 41: unable to get rsvd page's phys addr */
+ xpcInvalidPartid, /* 42: invalid partition ID */
+ xpcLocalPartid, /* 43: local partition ID */
+
+ xpcOtherGoingDown, /* 44: other side going down, reason unknown */
+ xpcSystemGoingDown, /* 45: system is going down, reason unknown */
+ xpcSystemHalt, /* 46: system is being halted */
+ xpcSystemReboot, /* 47: system is being rebooted */
+ xpcSystemPoweroff, /* 48: system is being powered off */
+
+ xpcDisconnecting, /* 49: channel disconnecting (closing) */
+
+ xpcOpenCloseError, /* 50: channel open/close protocol error */
+
+ xpcDisconnected, /* 51: channel disconnected (closed) */
+
+ xpcBteSh2Start, /* 52: BTE CRB timeout */
+
+ /* 53: 0x1 BTE Error Response Short */
+ xpcBteSh2RspShort = xpcBteSh2Start + BTEFAIL_SH2_RESP_SHORT,
+
+ /* 54: 0x2 BTE Error Response Long */
+ xpcBteSh2RspLong = xpcBteSh2Start + BTEFAIL_SH2_RESP_LONG,
+
+ /* 56: 0x4 BTE Error Response DSB */
+ xpcBteSh2RspDSB = xpcBteSh2Start + BTEFAIL_SH2_RESP_DSP,
+
+ /* 60: 0x8 BTE Error Response Access */
+ xpcBteSh2RspAccess = xpcBteSh2Start + BTEFAIL_SH2_RESP_ACCESS,
+
+ /* 68: 0x10 BTE Error CRB timeout */
+ xpcBteSh2CRBTO = xpcBteSh2Start + BTEFAIL_SH2_CRB_TO,
+
+ /* 84: 0x20 BTE Error NACK limit */
+ xpcBteSh2NACKLimit = xpcBteSh2Start + BTEFAIL_SH2_NACK_LIMIT,
+
+ /* 115: BTE end */
+ xpcBteSh2End = xpcBteSh2Start + BTEFAIL_SH2_ALL,
+
+ xpcUnknownReason /* 116: unknown reason - must be last in enum */
+};
+
+/*
+ * Define the callout function types used by XPC to update the user on
+ * connection activity and state changes (via the user function registered by
+ * xpc_connect()) and to notify them of messages received and delivered (via
+ * the user function registered by xpc_send_notify()).
+ *
+ * The two function types are xpc_channel_func and xpc_notify_func and
+ * both share the following arguments, with the exception of "data", which
+ * only xpc_channel_func has.
+ *
+ * Arguments:
+ *
+ * reason - reason code. (See following table.)
+ * partid - partition ID associated with condition.
+ * ch_number - channel # associated with condition.
+ * data - pointer to optional data. (See following table.)
+ * key - pointer to optional user-defined value provided as the "key"
+ * argument to xpc_connect() or xpc_send_notify().
+ *
+ * In the following table the "Optional Data" column applies to callouts made
+ * to functions registered by xpc_connect(). A "NA" in that column indicates
+ * that this reason code can be passed to functions registered by
+ * xpc_send_notify() (i.e. they don't have data arguments).
+ *
+ * Also, the first three reason codes in the following table indicate
+ * success, whereas the others indicate failure. When a failure reason code
+ * is received, one can assume that the channel is not connected.
+ *
+ *
+ * Reason Code | Cause | Optional Data
+ * =====================+================================+=====================
+ * xpcConnected | connection has been established| max #of entries
+ * | to the specified partition on | allowed in message
+ * | the specified channel | queue
+ * ---------------------+--------------------------------+---------------------
+ * xpcMsgReceived | an XPC message arrived from | address of payload
+ * | the specified partition on the |
+ * | specified channel | [the user must call
+ * | | xpc_received() when
+ * | | finished with the
+ * | | payload]
+ * ---------------------+--------------------------------+---------------------
+ * xpcMsgDelivered | notification that the message | NA
+ * | was delivered to the intended |
+ * | recipient and that they have |
+ * | acknowledged its receipt by |
+ * | calling xpc_received() |
+ * =====================+================================+=====================
+ * xpcUnequalMsgSizes | can't connect to the specified | NULL
+ * | partition on the specified |
+ * | channel because of mismatched |
+ * | message sizes |
+ * ---------------------+--------------------------------+---------------------
+ * xpcNoMemory | insufficient memory avaiable | NULL
+ * | to allocate message queue |
+ * ---------------------+--------------------------------+---------------------
+ * xpcLackOfResources | lack of resources to create | NULL
+ * | the necessary kthreads to |
+ * | support the channel |
+ * ---------------------+--------------------------------+---------------------
+ * xpcUnregistering | this side's user has | NULL or NA
+ * | unregistered by calling |
+ * | xpc_disconnect() |
+ * ---------------------+--------------------------------+---------------------
+ * xpcOtherUnregistering| the other side's user has | NULL or NA
+ * | unregistered by calling |
+ * | xpc_disconnect() |
+ * ---------------------+--------------------------------+---------------------
+ * xpcNoHeartbeat | the other side's XPC is no | NULL or NA
+ * | longer heartbeating |
+ * | |
+ * ---------------------+--------------------------------+---------------------
+ * xpcUnloading | this side's XPC module is | NULL or NA
+ * | being unloaded |
+ * | |
+ * ---------------------+--------------------------------+---------------------
+ * xpcOtherUnloading | the other side's XPC module is | NULL or NA
+ * | is being unloaded |
+ * | |
+ * ---------------------+--------------------------------+---------------------
+ * xpcPioReadError | xp_nofault_PIOR() returned an | NULL or NA
+ * | error while sending an IPI |
+ * | |
+ * ---------------------+--------------------------------+---------------------
+ * xpcInvalidAddress | the address either received or | NULL or NA
+ * | sent by the specified partition|
+ * | is invalid |
+ * ---------------------+--------------------------------+---------------------
+ * xpcBteNotAvailable | attempt to pull data from the | NULL or NA
+ * xpcBtePoisonError | specified partition over the |
+ * xpcBteWriteError | specified channel via a |
+ * xpcBteAccessError | bte_copy() failed |
+ * xpcBteTimeOutError | |
+ * xpcBteXtalkError | |
+ * xpcBteDirectoryError | |
+ * xpcBteGenericError | |
+ * xpcBteUnmappedError | |
+ * ---------------------+--------------------------------+---------------------
+ * xpcUnknownReason | the specified channel to the | NULL or NA
+ * | specified partition was |
+ * | unavailable for unknown reasons|
+ * =====================+================================+=====================
+ */
+
+typedef void (*xpc_channel_func) (enum xpc_retval reason, partid_t partid,
+ int ch_number, void *data, void *key);
+
+typedef void (*xpc_notify_func) (enum xpc_retval reason, partid_t partid,
+ int ch_number, void *key);
+
+/*
+ * The following is a registration entry. There is a global array of these,
+ * one per channel. It is used to record the connection registration made
+ * by the users of XPC. As long as a registration entry exists, for any
+ * partition that comes up, XPC will attempt to establish a connection on
+ * that channel. Notification that a connection has been made will occur via
+ * the xpc_channel_func function.
+ *
+ * The 'func' field points to the function to call when aynchronous
+ * notification is required for such events as: a connection established/lost,
+ * or an incoming message received, or an error condition encountered. A
+ * non-NULL 'func' field indicates that there is an active registration for
+ * the channel.
+ */
+struct xpc_registration {
+ struct mutex mutex;
+ xpc_channel_func func; /* function to call */
+ void *key; /* pointer to user's key */
+ u16 nentries; /* #of msg entries in local msg queue */
+ u16 msg_size; /* message queue's message size */
+ u32 assigned_limit; /* limit on #of assigned kthreads */
+ u32 idle_limit; /* limit on #of idle kthreads */
+} ____cacheline_aligned;
+
+#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL)
+
+/* the following are valid xpc_allocate() flags */
+#define XPC_WAIT 0 /* wait flag */
+#define XPC_NOWAIT 1 /* no wait flag */
+
+struct xpc_interface {
+ void (*connect) (int);
+ void (*disconnect) (int);
+ enum xpc_retval (*allocate) (partid_t, int, u32, void **);
+ enum xpc_retval (*send) (partid_t, int, void *);
+ enum xpc_retval (*send_notify) (partid_t, int, void *,
+ xpc_notify_func, void *);
+ void (*received) (partid_t, int, void *);
+ enum xpc_retval (*partid_to_nasids) (partid_t, void *);
+};
+
+extern struct xpc_interface xpc_interface;
+
+extern void xpc_set_interface(void (*)(int),
+ void (*)(int),
+ enum xpc_retval (*)(partid_t, int, u32, void **),
+ enum xpc_retval (*)(partid_t, int, void *),
+ enum xpc_retval (*)(partid_t, int, void *,
+ xpc_notify_func, void *),
+ void (*)(partid_t, int, void *),
+ enum xpc_retval (*)(partid_t, void *));
+extern void xpc_clear_interface(void);
+
+extern enum xpc_retval xpc_connect(int, xpc_channel_func, void *, u16,
+ u16, u32, u32);
+extern void xpc_disconnect(int);
+
+static inline enum xpc_retval
+xpc_allocate(partid_t partid, int ch_number, u32 flags, void **payload)
+{
+ return xpc_interface.allocate(partid, ch_number, flags, payload);
+}
+
+static inline enum xpc_retval
+xpc_send(partid_t partid, int ch_number, void *payload)
+{
+ return xpc_interface.send(partid, ch_number, payload);
+}
+
+static inline enum xpc_retval
+xpc_send_notify(partid_t partid, int ch_number, void *payload,
+ xpc_notify_func func, void *key)
+{
+ return xpc_interface.send_notify(partid, ch_number, payload, func, key);
+}
+
+static inline void
+xpc_received(partid_t partid, int ch_number, void *payload)
+{
+ return xpc_interface.received(partid, ch_number, payload);
+}
+
+static inline enum xpc_retval
+xpc_partid_to_nasids(partid_t partid, void *nasids)
+{
+ return xpc_interface.partid_to_nasids(partid, nasids);
+}
+
+extern u64 xp_nofault_PIOR_target;
+extern int xp_nofault_PIOR(void *);
+extern int xp_error_PIOR(void);
+
+#endif /* _DRIVERS_MISC_SGIXP_XP_H */
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
new file mode 100644
index 00000000000..1fbf99bae96
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -0,0 +1,279 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) base.
+ *
+ * XP provides a base from which its users can interact
+ * with XPC, yet not be dependent on XPC.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/sn_sal.h>
+#include "xp.h"
+
+/*
+ * The export of xp_nofault_PIOR needs to happen here since it is defined
+ * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
+ * defined here.
+ */
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
+
+u64 xp_nofault_PIOR_target;
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
+
+/*
+ * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
+ * users of XPC.
+ */
+struct xpc_registration xpc_registrations[XPC_NCHANNELS];
+EXPORT_SYMBOL_GPL(xpc_registrations);
+
+/*
+ * Initialize the XPC interface to indicate that XPC isn't loaded.
+ */
+static enum xpc_retval
+xpc_notloaded(void)
+{
+ return xpcNotLoaded;
+}
+
+struct xpc_interface xpc_interface = {
+ (void (*)(int))xpc_notloaded,
+ (void (*)(int))xpc_notloaded,
+ (enum xpc_retval(*)(partid_t, int, u32, void **))xpc_notloaded,
+ (enum xpc_retval(*)(partid_t, int, void *))xpc_notloaded,
+ (enum xpc_retval(*)(partid_t, int, void *, xpc_notify_func, void *))
+ xpc_notloaded,
+ (void (*)(partid_t, int, void *))xpc_notloaded,
+ (enum xpc_retval(*)(partid_t, void *))xpc_notloaded
+};
+EXPORT_SYMBOL_GPL(xpc_interface);
+
+/*
+ * XPC calls this when it (the XPC module) has been loaded.
+ */
+void
+xpc_set_interface(void (*connect) (int),
+ void (*disconnect) (int),
+ enum xpc_retval (*allocate) (partid_t, int, u32, void **),
+ enum xpc_retval (*send) (partid_t, int, void *),
+ enum xpc_retval (*send_notify) (partid_t, int, void *,
+ xpc_notify_func, void *),
+ void (*received) (partid_t, int, void *),
+ enum xpc_retval (*partid_to_nasids) (partid_t, void *))
+{
+ xpc_interface.connect = connect;
+ xpc_interface.disconnect = disconnect;
+ xpc_interface.allocate = allocate;
+ xpc_interface.send = send;
+ xpc_interface.send_notify = send_notify;
+ xpc_interface.received = received;
+ xpc_interface.partid_to_nasids = partid_to_nasids;
+}
+EXPORT_SYMBOL_GPL(xpc_set_interface);
+
+/*
+ * XPC calls this when it (the XPC module) is being unloaded.
+ */
+void
+xpc_clear_interface(void)
+{
+ xpc_interface.connect = (void (*)(int))xpc_notloaded;
+ xpc_interface.disconnect = (void (*)(int))xpc_notloaded;
+ xpc_interface.allocate = (enum xpc_retval(*)(partid_t, int, u32,
+ void **))xpc_notloaded;
+ xpc_interface.send = (enum xpc_retval(*)(partid_t, int, void *))
+ xpc_notloaded;
+ xpc_interface.send_notify = (enum xpc_retval(*)(partid_t, int, void *,
+ xpc_notify_func,
+ void *))xpc_notloaded;
+ xpc_interface.received = (void (*)(partid_t, int, void *))
+ xpc_notloaded;
+ xpc_interface.partid_to_nasids = (enum xpc_retval(*)(partid_t, void *))
+ xpc_notloaded;
+}
+EXPORT_SYMBOL_GPL(xpc_clear_interface);
+
+/*
+ * Register for automatic establishment of a channel connection whenever
+ * a partition comes up.
+ *
+ * Arguments:
+ *
+ * ch_number - channel # to register for connection.
+ * func - function to call for asynchronous notification of channel
+ * state changes (i.e., connection, disconnection, error) and
+ * the arrival of incoming messages.
+ * key - pointer to optional user-defined value that gets passed back
+ * to the user on any callouts made to func.
+ * payload_size - size in bytes of the XPC message's payload area which
+ * contains a user-defined message. The user should make
+ * this large enough to hold their largest message.
+ * nentries - max #of XPC message entries a message queue can contain.
+ * The actual number, which is determined when a connection
+ * is established and may be less then requested, will be
+ * passed to the user via the xpcConnected callout.
+ * assigned_limit - max number of kthreads allowed to be processing
+ * messages (per connection) at any given instant.
+ * idle_limit - max number of kthreads allowed to be idle at any given
+ * instant.
+ */
+enum xpc_retval
+xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
+ u16 nentries, u32 assigned_limit, u32 idle_limit)
+{
+ struct xpc_registration *registration;
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+ DBUG_ON(payload_size == 0 || nentries == 0);
+ DBUG_ON(func == NULL);
+ DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
+
+ registration = &xpc_registrations[ch_number];
+
+ if (mutex_lock_interruptible(&registration->mutex) != 0)
+ return xpcInterrupted;
+
+ /* if XPC_CHANNEL_REGISTERED(ch_number) */
+ if (registration->func != NULL) {
+ mutex_unlock(&registration->mutex);
+ return xpcAlreadyRegistered;
+ }
+
+ /* register the channel for connection */
+ registration->msg_size = XPC_MSG_SIZE(payload_size);
+ registration->nentries = nentries;
+ registration->assigned_limit = assigned_limit;
+ registration->idle_limit = idle_limit;
+ registration->key = key;
+ registration->func = func;
+
+ mutex_unlock(&registration->mutex);
+
+ xpc_interface.connect(ch_number);
+
+ return xpcSuccess;
+}
+EXPORT_SYMBOL_GPL(xpc_connect);
+
+/*
+ * Remove the registration for automatic connection of the specified channel
+ * when a partition comes up.
+ *
+ * Before returning this xpc_disconnect() will wait for all connections on the
+ * specified channel have been closed/torndown. So the caller can be assured
+ * that they will not be receiving any more callouts from XPC to their
+ * function registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ * ch_number - channel # to unregister.
+ */
+void
+xpc_disconnect(int ch_number)
+{
+ struct xpc_registration *registration;
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+
+ registration = &xpc_registrations[ch_number];
+
+ /*
+ * We've decided not to make this a down_interruptible(), since we
+ * figured XPC's users will just turn around and call xpc_disconnect()
+ * again anyways, so we might as well wait, if need be.
+ */
+ mutex_lock(&registration->mutex);
+
+ /* if !XPC_CHANNEL_REGISTERED(ch_number) */
+ if (registration->func == NULL) {
+ mutex_unlock(&registration->mutex);
+ return;
+ }
+
+ /* remove the connection registration for the specified channel */
+ registration->func = NULL;
+ registration->key = NULL;
+ registration->nentries = 0;
+ registration->msg_size = 0;
+ registration->assigned_limit = 0;
+ registration->idle_limit = 0;
+
+ xpc_interface.disconnect(ch_number);
+
+ mutex_unlock(&registration->mutex);
+
+ return;
+}
+EXPORT_SYMBOL_GPL(xpc_disconnect);
+
+int __init
+xp_init(void)
+{
+ int ret, ch_number;
+ u64 func_addr = *(u64 *)xp_nofault_PIOR;
+ u64 err_func_addr = *(u64 *)xp_error_PIOR;
+
+ if (!ia64_platform_is("sn2"))
+ return -ENODEV;
+
+ /*
+ * Register a nofault code region which performs a cross-partition
+ * PIO read. If the PIO read times out, the MCA handler will consume
+ * the error and return to a kernel-provided instruction to indicate
+ * an error. This PIO read exists because it is guaranteed to timeout
+ * if the destination is down (AMO operations do not timeout on at
+ * least some CPUs on Shubs <= v1.2, which unfortunately we have to
+ * work around).
+ */
+ ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
+ 1, 1);
+ if (ret != 0) {
+ printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
+ ret);
+ }
+ /*
+ * Setup the nofault PIO read target. (There is no special reason why
+ * SH_IPI_ACCESS was selected.)
+ */
+ if (is_shub2())
+ xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
+ else
+ xp_nofault_PIOR_target = SH1_IPI_ACCESS;
+
+ /* initialize the connection registration mutex */
+ for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++)
+ mutex_init(&xpc_registrations[ch_number].mutex);
+
+ return 0;
+}
+
+module_init(xp_init);
+
+void __exit
+xp_exit(void)
+{
+ u64 func_addr = *(u64 *)xp_nofault_PIOR;
+ u64 err_func_addr = *(u64 *)xp_error_PIOR;
+
+ /* unregister the PIO read nofault code region */
+ (void)sn_register_nofault_code(func_addr, err_func_addr,
+ err_func_addr, 1, 0);
+}
+
+module_exit(xp_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition (XP) base");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sgi-xp/xp_nofault.S b/drivers/misc/sgi-xp/xp_nofault.S
new file mode 100644
index 00000000000..e38d4331942
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_nofault.S
@@ -0,0 +1,35 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * The xp_nofault_PIOR function takes a pointer to a remote PIO register
+ * and attempts to load and consume a value from it. This function
+ * will be registered as a nofault code block. In the event that the
+ * PIO read fails, the MCA handler will force the error to look
+ * corrected and vector to the xp_error_PIOR which will return an error.
+ *
+ * The definition of "consumption" and the time it takes for an MCA
+ * to surface is processor implementation specific. This code
+ * is sufficient on Itanium through the Montvale processor family.
+ * It may need to be adjusted for future processor implementations.
+ *
+ * extern int xp_nofault_PIOR(void *remote_register);
+ */
+
+ .global xp_nofault_PIOR
+xp_nofault_PIOR:
+ mov r8=r0 // Stage a success return value
+ ld8.acq r9=[r32];; // PIO Read the specified register
+ adds r9=1,r9;; // Add to force consumption
+ srlz.i;; // Allow time for MCA to surface
+ br.ret.sptk.many b0;; // Return success
+
+ .global xp_error_PIOR
+xp_error_PIOR:
+ mov r8=1 // Return value of 1
+ br.ret.sptk.many b0;; // Return failure
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
new file mode 100644
index 00000000000..9eb6d4a3269
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -0,0 +1,1187 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) structures and macros.
+ */
+
+#ifndef _DRIVERS_MISC_SGIXP_XPC_H
+#define _DRIVERS_MISC_SGIXP_XPC_H
+
+#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/completion.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/mspec.h>
+#include <asm/sn/shub_mmr.h>
+#include "xp.h"
+
+/*
+ * XPC Version numbers consist of a major and minor number. XPC can always
+ * talk to versions with same major #, and never talk to versions with a
+ * different major #.
+ */
+#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf))
+#define XPC_VERSION_MAJOR(_v) ((_v) >> 4)
+#define XPC_VERSION_MINOR(_v) ((_v) & 0xf)
+
+/*
+ * The next macros define word or bit representations for given
+ * C-brick nasid in either the SAL provided bit array representing
+ * nasids in the partition/machine or the AMO_t array used for
+ * inter-partition initiation communications.
+ *
+ * For SN2 machines, C-Bricks are alway even numbered NASIDs. As
+ * such, some space will be saved by insisting that nasid information
+ * passed from SAL always be packed for C-Bricks and the
+ * cross-partition interrupts use the same packing scheme.
+ */
+#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2)
+#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1))
+#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
+ (1UL << XPC_NASID_B_INDEX(_n)))
+#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
+
+#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
+#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
+
+/* define the process name of HB checker and the CPU it is pinned to */
+#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
+#define XPC_HB_CHECK_CPU 0
+
+/* define the process name of the discovery thread */
+#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
+
+/*
+ * the reserved page
+ *
+ * SAL reserves one page of memory per partition for XPC. Though a full page
+ * in length (16384 bytes), its starting address is not page aligned, but it
+ * is cacheline aligned. The reserved page consists of the following:
+ *
+ * reserved page header
+ *
+ * The first cacheline of the reserved page contains the header
+ * (struct xpc_rsvd_page). Before SAL initialization has completed,
+ * SAL has set up the following fields of the reserved page header:
+ * SAL_signature, SAL_version, partid, and nasids_size. The other
+ * fields are set up by XPC. (xpc_rsvd_page points to the local
+ * partition's reserved page.)
+ *
+ * part_nasids mask
+ * mach_nasids mask
+ *
+ * SAL also sets up two bitmaps (or masks), one that reflects the actual
+ * nasids in this partition (part_nasids), and the other that reflects
+ * the actual nasids in the entire machine (mach_nasids). We're only
+ * interested in the even numbered nasids (which contain the processors
+ * and/or memory), so we only need half as many bits to represent the
+ * nasids. The part_nasids mask is located starting at the first cacheline
+ * following the reserved page header. The mach_nasids mask follows right
+ * after the part_nasids mask. The size in bytes of each mask is reflected
+ * by the reserved page header field 'nasids_size'. (Local partition's
+ * mask pointers are xpc_part_nasids and xpc_mach_nasids.)
+ *
+ * vars
+ * vars part
+ *
+ * Immediately following the mach_nasids mask are the XPC variables
+ * required by other partitions. First are those that are generic to all
+ * partitions (vars), followed on the next available cacheline by those
+ * which are partition specific (vars part). These are setup by XPC.
+ * (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
+ *
+ * Note: Until vars_pa is set, the partition XPC code has not been initialized.
+ */
+struct xpc_rsvd_page {
+ u64 SAL_signature; /* SAL: unique signature */
+ u64 SAL_version; /* SAL: version */
+ u8 partid; /* SAL: partition ID */
+ u8 version;
+ u8 pad1[6]; /* align to next u64 in cacheline */
+ u64 vars_pa; /* physical address of struct xpc_vars */
+ struct timespec stamp; /* time when reserved page was setup by XPC */
+ u64 pad2[9]; /* align to last u64 in cacheline */
+ u64 nasids_size; /* SAL: size of each nasid mask in bytes */
+};
+
+#define XPC_RP_VERSION _XPC_VERSION(1, 1) /* version 1.1 of the reserved page */
+
+#define XPC_SUPPORTS_RP_STAMP(_version) \
+ (_version >= _XPC_VERSION(1, 1))
+
+/*
+ * compare stamps - the return value is:
+ *
+ * < 0, if stamp1 < stamp2
+ * = 0, if stamp1 == stamp2
+ * > 0, if stamp1 > stamp2
+ */
+static inline int
+xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
+{
+ int ret;
+
+ ret = stamp1->tv_sec - stamp2->tv_sec;
+ if (ret == 0)
+ ret = stamp1->tv_nsec - stamp2->tv_nsec;
+
+ return ret;
+}
+
+/*
+ * Define the structures by which XPC variables can be exported to other
+ * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
+ */
+
+/*
+ * The following structure describes the partition generic variables
+ * needed by other partitions in order to properly initialize.
+ *
+ * struct xpc_vars version number also applies to struct xpc_vars_part.
+ * Changes to either structure and/or related functionality should be
+ * reflected by incrementing either the major or minor version numbers
+ * of struct xpc_vars.
+ */
+struct xpc_vars {
+ u8 version;
+ u64 heartbeat;
+ u64 heartbeating_to_mask;
+ u64 heartbeat_offline; /* if 0, heartbeat should be changing */
+ int act_nasid;
+ int act_phys_cpuid;
+ u64 vars_part_pa;
+ u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
+ AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
+};
+
+#define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */
+
+#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
+ (_version >= _XPC_VERSION(3, 1))
+
+static inline int
+xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
+{
+ return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
+}
+
+static inline void
+xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
+{
+ u64 old_mask, new_mask;
+
+ do {
+ old_mask = vars->heartbeating_to_mask;
+ new_mask = (old_mask | (1UL << partid));
+ } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
+ old_mask);
+}
+
+static inline void
+xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
+{
+ u64 old_mask, new_mask;
+
+ do {
+ old_mask = vars->heartbeating_to_mask;
+ new_mask = (old_mask & ~(1UL << partid));
+ } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
+ old_mask);
+}
+
+/*
+ * The AMOs page consists of a number of AMO variables which are divided into
+ * four groups, The first two groups are used to identify an IRQ's sender.
+ * These two groups consist of 64 and 128 AMO variables respectively. The last
+ * two groups, consisting of just one AMO variable each, are used to identify
+ * the remote partitions that are currently engaged (from the viewpoint of
+ * the XPC running on the remote partition).
+ */
+#define XPC_NOTIFY_IRQ_AMOS 0
+#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
+#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
+#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1)
+
+/*
+ * The following structure describes the per partition specific variables.
+ *
+ * An array of these structures, one per partition, will be defined. As a
+ * partition becomes active XPC will copy the array entry corresponding to
+ * itself from that partition. It is desirable that the size of this
+ * structure evenly divide into a cacheline, such that none of the entries
+ * in this array crosses a cacheline boundary. As it is now, each entry
+ * occupies half a cacheline.
+ */
+struct xpc_vars_part {
+ u64 magic;
+
+ u64 openclose_args_pa; /* physical address of open and close args */
+ u64 GPs_pa; /* physical address of Get/Put values */
+
+ u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */
+ int IPI_nasid; /* nasid of where to send IPIs */
+ int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */
+
+ u8 nchannels; /* #of defined channels supported */
+
+ u8 reserved[23]; /* pad to a full 64 bytes */
+};
+
+/*
+ * The vars_part MAGIC numbers play a part in the first contact protocol.
+ *
+ * MAGIC1 indicates that the per partition specific variables for a remote
+ * partition have been initialized by this partition.
+ *
+ * MAGIC2 indicates that this partition has pulled the remote partititions
+ * per partition variables that pertain to this partition.
+ */
+#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
+#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
+
+/* the reserved page sizes and offsets */
+
+#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
+#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars))
+
+#define XPC_RP_PART_NASIDS(_rp) ((u64 *)((u8 *)(_rp) + XPC_RP_HEADER_SIZE))
+#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
+#define XPC_RP_VARS(_rp) ((struct xpc_vars *)(XPC_RP_MACH_NASIDS(_rp) + \
+ xp_nasid_mask_words))
+#define XPC_RP_VARS_PART(_rp) ((struct xpc_vars_part *) \
+ ((u8 *)XPC_RP_VARS(_rp) + XPC_RP_VARS_SIZE))
+
+/*
+ * Functions registered by add_timer() or called by kernel_thread() only
+ * allow for a single 64-bit argument. The following macros can be used to
+ * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
+ * the passed argument.
+ */
+#define XPC_PACK_ARGS(_arg1, _arg2) \
+ ((((u64) _arg1) & 0xffffffff) | \
+ ((((u64) _arg2) & 0xffffffff) << 32))
+
+#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff)
+#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff)
+
+/*
+ * Define a Get/Put value pair (pointers) used with a message queue.
+ */
+struct xpc_gp {
+ s64 get; /* Get value */
+ s64 put; /* Put value */
+};
+
+#define XPC_GP_SIZE \
+ L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS)
+
+/*
+ * Define a structure that contains arguments associated with opening and
+ * closing a channel.
+ */
+struct xpc_openclose_args {
+ u16 reason; /* reason why channel is closing */
+ u16 msg_size; /* sizeof each message entry */
+ u16 remote_nentries; /* #of message entries in remote msg queue */
+ u16 local_nentries; /* #of message entries in local msg queue */
+ u64 local_msgqueue_pa; /* physical address of local message queue */
+};
+
+#define XPC_OPENCLOSE_ARGS_SIZE \
+ L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS)
+
+/* struct xpc_msg flags */
+
+#define XPC_M_DONE 0x01 /* msg has been received/consumed */
+#define XPC_M_READY 0x02 /* msg is ready to be sent */
+#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */
+
+#define XPC_MSG_ADDRESS(_payload) \
+ ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET))
+
+/*
+ * Defines notify entry.
+ *
+ * This is used to notify a message's sender that their message was received
+ * and consumed by the intended recipient.
+ */
+struct xpc_notify {
+ u8 type; /* type of notification */
+
+ /* the following two fields are only used if type == XPC_N_CALL */
+ xpc_notify_func func; /* user's notify function */
+ void *key; /* pointer to user's key */
+};
+
+/* struct xpc_notify type of notification */
+
+#define XPC_N_CALL 0x01 /* notify function provided by user */
+
+/*
+ * Define the structure that manages all the stuff required by a channel. In
+ * particular, they are used to manage the messages sent across the channel.
+ *
+ * This structure is private to a partition, and is NOT shared across the
+ * partition boundary.
+ *
+ * There is an array of these structures for each remote partition. It is
+ * allocated at the time a partition becomes active. The array contains one
+ * of these structures for each potential channel connection to that partition.
+ *
+ * Each of these structures manages two message queues (circular buffers).
+ * They are allocated at the time a channel connection is made. One of
+ * these message queues (local_msgqueue) holds the locally created messages
+ * that are destined for the remote partition. The other of these message
+ * queues (remote_msgqueue) is a locally cached copy of the remote partition's
+ * own local_msgqueue.
+ *
+ * The following is a description of the Get/Put pointers used to manage these
+ * two message queues. Consider the local_msgqueue to be on one partition
+ * and the remote_msgqueue to be its cached copy on another partition. A
+ * description of what each of the lettered areas contains is included.
+ *
+ *
+ * local_msgqueue remote_msgqueue
+ *
+ * |/////////| |/////////|
+ * w_remote_GP.get --> +---------+ |/////////|
+ * | F | |/////////|
+ * remote_GP.get --> +---------+ +---------+ <-- local_GP->get
+ * | | | |
+ * | | | E |
+ * | | | |
+ * | | +---------+ <-- w_local_GP.get
+ * | B | |/////////|
+ * | | |////D////|
+ * | | |/////////|
+ * | | +---------+ <-- w_remote_GP.put
+ * | | |////C////|
+ * local_GP->put --> +---------+ +---------+ <-- remote_GP.put
+ * | | |/////////|
+ * | A | |/////////|
+ * | | |/////////|
+ * w_local_GP.put --> +---------+ |/////////|
+ * |/////////| |/////////|
+ *
+ *
+ * ( remote_GP.[get|put] are cached copies of the remote
+ * partition's local_GP->[get|put], and thus their values can
+ * lag behind their counterparts on the remote partition. )
+ *
+ *
+ * A - Messages that have been allocated, but have not yet been sent to the
+ * remote partition.
+ *
+ * B - Messages that have been sent, but have not yet been acknowledged by the
+ * remote partition as having been received.
+ *
+ * C - Area that needs to be prepared for the copying of sent messages, by
+ * the clearing of the message flags of any previously received messages.
+ *
+ * D - Area into which sent messages are to be copied from the remote
+ * partition's local_msgqueue and then delivered to their intended
+ * recipients. [ To allow for a multi-message copy, another pointer
+ * (next_msg_to_pull) has been added to keep track of the next message
+ * number needing to be copied (pulled). It chases after w_remote_GP.put.
+ * Any messages lying between w_local_GP.get and next_msg_to_pull have
+ * been copied and are ready to be delivered. ]
+ *
+ * E - Messages that have been copied and delivered, but have not yet been
+ * acknowledged by the recipient as having been received.
+ *
+ * F - Messages that have been acknowledged, but XPC has not yet notified the
+ * sender that the message was received by its intended recipient.
+ * This is also an area that needs to be prepared for the allocating of
+ * new messages, by the clearing of the message flags of the acknowledged
+ * messages.
+ */
+struct xpc_channel {
+ partid_t partid; /* ID of remote partition connected */
+ spinlock_t lock; /* lock for updating this structure */
+ u32 flags; /* general flags */
+
+ enum xpc_retval reason; /* reason why channel is disconnect'g */
+ int reason_line; /* line# disconnect initiated from */
+
+ u16 number; /* channel # */
+
+ u16 msg_size; /* sizeof each msg entry */
+ u16 local_nentries; /* #of msg entries in local msg queue */
+ u16 remote_nentries; /* #of msg entries in remote msg queue */
+
+ void *local_msgqueue_base; /* base address of kmalloc'd space */
+ struct xpc_msg *local_msgqueue; /* local message queue */
+ void *remote_msgqueue_base; /* base address of kmalloc'd space */
+ struct xpc_msg *remote_msgqueue; /* cached copy of remote partition's */
+ /* local message queue */
+ u64 remote_msgqueue_pa; /* phys addr of remote partition's */
+ /* local message queue */
+
+ atomic_t references; /* #of external references to queues */
+
+ atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
+ wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
+
+ u8 delayed_IPI_flags; /* IPI flags received, but delayed */
+ /* action until channel disconnected */
+
+ /* queue of msg senders who want to be notified when msg received */
+
+ atomic_t n_to_notify; /* #of msg senders to notify */
+ struct xpc_notify *notify_queue; /* notify queue for messages sent */
+
+ xpc_channel_func func; /* user's channel function */
+ void *key; /* pointer to user's key */
+
+ struct mutex msg_to_pull_mutex; /* next msg to pull serialization */
+ struct completion wdisconnect_wait; /* wait for channel disconnect */
+
+ struct xpc_openclose_args *local_openclose_args; /* args passed on */
+ /* opening or closing of channel */
+
+ /* various flavors of local and remote Get/Put values */
+
+ struct xpc_gp *local_GP; /* local Get/Put values */
+ struct xpc_gp remote_GP; /* remote Get/Put values */
+ struct xpc_gp w_local_GP; /* working local Get/Put values */
+ struct xpc_gp w_remote_GP; /* working remote Get/Put values */
+ s64 next_msg_to_pull; /* Put value of next msg to pull */
+
+ /* kthread management related fields */
+
+ atomic_t kthreads_assigned; /* #of kthreads assigned to channel */
+ u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */
+ atomic_t kthreads_idle; /* #of kthreads idle waiting for work */
+ u32 kthreads_idle_limit; /* limit on #of kthreads idle */
+ atomic_t kthreads_active; /* #of kthreads actively working */
+
+ wait_queue_head_t idle_wq; /* idle kthread wait queue */
+
+} ____cacheline_aligned;
+
+/* struct xpc_channel flags */
+
+#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */
+
+#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */
+#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */
+#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */
+#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */
+
+#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */
+#define XPC_C_CONNECTEDCALLOUT 0x00000040 /* connected callout initiated */
+#define XPC_C_CONNECTEDCALLOUT_MADE \
+ 0x00000080 /* connected callout completed */
+#define XPC_C_CONNECTED 0x00000100 /* local channel is connected */
+#define XPC_C_CONNECTING 0x00000200 /* channel is being connected */
+
+#define XPC_C_RCLOSEREPLY 0x00000400 /* remote close channel reply */
+#define XPC_C_CLOSEREPLY 0x00000800 /* local close channel reply */
+#define XPC_C_RCLOSEREQUEST 0x00001000 /* remote close channel request */
+#define XPC_C_CLOSEREQUEST 0x00002000 /* local close channel request */
+
+#define XPC_C_DISCONNECTED 0x00004000 /* channel is disconnected */
+#define XPC_C_DISCONNECTING 0x00008000 /* channel is being disconnected */
+#define XPC_C_DISCONNECTINGCALLOUT \
+ 0x00010000 /* disconnecting callout initiated */
+#define XPC_C_DISCONNECTINGCALLOUT_MADE \
+ 0x00020000 /* disconnecting callout completed */
+#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */
+
+/*
+ * Manages channels on a partition basis. There is one of these structures
+ * for each partition (a partition will never utilize the structure that
+ * represents itself).
+ */
+struct xpc_partition {
+
+ /* XPC HB infrastructure */
+
+ u8 remote_rp_version; /* version# of partition's rsvd pg */
+ struct timespec remote_rp_stamp; /* time when rsvd pg was initialized */
+ u64 remote_rp_pa; /* phys addr of partition's rsvd pg */
+ u64 remote_vars_pa; /* phys addr of partition's vars */
+ u64 remote_vars_part_pa; /* phys addr of partition's vars part */
+ u64 last_heartbeat; /* HB at last read */
+ u64 remote_amos_page_pa; /* phys addr of partition's amos page */
+ int remote_act_nasid; /* active part's act/deact nasid */
+ int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */
+ u32 act_IRQ_rcvd; /* IRQs since activation */
+ spinlock_t act_lock; /* protect updating of act_state */
+ u8 act_state; /* from XPC HB viewpoint */
+ u8 remote_vars_version; /* version# of partition's vars */
+ enum xpc_retval reason; /* reason partition is deactivating */
+ int reason_line; /* line# deactivation initiated from */
+ int reactivate_nasid; /* nasid in partition to reactivate */
+
+ unsigned long disengage_request_timeout; /* timeout in jiffies */
+ struct timer_list disengage_request_timer;
+
+ /* XPC infrastructure referencing and teardown control */
+
+ u8 setup_state; /* infrastructure setup state */
+ wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
+ atomic_t references; /* #of references to infrastructure */
+
+ /*
+ * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN
+ * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION
+ * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE
+ * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.)
+ */
+
+ u8 nchannels; /* #of defined channels supported */
+ atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
+ atomic_t nchannels_engaged; /* #of channels engaged with remote part */
+ struct xpc_channel *channels; /* array of channel structures */
+
+ void *local_GPs_base; /* base address of kmalloc'd space */
+ struct xpc_gp *local_GPs; /* local Get/Put values */
+ void *remote_GPs_base; /* base address of kmalloc'd space */
+ struct xpc_gp *remote_GPs; /* copy of remote partition's local */
+ /* Get/Put values */
+ u64 remote_GPs_pa; /* phys address of remote partition's local */
+ /* Get/Put values */
+
+ /* fields used to pass args when opening or closing a channel */
+
+ void *local_openclose_args_base; /* base address of kmalloc'd space */
+ struct xpc_openclose_args *local_openclose_args; /* local's args */
+ void *remote_openclose_args_base; /* base address of kmalloc'd space */
+ struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
+ /* args */
+ u64 remote_openclose_args_pa; /* phys addr of remote's args */
+
+ /* IPI sending, receiving and handling related fields */
+
+ int remote_IPI_nasid; /* nasid of where to send IPIs */
+ int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */
+ AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */
+
+ AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */
+ u64 local_IPI_amo; /* IPI amo flags yet to be handled */
+ char IPI_owner[8]; /* IPI owner's name */
+ struct timer_list dropped_IPI_timer; /* dropped IPI timer */
+
+ spinlock_t IPI_lock; /* IPI handler lock */
+
+ /* channel manager related fields */
+
+ atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */
+ wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */
+
+} ____cacheline_aligned;
+
+/* struct xpc_partition act_state values (for XPC HB) */
+
+#define XPC_P_INACTIVE 0x00 /* partition is not active */
+#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */
+#define XPC_P_ACTIVATING 0x02 /* activation thread started */
+#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */
+#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */
+
+#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
+ xpc_deactivate_partition(__LINE__, (_p), (_reason))
+
+/* struct xpc_partition setup_state values */
+
+#define XPC_P_UNSET 0x00 /* infrastructure was never setup */
+#define XPC_P_SETUP 0x01 /* infrastructure is setup */
+#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */
+#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */
+
+/*
+ * struct xpc_partition IPI_timer #of seconds to wait before checking for
+ * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
+ * after the IPI was received.
+ */
+#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ)
+
+/* number of seconds to wait for other partitions to disengage */
+#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90
+
+/* interval in seconds to print 'waiting disengagement' messages */
+#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10
+
+#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0]))
+
+/* found in xp_main.c */
+extern struct xpc_registration xpc_registrations[];
+
+/* found in xpc_main.c */
+extern struct device *xpc_part;
+extern struct device *xpc_chan;
+extern int xpc_disengage_request_timelimit;
+extern int xpc_disengage_request_timedout;
+extern irqreturn_t xpc_notify_IRQ_handler(int, void *);
+extern void xpc_dropped_IPI_check(struct xpc_partition *);
+extern void xpc_activate_partition(struct xpc_partition *);
+extern void xpc_activate_kthreads(struct xpc_channel *, int);
+extern void xpc_create_kthreads(struct xpc_channel *, int, int);
+extern void xpc_disconnect_wait(int);
+
+/* found in xpc_partition.c */
+extern int xpc_exiting;
+extern struct xpc_vars *xpc_vars;
+extern struct xpc_rsvd_page *xpc_rsvd_page;
+extern struct xpc_vars_part *xpc_vars_part;
+extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
+extern char *xpc_remote_copy_buffer;
+extern void *xpc_remote_copy_buffer_base;
+extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
+extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
+extern void xpc_allow_IPI_ops(void);
+extern void xpc_restrict_IPI_ops(void);
+extern int xpc_identify_act_IRQ_sender(void);
+extern int xpc_partition_disengaged(struct xpc_partition *);
+extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
+extern void xpc_mark_partition_inactive(struct xpc_partition *);
+extern void xpc_discovery(void);
+extern void xpc_check_remote_hb(void);
+extern void xpc_deactivate_partition(const int, struct xpc_partition *,
+ enum xpc_retval);
+extern enum xpc_retval xpc_initiate_partid_to_nasids(partid_t, void *);
+
+/* found in xpc_channel.c */
+extern void xpc_initiate_connect(int);
+extern void xpc_initiate_disconnect(int);
+extern enum xpc_retval xpc_initiate_allocate(partid_t, int, u32, void **);
+extern enum xpc_retval xpc_initiate_send(partid_t, int, void *);
+extern enum xpc_retval xpc_initiate_send_notify(partid_t, int, void *,
+ xpc_notify_func, void *);
+extern void xpc_initiate_received(partid_t, int, void *);
+extern enum xpc_retval xpc_setup_infrastructure(struct xpc_partition *);
+extern enum xpc_retval xpc_pull_remote_vars_part(struct xpc_partition *);
+extern void xpc_process_channel_activity(struct xpc_partition *);
+extern void xpc_connected_callout(struct xpc_channel *);
+extern void xpc_deliver_msg(struct xpc_channel *);
+extern void xpc_disconnect_channel(const int, struct xpc_channel *,
+ enum xpc_retval, unsigned long *);
+extern void xpc_disconnect_callout(struct xpc_channel *, enum xpc_retval);
+extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
+extern void xpc_teardown_infrastructure(struct xpc_partition *);
+
+static inline void
+xpc_wakeup_channel_mgr(struct xpc_partition *part)
+{
+ if (atomic_inc_return(&part->channel_mgr_requests) == 1)
+ wake_up(&part->channel_mgr_wq);
+}
+
+/*
+ * These next two inlines are used to keep us from tearing down a channel's
+ * msg queues while a thread may be referencing them.
+ */
+static inline void
+xpc_msgqueue_ref(struct xpc_channel *ch)
+{
+ atomic_inc(&ch->references);
+}
+
+static inline void
+xpc_msgqueue_deref(struct xpc_channel *ch)
+{
+ s32 refs = atomic_dec_return(&ch->references);
+
+ DBUG_ON(refs < 0);
+ if (refs == 0)
+ xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
+}
+
+#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
+ xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
+
+/*
+ * These two inlines are used to keep us from tearing down a partition's
+ * setup infrastructure while a thread may be referencing it.
+ */
+static inline void
+xpc_part_deref(struct xpc_partition *part)
+{
+ s32 refs = atomic_dec_return(&part->references);
+
+ DBUG_ON(refs < 0);
+ if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN)
+ wake_up(&part->teardown_wq);
+}
+
+static inline int
+xpc_part_ref(struct xpc_partition *part)
+{
+ int setup;
+
+ atomic_inc(&part->references);
+ setup = (part->setup_state == XPC_P_SETUP);
+ if (!setup)
+ xpc_part_deref(part);
+
+ return setup;
+}
+
+/*
+ * The following macro is to be used for the setting of the reason and
+ * reason_line fields in both the struct xpc_channel and struct xpc_partition
+ * structures.
+ */
+#define XPC_SET_REASON(_p, _reason, _line) \
+ { \
+ (_p)->reason = _reason; \
+ (_p)->reason_line = _line; \
+ }
+
+/*
+ * This next set of inlines are used to keep track of when a partition is
+ * potentially engaged in accessing memory belonging to another partition.
+ */
+
+static inline void
+xpc_mark_partition_engaged(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
+ (XPC_ENGAGED_PARTITIONS_AMO *
+ sizeof(AMO_t)));
+
+ local_irq_save(irq_flags);
+
+ /* set bit corresponding to our partid in remote partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+ (1UL << sn_partition_id));
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IPIs and AMOs to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static inline void
+xpc_mark_partition_disengaged(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
+ (XPC_ENGAGED_PARTITIONS_AMO *
+ sizeof(AMO_t)));
+
+ local_irq_save(irq_flags);
+
+ /* clear bit corresponding to our partid in remote partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+ ~(1UL << sn_partition_id));
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IPIs and AMOs to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static inline void
+xpc_request_partition_disengage(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
+ (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
+
+ local_irq_save(irq_flags);
+
+ /* set bit corresponding to our partid in remote partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+ (1UL << sn_partition_id));
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IPIs and AMOs to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static inline void
+xpc_cancel_partition_disengage_request(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
+ (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
+
+ local_irq_save(irq_flags);
+
+ /* clear bit corresponding to our partid in remote partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+ ~(1UL << sn_partition_id));
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IPIs and AMOs to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static inline u64
+xpc_partition_engaged(u64 partid_mask)
+{
+ AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+
+ /* return our partition's AMO variable ANDed with partid_mask */
+ return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+ partid_mask);
+}
+
+static inline u64
+xpc_partition_disengage_requested(u64 partid_mask)
+{
+ AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
+
+ /* return our partition's AMO variable ANDed with partid_mask */
+ return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+ partid_mask);
+}
+
+static inline void
+xpc_clear_partition_engaged(u64 partid_mask)
+{
+ AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+
+ /* clear bit(s) based on partid_mask in our partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+ ~partid_mask);
+}
+
+static inline void
+xpc_clear_partition_disengage_request(u64 partid_mask)
+{
+ AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
+
+ /* clear bit(s) based on partid_mask in our partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+ ~partid_mask);
+}
+
+/*
+ * The following set of macros and inlines are used for the sending and
+ * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
+ * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
+ * the other that is associated with channel activity (SGI_XPC_NOTIFY).
+ */
+
+static inline u64
+xpc_IPI_receive(AMO_t *amo)
+{
+ return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
+}
+
+static inline enum xpc_retval
+xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
+{
+ int ret = 0;
+ unsigned long irq_flags;
+
+ local_irq_save(irq_flags);
+
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
+ sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IPIs and AMOs to it until the heartbeat times out.
+ */
+ ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+
+ return ((ret == 0) ? xpcSuccess : xpcPioReadError);
+}
+
+/*
+ * IPIs associated with SGI_XPC_ACTIVATE IRQ.
+ */
+
+/*
+ * Flag the appropriate AMO variable and send an IPI to the specified node.
+ */
+static inline void
+xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
+ int to_phys_cpuid)
+{
+ int w_index = XPC_NASID_W_INDEX(from_nasid);
+ int b_index = XPC_NASID_B_INDEX(from_nasid);
+ AMO_t *amos = (AMO_t *)__va(amos_page_pa +
+ (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
+
+ (void)xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
+ to_phys_cpuid, SGI_XPC_ACTIVATE);
+}
+
+static inline void
+xpc_IPI_send_activate(struct xpc_vars *vars)
+{
+ xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0),
+ vars->act_nasid, vars->act_phys_cpuid);
+}
+
+static inline void
+xpc_IPI_send_activated(struct xpc_partition *part)
+{
+ xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
+ part->remote_act_nasid,
+ part->remote_act_phys_cpuid);
+}
+
+static inline void
+xpc_IPI_send_reactivate(struct xpc_partition *part)
+{
+ xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid,
+ xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
+}
+
+static inline void
+xpc_IPI_send_disengage(struct xpc_partition *part)
+{
+ xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
+ part->remote_act_nasid,
+ part->remote_act_phys_cpuid);
+}
+
+/*
+ * IPIs associated with SGI_XPC_NOTIFY IRQ.
+ */
+
+/*
+ * Send an IPI to the remote partition that is associated with the
+ * specified channel.
+ */
+#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \
+ xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f)
+
+static inline void
+xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string,
+ unsigned long *irq_flags)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ enum xpc_retval ret;
+
+ if (likely(part->act_state != XPC_P_DEACTIVATING)) {
+ ret = xpc_IPI_send(part->remote_IPI_amo_va,
+ (u64)ipi_flag << (ch->number * 8),
+ part->remote_IPI_nasid,
+ part->remote_IPI_phys_cpuid, SGI_XPC_NOTIFY);
+ dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
+ ipi_flag_string, ch->partid, ch->number, ret);
+ if (unlikely(ret != xpcSuccess)) {
+ if (irq_flags != NULL)
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ if (irq_flags != NULL)
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+ }
+ }
+}
+
+/*
+ * Make it look like the remote partition, which is associated with the
+ * specified channel, sent us an IPI. This faked IPI will be handled
+ * by xpc_dropped_IPI_check().
+ */
+#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \
+ xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f)
+
+static inline void
+xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
+ char *ipi_flag_string)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+
+ FETCHOP_STORE_OP(TO_AMO((u64)&part->local_IPI_amo_va->variable),
+ FETCHOP_OR, ((u64)ipi_flag << (ch->number * 8)));
+ dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
+ ipi_flag_string, ch->partid, ch->number);
+}
+
+/*
+ * The sending and receiving of IPIs includes the setting of an AMO variable
+ * to indicate the reason the IPI was sent. The 64-bit variable is divided
+ * up into eight bytes, ordered from right to left. Byte zero pertains to
+ * channel 0, byte one to channel 1, and so on. Each byte is described by
+ * the following IPI flags.
+ */
+
+#define XPC_IPI_CLOSEREQUEST 0x01
+#define XPC_IPI_CLOSEREPLY 0x02
+#define XPC_IPI_OPENREQUEST 0x04
+#define XPC_IPI_OPENREPLY 0x08
+#define XPC_IPI_MSGREQUEST 0x10
+
+/* given an AMO variable and a channel#, get its associated IPI flags */
+#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff))
+#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8))
+
+#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0fUL)
+#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010UL)
+
+static inline void
+xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_openclose_args *args = ch->local_openclose_args;
+
+ args->reason = ch->reason;
+
+ XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
+}
+
+static inline void
+xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags);
+}
+
+static inline void
+xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_openclose_args *args = ch->local_openclose_args;
+
+ args->msg_size = ch->msg_size;
+ args->local_nentries = ch->local_nentries;
+
+ XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags);
+}
+
+static inline void
+xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_openclose_args *args = ch->local_openclose_args;
+
+ args->remote_nentries = ch->remote_nentries;
+ args->local_nentries = ch->local_nentries;
+ args->local_msgqueue_pa = __pa(ch->local_msgqueue);
+
+ XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags);
+}
+
+static inline void
+xpc_IPI_send_msgrequest(struct xpc_channel *ch)
+{
+ XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL);
+}
+
+static inline void
+xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
+{
+ XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST);
+}
+
+/*
+ * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
+ * pages are located in the lowest granule. The lowest granule uses 4k pages
+ * for cached references and an alternate TLB handler to never provide a
+ * cacheable mapping for the entire region. This will prevent speculative
+ * reading of cached copies of our lines from being issued which will cause
+ * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
+ * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
+ * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
+ * activation and 2 AMO variables for partition deactivation.
+ */
+static inline AMO_t *
+xpc_IPI_init(int index)
+{
+ AMO_t *amo = xpc_vars->amos_page + index;
+
+ (void)xpc_IPI_receive(amo); /* clear AMO variable */
+ return amo;
+}
+
+static inline enum xpc_retval
+xpc_map_bte_errors(bte_result_t error)
+{
+ if (error == BTE_SUCCESS)
+ return xpcSuccess;
+
+ if (is_shub2()) {
+ if (BTE_VALID_SH2_ERROR(error))
+ return xpcBteSh2Start + error;
+ return xpcBteUnmappedError;
+ }
+ switch (error) {
+ case BTE_SUCCESS:
+ return xpcSuccess;
+ case BTEFAIL_DIR:
+ return xpcBteDirectoryError;
+ case BTEFAIL_POISON:
+ return xpcBtePoisonError;
+ case BTEFAIL_WERR:
+ return xpcBteWriteError;
+ case BTEFAIL_ACCESS:
+ return xpcBteAccessError;
+ case BTEFAIL_PWERR:
+ return xpcBtePWriteError;
+ case BTEFAIL_PRERR:
+ return xpcBtePReadError;
+ case BTEFAIL_TOUT:
+ return xpcBteTimeOutError;
+ case BTEFAIL_XTERR:
+ return xpcBteXtalkError;
+ case BTEFAIL_NOTAVAIL:
+ return xpcBteNotAvailable;
+ default:
+ return xpcBteUnmappedError;
+ }
+}
+
+/*
+ * Check to see if there is any channel activity to/from the specified
+ * partition.
+ */
+static inline void
+xpc_check_for_channel_activity(struct xpc_partition *part)
+{
+ u64 IPI_amo;
+ unsigned long irq_flags;
+
+ IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va);
+ if (IPI_amo == 0)
+ return;
+
+ spin_lock_irqsave(&part->IPI_lock, irq_flags);
+ part->local_IPI_amo |= IPI_amo;
+ spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
+
+ dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
+ XPC_PARTID(part), IPI_amo);
+
+ xpc_wakeup_channel_mgr(part);
+}
+
+#endif /* _DRIVERS_MISC_SGIXP_XPC_H */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
new file mode 100644
index 00000000000..bfcb9ea968e
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -0,0 +1,2243 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) channel support.
+ *
+ * This is the part of XPC that manages the channels and
+ * sends/receives messages across them to/from other partitions.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/completion.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/sn_sal.h>
+#include "xpc.h"
+
+/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kzalloc will give us cachline aligned memory by default */
+ *base = kzalloc(size, flags);
+ if (*base == NULL)
+ return NULL;
+
+ if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+ return *base;
+
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kzalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL)
+ return NULL;
+
+ return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Set up the initial values for the XPartition Communication channels.
+ */
+static void
+xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
+{
+ int ch_number;
+ struct xpc_channel *ch;
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch = &part->channels[ch_number];
+
+ ch->partid = partid;
+ ch->number = ch_number;
+ ch->flags = XPC_C_DISCONNECTED;
+
+ ch->local_GP = &part->local_GPs[ch_number];
+ ch->local_openclose_args =
+ &part->local_openclose_args[ch_number];
+
+ atomic_set(&ch->kthreads_assigned, 0);
+ atomic_set(&ch->kthreads_idle, 0);
+ atomic_set(&ch->kthreads_active, 0);
+
+ atomic_set(&ch->references, 0);
+ atomic_set(&ch->n_to_notify, 0);
+
+ spin_lock_init(&ch->lock);
+ mutex_init(&ch->msg_to_pull_mutex);
+ init_completion(&ch->wdisconnect_wait);
+
+ atomic_set(&ch->n_on_msg_allocate_wq, 0);
+ init_waitqueue_head(&ch->msg_allocate_wq);
+ init_waitqueue_head(&ch->idle_wq);
+ }
+}
+
+/*
+ * Setup the infrastructure necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+enum xpc_retval
+xpc_setup_infrastructure(struct xpc_partition *part)
+{
+ int ret, cpuid;
+ struct timer_list *timer;
+ partid_t partid = XPC_PARTID(part);
+
+ /*
+ * Zero out MOST of the entry for this partition. Only the fields
+ * starting with `nchannels' will be zeroed. The preceding fields must
+ * remain `viable' across partition ups and downs, since they may be
+ * referenced during this memset() operation.
+ */
+ memset(&part->nchannels, 0, sizeof(struct xpc_partition) -
+ offsetof(struct xpc_partition, nchannels));
+
+ /*
+ * Allocate all of the channel structures as a contiguous chunk of
+ * memory.
+ */
+ part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
+ GFP_KERNEL);
+ if (part->channels == NULL) {
+ dev_err(xpc_chan, "can't get memory for channels\n");
+ return xpcNoMemory;
+ }
+
+ part->nchannels = XPC_NCHANNELS;
+
+ /* allocate all the required GET/PUT values */
+
+ part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
+ GFP_KERNEL,
+ &part->local_GPs_base);
+ if (part->local_GPs == NULL) {
+ kfree(part->channels);
+ part->channels = NULL;
+ dev_err(xpc_chan, "can't get memory for local get/put "
+ "values\n");
+ return xpcNoMemory;
+ }
+
+ part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
+ GFP_KERNEL,
+ &part->
+ remote_GPs_base);
+ if (part->remote_GPs == NULL) {
+ dev_err(xpc_chan, "can't get memory for remote get/put "
+ "values\n");
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
+ return xpcNoMemory;
+ }
+
+ /* allocate all the required open and close args */
+
+ part->local_openclose_args =
+ xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
+ &part->local_openclose_args_base);
+ if (part->local_openclose_args == NULL) {
+ dev_err(xpc_chan, "can't get memory for local connect args\n");
+ kfree(part->remote_GPs_base);
+ part->remote_GPs = NULL;
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
+ return xpcNoMemory;
+ }
+
+ part->remote_openclose_args =
+ xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
+ &part->remote_openclose_args_base);
+ if (part->remote_openclose_args == NULL) {
+ dev_err(xpc_chan, "can't get memory for remote connect args\n");
+ kfree(part->local_openclose_args_base);
+ part->local_openclose_args = NULL;
+ kfree(part->remote_GPs_base);
+ part->remote_GPs = NULL;
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
+ return xpcNoMemory;
+ }
+
+ xpc_initialize_channels(part, partid);
+
+ atomic_set(&part->nchannels_active, 0);
+ atomic_set(&part->nchannels_engaged, 0);
+
+ /* local_IPI_amo were set to 0 by an earlier memset() */
+
+ /* Initialize this partitions AMO_t structure */
+ part->local_IPI_amo_va = xpc_IPI_init(partid);
+
+ spin_lock_init(&part->IPI_lock);
+
+ atomic_set(&part->channel_mgr_requests, 1);
+ init_waitqueue_head(&part->channel_mgr_wq);
+
+ sprintf(part->IPI_owner, "xpc%02d", partid);
+ ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED,
+ part->IPI_owner, (void *)(u64)partid);
+ if (ret != 0) {
+ dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
+ "errno=%d\n", -ret);
+ kfree(part->remote_openclose_args_base);
+ part->remote_openclose_args = NULL;
+ kfree(part->local_openclose_args_base);
+ part->local_openclose_args = NULL;
+ kfree(part->remote_GPs_base);
+ part->remote_GPs = NULL;
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
+ return xpcLackOfResources;
+ }
+
+ /* Setup a timer to check for dropped IPIs */
+ timer = &part->dropped_IPI_timer;
+ init_timer(timer);
+ timer->function = (void (*)(unsigned long))xpc_dropped_IPI_check;
+ timer->data = (unsigned long)part;
+ timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT;
+ add_timer(timer);
+
+ /*
+ * With the setting of the partition setup_state to XPC_P_SETUP, we're
+ * declaring that this partition is ready to go.
+ */
+ part->setup_state = XPC_P_SETUP;
+
+ /*
+ * Setup the per partition specific variables required by the
+ * remote partition to establish channel connections with us.
+ *
+ * The setting of the magic # indicates that these per partition
+ * specific variables are ready to be used.
+ */
+ xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs);
+ xpc_vars_part[partid].openclose_args_pa =
+ __pa(part->local_openclose_args);
+ xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
+ cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */
+ xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
+ xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
+ xpc_vars_part[partid].nchannels = part->nchannels;
+ xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
+
+ return xpcSuccess;
+}
+
+/*
+ * Create a wrapper that hides the underlying mechanism for pulling a cacheline
+ * (or multiple cachelines) from a remote partition.
+ *
+ * src must be a cacheline aligned physical address on the remote partition.
+ * dst must be a cacheline aligned virtual address on this partition.
+ * cnt must be an cacheline sized
+ */
+static enum xpc_retval
+xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
+ const void *src, size_t cnt)
+{
+ bte_result_t bte_ret;
+
+ DBUG_ON((u64)src != L1_CACHE_ALIGN((u64)src));
+ DBUG_ON((u64)dst != L1_CACHE_ALIGN((u64)dst));
+ DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
+
+ if (part->act_state == XPC_P_DEACTIVATING)
+ return part->reason;
+
+ bte_ret = xp_bte_copy((u64)src, (u64)dst, (u64)cnt,
+ (BTE_NORMAL | BTE_WACQUIRE), NULL);
+ if (bte_ret == BTE_SUCCESS)
+ return xpcSuccess;
+
+ dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n",
+ XPC_PARTID(part), bte_ret);
+
+ return xpc_map_bte_errors(bte_ret);
+}
+
+/*
+ * Pull the remote per partition specific variables from the specified
+ * partition.
+ */
+enum xpc_retval
+xpc_pull_remote_vars_part(struct xpc_partition *part)
+{
+ u8 buffer[L1_CACHE_BYTES * 2];
+ struct xpc_vars_part *pulled_entry_cacheline =
+ (struct xpc_vars_part *)L1_CACHE_ALIGN((u64)buffer);
+ struct xpc_vars_part *pulled_entry;
+ u64 remote_entry_cacheline_pa, remote_entry_pa;
+ partid_t partid = XPC_PARTID(part);
+ enum xpc_retval ret;
+
+ /* pull the cacheline that contains the variables we're interested in */
+
+ DBUG_ON(part->remote_vars_part_pa !=
+ L1_CACHE_ALIGN(part->remote_vars_part_pa));
+ DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2);
+
+ remote_entry_pa = part->remote_vars_part_pa +
+ sn_partition_id * sizeof(struct xpc_vars_part);
+
+ remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
+
+ pulled_entry = (struct xpc_vars_part *)((u64)pulled_entry_cacheline +
+ (remote_entry_pa &
+ (L1_CACHE_BYTES - 1)));
+
+ ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline,
+ (void *)remote_entry_cacheline_pa,
+ L1_CACHE_BYTES);
+ if (ret != xpcSuccess) {
+ dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
+ "partition %d, ret=%d\n", partid, ret);
+ return ret;
+ }
+
+ /* see if they've been set up yet */
+
+ if (pulled_entry->magic != XPC_VP_MAGIC1 &&
+ pulled_entry->magic != XPC_VP_MAGIC2) {
+
+ if (pulled_entry->magic != 0) {
+ dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
+ "partition %d has bad magic value (=0x%lx)\n",
+ partid, sn_partition_id, pulled_entry->magic);
+ return xpcBadMagic;
+ }
+
+ /* they've not been initialized yet */
+ return xpcRetry;
+ }
+
+ if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
+
+ /* validate the variables */
+
+ if (pulled_entry->GPs_pa == 0 ||
+ pulled_entry->openclose_args_pa == 0 ||
+ pulled_entry->IPI_amo_pa == 0) {
+
+ dev_err(xpc_chan, "partition %d's XPC vars_part for "
+ "partition %d are not valid\n", partid,
+ sn_partition_id);
+ return xpcInvalidAddress;
+ }
+
+ /* the variables we imported look to be valid */
+
+ part->remote_GPs_pa = pulled_entry->GPs_pa;
+ part->remote_openclose_args_pa =
+ pulled_entry->openclose_args_pa;
+ part->remote_IPI_amo_va =
+ (AMO_t *)__va(pulled_entry->IPI_amo_pa);
+ part->remote_IPI_nasid = pulled_entry->IPI_nasid;
+ part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
+
+ if (part->nchannels > pulled_entry->nchannels)
+ part->nchannels = pulled_entry->nchannels;
+
+ /* let the other side know that we've pulled their variables */
+
+ xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
+ }
+
+ if (pulled_entry->magic == XPC_VP_MAGIC1)
+ return xpcRetry;
+
+ return xpcSuccess;
+}
+
+/*
+ * Get the IPI flags and pull the openclose args and/or remote GPs as needed.
+ */
+static u64
+xpc_get_IPI_flags(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ u64 IPI_amo;
+ enum xpc_retval ret;
+
+ /*
+ * See if there are any IPI flags to be handled.
+ */
+
+ spin_lock_irqsave(&part->IPI_lock, irq_flags);
+ IPI_amo = part->local_IPI_amo;
+ if (IPI_amo != 0)
+ part->local_IPI_amo = 0;
+
+ spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
+
+ if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
+ ret = xpc_pull_remote_cachelines(part,
+ part->remote_openclose_args,
+ (void *)part->
+ remote_openclose_args_pa,
+ XPC_OPENCLOSE_ARGS_SIZE);
+ if (ret != xpcSuccess) {
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ dev_dbg(xpc_chan, "failed to pull openclose args from "
+ "partition %d, ret=%d\n", XPC_PARTID(part),
+ ret);
+
+ /* don't bother processing IPIs anymore */
+ IPI_amo = 0;
+ }
+ }
+
+ if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
+ ret = xpc_pull_remote_cachelines(part, part->remote_GPs,
+ (void *)part->remote_GPs_pa,
+ XPC_GP_SIZE);
+ if (ret != xpcSuccess) {
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ dev_dbg(xpc_chan, "failed to pull GPs from partition "
+ "%d, ret=%d\n", XPC_PARTID(part), ret);
+
+ /* don't bother processing IPIs anymore */
+ IPI_amo = 0;
+ }
+ }
+
+ return IPI_amo;
+}
+
+/*
+ * Allocate the local message queue and the notify queue.
+ */
+static enum xpc_retval
+xpc_allocate_local_msgqueue(struct xpc_channel *ch)
+{
+ unsigned long irq_flags;
+ int nentries;
+ size_t nbytes;
+
+ for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+
+ nbytes = nentries * ch->msg_size;
+ ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
+ GFP_KERNEL,
+ &ch->local_msgqueue_base);
+ if (ch->local_msgqueue == NULL)
+ continue;
+
+ nbytes = nentries * sizeof(struct xpc_notify);
+ ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
+ if (ch->notify_queue == NULL) {
+ kfree(ch->local_msgqueue_base);
+ ch->local_msgqueue = NULL;
+ continue;
+ }
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (nentries < ch->local_nentries) {
+ dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
+ "partid=%d, channel=%d\n", nentries,
+ ch->local_nentries, ch->partid, ch->number);
+
+ ch->local_nentries = nentries;
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpcSuccess;
+ }
+
+ dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
+ "queue, partid=%d, channel=%d\n", ch->partid, ch->number);
+ return xpcNoMemory;
+}
+
+/*
+ * Allocate the cached remote message queue.
+ */
+static enum xpc_retval
+xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
+{
+ unsigned long irq_flags;
+ int nentries;
+ size_t nbytes;
+
+ DBUG_ON(ch->remote_nentries <= 0);
+
+ for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+
+ nbytes = nentries * ch->msg_size;
+ ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
+ GFP_KERNEL,
+ &ch->remote_msgqueue_base);
+ if (ch->remote_msgqueue == NULL)
+ continue;
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (nentries < ch->remote_nentries) {
+ dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
+ "partid=%d, channel=%d\n", nentries,
+ ch->remote_nentries, ch->partid, ch->number);
+
+ ch->remote_nentries = nentries;
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpcSuccess;
+ }
+
+ dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
+ "partid=%d, channel=%d\n", ch->partid, ch->number);
+ return xpcNoMemory;
+}
+
+/*
+ * Allocate message queues and other stuff associated with a channel.
+ *
+ * Note: Assumes all of the channel sizes are filled in.
+ */
+static enum xpc_retval
+xpc_allocate_msgqueues(struct xpc_channel *ch)
+{
+ unsigned long irq_flags;
+ enum xpc_retval ret;
+
+ DBUG_ON(ch->flags & XPC_C_SETUP);
+
+ ret = xpc_allocate_local_msgqueue(ch);
+ if (ret != xpcSuccess)
+ return ret;
+
+ ret = xpc_allocate_remote_msgqueue(ch);
+ if (ret != xpcSuccess) {
+ kfree(ch->local_msgqueue_base);
+ ch->local_msgqueue = NULL;
+ kfree(ch->notify_queue);
+ ch->notify_queue = NULL;
+ return ret;
+ }
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ ch->flags |= XPC_C_SETUP;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ return xpcSuccess;
+}
+
+/*
+ * Process a connect message from a remote partition.
+ *
+ * Note: xpc_process_connect() is expecting to be called with the
+ * spin_lock_irqsave held and will leave it locked upon return.
+ */
+static void
+xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ enum xpc_retval ret;
+
+ DBUG_ON(!spin_is_locked(&ch->lock));
+
+ if (!(ch->flags & XPC_C_OPENREQUEST) ||
+ !(ch->flags & XPC_C_ROPENREQUEST)) {
+ /* nothing more to do for now */
+ return;
+ }
+ DBUG_ON(!(ch->flags & XPC_C_CONNECTING));
+
+ if (!(ch->flags & XPC_C_SETUP)) {
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+ ret = xpc_allocate_msgqueues(ch);
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+
+ if (ret != xpcSuccess)
+ XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
+
+ if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING))
+ return;
+
+ DBUG_ON(!(ch->flags & XPC_C_SETUP));
+ DBUG_ON(ch->local_msgqueue == NULL);
+ DBUG_ON(ch->remote_msgqueue == NULL);
+ }
+
+ if (!(ch->flags & XPC_C_OPENREPLY)) {
+ ch->flags |= XPC_C_OPENREPLY;
+ xpc_IPI_send_openreply(ch, irq_flags);
+ }
+
+ if (!(ch->flags & XPC_C_ROPENREPLY))
+ return;
+
+ DBUG_ON(ch->remote_msgqueue_pa == 0);
+
+ ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */
+
+ dev_info(xpc_chan, "channel %d to partition %d connected\n",
+ ch->number, ch->partid);
+
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+ xpc_create_kthreads(ch, 1, 0);
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+}
+
+/*
+ * Notify those who wanted to be notified upon delivery of their message.
+ */
+static void
+xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
+{
+ struct xpc_notify *notify;
+ u8 notify_type;
+ s64 get = ch->w_remote_GP.get - 1;
+
+ while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
+
+ notify = &ch->notify_queue[get % ch->local_nentries];
+
+ /*
+ * See if the notify entry indicates it was associated with
+ * a message who's sender wants to be notified. It is possible
+ * that it is, but someone else is doing or has done the
+ * notification.
+ */
+ notify_type = notify->type;
+ if (notify_type == 0 ||
+ cmpxchg(&notify->type, notify_type, 0) != notify_type) {
+ continue;
+ }
+
+ DBUG_ON(notify_type != XPC_N_CALL);
+
+ atomic_dec(&ch->n_to_notify);
+
+ if (notify->func != NULL) {
+ dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
+ "msg_number=%ld, partid=%d, channel=%d\n",
+ (void *)notify, get, ch->partid, ch->number);
+
+ notify->func(reason, ch->partid, ch->number,
+ notify->key);
+
+ dev_dbg(xpc_chan, "notify->func() returned, "
+ "notify=0x%p, msg_number=%ld, partid=%d, "
+ "channel=%d\n", (void *)notify, get,
+ ch->partid, ch->number);
+ }
+ }
+}
+
+/*
+ * Free up message queues and other stuff that were allocated for the specified
+ * channel.
+ *
+ * Note: ch->reason and ch->reason_line are left set for debugging purposes,
+ * they're cleared when XPC_C_DISCONNECTED is cleared.
+ */
+static void
+xpc_free_msgqueues(struct xpc_channel *ch)
+{
+ DBUG_ON(!spin_is_locked(&ch->lock));
+ DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
+
+ ch->remote_msgqueue_pa = 0;
+ ch->func = NULL;
+ ch->key = NULL;
+ ch->msg_size = 0;
+ ch->local_nentries = 0;
+ ch->remote_nentries = 0;
+ ch->kthreads_assigned_limit = 0;
+ ch->kthreads_idle_limit = 0;
+
+ ch->local_GP->get = 0;
+ ch->local_GP->put = 0;
+ ch->remote_GP.get = 0;
+ ch->remote_GP.put = 0;
+ ch->w_local_GP.get = 0;
+ ch->w_local_GP.put = 0;
+ ch->w_remote_GP.get = 0;
+ ch->w_remote_GP.put = 0;
+ ch->next_msg_to_pull = 0;
+
+ if (ch->flags & XPC_C_SETUP) {
+ ch->flags &= ~XPC_C_SETUP;
+
+ dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
+ ch->flags, ch->partid, ch->number);
+
+ kfree(ch->local_msgqueue_base);
+ ch->local_msgqueue = NULL;
+ kfree(ch->remote_msgqueue_base);
+ ch->remote_msgqueue = NULL;
+ kfree(ch->notify_queue);
+ ch->notify_queue = NULL;
+ }
+}
+
+/*
+ * spin_lock_irqsave() is expected to be held on entry.
+ */
+static void
+xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
+
+ DBUG_ON(!spin_is_locked(&ch->lock));
+
+ if (!(ch->flags & XPC_C_DISCONNECTING))
+ return;
+
+ DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+
+ /* make sure all activity has settled down first */
+
+ if (atomic_read(&ch->kthreads_assigned) > 0 ||
+ atomic_read(&ch->references) > 0) {
+ return;
+ }
+ DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+ !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE));
+
+ if (part->act_state == XPC_P_DEACTIVATING) {
+ /* can't proceed until the other side disengages from us */
+ if (xpc_partition_engaged(1UL << ch->partid))
+ return;
+
+ } else {
+
+ /* as long as the other side is up do the full protocol */
+
+ if (!(ch->flags & XPC_C_RCLOSEREQUEST))
+ return;
+
+ if (!(ch->flags & XPC_C_CLOSEREPLY)) {
+ ch->flags |= XPC_C_CLOSEREPLY;
+ xpc_IPI_send_closereply(ch, irq_flags);
+ }
+
+ if (!(ch->flags & XPC_C_RCLOSEREPLY))
+ return;
+ }
+
+ /* wake those waiting for notify completion */
+ if (atomic_read(&ch->n_to_notify) > 0) {
+ /* >>> we do callout while holding ch->lock */
+ xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
+ }
+
+ /* both sides are disconnected now */
+
+ if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+ xpc_disconnect_callout(ch, xpcDisconnected);
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+ }
+
+ /* it's now safe to free the channel's message queues */
+ xpc_free_msgqueues(ch);
+
+ /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
+ ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
+
+ atomic_dec(&part->nchannels_active);
+
+ if (channel_was_connected) {
+ dev_info(xpc_chan, "channel %d to partition %d disconnected, "
+ "reason=%d\n", ch->number, ch->partid, ch->reason);
+ }
+
+ if (ch->flags & XPC_C_WDISCONNECT) {
+ /* we won't lose the CPU since we're holding ch->lock */
+ complete(&ch->wdisconnect_wait);
+ } else if (ch->delayed_IPI_flags) {
+ if (part->act_state != XPC_P_DEACTIVATING) {
+ /* time to take action on any delayed IPI flags */
+ spin_lock(&part->IPI_lock);
+ XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number,
+ ch->delayed_IPI_flags);
+ spin_unlock(&part->IPI_lock);
+ }
+ ch->delayed_IPI_flags = 0;
+ }
+}
+
+/*
+ * Process a change in the channel's remote connection state.
+ */
+static void
+xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
+ u8 IPI_flags)
+{
+ unsigned long irq_flags;
+ struct xpc_openclose_args *args =
+ &part->remote_openclose_args[ch_number];
+ struct xpc_channel *ch = &part->channels[ch_number];
+ enum xpc_retval reason;
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+
+again:
+
+ if ((ch->flags & XPC_C_DISCONNECTED) &&
+ (ch->flags & XPC_C_WDISCONNECT)) {
+ /*
+ * Delay processing IPI flags until thread waiting disconnect
+ * has had a chance to see that the channel is disconnected.
+ */
+ ch->delayed_IPI_flags |= IPI_flags;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
+ if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
+
+ dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received "
+ "from partid=%d, channel=%d\n", args->reason,
+ ch->partid, ch->number);
+
+ /*
+ * If RCLOSEREQUEST is set, we're probably waiting for
+ * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
+ * with this RCLOSEREQUEST in the IPI_flags.
+ */
+
+ if (ch->flags & XPC_C_RCLOSEREQUEST) {
+ DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+ DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+ DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
+ DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
+
+ DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY));
+ IPI_flags &= ~XPC_IPI_CLOSEREPLY;
+ ch->flags |= XPC_C_RCLOSEREPLY;
+
+ /* both sides have finished disconnecting */
+ xpc_process_disconnect(ch, &irq_flags);
+ DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+ goto again;
+ }
+
+ if (ch->flags & XPC_C_DISCONNECTED) {
+ if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
+ if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo,
+ ch_number) &
+ XPC_IPI_OPENREQUEST)) {
+
+ DBUG_ON(ch->delayed_IPI_flags != 0);
+ spin_lock(&part->IPI_lock);
+ XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+ ch_number,
+ XPC_IPI_CLOSEREQUEST);
+ spin_unlock(&part->IPI_lock);
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
+ XPC_SET_REASON(ch, 0, 0);
+ ch->flags &= ~XPC_C_DISCONNECTED;
+
+ atomic_inc(&part->nchannels_active);
+ ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
+ }
+
+ IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY);
+
+ /*
+ * The meaningful CLOSEREQUEST connection state fields are:
+ * reason = reason connection is to be closed
+ */
+
+ ch->flags |= XPC_C_RCLOSEREQUEST;
+
+ if (!(ch->flags & XPC_C_DISCONNECTING)) {
+ reason = args->reason;
+ if (reason <= xpcSuccess || reason > xpcUnknownReason)
+ reason = xpcUnknownReason;
+ else if (reason == xpcUnregistering)
+ reason = xpcOtherUnregistering;
+
+ XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
+
+ DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
+ xpc_process_disconnect(ch, &irq_flags);
+ }
+
+ if (IPI_flags & XPC_IPI_CLOSEREPLY) {
+
+ dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d,"
+ " channel=%d\n", ch->partid, ch->number);
+
+ if (ch->flags & XPC_C_DISCONNECTED) {
+ DBUG_ON(part->act_state != XPC_P_DEACTIVATING);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
+ DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+
+ if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
+ if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number)
+ & XPC_IPI_CLOSEREQUEST)) {
+
+ DBUG_ON(ch->delayed_IPI_flags != 0);
+ spin_lock(&part->IPI_lock);
+ XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+ ch_number,
+ XPC_IPI_CLOSEREPLY);
+ spin_unlock(&part->IPI_lock);
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
+ ch->flags |= XPC_C_RCLOSEREPLY;
+
+ if (ch->flags & XPC_C_CLOSEREPLY) {
+ /* both sides have finished disconnecting */
+ xpc_process_disconnect(ch, &irq_flags);
+ }
+ }
+
+ if (IPI_flags & XPC_IPI_OPENREQUEST) {
+
+ dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, "
+ "local_nentries=%d) received from partid=%d, "
+ "channel=%d\n", args->msg_size, args->local_nentries,
+ ch->partid, ch->number);
+
+ if (part->act_state == XPC_P_DEACTIVATING ||
+ (ch->flags & XPC_C_ROPENREQUEST)) {
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
+ if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
+ ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+ DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
+ XPC_C_OPENREQUEST)));
+ DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
+ XPC_C_OPENREPLY | XPC_C_CONNECTED));
+
+ /*
+ * The meaningful OPENREQUEST connection state fields are:
+ * msg_size = size of channel's messages in bytes
+ * local_nentries = remote partition's local_nentries
+ */
+ if (args->msg_size == 0 || args->local_nentries == 0) {
+ /* assume OPENREQUEST was delayed by mistake */
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
+ ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
+ ch->remote_nentries = args->local_nentries;
+
+ if (ch->flags & XPC_C_OPENREQUEST) {
+ if (args->msg_size != ch->msg_size) {
+ XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
+ &irq_flags);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+ } else {
+ ch->msg_size = args->msg_size;
+
+ XPC_SET_REASON(ch, 0, 0);
+ ch->flags &= ~XPC_C_DISCONNECTED;
+
+ atomic_inc(&part->nchannels_active);
+ }
+
+ xpc_process_connect(ch, &irq_flags);
+ }
+
+ if (IPI_flags & XPC_IPI_OPENREPLY) {
+
+ dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, "
+ "local_nentries=%d, remote_nentries=%d) received from "
+ "partid=%d, channel=%d\n", args->local_msgqueue_pa,
+ args->local_nentries, args->remote_nentries,
+ ch->partid, ch->number);
+
+ if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+ if (!(ch->flags & XPC_C_OPENREQUEST)) {
+ XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError,
+ &irq_flags);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
+ DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
+ DBUG_ON(ch->flags & XPC_C_CONNECTED);
+
+ /*
+ * The meaningful OPENREPLY connection state fields are:
+ * local_msgqueue_pa = physical address of remote
+ * partition's local_msgqueue
+ * local_nentries = remote partition's local_nentries
+ * remote_nentries = remote partition's remote_nentries
+ */
+ DBUG_ON(args->local_msgqueue_pa == 0);
+ DBUG_ON(args->local_nentries == 0);
+ DBUG_ON(args->remote_nentries == 0);
+
+ ch->flags |= XPC_C_ROPENREPLY;
+ ch->remote_msgqueue_pa = args->local_msgqueue_pa;
+
+ if (args->local_nentries < ch->remote_nentries) {
+ dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
+ "remote_nentries=%d, old remote_nentries=%d, "
+ "partid=%d, channel=%d\n",
+ args->local_nentries, ch->remote_nentries,
+ ch->partid, ch->number);
+
+ ch->remote_nentries = args->local_nentries;
+ }
+ if (args->remote_nentries < ch->local_nentries) {
+ dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
+ "local_nentries=%d, old local_nentries=%d, "
+ "partid=%d, channel=%d\n",
+ args->remote_nentries, ch->local_nentries,
+ ch->partid, ch->number);
+
+ ch->local_nentries = args->remote_nentries;
+ }
+
+ xpc_process_connect(ch, &irq_flags);
+ }
+
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+}
+
+/*
+ * Attempt to establish a channel connection to a remote partition.
+ */
+static enum xpc_retval
+xpc_connect_channel(struct xpc_channel *ch)
+{
+ unsigned long irq_flags;
+ struct xpc_registration *registration = &xpc_registrations[ch->number];
+
+ if (mutex_trylock(&registration->mutex) == 0)
+ return xpcRetry;
+
+ if (!XPC_CHANNEL_REGISTERED(ch->number)) {
+ mutex_unlock(&registration->mutex);
+ return xpcUnregistered;
+ }
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+
+ DBUG_ON(ch->flags & XPC_C_CONNECTED);
+ DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ mutex_unlock(&registration->mutex);
+ return ch->reason;
+ }
+
+ /* add info from the channel connect registration to the channel */
+
+ ch->kthreads_assigned_limit = registration->assigned_limit;
+ ch->kthreads_idle_limit = registration->idle_limit;
+ DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
+ DBUG_ON(atomic_read(&ch->kthreads_idle) != 0);
+ DBUG_ON(atomic_read(&ch->kthreads_active) != 0);
+
+ ch->func = registration->func;
+ DBUG_ON(registration->func == NULL);
+ ch->key = registration->key;
+
+ ch->local_nentries = registration->nentries;
+
+ if (ch->flags & XPC_C_ROPENREQUEST) {
+ if (registration->msg_size != ch->msg_size) {
+ /* the local and remote sides aren't the same */
+
+ /*
+ * Because XPC_DISCONNECT_CHANNEL() can block we're
+ * forced to up the registration sema before we unlock
+ * the channel lock. But that's okay here because we're
+ * done with the part that required the registration
+ * sema. XPC_DISCONNECT_CHANNEL() requires that the
+ * channel lock be locked and will unlock and relock
+ * the channel lock as needed.
+ */
+ mutex_unlock(&registration->mutex);
+ XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
+ &irq_flags);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpcUnequalMsgSizes;
+ }
+ } else {
+ ch->msg_size = registration->msg_size;
+
+ XPC_SET_REASON(ch, 0, 0);
+ ch->flags &= ~XPC_C_DISCONNECTED;
+
+ atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
+ }
+
+ mutex_unlock(&registration->mutex);
+
+ /* initiate the connection */
+
+ ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
+ xpc_IPI_send_openrequest(ch, &irq_flags);
+
+ xpc_process_connect(ch, &irq_flags);
+
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ return xpcSuccess;
+}
+
+/*
+ * Clear some of the msg flags in the local message queue.
+ */
+static inline void
+xpc_clear_local_msgqueue_flags(struct xpc_channel *ch)
+{
+ struct xpc_msg *msg;
+ s64 get;
+
+ get = ch->w_remote_GP.get;
+ do {
+ msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
+ (get % ch->local_nentries) *
+ ch->msg_size);
+ msg->flags = 0;
+ } while (++get < ch->remote_GP.get);
+}
+
+/*
+ * Clear some of the msg flags in the remote message queue.
+ */
+static inline void
+xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch)
+{
+ struct xpc_msg *msg;
+ s64 put;
+
+ put = ch->w_remote_GP.put;
+ do {
+ msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
+ (put % ch->remote_nentries) *
+ ch->msg_size);
+ msg->flags = 0;
+ } while (++put < ch->remote_GP.put);
+}
+
+static void
+xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
+{
+ struct xpc_channel *ch = &part->channels[ch_number];
+ int nmsgs_sent;
+
+ ch->remote_GP = part->remote_GPs[ch_number];
+
+ /* See what, if anything, has changed for each connected channel */
+
+ xpc_msgqueue_ref(ch);
+
+ if (ch->w_remote_GP.get == ch->remote_GP.get &&
+ ch->w_remote_GP.put == ch->remote_GP.put) {
+ /* nothing changed since GPs were last pulled */
+ xpc_msgqueue_deref(ch);
+ return;
+ }
+
+ if (!(ch->flags & XPC_C_CONNECTED)) {
+ xpc_msgqueue_deref(ch);
+ return;
+ }
+
+ /*
+ * First check to see if messages recently sent by us have been
+ * received by the other side. (The remote GET value will have
+ * changed since we last looked at it.)
+ */
+
+ if (ch->w_remote_GP.get != ch->remote_GP.get) {
+
+ /*
+ * We need to notify any senders that want to be notified
+ * that their sent messages have been received by their
+ * intended recipients. We need to do this before updating
+ * w_remote_GP.get so that we don't allocate the same message
+ * queue entries prematurely (see xpc_allocate_msg()).
+ */
+ if (atomic_read(&ch->n_to_notify) > 0) {
+ /*
+ * Notify senders that messages sent have been
+ * received and delivered by the other side.
+ */
+ xpc_notify_senders(ch, xpcMsgDelivered,
+ ch->remote_GP.get);
+ }
+
+ /*
+ * Clear msg->flags in previously sent messages, so that
+ * they're ready for xpc_allocate_msg().
+ */
+ xpc_clear_local_msgqueue_flags(ch);
+
+ ch->w_remote_GP.get = ch->remote_GP.get;
+
+ dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
+ "channel=%d\n", ch->w_remote_GP.get, ch->partid,
+ ch->number);
+
+ /*
+ * If anyone was waiting for message queue entries to become
+ * available, wake them up.
+ */
+ if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+ wake_up(&ch->msg_allocate_wq);
+ }
+
+ /*
+ * Now check for newly sent messages by the other side. (The remote
+ * PUT value will have changed since we last looked at it.)
+ */
+
+ if (ch->w_remote_GP.put != ch->remote_GP.put) {
+ /*
+ * Clear msg->flags in previously received messages, so that
+ * they're ready for xpc_get_deliverable_msg().
+ */
+ xpc_clear_remote_msgqueue_flags(ch);
+
+ ch->w_remote_GP.put = ch->remote_GP.put;
+
+ dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
+ "channel=%d\n", ch->w_remote_GP.put, ch->partid,
+ ch->number);
+
+ nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get;
+ if (nmsgs_sent > 0) {
+ dev_dbg(xpc_chan, "msgs waiting to be copied and "
+ "delivered=%d, partid=%d, channel=%d\n",
+ nmsgs_sent, ch->partid, ch->number);
+
+ if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
+ xpc_activate_kthreads(ch, nmsgs_sent);
+ }
+ }
+
+ xpc_msgqueue_deref(ch);
+}
+
+void
+xpc_process_channel_activity(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ u64 IPI_amo, IPI_flags;
+ struct xpc_channel *ch;
+ int ch_number;
+ u32 ch_flags;
+
+ IPI_amo = xpc_get_IPI_flags(part);
+
+ /*
+ * Initiate channel connections for registered channels.
+ *
+ * For each connected channel that has pending messages activate idle
+ * kthreads and/or create new kthreads as needed.
+ */
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch = &part->channels[ch_number];
+
+ /*
+ * Process any open or close related IPI flags, and then deal
+ * with connecting or disconnecting the channel as required.
+ */
+
+ IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number);
+
+ if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags))
+ xpc_process_openclose_IPI(part, ch_number, IPI_flags);
+
+ ch_flags = ch->flags; /* need an atomic snapshot of flags */
+
+ if (ch_flags & XPC_C_DISCONNECTING) {
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ xpc_process_disconnect(ch, &irq_flags);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ continue;
+ }
+
+ if (part->act_state == XPC_P_DEACTIVATING)
+ continue;
+
+ if (!(ch_flags & XPC_C_CONNECTED)) {
+ if (!(ch_flags & XPC_C_OPENREQUEST)) {
+ DBUG_ON(ch_flags & XPC_C_SETUP);
+ (void)xpc_connect_channel(ch);
+ } else {
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ xpc_process_connect(ch, &irq_flags);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ }
+ continue;
+ }
+
+ /*
+ * Process any message related IPI flags, this may involve the
+ * activation of kthreads to deliver any pending messages sent
+ * from the other partition.
+ */
+
+ if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags))
+ xpc_process_msg_IPI(part, ch_number);
+ }
+}
+
+/*
+ * XPC's heartbeat code calls this function to inform XPC that a partition is
+ * going down. XPC responds by tearing down the XPartition Communication
+ * infrastructure used for the just downed partition.
+ *
+ * XPC's heartbeat code will never call this function and xpc_partition_up()
+ * at the same time. Nor will it ever make multiple calls to either function
+ * at the same time.
+ */
+void
+xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason)
+{
+ unsigned long irq_flags;
+ int ch_number;
+ struct xpc_channel *ch;
+
+ dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n",
+ XPC_PARTID(part), reason);
+
+ if (!xpc_part_ref(part)) {
+ /* infrastructure for this partition isn't currently set up */
+ return;
+ }
+
+ /* disconnect channels associated with the partition going down */
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch = &part->channels[ch_number];
+
+ xpc_msgqueue_ref(ch);
+ spin_lock_irqsave(&ch->lock, irq_flags);
+
+ XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
+
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ xpc_msgqueue_deref(ch);
+ }
+
+ xpc_wakeup_channel_mgr(part);
+
+ xpc_part_deref(part);
+}
+
+/*
+ * Teardown the infrastructure necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+void
+xpc_teardown_infrastructure(struct xpc_partition *part)
+{
+ partid_t partid = XPC_PARTID(part);
+
+ /*
+ * We start off by making this partition inaccessible to local
+ * processes by marking it as no longer setup. Then we make it
+ * inaccessible to remote processes by clearing the XPC per partition
+ * specific variable's magic # (which indicates that these variables
+ * are no longer valid) and by ignoring all XPC notify IPIs sent to
+ * this partition.
+ */
+
+ DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
+ DBUG_ON(atomic_read(&part->nchannels_active) != 0);
+ DBUG_ON(part->setup_state != XPC_P_SETUP);
+ part->setup_state = XPC_P_WTEARDOWN;
+
+ xpc_vars_part[partid].magic = 0;
+
+ free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
+
+ /*
+ * Before proceeding with the teardown we have to wait until all
+ * existing references cease.
+ */
+ wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
+
+ /* now we can begin tearing down the infrastructure */
+
+ part->setup_state = XPC_P_TORNDOWN;
+
+ /* in case we've still got outstanding timers registered... */
+ del_timer_sync(&part->dropped_IPI_timer);
+
+ kfree(part->remote_openclose_args_base);
+ part->remote_openclose_args = NULL;
+ kfree(part->local_openclose_args_base);
+ part->local_openclose_args = NULL;
+ kfree(part->remote_GPs_base);
+ part->remote_GPs = NULL;
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
+ part->local_IPI_amo_va = NULL;
+}
+
+/*
+ * Called by XP at the time of channel connection registration to cause
+ * XPC to establish connections to all currently active partitions.
+ */
+void
+xpc_initiate_connect(int ch_number)
+{
+ partid_t partid;
+ struct xpc_partition *part;
+ struct xpc_channel *ch;
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+
+ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (xpc_part_ref(part)) {
+ ch = &part->channels[ch_number];
+
+ /*
+ * Initiate the establishment of a connection on the
+ * newly registered channel to the remote partition.
+ */
+ xpc_wakeup_channel_mgr(part);
+ xpc_part_deref(part);
+ }
+ }
+}
+
+void
+xpc_connected_callout(struct xpc_channel *ch)
+{
+ /* let the registerer know that a connection has been established */
+
+ if (ch->func != NULL) {
+ dev_dbg(xpc_chan, "ch->func() called, reason=xpcConnected, "
+ "partid=%d, channel=%d\n", ch->partid, ch->number);
+
+ ch->func(xpcConnected, ch->partid, ch->number,
+ (void *)(u64)ch->local_nentries, ch->key);
+
+ dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, "
+ "partid=%d, channel=%d\n", ch->partid, ch->number);
+ }
+}
+
+/*
+ * Called by XP at the time of channel connection unregistration to cause
+ * XPC to teardown all current connections for the specified channel.
+ *
+ * Before returning xpc_initiate_disconnect() will wait until all connections
+ * on the specified channel have been closed/torndown. So the caller can be
+ * assured that they will not be receiving any more callouts from XPC to the
+ * function they registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ * ch_number - channel # to unregister.
+ */
+void
+xpc_initiate_disconnect(int ch_number)
+{
+ unsigned long irq_flags;
+ partid_t partid;
+ struct xpc_partition *part;
+ struct xpc_channel *ch;
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+
+ /* initiate the channel disconnect for every active partition */
+ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (xpc_part_ref(part)) {
+ ch = &part->channels[ch_number];
+ xpc_msgqueue_ref(ch);
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+
+ if (!(ch->flags & XPC_C_DISCONNECTED)) {
+ ch->flags |= XPC_C_WDISCONNECT;
+
+ XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
+ &irq_flags);
+ }
+
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ xpc_msgqueue_deref(ch);
+ xpc_part_deref(part);
+ }
+ }
+
+ xpc_disconnect_wait(ch_number);
+}
+
+/*
+ * To disconnect a channel, and reflect it back to all who may be waiting.
+ *
+ * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
+ * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
+ * xpc_disconnect_wait().
+ *
+ * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
+ */
+void
+xpc_disconnect_channel(const int line, struct xpc_channel *ch,
+ enum xpc_retval reason, unsigned long *irq_flags)
+{
+ u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
+
+ DBUG_ON(!spin_is_locked(&ch->lock));
+
+ if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+ return;
+
+ DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED)));
+
+ dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n",
+ reason, line, ch->partid, ch->number);
+
+ XPC_SET_REASON(ch, reason, line);
+
+ ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
+ /* some of these may not have been set */
+ ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
+ XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
+ XPC_C_CONNECTING | XPC_C_CONNECTED);
+
+ xpc_IPI_send_closerequest(ch, irq_flags);
+
+ if (channel_was_connected)
+ ch->flags |= XPC_C_WASCONNECTED;
+
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+ /* wake all idle kthreads so they can exit */
+ if (atomic_read(&ch->kthreads_idle) > 0) {
+ wake_up_all(&ch->idle_wq);
+
+ } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+ !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+ /* start a kthread that will do the xpcDisconnecting callout */
+ xpc_create_kthreads(ch, 1, 1);
+ }
+
+ /* wake those waiting to allocate an entry from the local msg queue */
+ if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+ wake_up(&ch->msg_allocate_wq);
+
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+}
+
+void
+xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason)
+{
+ /*
+ * Let the channel's registerer know that the channel is being
+ * disconnected. We don't want to do this if the registerer was never
+ * informed of a connection being made.
+ */
+
+ if (ch->func != NULL) {
+ dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
+ "channel=%d\n", reason, ch->partid, ch->number);
+
+ ch->func(reason, ch->partid, ch->number, NULL, ch->key);
+
+ dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
+ "channel=%d\n", reason, ch->partid, ch->number);
+ }
+}
+
+/*
+ * Wait for a message entry to become available for the specified channel,
+ * but don't wait any longer than 1 jiffy.
+ */
+static enum xpc_retval
+xpc_allocate_msg_wait(struct xpc_channel *ch)
+{
+ enum xpc_retval ret;
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ DBUG_ON(ch->reason == xpcInterrupted);
+ return ch->reason;
+ }
+
+ atomic_inc(&ch->n_on_msg_allocate_wq);
+ ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1);
+ atomic_dec(&ch->n_on_msg_allocate_wq);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ ret = ch->reason;
+ DBUG_ON(ch->reason == xpcInterrupted);
+ } else if (ret == 0) {
+ ret = xpcTimeout;
+ } else {
+ ret = xpcInterrupted;
+ }
+
+ return ret;
+}
+
+/*
+ * Allocate an entry for a message from the message queue associated with the
+ * specified channel.
+ */
+static enum xpc_retval
+xpc_allocate_msg(struct xpc_channel *ch, u32 flags,
+ struct xpc_msg **address_of_msg)
+{
+ struct xpc_msg *msg;
+ enum xpc_retval ret;
+ s64 put;
+
+ /* this reference will be dropped in xpc_send_msg() */
+ xpc_msgqueue_ref(ch);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ xpc_msgqueue_deref(ch);
+ return ch->reason;
+ }
+ if (!(ch->flags & XPC_C_CONNECTED)) {
+ xpc_msgqueue_deref(ch);
+ return xpcNotConnected;
+ }
+
+ /*
+ * Get the next available message entry from the local message queue.
+ * If none are available, we'll make sure that we grab the latest
+ * GP values.
+ */
+ ret = xpcTimeout;
+
+ while (1) {
+
+ put = ch->w_local_GP.put;
+ rmb(); /* guarantee that .put loads before .get */
+ if (put - ch->w_remote_GP.get < ch->local_nentries) {
+
+ /* There are available message entries. We need to try
+ * to secure one for ourselves. We'll do this by trying
+ * to increment w_local_GP.put as long as someone else
+ * doesn't beat us to it. If they do, we'll have to
+ * try again.
+ */
+ if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == put) {
+ /* we got the entry referenced by put */
+ break;
+ }
+ continue; /* try again */
+ }
+
+ /*
+ * There aren't any available msg entries at this time.
+ *
+ * In waiting for a message entry to become available,
+ * we set a timeout in case the other side is not
+ * sending completion IPIs. This lets us fake an IPI
+ * that will cause the IPI handler to fetch the latest
+ * GP values as if an IPI was sent by the other side.
+ */
+ if (ret == xpcTimeout)
+ xpc_IPI_send_local_msgrequest(ch);
+
+ if (flags & XPC_NOWAIT) {
+ xpc_msgqueue_deref(ch);
+ return xpcNoWait;
+ }
+
+ ret = xpc_allocate_msg_wait(ch);
+ if (ret != xpcInterrupted && ret != xpcTimeout) {
+ xpc_msgqueue_deref(ch);
+ return ret;
+ }
+ }
+
+ /* get the message's address and initialize it */
+ msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
+ (put % ch->local_nentries) * ch->msg_size);
+
+ DBUG_ON(msg->flags != 0);
+ msg->number = put;
+
+ dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
+ "msg_number=%ld, partid=%d, channel=%d\n", put + 1,
+ (void *)msg, msg->number, ch->partid, ch->number);
+
+ *address_of_msg = msg;
+
+ return xpcSuccess;
+}
+
+/*
+ * Allocate an entry for a message from the message queue associated with the
+ * specified channel. NOTE that this routine can sleep waiting for a message
+ * entry to become available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * Arguments:
+ *
+ * partid - ID of partition to which the channel is connected.
+ * ch_number - channel #.
+ * flags - see xpc.h for valid flags.
+ * payload - address of the allocated payload area pointer (filled in on
+ * return) in which the user-defined message is constructed.
+ */
+enum xpc_retval
+xpc_initiate_allocate(partid_t partid, int ch_number, u32 flags, void **payload)
+{
+ struct xpc_partition *part = &xpc_partitions[partid];
+ enum xpc_retval ret = xpcUnknownReason;
+ struct xpc_msg *msg = NULL;
+
+ DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+ DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+
+ *payload = NULL;
+
+ if (xpc_part_ref(part)) {
+ ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg);
+ xpc_part_deref(part);
+
+ if (msg != NULL)
+ *payload = &msg->payload;
+ }
+
+ return ret;
+}
+
+/*
+ * Now we actually send the messages that are ready to be sent by advancing
+ * the local message queue's Put value and then send an IPI to the recipient
+ * partition.
+ */
+static void
+xpc_send_msgs(struct xpc_channel *ch, s64 initial_put)
+{
+ struct xpc_msg *msg;
+ s64 put = initial_put + 1;
+ int send_IPI = 0;
+
+ while (1) {
+
+ while (1) {
+ if (put == ch->w_local_GP.put)
+ break;
+
+ msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
+ (put % ch->local_nentries) *
+ ch->msg_size);
+
+ if (!(msg->flags & XPC_M_READY))
+ break;
+
+ put++;
+ }
+
+ if (put == initial_put) {
+ /* nothing's changed */
+ break;
+ }
+
+ if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) !=
+ initial_put) {
+ /* someone else beat us to it */
+ DBUG_ON(ch->local_GP->put < initial_put);
+ break;
+ }
+
+ /* we just set the new value of local_GP->put */
+
+ dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
+ "channel=%d\n", put, ch->partid, ch->number);
+
+ send_IPI = 1;
+
+ /*
+ * We need to ensure that the message referenced by
+ * local_GP->put is not XPC_M_READY or that local_GP->put
+ * equals w_local_GP.put, so we'll go have a look.
+ */
+ initial_put = put;
+ }
+
+ if (send_IPI)
+ xpc_IPI_send_msgrequest(ch);
+}
+
+/*
+ * Common code that does the actual sending of the message by advancing the
+ * local message queue's Put value and sends an IPI to the partition the
+ * message is being sent to.
+ */
+static enum xpc_retval
+xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
+ xpc_notify_func func, void *key)
+{
+ enum xpc_retval ret = xpcSuccess;
+ struct xpc_notify *notify = notify;
+ s64 put, msg_number = msg->number;
+
+ DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
+ DBUG_ON((((u64)msg - (u64)ch->local_msgqueue) / ch->msg_size) !=
+ msg_number % ch->local_nentries);
+ DBUG_ON(msg->flags & XPC_M_READY);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ /* drop the reference grabbed in xpc_allocate_msg() */
+ xpc_msgqueue_deref(ch);
+ return ch->reason;
+ }
+
+ if (notify_type != 0) {
+ /*
+ * Tell the remote side to send an ACK interrupt when the
+ * message has been delivered.
+ */
+ msg->flags |= XPC_M_INTERRUPT;
+
+ atomic_inc(&ch->n_to_notify);
+
+ notify = &ch->notify_queue[msg_number % ch->local_nentries];
+ notify->func = func;
+ notify->key = key;
+ notify->type = notify_type;
+
+ /* >>> is a mb() needed here? */
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ /*
+ * An error occurred between our last error check and
+ * this one. We will try to clear the type field from
+ * the notify entry. If we succeed then
+ * xpc_disconnect_channel() didn't already process
+ * the notify entry.
+ */
+ if (cmpxchg(&notify->type, notify_type, 0) ==
+ notify_type) {
+ atomic_dec(&ch->n_to_notify);
+ ret = ch->reason;
+ }
+
+ /* drop the reference grabbed in xpc_allocate_msg() */
+ xpc_msgqueue_deref(ch);
+ return ret;
+ }
+ }
+
+ msg->flags |= XPC_M_READY;
+
+ /*
+ * The preceding store of msg->flags must occur before the following
+ * load of ch->local_GP->put.
+ */
+ mb();
+
+ /* see if the message is next in line to be sent, if so send it */
+
+ put = ch->local_GP->put;
+ if (put == msg_number)
+ xpc_send_msgs(ch, put);
+
+ /* drop the reference grabbed in xpc_allocate_msg() */
+ xpc_msgqueue_deref(ch);
+ return ret;
+}
+
+/*
+ * Send a message previously allocated using xpc_initiate_allocate() on the
+ * specified channel connected to the specified partition.
+ *
+ * This routine will not wait for the message to be received, nor will
+ * notification be given when it does happen. Once this routine has returned
+ * the message entry allocated via xpc_initiate_allocate() is no longer
+ * accessable to the caller.
+ *
+ * This routine, although called by users, does not call xpc_part_ref() to
+ * ensure that the partition infrastructure is in place. It relies on the
+ * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
+ *
+ * Arguments:
+ *
+ * partid - ID of partition to which the channel is connected.
+ * ch_number - channel # to send message on.
+ * payload - pointer to the payload area allocated via
+ * xpc_initiate_allocate().
+ */
+enum xpc_retval
+xpc_initiate_send(partid_t partid, int ch_number, void *payload)
+{
+ struct xpc_partition *part = &xpc_partitions[partid];
+ struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
+ enum xpc_retval ret;
+
+ dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg,
+ partid, ch_number);
+
+ DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+ DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+ DBUG_ON(msg == NULL);
+
+ ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL);
+
+ return ret;
+}
+
+/*
+ * Send a message previously allocated using xpc_initiate_allocate on the
+ * specified channel connected to the specified partition.
+ *
+ * This routine will not wait for the message to be sent. Once this routine
+ * has returned the message entry allocated via xpc_initiate_allocate() is no
+ * longer accessable to the caller.
+ *
+ * Once the remote end of the channel has received the message, the function
+ * passed as an argument to xpc_initiate_send_notify() will be called. This
+ * allows the sender to free up or re-use any buffers referenced by the
+ * message, but does NOT mean the message has been processed at the remote
+ * end by a receiver.
+ *
+ * If this routine returns an error, the caller's function will NOT be called.
+ *
+ * This routine, although called by users, does not call xpc_part_ref() to
+ * ensure that the partition infrastructure is in place. It relies on the
+ * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
+ *
+ * Arguments:
+ *
+ * partid - ID of partition to which the channel is connected.
+ * ch_number - channel # to send message on.
+ * payload - pointer to the payload area allocated via
+ * xpc_initiate_allocate().
+ * func - function to call with asynchronous notification of message
+ * receipt. THIS FUNCTION MUST BE NON-BLOCKING.
+ * key - user-defined key to be passed to the function when it's called.
+ */
+enum xpc_retval
+xpc_initiate_send_notify(partid_t partid, int ch_number, void *payload,
+ xpc_notify_func func, void *key)
+{
+ struct xpc_partition *part = &xpc_partitions[partid];
+ struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
+ enum xpc_retval ret;
+
+ dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg,
+ partid, ch_number);
+
+ DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+ DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+ DBUG_ON(msg == NULL);
+ DBUG_ON(func == NULL);
+
+ ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL,
+ func, key);
+ return ret;
+}
+
+static struct xpc_msg *
+xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ struct xpc_msg *remote_msg, *msg;
+ u32 msg_index, nmsgs;
+ u64 msg_offset;
+ enum xpc_retval ret;
+
+ if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
+ /* we were interrupted by a signal */
+ return NULL;
+ }
+
+ while (get >= ch->next_msg_to_pull) {
+
+ /* pull as many messages as are ready and able to be pulled */
+
+ msg_index = ch->next_msg_to_pull % ch->remote_nentries;
+
+ DBUG_ON(ch->next_msg_to_pull >= ch->w_remote_GP.put);
+ nmsgs = ch->w_remote_GP.put - ch->next_msg_to_pull;
+ if (msg_index + nmsgs > ch->remote_nentries) {
+ /* ignore the ones that wrap the msg queue for now */
+ nmsgs = ch->remote_nentries - msg_index;
+ }
+
+ msg_offset = msg_index * ch->msg_size;
+ msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
+ remote_msg = (struct xpc_msg *)(ch->remote_msgqueue_pa +
+ msg_offset);
+
+ ret = xpc_pull_remote_cachelines(part, msg, remote_msg,
+ nmsgs * ch->msg_size);
+ if (ret != xpcSuccess) {
+
+ dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
+ " msg %ld from partition %d, channel=%d, "
+ "ret=%d\n", nmsgs, ch->next_msg_to_pull,
+ ch->partid, ch->number, ret);
+
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ mutex_unlock(&ch->msg_to_pull_mutex);
+ return NULL;
+ }
+
+ ch->next_msg_to_pull += nmsgs;
+ }
+
+ mutex_unlock(&ch->msg_to_pull_mutex);
+
+ /* return the message we were looking for */
+ msg_offset = (get % ch->remote_nentries) * ch->msg_size;
+ msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
+
+ return msg;
+}
+
+/*
+ * Get a message to be delivered.
+ */
+static struct xpc_msg *
+xpc_get_deliverable_msg(struct xpc_channel *ch)
+{
+ struct xpc_msg *msg = NULL;
+ s64 get;
+
+ do {
+ if (ch->flags & XPC_C_DISCONNECTING)
+ break;
+
+ get = ch->w_local_GP.get;
+ rmb(); /* guarantee that .get loads before .put */
+ if (get == ch->w_remote_GP.put)
+ break;
+
+ /* There are messages waiting to be pulled and delivered.
+ * We need to try to secure one for ourselves. We'll do this
+ * by trying to increment w_local_GP.get and hope that no one
+ * else beats us to it. If they do, we'll we'll simply have
+ * to try again for the next one.
+ */
+
+ if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) {
+ /* we got the entry referenced by get */
+
+ dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
+ "partid=%d, channel=%d\n", get + 1,
+ ch->partid, ch->number);
+
+ /* pull the message from the remote partition */
+
+ msg = xpc_pull_remote_msg(ch, get);
+
+ DBUG_ON(msg != NULL && msg->number != get);
+ DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
+ DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
+
+ break;
+ }
+
+ } while (1);
+
+ return msg;
+}
+
+/*
+ * Deliver a message to its intended recipient.
+ */
+void
+xpc_deliver_msg(struct xpc_channel *ch)
+{
+ struct xpc_msg *msg;
+
+ msg = xpc_get_deliverable_msg(ch);
+ if (msg != NULL) {
+
+ /*
+ * This ref is taken to protect the payload itself from being
+ * freed before the user is finished with it, which the user
+ * indicates by calling xpc_initiate_received().
+ */
+ xpc_msgqueue_ref(ch);
+
+ atomic_inc(&ch->kthreads_active);
+
+ if (ch->func != NULL) {
+ dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, "
+ "msg_number=%ld, partid=%d, channel=%d\n",
+ (void *)msg, msg->number, ch->partid,
+ ch->number);
+
+ /* deliver the message to its intended recipient */
+ ch->func(xpcMsgReceived, ch->partid, ch->number,
+ &msg->payload, ch->key);
+
+ dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, "
+ "msg_number=%ld, partid=%d, channel=%d\n",
+ (void *)msg, msg->number, ch->partid,
+ ch->number);
+ }
+
+ atomic_dec(&ch->kthreads_active);
+ }
+}
+
+/*
+ * Now we actually acknowledge the messages that have been delivered and ack'd
+ * by advancing the cached remote message queue's Get value and if requested
+ * send an IPI to the message sender's partition.
+ */
+static void
+xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
+{
+ struct xpc_msg *msg;
+ s64 get = initial_get + 1;
+ int send_IPI = 0;
+
+ while (1) {
+
+ while (1) {
+ if (get == ch->w_local_GP.get)
+ break;
+
+ msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
+ (get % ch->remote_nentries) *
+ ch->msg_size);
+
+ if (!(msg->flags & XPC_M_DONE))
+ break;
+
+ msg_flags |= msg->flags;
+ get++;
+ }
+
+ if (get == initial_get) {
+ /* nothing's changed */
+ break;
+ }
+
+ if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) !=
+ initial_get) {
+ /* someone else beat us to it */
+ DBUG_ON(ch->local_GP->get <= initial_get);
+ break;
+ }
+
+ /* we just set the new value of local_GP->get */
+
+ dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
+ "channel=%d\n", get, ch->partid, ch->number);
+
+ send_IPI = (msg_flags & XPC_M_INTERRUPT);
+
+ /*
+ * We need to ensure that the message referenced by
+ * local_GP->get is not XPC_M_DONE or that local_GP->get
+ * equals w_local_GP.get, so we'll go have a look.
+ */
+ initial_get = get;
+ }
+
+ if (send_IPI)
+ xpc_IPI_send_msgrequest(ch);
+}
+
+/*
+ * Acknowledge receipt of a delivered message.
+ *
+ * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition
+ * that sent the message.
+ *
+ * This function, although called by users, does not call xpc_part_ref() to
+ * ensure that the partition infrastructure is in place. It relies on the
+ * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg().
+ *
+ * Arguments:
+ *
+ * partid - ID of partition to which the channel is connected.
+ * ch_number - channel # message received on.
+ * payload - pointer to the payload area allocated via
+ * xpc_initiate_allocate().
+ */
+void
+xpc_initiate_received(partid_t partid, int ch_number, void *payload)
+{
+ struct xpc_partition *part = &xpc_partitions[partid];
+ struct xpc_channel *ch;
+ struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
+ s64 get, msg_number = msg->number;
+
+ DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+ DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+
+ ch = &part->channels[ch_number];
+
+ dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
+ (void *)msg, msg_number, ch->partid, ch->number);
+
+ DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->msg_size) !=
+ msg_number % ch->remote_nentries);
+ DBUG_ON(msg->flags & XPC_M_DONE);
+
+ msg->flags |= XPC_M_DONE;
+
+ /*
+ * The preceding store of msg->flags must occur before the following
+ * load of ch->local_GP->get.
+ */
+ mb();
+
+ /*
+ * See if this message is next in line to be acknowledged as having
+ * been delivered.
+ */
+ get = ch->local_GP->get;
+ if (get == msg_number)
+ xpc_acknowledge_msgs(ch, get, msg->flags);
+
+ /* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg() */
+ xpc_msgqueue_deref(ch);
+}
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
new file mode 100644
index 00000000000..f673ba90eb0
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -0,0 +1,1323 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) support - standard version.
+ *
+ * XPC provides a message passing capability that crosses partition
+ * boundaries. This module is made up of two parts:
+ *
+ * partition This part detects the presence/absence of other
+ * partitions. It provides a heartbeat and monitors
+ * the heartbeats of other partitions.
+ *
+ * channel This part manages the channels and sends/receives
+ * messages across them to/from other partitions.
+ *
+ * There are a couple of additional functions residing in XP, which
+ * provide an interface to XPC for its users.
+ *
+ *
+ * Caveats:
+ *
+ * . We currently have no way to determine which nasid an IPI came
+ * from. Thus, xpc_IPI_send() does a remote AMO write followed by
+ * an IPI. The AMO indicates where data is to be pulled from, so
+ * after the IPI arrives, the remote partition checks the AMO word.
+ * The IPI can actually arrive before the AMO however, so other code
+ * must periodically check for this case. Also, remote AMO operations
+ * do not reliably time out. Thus we do a remote PIO read solely to
+ * know whether the remote partition is down and whether we should
+ * stop sending IPIs to it. This remote PIO read operation is set up
+ * in a special nofault region so SAL knows to ignore (and cleanup)
+ * any errors due to the remote AMO write, PIO read, and/or PIO
+ * write operations.
+ *
+ * If/when new hardware solves this IPI problem, we should abandon
+ * the current approach.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/completion.h>
+#include <linux/kdebug.h>
+#include <linux/kthread.h>
+#include <linux/uaccess.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/sn_sal.h>
+#include "xpc.h"
+
+/* define two XPC debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xpc_dbg_name = {
+ .name = "xpc"
+};
+
+struct device xpc_part_dbg_subname = {
+ .bus_id = {0}, /* set to "part" at xpc_init() time */
+ .driver = &xpc_dbg_name
+};
+
+struct device xpc_chan_dbg_subname = {
+ .bus_id = {0}, /* set to "chan" at xpc_init() time */
+ .driver = &xpc_dbg_name
+};
+
+struct device *xpc_part = &xpc_part_dbg_subname;
+struct device *xpc_chan = &xpc_chan_dbg_subname;
+
+static int xpc_kdebug_ignore;
+
+/* systune related variables for /proc/sys directories */
+
+static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
+static int xpc_hb_min_interval = 1;
+static int xpc_hb_max_interval = 10;
+
+static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
+static int xpc_hb_check_min_interval = 10;
+static int xpc_hb_check_max_interval = 120;
+
+int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
+static int xpc_disengage_request_min_timelimit; /* = 0 */
+static int xpc_disengage_request_max_timelimit = 120;
+
+static ctl_table xpc_sys_xpc_hb_dir[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "hb_interval",
+ .data = &xpc_hb_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xpc_hb_min_interval,
+ .extra2 = &xpc_hb_max_interval},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "hb_check_interval",
+ .data = &xpc_hb_check_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xpc_hb_check_min_interval,
+ .extra2 = &xpc_hb_check_max_interval},
+ {}
+};
+static ctl_table xpc_sys_xpc_dir[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "hb",
+ .mode = 0555,
+ .child = xpc_sys_xpc_hb_dir},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "disengage_request_timelimit",
+ .data = &xpc_disengage_request_timelimit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xpc_disengage_request_min_timelimit,
+ .extra2 = &xpc_disengage_request_max_timelimit},
+ {}
+};
+static ctl_table xpc_sys_dir[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "xpc",
+ .mode = 0555,
+ .child = xpc_sys_xpc_dir},
+ {}
+};
+static struct ctl_table_header *xpc_sysctl;
+
+/* non-zero if any remote partition disengage request was timed out */
+int xpc_disengage_request_timedout;
+
+/* #of IRQs received */
+static atomic_t xpc_act_IRQ_rcvd;
+
+/* IRQ handler notifies this wait queue on receipt of an IRQ */
+static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
+
+static unsigned long xpc_hb_check_timeout;
+
+/* notification that the xpc_hb_checker thread has exited */
+static DECLARE_COMPLETION(xpc_hb_checker_exited);
+
+/* notification that the xpc_discovery thread has exited */
+static DECLARE_COMPLETION(xpc_discovery_exited);
+
+static struct timer_list xpc_hb_timer;
+
+static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
+
+static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_reboot_notifier = {
+ .notifier_call = xpc_system_reboot,
+};
+
+static int xpc_system_die(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_die_notifier = {
+ .notifier_call = xpc_system_die,
+};
+
+/*
+ * Timer function to enforce the timelimit on the partition disengage request.
+ */
+static void
+xpc_timeout_partition_disengage_request(unsigned long data)
+{
+ struct xpc_partition *part = (struct xpc_partition *)data;
+
+ DBUG_ON(time_before(jiffies, part->disengage_request_timeout));
+
+ (void)xpc_partition_disengaged(part);
+
+ DBUG_ON(part->disengage_request_timeout != 0);
+ DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
+}
+
+/*
+ * Notify the heartbeat check thread that an IRQ has been received.
+ */
+static irqreturn_t
+xpc_act_IRQ_handler(int irq, void *dev_id)
+{
+ atomic_inc(&xpc_act_IRQ_rcvd);
+ wake_up_interruptible(&xpc_act_IRQ_wq);
+ return IRQ_HANDLED;
+}
+
+/*
+ * Timer to produce the heartbeat. The timer structures function is
+ * already set when this is initially called. A tunable is used to
+ * specify when the next timeout should occur.
+ */
+static void
+xpc_hb_beater(unsigned long dummy)
+{
+ xpc_vars->heartbeat++;
+
+ if (time_after_eq(jiffies, xpc_hb_check_timeout))
+ wake_up_interruptible(&xpc_act_IRQ_wq);
+
+ xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
+ add_timer(&xpc_hb_timer);
+}
+
+/*
+ * This thread is responsible for nearly all of the partition
+ * activation/deactivation.
+ */
+static int
+xpc_hb_checker(void *ignore)
+{
+ int last_IRQ_count = 0;
+ int new_IRQ_count;
+ int force_IRQ = 0;
+
+ /* this thread was marked active by xpc_hb_init() */
+
+ set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU));
+
+ /* set our heartbeating to other partitions into motion */
+ xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
+ xpc_hb_beater(0);
+
+ while (!xpc_exiting) {
+
+ dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
+ "been received\n",
+ (int)(xpc_hb_check_timeout - jiffies),
+ atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
+
+ /* checking of remote heartbeats is skewed by IRQ handling */
+ if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
+ dev_dbg(xpc_part, "checking remote heartbeats\n");
+ xpc_check_remote_hb();
+
+ /*
+ * We need to periodically recheck to ensure no
+ * IPI/AMO pairs have been missed. That check
+ * must always reset xpc_hb_check_timeout.
+ */
+ force_IRQ = 1;
+ }
+
+ /* check for outstanding IRQs */
+ new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
+ if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
+ force_IRQ = 0;
+
+ dev_dbg(xpc_part, "found an IRQ to process; will be "
+ "resetting xpc_hb_check_timeout\n");
+
+ last_IRQ_count += xpc_identify_act_IRQ_sender();
+ if (last_IRQ_count < new_IRQ_count) {
+ /* retry once to help avoid missing AMO */
+ (void)xpc_identify_act_IRQ_sender();
+ }
+ last_IRQ_count = new_IRQ_count;
+
+ xpc_hb_check_timeout = jiffies +
+ (xpc_hb_check_interval * HZ);
+ }
+
+ /* wait for IRQ or timeout */
+ (void)wait_event_interruptible(xpc_act_IRQ_wq,
+ (last_IRQ_count <
+ atomic_read(&xpc_act_IRQ_rcvd)
+ || time_after_eq(jiffies,
+ xpc_hb_check_timeout) ||
+ xpc_exiting));
+ }
+
+ dev_dbg(xpc_part, "heartbeat checker is exiting\n");
+
+ /* mark this thread as having exited */
+ complete(&xpc_hb_checker_exited);
+ return 0;
+}
+
+/*
+ * This thread will attempt to discover other partitions to activate
+ * based on info provided by SAL. This new thread is short lived and
+ * will exit once discovery is complete.
+ */
+static int
+xpc_initiate_discovery(void *ignore)
+{
+ xpc_discovery();
+
+ dev_dbg(xpc_part, "discovery thread is exiting\n");
+
+ /* mark this thread as having exited */
+ complete(&xpc_discovery_exited);
+ return 0;
+}
+
+/*
+ * Establish first contact with the remote partititon. This involves pulling
+ * the XPC per partition variables from the remote partition and waiting for
+ * the remote partition to pull ours.
+ */
+static enum xpc_retval
+xpc_make_first_contact(struct xpc_partition *part)
+{
+ enum xpc_retval ret;
+
+ while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) {
+ if (ret != xpcRetry) {
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ return ret;
+ }
+
+ dev_dbg(xpc_chan, "waiting to make first contact with "
+ "partition %d\n", XPC_PARTID(part));
+
+ /* wait a 1/4 of a second or so */
+ (void)msleep_interruptible(250);
+
+ if (part->act_state == XPC_P_DEACTIVATING)
+ return part->reason;
+ }
+
+ return xpc_mark_partition_active(part);
+}
+
+/*
+ * The first kthread assigned to a newly activated partition is the one
+ * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to
+ * that kthread until the partition is brought down, at which time that kthread
+ * returns back to XPC HB. (The return of that kthread will signify to XPC HB
+ * that XPC has dismantled all communication infrastructure for the associated
+ * partition.) This kthread becomes the channel manager for that partition.
+ *
+ * Each active partition has a channel manager, who, besides connecting and
+ * disconnecting channels, will ensure that each of the partition's connected
+ * channels has the required number of assigned kthreads to get the work done.
+ */
+static void
+xpc_channel_mgr(struct xpc_partition *part)
+{
+ while (part->act_state != XPC_P_DEACTIVATING ||
+ atomic_read(&part->nchannels_active) > 0 ||
+ !xpc_partition_disengaged(part)) {
+
+ xpc_process_channel_activity(part);
+
+ /*
+ * Wait until we've been requested to activate kthreads or
+ * all of the channel's message queues have been torn down or
+ * a signal is pending.
+ *
+ * The channel_mgr_requests is set to 1 after being awakened,
+ * This is done to prevent the channel mgr from making one pass
+ * through the loop for each request, since he will
+ * be servicing all the requests in one pass. The reason it's
+ * set to 1 instead of 0 is so that other kthreads will know
+ * that the channel mgr is running and won't bother trying to
+ * wake him up.
+ */
+ atomic_dec(&part->channel_mgr_requests);
+ (void)wait_event_interruptible(part->channel_mgr_wq,
+ (atomic_read(&part->channel_mgr_requests) > 0 ||
+ part->local_IPI_amo != 0 ||
+ (part->act_state == XPC_P_DEACTIVATING &&
+ atomic_read(&part->nchannels_active) == 0 &&
+ xpc_partition_disengaged(part))));
+ atomic_set(&part->channel_mgr_requests, 1);
+ }
+}
+
+/*
+ * When XPC HB determines that a partition has come up, it will create a new
+ * kthread and that kthread will call this function to attempt to set up the
+ * basic infrastructure used for Cross Partition Communication with the newly
+ * upped partition.
+ *
+ * The kthread that was created by XPC HB and which setup the XPC
+ * infrastructure will remain assigned to the partition until the partition
+ * goes down. At which time the kthread will teardown the XPC infrastructure
+ * and then exit.
+ *
+ * XPC HB will put the remote partition's XPC per partition specific variables
+ * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
+ * calling xpc_partition_up().
+ */
+static void
+xpc_partition_up(struct xpc_partition *part)
+{
+ DBUG_ON(part->channels != NULL);
+
+ dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part));
+
+ if (xpc_setup_infrastructure(part) != xpcSuccess)
+ return;
+
+ /*
+ * The kthread that XPC HB called us with will become the
+ * channel manager for this partition. It will not return
+ * back to XPC HB until the partition's XPC infrastructure
+ * has been dismantled.
+ */
+
+ (void)xpc_part_ref(part); /* this will always succeed */
+
+ if (xpc_make_first_contact(part) == xpcSuccess)
+ xpc_channel_mgr(part);
+
+ xpc_part_deref(part);
+
+ xpc_teardown_infrastructure(part);
+}
+
+static int
+xpc_activating(void *__partid)
+{
+ partid_t partid = (u64)__partid;
+ struct xpc_partition *part = &xpc_partitions[partid];
+ unsigned long irq_flags;
+
+ DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+
+ if (part->act_state == XPC_P_DEACTIVATING) {
+ part->act_state = XPC_P_INACTIVE;
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ part->remote_rp_pa = 0;
+ return 0;
+ }
+
+ /* indicate the thread is activating */
+ DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ);
+ part->act_state = XPC_P_ACTIVATING;
+
+ XPC_SET_REASON(part, 0, 0);
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+ dev_dbg(xpc_part, "bringing partition %d up\n", partid);
+
+ /*
+ * Register the remote partition's AMOs with SAL so it can handle
+ * and cleanup errors within that address range should the remote
+ * partition go down. We don't unregister this range because it is
+ * difficult to tell when outstanding writes to the remote partition
+ * are finished and thus when it is safe to unregister. This should
+ * not result in wasted space in the SAL xp_addr_region table because
+ * we should get the same page for remote_amos_page_pa after module
+ * reloads and system reboots.
+ */
+ if (sn_register_xp_addr_region(part->remote_amos_page_pa,
+ PAGE_SIZE, 1) < 0) {
+ dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
+ "xp_addr region\n", partid);
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+ part->act_state = XPC_P_INACTIVE;
+ XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__);
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ part->remote_rp_pa = 0;
+ return 0;
+ }
+
+ xpc_allow_hb(partid, xpc_vars);
+ xpc_IPI_send_activated(part);
+
+ /*
+ * xpc_partition_up() holds this thread and marks this partition as
+ * XPC_P_ACTIVE by calling xpc_hb_mark_active().
+ */
+ (void)xpc_partition_up(part);
+
+ xpc_disallow_hb(partid, xpc_vars);
+ xpc_mark_partition_inactive(part);
+
+ if (part->reason == xpcReactivating) {
+ /* interrupting ourselves results in activating partition */
+ xpc_IPI_send_reactivate(part);
+ }
+
+ return 0;
+}
+
+void
+xpc_activate_partition(struct xpc_partition *part)
+{
+ partid_t partid = XPC_PARTID(part);
+ unsigned long irq_flags;
+ struct task_struct *kthread;
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+
+ DBUG_ON(part->act_state != XPC_P_INACTIVE);
+
+ part->act_state = XPC_P_ACTIVATION_REQ;
+ XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
+
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+ kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d",
+ partid);
+ if (IS_ERR(kthread)) {
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+ part->act_state = XPC_P_INACTIVE;
+ XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ }
+}
+
+/*
+ * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
+ * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
+ * than one partition, we use an AMO_t structure per partition to indicate
+ * whether a partition has sent an IPI or not. If it has, then wake up the
+ * associated kthread to handle it.
+ *
+ * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
+ * running on other partitions.
+ *
+ * Noteworthy Arguments:
+ *
+ * irq - Interrupt ReQuest number. NOT USED.
+ *
+ * dev_id - partid of IPI's potential sender.
+ */
+irqreturn_t
+xpc_notify_IRQ_handler(int irq, void *dev_id)
+{
+ partid_t partid = (partid_t) (u64)dev_id;
+ struct xpc_partition *part = &xpc_partitions[partid];
+
+ DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+
+ if (xpc_part_ref(part)) {
+ xpc_check_for_channel_activity(part);
+
+ xpc_part_deref(part);
+ }
+ return IRQ_HANDLED;
+}
+
+/*
+ * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
+ * because the write to their associated IPI amo completed after the IRQ/IPI
+ * was received.
+ */
+void
+xpc_dropped_IPI_check(struct xpc_partition *part)
+{
+ if (xpc_part_ref(part)) {
+ xpc_check_for_channel_activity(part);
+
+ part->dropped_IPI_timer.expires = jiffies +
+ XPC_P_DROPPED_IPI_WAIT;
+ add_timer(&part->dropped_IPI_timer);
+ xpc_part_deref(part);
+ }
+}
+
+void
+xpc_activate_kthreads(struct xpc_channel *ch, int needed)
+{
+ int idle = atomic_read(&ch->kthreads_idle);
+ int assigned = atomic_read(&ch->kthreads_assigned);
+ int wakeup;
+
+ DBUG_ON(needed <= 0);
+
+ if (idle > 0) {
+ wakeup = (needed > idle) ? idle : needed;
+ needed -= wakeup;
+
+ dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
+ "channel=%d\n", wakeup, ch->partid, ch->number);
+
+ /* only wakeup the requested number of kthreads */
+ wake_up_nr(&ch->idle_wq, wakeup);
+ }
+
+ if (needed <= 0)
+ return;
+
+ if (needed + assigned > ch->kthreads_assigned_limit) {
+ needed = ch->kthreads_assigned_limit - assigned;
+ if (needed <= 0)
+ return;
+ }
+
+ dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
+ needed, ch->partid, ch->number);
+
+ xpc_create_kthreads(ch, needed, 0);
+}
+
+/*
+ * This function is where XPC's kthreads wait for messages to deliver.
+ */
+static void
+xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
+{
+ do {
+ /* deliver messages to their intended recipients */
+
+ while (ch->w_local_GP.get < ch->w_remote_GP.put &&
+ !(ch->flags & XPC_C_DISCONNECTING)) {
+ xpc_deliver_msg(ch);
+ }
+
+ if (atomic_inc_return(&ch->kthreads_idle) >
+ ch->kthreads_idle_limit) {
+ /* too many idle kthreads on this channel */
+ atomic_dec(&ch->kthreads_idle);
+ break;
+ }
+
+ dev_dbg(xpc_chan, "idle kthread calling "
+ "wait_event_interruptible_exclusive()\n");
+
+ (void)wait_event_interruptible_exclusive(ch->idle_wq,
+ (ch->w_local_GP.get < ch->w_remote_GP.put ||
+ (ch->flags & XPC_C_DISCONNECTING)));
+
+ atomic_dec(&ch->kthreads_idle);
+
+ } while (!(ch->flags & XPC_C_DISCONNECTING));
+}
+
+static int
+xpc_kthread_start(void *args)
+{
+ partid_t partid = XPC_UNPACK_ARG1(args);
+ u16 ch_number = XPC_UNPACK_ARG2(args);
+ struct xpc_partition *part = &xpc_partitions[partid];
+ struct xpc_channel *ch;
+ int n_needed;
+ unsigned long irq_flags;
+
+ dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
+ partid, ch_number);
+
+ ch = &part->channels[ch_number];
+
+ if (!(ch->flags & XPC_C_DISCONNECTING)) {
+
+ /* let registerer know that connection has been established */
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
+ ch->flags |= XPC_C_CONNECTEDCALLOUT;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ xpc_connected_callout(ch);
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ /*
+ * It is possible that while the callout was being
+ * made that the remote partition sent some messages.
+ * If that is the case, we may need to activate
+ * additional kthreads to help deliver them. We only
+ * need one less than total #of messages to deliver.
+ */
+ n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1;
+ if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
+ xpc_activate_kthreads(ch, n_needed);
+
+ } else {
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ }
+
+ xpc_kthread_waitmsgs(part, ch);
+ }
+
+ /* let registerer know that connection is disconnecting */
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+ !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+ ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ xpc_disconnect_callout(ch, xpcDisconnecting);
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
+ if (atomic_dec_return(&part->nchannels_engaged) == 0) {
+ xpc_mark_partition_disengaged(part);
+ xpc_IPI_send_disengage(part);
+ }
+ }
+
+ xpc_msgqueue_deref(ch);
+
+ dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
+ partid, ch_number);
+
+ xpc_part_deref(part);
+ return 0;
+}
+
+/*
+ * For each partition that XPC has established communications with, there is
+ * a minimum of one kernel thread assigned to perform any operation that
+ * may potentially sleep or block (basically the callouts to the asynchronous
+ * functions registered via xpc_connect()).
+ *
+ * Additional kthreads are created and destroyed by XPC as the workload
+ * demands.
+ *
+ * A kthread is assigned to one of the active channels that exists for a given
+ * partition.
+ */
+void
+xpc_create_kthreads(struct xpc_channel *ch, int needed,
+ int ignore_disconnecting)
+{
+ unsigned long irq_flags;
+ u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ struct task_struct *kthread;
+
+ while (needed-- > 0) {
+
+ /*
+ * The following is done on behalf of the newly created
+ * kthread. That kthread is responsible for doing the
+ * counterpart to the following before it exits.
+ */
+ if (ignore_disconnecting) {
+ if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
+ /* kthreads assigned had gone to zero */
+ BUG_ON(!(ch->flags &
+ XPC_C_DISCONNECTINGCALLOUT_MADE));
+ break;
+ }
+
+ } else if (ch->flags & XPC_C_DISCONNECTING) {
+ break;
+
+ } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) {
+ if (atomic_inc_return(&part->nchannels_engaged) == 1)
+ xpc_mark_partition_engaged(part);
+ }
+ (void)xpc_part_ref(part);
+ xpc_msgqueue_ref(ch);
+
+ kthread = kthread_run(xpc_kthread_start, (void *)args,
+ "xpc%02dc%d", ch->partid, ch->number);
+ if (IS_ERR(kthread)) {
+ /* the fork failed */
+
+ /*
+ * NOTE: if (ignore_disconnecting &&
+ * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
+ * then we'll deadlock if all other kthreads assigned
+ * to this channel are blocked in the channel's
+ * registerer, because the only thing that will unblock
+ * them is the xpcDisconnecting callout that this
+ * failed kthread_run() would have made.
+ */
+
+ if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+ atomic_dec_return(&part->nchannels_engaged) == 0) {
+ xpc_mark_partition_disengaged(part);
+ xpc_IPI_send_disengage(part);
+ }
+ xpc_msgqueue_deref(ch);
+ xpc_part_deref(part);
+
+ if (atomic_read(&ch->kthreads_assigned) <
+ ch->kthreads_idle_limit) {
+ /*
+ * Flag this as an error only if we have an
+ * insufficient #of kthreads for the channel
+ * to function.
+ */
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources,
+ &irq_flags);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ }
+ break;
+ }
+ }
+}
+
+void
+xpc_disconnect_wait(int ch_number)
+{
+ unsigned long irq_flags;
+ partid_t partid;
+ struct xpc_partition *part;
+ struct xpc_channel *ch;
+ int wakeup_channel_mgr;
+
+ /* now wait for all callouts to the caller's function to cease */
+ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (!xpc_part_ref(part))
+ continue;
+
+ ch = &part->channels[ch_number];
+
+ if (!(ch->flags & XPC_C_WDISCONNECT)) {
+ xpc_part_deref(part);
+ continue;
+ }
+
+ wait_for_completion(&ch->wdisconnect_wait);
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+ wakeup_channel_mgr = 0;
+
+ if (ch->delayed_IPI_flags) {
+ if (part->act_state != XPC_P_DEACTIVATING) {
+ spin_lock(&part->IPI_lock);
+ XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+ ch->number,
+ ch->delayed_IPI_flags);
+ spin_unlock(&part->IPI_lock);
+ wakeup_channel_mgr = 1;
+ }
+ ch->delayed_IPI_flags = 0;
+ }
+
+ ch->flags &= ~XPC_C_WDISCONNECT;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+ if (wakeup_channel_mgr)
+ xpc_wakeup_channel_mgr(part);
+
+ xpc_part_deref(part);
+ }
+}
+
+static void
+xpc_do_exit(enum xpc_retval reason)
+{
+ partid_t partid;
+ int active_part_count, printed_waiting_msg = 0;
+ struct xpc_partition *part;
+ unsigned long printmsg_time, disengage_request_timeout = 0;
+
+ /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
+ DBUG_ON(xpc_exiting == 1);
+
+ /*
+ * Let the heartbeat checker thread and the discovery thread
+ * (if one is running) know that they should exit. Also wake up
+ * the heartbeat checker thread in case it's sleeping.
+ */
+ xpc_exiting = 1;
+ wake_up_interruptible(&xpc_act_IRQ_wq);
+
+ /* ignore all incoming interrupts */
+ free_irq(SGI_XPC_ACTIVATE, NULL);
+
+ /* wait for the discovery thread to exit */
+ wait_for_completion(&xpc_discovery_exited);
+
+ /* wait for the heartbeat checker thread to exit */
+ wait_for_completion(&xpc_hb_checker_exited);
+
+ /* sleep for a 1/3 of a second or so */
+ (void)msleep_interruptible(300);
+
+ /* wait for all partitions to become inactive */
+
+ printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+ xpc_disengage_request_timedout = 0;
+
+ do {
+ active_part_count = 0;
+
+ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (xpc_partition_disengaged(part) &&
+ part->act_state == XPC_P_INACTIVE) {
+ continue;
+ }
+
+ active_part_count++;
+
+ XPC_DEACTIVATE_PARTITION(part, reason);
+
+ if (part->disengage_request_timeout >
+ disengage_request_timeout) {
+ disengage_request_timeout =
+ part->disengage_request_timeout;
+ }
+ }
+
+ if (xpc_partition_engaged(-1UL)) {
+ if (time_after(jiffies, printmsg_time)) {
+ dev_info(xpc_part, "waiting for remote "
+ "partitions to disengage, timeout in "
+ "%ld seconds\n",
+ (disengage_request_timeout - jiffies)
+ / HZ);
+ printmsg_time = jiffies +
+ (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+ printed_waiting_msg = 1;
+ }
+
+ } else if (active_part_count > 0) {
+ if (printed_waiting_msg) {
+ dev_info(xpc_part, "waiting for local partition"
+ " to disengage\n");
+ printed_waiting_msg = 0;
+ }
+
+ } else {
+ if (!xpc_disengage_request_timedout) {
+ dev_info(xpc_part, "all partitions have "
+ "disengaged\n");
+ }
+ break;
+ }
+
+ /* sleep for a 1/3 of a second or so */
+ (void)msleep_interruptible(300);
+
+ } while (1);
+
+ DBUG_ON(xpc_partition_engaged(-1UL));
+
+ /* indicate to others that our reserved page is uninitialized */
+ xpc_rsvd_page->vars_pa = 0;
+
+ /* now it's time to eliminate our heartbeat */
+ del_timer_sync(&xpc_hb_timer);
+ DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
+
+ if (reason == xpcUnloading) {
+ /* take ourselves off of the reboot_notifier_list */
+ (void)unregister_reboot_notifier(&xpc_reboot_notifier);
+
+ /* take ourselves off of the die_notifier list */
+ (void)unregister_die_notifier(&xpc_die_notifier);
+ }
+
+ /* close down protections for IPI operations */
+ xpc_restrict_IPI_ops();
+
+ /* clear the interface to XPC's functions */
+ xpc_clear_interface();
+
+ if (xpc_sysctl)
+ unregister_sysctl_table(xpc_sysctl);
+
+ kfree(xpc_remote_copy_buffer_base);
+}
+
+/*
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+ enum xpc_retval reason;
+
+ switch (event) {
+ case SYS_RESTART:
+ reason = xpcSystemReboot;
+ break;
+ case SYS_HALT:
+ reason = xpcSystemHalt;
+ break;
+ case SYS_POWER_OFF:
+ reason = xpcSystemPoweroff;
+ break;
+ default:
+ reason = xpcSystemGoingDown;
+ }
+
+ xpc_do_exit(reason);
+ return NOTIFY_DONE;
+}
+
+/*
+ * Notify other partitions to disengage from all references to our memory.
+ */
+static void
+xpc_die_disengage(void)
+{
+ struct xpc_partition *part;
+ partid_t partid;
+ unsigned long engaged;
+ long time, printmsg_time, disengage_request_timeout;
+
+ /* keep xpc_hb_checker thread from doing anything (just in case) */
+ xpc_exiting = 1;
+
+ xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */
+
+ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
+ remote_vars_version)) {
+
+ /* just in case it was left set by an earlier XPC */
+ xpc_clear_partition_engaged(1UL << partid);
+ continue;
+ }
+
+ if (xpc_partition_engaged(1UL << partid) ||
+ part->act_state != XPC_P_INACTIVE) {
+ xpc_request_partition_disengage(part);
+ xpc_mark_partition_disengaged(part);
+ xpc_IPI_send_disengage(part);
+ }
+ }
+
+ time = rtc_time();
+ printmsg_time = time +
+ (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
+ disengage_request_timeout = time +
+ (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
+
+ /* wait for all other partitions to disengage from us */
+
+ while (1) {
+ engaged = xpc_partition_engaged(-1UL);
+ if (!engaged) {
+ dev_info(xpc_part, "all partitions have disengaged\n");
+ break;
+ }
+
+ time = rtc_time();
+ if (time >= disengage_request_timeout) {
+ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ if (engaged & (1UL << partid)) {
+ dev_info(xpc_part, "disengage from "
+ "remote partition %d timed "
+ "out\n", partid);
+ }
+ }
+ break;
+ }
+
+ if (time >= printmsg_time) {
+ dev_info(xpc_part, "waiting for remote partitions to "
+ "disengage, timeout in %ld seconds\n",
+ (disengage_request_timeout - time) /
+ sn_rtc_cycles_per_second);
+ printmsg_time = time +
+ (XPC_DISENGAGE_PRINTMSG_INTERVAL *
+ sn_rtc_cycles_per_second);
+ }
+ }
+}
+
+/*
+ * This function is called when the system is being restarted or halted due
+ * to some sort of system failure. If this is the case we need to notify the
+ * other partitions to disengage from all references to our memory.
+ * This function can also be called when our heartbeater could be offlined
+ * for a time. In this case we need to notify other partitions to not worry
+ * about the lack of a heartbeat.
+ */
+static int
+xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
+{
+ switch (event) {
+ case DIE_MACHINE_RESTART:
+ case DIE_MACHINE_HALT:
+ xpc_die_disengage();
+ break;
+
+ case DIE_KDEBUG_ENTER:
+ /* Should lack of heartbeat be ignored by other partitions? */
+ if (!xpc_kdebug_ignore)
+ break;
+
+ /* fall through */
+ case DIE_MCA_MONARCH_ENTER:
+ case DIE_INIT_MONARCH_ENTER:
+ xpc_vars->heartbeat++;
+ xpc_vars->heartbeat_offline = 1;
+ break;
+
+ case DIE_KDEBUG_LEAVE:
+ /* Is lack of heartbeat being ignored by other partitions? */
+ if (!xpc_kdebug_ignore)
+ break;
+
+ /* fall through */
+ case DIE_MCA_MONARCH_LEAVE:
+ case DIE_INIT_MONARCH_LEAVE:
+ xpc_vars->heartbeat++;
+ xpc_vars->heartbeat_offline = 0;
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+int __init
+xpc_init(void)
+{
+ int ret;
+ partid_t partid;
+ struct xpc_partition *part;
+ struct task_struct *kthread;
+ size_t buf_size;
+
+ if (!ia64_platform_is("sn2"))
+ return -ENODEV;
+
+ buf_size = max(XPC_RP_VARS_SIZE,
+ XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
+ xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
+ GFP_KERNEL,
+ &xpc_remote_copy_buffer_base);
+ if (xpc_remote_copy_buffer == NULL)
+ return -ENOMEM;
+
+ snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
+ snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
+
+ xpc_sysctl = register_sysctl_table(xpc_sys_dir);
+
+ /*
+ * The first few fields of each entry of xpc_partitions[] need to
+ * be initialized now so that calls to xpc_connect() and
+ * xpc_disconnect() can be made prior to the activation of any remote
+ * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
+ * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
+ * PARTITION HAS BEEN ACTIVATED.
+ */
+ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ part = &xpc_partitions[partid];
+
+ DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
+
+ part->act_IRQ_rcvd = 0;
+ spin_lock_init(&part->act_lock);
+ part->act_state = XPC_P_INACTIVE;
+ XPC_SET_REASON(part, 0, 0);
+
+ init_timer(&part->disengage_request_timer);
+ part->disengage_request_timer.function =
+ xpc_timeout_partition_disengage_request;
+ part->disengage_request_timer.data = (unsigned long)part;
+
+ part->setup_state = XPC_P_UNSET;
+ init_waitqueue_head(&part->teardown_wq);
+ atomic_set(&part->references, 0);
+ }
+
+ /*
+ * Open up protections for IPI operations (and AMO operations on
+ * Shub 1.1 systems).
+ */
+ xpc_allow_IPI_ops();
+
+ /*
+ * Interrupts being processed will increment this atomic variable and
+ * awaken the heartbeat thread which will process the interrupts.
+ */
+ atomic_set(&xpc_act_IRQ_rcvd, 0);
+
+ /*
+ * This is safe to do before the xpc_hb_checker thread has started
+ * because the handler releases a wait queue. If an interrupt is
+ * received before the thread is waiting, it will not go to sleep,
+ * but rather immediately process the interrupt.
+ */
+ ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
+ "xpc hb", NULL);
+ if (ret != 0) {
+ dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
+ "errno=%d\n", -ret);
+
+ xpc_restrict_IPI_ops();
+
+ if (xpc_sysctl)
+ unregister_sysctl_table(xpc_sysctl);
+
+ kfree(xpc_remote_copy_buffer_base);
+ return -EBUSY;
+ }
+
+ /*
+ * Fill the partition reserved page with the information needed by
+ * other partitions to discover we are alive and establish initial
+ * communications.
+ */
+ xpc_rsvd_page = xpc_rsvd_page_init();
+ if (xpc_rsvd_page == NULL) {
+ dev_err(xpc_part, "could not setup our reserved page\n");
+
+ free_irq(SGI_XPC_ACTIVATE, NULL);
+ xpc_restrict_IPI_ops();
+
+ if (xpc_sysctl)
+ unregister_sysctl_table(xpc_sysctl);
+
+ kfree(xpc_remote_copy_buffer_base);
+ return -EBUSY;
+ }
+
+ /* add ourselves to the reboot_notifier_list */
+ ret = register_reboot_notifier(&xpc_reboot_notifier);
+ if (ret != 0)
+ dev_warn(xpc_part, "can't register reboot notifier\n");
+
+ /* add ourselves to the die_notifier list */
+ ret = register_die_notifier(&xpc_die_notifier);
+ if (ret != 0)
+ dev_warn(xpc_part, "can't register die notifier\n");
+
+ init_timer(&xpc_hb_timer);
+ xpc_hb_timer.function = xpc_hb_beater;
+
+ /*
+ * The real work-horse behind xpc. This processes incoming
+ * interrupts and monitors remote heartbeats.
+ */
+ kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
+ if (IS_ERR(kthread)) {
+ dev_err(xpc_part, "failed while forking hb check thread\n");
+
+ /* indicate to others that our reserved page is uninitialized */
+ xpc_rsvd_page->vars_pa = 0;
+
+ /* take ourselves off of the reboot_notifier_list */
+ (void)unregister_reboot_notifier(&xpc_reboot_notifier);
+
+ /* take ourselves off of the die_notifier list */
+ (void)unregister_die_notifier(&xpc_die_notifier);
+
+ del_timer_sync(&xpc_hb_timer);
+ free_irq(SGI_XPC_ACTIVATE, NULL);
+ xpc_restrict_IPI_ops();
+
+ if (xpc_sysctl)
+ unregister_sysctl_table(xpc_sysctl);
+
+ kfree(xpc_remote_copy_buffer_base);
+ return -EBUSY;
+ }
+
+ /*
+ * Startup a thread that will attempt to discover other partitions to
+ * activate based on info provided by SAL. This new thread is short
+ * lived and will exit once discovery is complete.
+ */
+ kthread = kthread_run(xpc_initiate_discovery, NULL,
+ XPC_DISCOVERY_THREAD_NAME);
+ if (IS_ERR(kthread)) {
+ dev_err(xpc_part, "failed while forking discovery thread\n");
+
+ /* mark this new thread as a non-starter */
+ complete(&xpc_discovery_exited);
+
+ xpc_do_exit(xpcUnloading);
+ return -EBUSY;
+ }
+
+ /* set the interface to point at XPC's functions */
+ xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
+ xpc_initiate_allocate, xpc_initiate_send,
+ xpc_initiate_send_notify, xpc_initiate_received,
+ xpc_initiate_partid_to_nasids);
+
+ return 0;
+}
+
+module_init(xpc_init);
+
+void __exit
+xpc_exit(void)
+{
+ xpc_do_exit(xpcUnloading);
+}
+
+module_exit(xpc_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
+MODULE_LICENSE("GPL");
+
+module_param(xpc_hb_interval, int, 0);
+MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
+ "heartbeat increments.");
+
+module_param(xpc_hb_check_interval, int, 0);
+MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
+ "heartbeat checks.");
+
+module_param(xpc_disengage_request_timelimit, int, 0);
+MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
+ "for disengage request to complete.");
+
+module_param(xpc_kdebug_ignore, int, 0);
+MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
+ "other partitions when dropping into kdebug.");
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
new file mode 100644
index 00000000000..27e200ec582
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -0,0 +1,1174 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) partition support.
+ *
+ * This is the part of XPC that detects the presence/absence of
+ * other partitions. It provides a heartbeat and monitors the
+ * heartbeats of other partitions.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sysctl.h>
+#include <linux/cache.h>
+#include <linux/mmzone.h>
+#include <linux/nodemask.h>
+#include <asm/uncached.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/addrs.h>
+#include "xpc.h"
+
+/* XPC is exiting flag */
+int xpc_exiting;
+
+/* SH_IPI_ACCESS shub register value on startup */
+static u64 xpc_sh1_IPI_access;
+static u64 xpc_sh2_IPI_access0;
+static u64 xpc_sh2_IPI_access1;
+static u64 xpc_sh2_IPI_access2;
+static u64 xpc_sh2_IPI_access3;
+
+/* original protection values for each node */
+u64 xpc_prot_vec[MAX_NUMNODES];
+
+/* this partition's reserved page pointers */
+struct xpc_rsvd_page *xpc_rsvd_page;
+static u64 *xpc_part_nasids;
+static u64 *xpc_mach_nasids;
+struct xpc_vars *xpc_vars;
+struct xpc_vars_part *xpc_vars_part;
+
+static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */
+static int xp_nasid_mask_words; /* actual size in words of nasid mask */
+
+/*
+ * For performance reasons, each entry of xpc_partitions[] is cacheline
+ * aligned. And xpc_partitions[] is padded with an additional entry at the
+ * end so that the last legitimate entry doesn't share its cacheline with
+ * another variable.
+ */
+struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
+
+/*
+ * Generic buffer used to store a local copy of portions of a remote
+ * partition's reserved page (either its header and part_nasids mask,
+ * or its vars).
+ */
+char *xpc_remote_copy_buffer;
+void *xpc_remote_copy_buffer_base;
+
+/*
+ * Guarantee that the kmalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kmalloc will give us cachline aligned memory by default */
+ *base = kmalloc(size, flags);
+ if (*base == NULL)
+ return NULL;
+
+ if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+ return *base;
+
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kmalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL)
+ return NULL;
+
+ return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Given a nasid, get the physical address of the partition's reserved page
+ * for that nasid. This function returns 0 on any error.
+ */
+static u64
+xpc_get_rsvd_page_pa(int nasid)
+{
+ bte_result_t bte_res;
+ s64 status;
+ u64 cookie = 0;
+ u64 rp_pa = nasid; /* seed with nasid */
+ u64 len = 0;
+ u64 buf = buf;
+ u64 buf_len = 0;
+ void *buf_base = NULL;
+
+ while (1) {
+
+ status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
+ &len);
+
+ dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
+ "0x%016lx, address=0x%016lx, len=0x%016lx\n",
+ status, cookie, rp_pa, len);
+
+ if (status != SALRET_MORE_PASSES)
+ break;
+
+ if (L1_CACHE_ALIGN(len) > buf_len) {
+ kfree(buf_base);
+ buf_len = L1_CACHE_ALIGN(len);
+ buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
+ GFP_KERNEL,
+ &buf_base);
+ if (buf_base == NULL) {
+ dev_err(xpc_part, "unable to kmalloc "
+ "len=0x%016lx\n", buf_len);
+ status = SALRET_ERROR;
+ break;
+ }
+ }
+
+ bte_res = xp_bte_copy(rp_pa, buf, buf_len,
+ (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+ if (bte_res != BTE_SUCCESS) {
+ dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
+ status = SALRET_ERROR;
+ break;
+ }
+ }
+
+ kfree(buf_base);
+
+ if (status != SALRET_OK)
+ rp_pa = 0;
+
+ dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
+ return rp_pa;
+}
+
+/*
+ * Fill the partition reserved page with the information needed by
+ * other partitions to discover we are alive and establish initial
+ * communications.
+ */
+struct xpc_rsvd_page *
+xpc_rsvd_page_init(void)
+{
+ struct xpc_rsvd_page *rp;
+ AMO_t *amos_page;
+ u64 rp_pa, nasid_array = 0;
+ int i, ret;
+
+ /* get the local reserved page's address */
+
+ preempt_disable();
+ rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
+ preempt_enable();
+ if (rp_pa == 0) {
+ dev_err(xpc_part, "SAL failed to locate the reserved page\n");
+ return NULL;
+ }
+ rp = (struct xpc_rsvd_page *)__va(rp_pa);
+
+ if (rp->partid != sn_partition_id) {
+ dev_err(xpc_part, "the reserved page's partid of %d should be "
+ "%d\n", rp->partid, sn_partition_id);
+ return NULL;
+ }
+
+ rp->version = XPC_RP_VERSION;
+
+ /* establish the actual sizes of the nasid masks */
+ if (rp->SAL_version == 1) {
+ /* SAL_version 1 didn't set the nasids_size field */
+ rp->nasids_size = 128;
+ }
+ xp_nasid_mask_bytes = rp->nasids_size;
+ xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
+
+ /* setup the pointers to the various items in the reserved page */
+ xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
+ xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
+ xpc_vars = XPC_RP_VARS(rp);
+ xpc_vars_part = XPC_RP_VARS_PART(rp);
+
+ /*
+ * Before clearing xpc_vars, see if a page of AMOs had been previously
+ * allocated. If not we'll need to allocate one and set permissions
+ * so that cross-partition AMOs are allowed.
+ *
+ * The allocated AMO page needs MCA reporting to remain disabled after
+ * XPC has unloaded. To make this work, we keep a copy of the pointer
+ * to this page (i.e., amos_page) in the struct xpc_vars structure,
+ * which is pointed to by the reserved page, and re-use that saved copy
+ * on subsequent loads of XPC. This AMO page is never freed, and its
+ * memory protections are never restricted.
+ */
+ amos_page = xpc_vars->amos_page;
+ if (amos_page == NULL) {
+ amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0));
+ if (amos_page == NULL) {
+ dev_err(xpc_part, "can't allocate page of AMOs\n");
+ return NULL;
+ }
+
+ /*
+ * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems
+ * when xpc_allow_IPI_ops() is called via xpc_hb_init().
+ */
+ if (!enable_shub_wars_1_1()) {
+ ret = sn_change_memprotect(ia64_tpa((u64)amos_page),
+ PAGE_SIZE,
+ SN_MEMPROT_ACCESS_CLASS_1,
+ &nasid_array);
+ if (ret != 0) {
+ dev_err(xpc_part, "can't change memory "
+ "protections\n");
+ uncached_free_page(__IA64_UNCACHED_OFFSET |
+ TO_PHYS((u64)amos_page));
+ return NULL;
+ }
+ }
+ } else if (!IS_AMO_ADDRESS((u64)amos_page)) {
+ /*
+ * EFI's XPBOOT can also set amos_page in the reserved page,
+ * but it happens to leave it as an uncached physical address
+ * and we need it to be an uncached virtual, so we'll have to
+ * convert it.
+ */
+ if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) {
+ dev_err(xpc_part, "previously used amos_page address "
+ "is bad = 0x%p\n", (void *)amos_page);
+ return NULL;
+ }
+ amos_page = (AMO_t *)TO_AMO((u64)amos_page);
+ }
+
+ /* clear xpc_vars */
+ memset(xpc_vars, 0, sizeof(struct xpc_vars));
+
+ xpc_vars->version = XPC_V_VERSION;
+ xpc_vars->act_nasid = cpuid_to_nasid(0);
+ xpc_vars->act_phys_cpuid = cpu_physical_id(0);
+ xpc_vars->vars_part_pa = __pa(xpc_vars_part);
+ xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
+ xpc_vars->amos_page = amos_page; /* save for next load of XPC */
+
+ /* clear xpc_vars_part */
+ memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
+ XP_MAX_PARTITIONS);
+
+ /* initialize the activate IRQ related AMO variables */
+ for (i = 0; i < xp_nasid_mask_words; i++)
+ (void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
+
+ /* initialize the engaged remote partitions related AMO variables */
+ (void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
+ (void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
+
+ /* timestamp of when reserved page was setup by XPC */
+ rp->stamp = CURRENT_TIME;
+
+ /*
+ * This signifies to the remote partition that our reserved
+ * page is initialized.
+ */
+ rp->vars_pa = __pa(xpc_vars);
+
+ return rp;
+}
+
+/*
+ * Change protections to allow IPI operations (and AMO operations on
+ * Shub 1.1 systems).
+ */
+void
+xpc_allow_IPI_ops(void)
+{
+ int node;
+ int nasid;
+
+ /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
+
+ if (is_shub2()) {
+ xpc_sh2_IPI_access0 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
+ xpc_sh2_IPI_access1 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
+ xpc_sh2_IPI_access2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
+ xpc_sh2_IPI_access3 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
+
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+ -1UL);
+ }
+
+ } else {
+ xpc_sh1_IPI_access =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
+
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+ -1UL);
+
+ /*
+ * Since the BIST collides with memory operations on
+ * SHUB 1.1 sn_change_memprotect() cannot be used.
+ */
+ if (enable_shub_wars_1_1()) {
+ /* open up everything */
+ xpc_prot_vec[node] = (u64)HUB_L((u64 *)
+ GLOBAL_MMR_ADDR
+ (nasid,
+ SH1_MD_DQLP_MMR_DIR_PRIVEC0));
+ HUB_S((u64 *)
+ GLOBAL_MMR_ADDR(nasid,
+ SH1_MD_DQLP_MMR_DIR_PRIVEC0),
+ -1UL);
+ HUB_S((u64 *)
+ GLOBAL_MMR_ADDR(nasid,
+ SH1_MD_DQRP_MMR_DIR_PRIVEC0),
+ -1UL);
+ }
+ }
+ }
+}
+
+/*
+ * Restrict protections to disallow IPI operations (and AMO operations on
+ * Shub 1.1 systems).
+ */
+void
+xpc_restrict_IPI_ops(void)
+{
+ int node;
+ int nasid;
+
+ /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
+
+ if (is_shub2()) {
+
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+ xpc_sh2_IPI_access0);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+ xpc_sh2_IPI_access1);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+ xpc_sh2_IPI_access2);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+ xpc_sh2_IPI_access3);
+ }
+
+ } else {
+
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+ xpc_sh1_IPI_access);
+
+ if (enable_shub_wars_1_1()) {
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+ SH1_MD_DQLP_MMR_DIR_PRIVEC0),
+ xpc_prot_vec[node]);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+ SH1_MD_DQRP_MMR_DIR_PRIVEC0),
+ xpc_prot_vec[node]);
+ }
+ }
+ }
+}
+
+/*
+ * At periodic intervals, scan through all active partitions and ensure
+ * their heartbeat is still active. If not, the partition is deactivated.
+ */
+void
+xpc_check_remote_hb(void)
+{
+ struct xpc_vars *remote_vars;
+ struct xpc_partition *part;
+ partid_t partid;
+ bte_result_t bres;
+
+ remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
+
+ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+
+ if (xpc_exiting)
+ break;
+
+ if (partid == sn_partition_id)
+ continue;
+
+ part = &xpc_partitions[partid];
+
+ if (part->act_state == XPC_P_INACTIVE ||
+ part->act_state == XPC_P_DEACTIVATING) {
+ continue;
+ }
+
+ /* pull the remote_hb cache line */
+ bres = xp_bte_copy(part->remote_vars_pa,
+ (u64)remote_vars,
+ XPC_RP_VARS_SIZE,
+ (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+ if (bres != BTE_SUCCESS) {
+ XPC_DEACTIVATE_PARTITION(part,
+ xpc_map_bte_errors(bres));
+ continue;
+ }
+
+ dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
+ " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
+ partid, remote_vars->heartbeat, part->last_heartbeat,
+ remote_vars->heartbeat_offline,
+ remote_vars->heartbeating_to_mask);
+
+ if (((remote_vars->heartbeat == part->last_heartbeat) &&
+ (remote_vars->heartbeat_offline == 0)) ||
+ !xpc_hb_allowed(sn_partition_id, remote_vars)) {
+
+ XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
+ continue;
+ }
+
+ part->last_heartbeat = remote_vars->heartbeat;
+ }
+}
+
+/*
+ * Get a copy of a portion of the remote partition's rsvd page.
+ *
+ * remote_rp points to a buffer that is cacheline aligned for BTE copies and
+ * is large enough to contain a copy of their reserved page header and
+ * part_nasids mask.
+ */
+static enum xpc_retval
+xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
+ struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
+{
+ int bres, i;
+
+ /* get the reserved page's physical address */
+
+ *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
+ if (*remote_rp_pa == 0)
+ return xpcNoRsvdPageAddr;
+
+ /* pull over the reserved page header and part_nasids mask */
+ bres = xp_bte_copy(*remote_rp_pa, (u64)remote_rp,
+ XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
+ (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+ if (bres != BTE_SUCCESS)
+ return xpc_map_bte_errors(bres);
+
+ if (discovered_nasids != NULL) {
+ u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
+
+ for (i = 0; i < xp_nasid_mask_words; i++)
+ discovered_nasids[i] |= remote_part_nasids[i];
+ }
+
+ /* check that the partid is for another partition */
+
+ if (remote_rp->partid < 1 ||
+ remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
+ return xpcInvalidPartid;
+ }
+
+ if (remote_rp->partid == sn_partition_id)
+ return xpcLocalPartid;
+
+ if (XPC_VERSION_MAJOR(remote_rp->version) !=
+ XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
+ return xpcBadVersion;
+ }
+
+ return xpcSuccess;
+}
+
+/*
+ * Get a copy of the remote partition's XPC variables from the reserved page.
+ *
+ * remote_vars points to a buffer that is cacheline aligned for BTE copies and
+ * assumed to be of size XPC_RP_VARS_SIZE.
+ */
+static enum xpc_retval
+xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
+{
+ int bres;
+
+ if (remote_vars_pa == 0)
+ return xpcVarsNotSet;
+
+ /* pull over the cross partition variables */
+ bres = xp_bte_copy(remote_vars_pa, (u64)remote_vars, XPC_RP_VARS_SIZE,
+ (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+ if (bres != BTE_SUCCESS)
+ return xpc_map_bte_errors(bres);
+
+ if (XPC_VERSION_MAJOR(remote_vars->version) !=
+ XPC_VERSION_MAJOR(XPC_V_VERSION)) {
+ return xpcBadVersion;
+ }
+
+ return xpcSuccess;
+}
+
+/*
+ * Update the remote partition's info.
+ */
+static void
+xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
+ struct timespec *remote_rp_stamp, u64 remote_rp_pa,
+ u64 remote_vars_pa, struct xpc_vars *remote_vars)
+{
+ part->remote_rp_version = remote_rp_version;
+ dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n",
+ part->remote_rp_version);
+
+ part->remote_rp_stamp = *remote_rp_stamp;
+ dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
+ part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
+
+ part->remote_rp_pa = remote_rp_pa;
+ dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
+
+ part->remote_vars_pa = remote_vars_pa;
+ dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
+ part->remote_vars_pa);
+
+ part->last_heartbeat = remote_vars->heartbeat;
+ dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
+ part->last_heartbeat);
+
+ part->remote_vars_part_pa = remote_vars->vars_part_pa;
+ dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
+ part->remote_vars_part_pa);
+
+ part->remote_act_nasid = remote_vars->act_nasid;
+ dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
+ part->remote_act_nasid);
+
+ part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
+ dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
+ part->remote_act_phys_cpuid);
+
+ part->remote_amos_page_pa = remote_vars->amos_page_pa;
+ dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
+ part->remote_amos_page_pa);
+
+ part->remote_vars_version = remote_vars->version;
+ dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
+ part->remote_vars_version);
+}
+
+/*
+ * Prior code has determined the nasid which generated an IPI. Inspect
+ * that nasid to determine if its partition needs to be activated or
+ * deactivated.
+ *
+ * A partition is consider "awaiting activation" if our partition
+ * flags indicate it is not active and it has a heartbeat. A
+ * partition is considered "awaiting deactivation" if our partition
+ * flags indicate it is active but it has no heartbeat or it is not
+ * sending its heartbeat to us.
+ *
+ * To determine the heartbeat, the remote nasid must have a properly
+ * initialized reserved page.
+ */
+static void
+xpc_identify_act_IRQ_req(int nasid)
+{
+ struct xpc_rsvd_page *remote_rp;
+ struct xpc_vars *remote_vars;
+ u64 remote_rp_pa;
+ u64 remote_vars_pa;
+ int remote_rp_version;
+ int reactivate = 0;
+ int stamp_diff;
+ struct timespec remote_rp_stamp = { 0, 0 };
+ partid_t partid;
+ struct xpc_partition *part;
+ enum xpc_retval ret;
+
+ /* pull over the reserved page structure */
+
+ remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
+
+ ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
+ if (ret != xpcSuccess) {
+ dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
+ "which sent interrupt, reason=%d\n", nasid, ret);
+ return;
+ }
+
+ remote_vars_pa = remote_rp->vars_pa;
+ remote_rp_version = remote_rp->version;
+ if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
+ remote_rp_stamp = remote_rp->stamp;
+
+ partid = remote_rp->partid;
+ part = &xpc_partitions[partid];
+
+ /* pull over the cross partition variables */
+
+ remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
+
+ ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
+ if (ret != xpcSuccess) {
+
+ dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
+ "which sent interrupt, reason=%d\n", nasid, ret);
+
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ return;
+ }
+
+ part->act_IRQ_rcvd++;
+
+ dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
+ "%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
+ remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
+
+ if (xpc_partition_disengaged(part) &&
+ part->act_state == XPC_P_INACTIVE) {
+
+ xpc_update_partition_info(part, remote_rp_version,
+ &remote_rp_stamp, remote_rp_pa,
+ remote_vars_pa, remote_vars);
+
+ if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+ if (xpc_partition_disengage_requested(1UL << partid)) {
+ /*
+ * Other side is waiting on us to disengage,
+ * even though we already have.
+ */
+ return;
+ }
+ } else {
+ /* other side doesn't support disengage requests */
+ xpc_clear_partition_disengage_request(1UL << partid);
+ }
+
+ xpc_activate_partition(part);
+ return;
+ }
+
+ DBUG_ON(part->remote_rp_version == 0);
+ DBUG_ON(part->remote_vars_version == 0);
+
+ if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
+ DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
+ remote_vars_version));
+
+ if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+ DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+ version));
+ /* see if the other side rebooted */
+ if (part->remote_amos_page_pa ==
+ remote_vars->amos_page_pa &&
+ xpc_hb_allowed(sn_partition_id, remote_vars)) {
+ /* doesn't look that way, so ignore the IPI */
+ return;
+ }
+ }
+
+ /*
+ * Other side rebooted and previous XPC didn't support the
+ * disengage request, so we don't need to do anything special.
+ */
+
+ xpc_update_partition_info(part, remote_rp_version,
+ &remote_rp_stamp, remote_rp_pa,
+ remote_vars_pa, remote_vars);
+ part->reactivate_nasid = nasid;
+ XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
+ return;
+ }
+
+ DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
+
+ if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+ DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
+
+ /*
+ * Other side rebooted and previous XPC did support the
+ * disengage request, but the new one doesn't.
+ */
+
+ xpc_clear_partition_engaged(1UL << partid);
+ xpc_clear_partition_disengage_request(1UL << partid);
+
+ xpc_update_partition_info(part, remote_rp_version,
+ &remote_rp_stamp, remote_rp_pa,
+ remote_vars_pa, remote_vars);
+ reactivate = 1;
+
+ } else {
+ DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
+
+ stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
+ &remote_rp_stamp);
+ if (stamp_diff != 0) {
+ DBUG_ON(stamp_diff >= 0);
+
+ /*
+ * Other side rebooted and the previous XPC did support
+ * the disengage request, as does the new one.
+ */
+
+ DBUG_ON(xpc_partition_engaged(1UL << partid));
+ DBUG_ON(xpc_partition_disengage_requested(1UL <<
+ partid));
+
+ xpc_update_partition_info(part, remote_rp_version,
+ &remote_rp_stamp,
+ remote_rp_pa, remote_vars_pa,
+ remote_vars);
+ reactivate = 1;
+ }
+ }
+
+ if (part->disengage_request_timeout > 0 &&
+ !xpc_partition_disengaged(part)) {
+ /* still waiting on other side to disengage from us */
+ return;
+ }
+
+ if (reactivate) {
+ part->reactivate_nasid = nasid;
+ XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
+
+ } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
+ xpc_partition_disengage_requested(1UL << partid)) {
+ XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
+ }
+}
+
+/*
+ * Loop through the activation AMO variables and process any bits
+ * which are set. Each bit indicates a nasid sending a partition
+ * activation or deactivation request.
+ *
+ * Return #of IRQs detected.
+ */
+int
+xpc_identify_act_IRQ_sender(void)
+{
+ int word, bit;
+ u64 nasid_mask;
+ u64 nasid; /* remote nasid */
+ int n_IRQs_detected = 0;
+ AMO_t *act_amos;
+
+ act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
+
+ /* scan through act AMO variable looking for non-zero entries */
+ for (word = 0; word < xp_nasid_mask_words; word++) {
+
+ if (xpc_exiting)
+ break;
+
+ nasid_mask = xpc_IPI_receive(&act_amos[word]);
+ if (nasid_mask == 0) {
+ /* no IRQs from nasids in this variable */
+ continue;
+ }
+
+ dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
+ nasid_mask);
+
+ /*
+ * If this nasid has been added to the machine since
+ * our partition was reset, this will retain the
+ * remote nasid in our reserved pages machine mask.
+ * This is used in the event of module reload.
+ */
+ xpc_mach_nasids[word] |= nasid_mask;
+
+ /* locate the nasid(s) which sent interrupts */
+
+ for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
+ if (nasid_mask & (1UL << bit)) {
+ n_IRQs_detected++;
+ nasid = XPC_NASID_FROM_W_B(word, bit);
+ dev_dbg(xpc_part, "interrupt from nasid %ld\n",
+ nasid);
+ xpc_identify_act_IRQ_req(nasid);
+ }
+ }
+ }
+ return n_IRQs_detected;
+}
+
+/*
+ * See if the other side has responded to a partition disengage request
+ * from us.
+ */
+int
+xpc_partition_disengaged(struct xpc_partition *part)
+{
+ partid_t partid = XPC_PARTID(part);
+ int disengaged;
+
+ disengaged = (xpc_partition_engaged(1UL << partid) == 0);
+ if (part->disengage_request_timeout) {
+ if (!disengaged) {
+ if (time_before(jiffies,
+ part->disengage_request_timeout)) {
+ /* timelimit hasn't been reached yet */
+ return 0;
+ }
+
+ /*
+ * Other side hasn't responded to our disengage
+ * request in a timely fashion, so assume it's dead.
+ */
+
+ dev_info(xpc_part, "disengage from remote partition %d "
+ "timed out\n", partid);
+ xpc_disengage_request_timedout = 1;
+ xpc_clear_partition_engaged(1UL << partid);
+ disengaged = 1;
+ }
+ part->disengage_request_timeout = 0;
+
+ /* cancel the timer function, provided it's not us */
+ if (!in_interrupt()) {
+ del_singleshot_timer_sync(&part->
+ disengage_request_timer);
+ }
+
+ DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
+ part->act_state != XPC_P_INACTIVE);
+ if (part->act_state != XPC_P_INACTIVE)
+ xpc_wakeup_channel_mgr(part);
+
+ if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version))
+ xpc_cancel_partition_disengage_request(part);
+ }
+ return disengaged;
+}
+
+/*
+ * Mark specified partition as active.
+ */
+enum xpc_retval
+xpc_mark_partition_active(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ enum xpc_retval ret;
+
+ dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+ if (part->act_state == XPC_P_ACTIVATING) {
+ part->act_state = XPC_P_ACTIVE;
+ ret = xpcSuccess;
+ } else {
+ DBUG_ON(part->reason == xpcSuccess);
+ ret = part->reason;
+ }
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+ return ret;
+}
+
+/*
+ * Notify XPC that the partition is down.
+ */
+void
+xpc_deactivate_partition(const int line, struct xpc_partition *part,
+ enum xpc_retval reason)
+{
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+
+ if (part->act_state == XPC_P_INACTIVE) {
+ XPC_SET_REASON(part, reason, line);
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ if (reason == xpcReactivating) {
+ /* we interrupt ourselves to reactivate partition */
+ xpc_IPI_send_reactivate(part);
+ }
+ return;
+ }
+ if (part->act_state == XPC_P_DEACTIVATING) {
+ if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
+ reason == xpcReactivating) {
+ XPC_SET_REASON(part, reason, line);
+ }
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ return;
+ }
+
+ part->act_state = XPC_P_DEACTIVATING;
+ XPC_SET_REASON(part, reason, line);
+
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+ if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+ xpc_request_partition_disengage(part);
+ xpc_IPI_send_disengage(part);
+
+ /* set a timelimit on the disengage request */
+ part->disengage_request_timeout = jiffies +
+ (xpc_disengage_request_timelimit * HZ);
+ part->disengage_request_timer.expires =
+ part->disengage_request_timeout;
+ add_timer(&part->disengage_request_timer);
+ }
+
+ dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
+ XPC_PARTID(part), reason);
+
+ xpc_partition_going_down(part, reason);
+}
+
+/*
+ * Mark specified partition as inactive.
+ */
+void
+xpc_mark_partition_inactive(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+
+ dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
+ XPC_PARTID(part));
+
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+ part->act_state = XPC_P_INACTIVE;
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ part->remote_rp_pa = 0;
+}
+
+/*
+ * SAL has provided a partition and machine mask. The partition mask
+ * contains a bit for each even nasid in our partition. The machine
+ * mask contains a bit for each even nasid in the entire machine.
+ *
+ * Using those two bit arrays, we can determine which nasids are
+ * known in the machine. Each should also have a reserved page
+ * initialized if they are available for partitioning.
+ */
+void
+xpc_discovery(void)
+{
+ void *remote_rp_base;
+ struct xpc_rsvd_page *remote_rp;
+ struct xpc_vars *remote_vars;
+ u64 remote_rp_pa;
+ u64 remote_vars_pa;
+ int region;
+ int region_size;
+ int max_regions;
+ int nasid;
+ struct xpc_rsvd_page *rp;
+ partid_t partid;
+ struct xpc_partition *part;
+ u64 *discovered_nasids;
+ enum xpc_retval ret;
+
+ remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
+ xp_nasid_mask_bytes,
+ GFP_KERNEL, &remote_rp_base);
+ if (remote_rp == NULL)
+ return;
+
+ remote_vars = (struct xpc_vars *)remote_rp;
+
+ discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
+ GFP_KERNEL);
+ if (discovered_nasids == NULL) {
+ kfree(remote_rp_base);
+ return;
+ }
+
+ rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
+
+ /*
+ * The term 'region' in this context refers to the minimum number of
+ * nodes that can comprise an access protection grouping. The access
+ * protection is in regards to memory, IOI and IPI.
+ */
+ max_regions = 64;
+ region_size = sn_region_size;
+
+ switch (region_size) {
+ case 128:
+ max_regions *= 2;
+ case 64:
+ max_regions *= 2;
+ case 32:
+ max_regions *= 2;
+ region_size = 16;
+ DBUG_ON(!is_shub2());
+ }
+
+ for (region = 0; region < max_regions; region++) {
+
+ if (xpc_exiting)
+ break;
+
+ dev_dbg(xpc_part, "searching region %d\n", region);
+
+ for (nasid = (region * region_size * 2);
+ nasid < ((region + 1) * region_size * 2); nasid += 2) {
+
+ if (xpc_exiting)
+ break;
+
+ dev_dbg(xpc_part, "checking nasid %d\n", nasid);
+
+ if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
+ dev_dbg(xpc_part, "PROM indicates Nasid %d is "
+ "part of the local partition; skipping "
+ "region\n", nasid);
+ break;
+ }
+
+ if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
+ dev_dbg(xpc_part, "PROM indicates Nasid %d was "
+ "not on Numa-Link network at reset\n",
+ nasid);
+ continue;
+ }
+
+ if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
+ dev_dbg(xpc_part, "Nasid %d is part of a "
+ "partition which was previously "
+ "discovered\n", nasid);
+ continue;
+ }
+
+ /* pull over the reserved page structure */
+
+ ret = xpc_get_remote_rp(nasid, discovered_nasids,
+ remote_rp, &remote_rp_pa);
+ if (ret != xpcSuccess) {
+ dev_dbg(xpc_part, "unable to get reserved page "
+ "from nasid %d, reason=%d\n", nasid,
+ ret);
+
+ if (ret == xpcLocalPartid)
+ break;
+
+ continue;
+ }
+
+ remote_vars_pa = remote_rp->vars_pa;
+
+ partid = remote_rp->partid;
+ part = &xpc_partitions[partid];
+
+ /* pull over the cross partition variables */
+
+ ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
+ if (ret != xpcSuccess) {
+ dev_dbg(xpc_part, "unable to get XPC variables "
+ "from nasid %d, reason=%d\n", nasid,
+ ret);
+
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ continue;
+ }
+
+ if (part->act_state != XPC_P_INACTIVE) {
+ dev_dbg(xpc_part, "partition %d on nasid %d is "
+ "already activating\n", partid, nasid);
+ break;
+ }
+
+ /*
+ * Register the remote partition's AMOs with SAL so it
+ * can handle and cleanup errors within that address
+ * range should the remote partition go down. We don't
+ * unregister this range because it is difficult to
+ * tell when outstanding writes to the remote partition
+ * are finished and thus when it is thus safe to
+ * unregister. This should not result in wasted space
+ * in the SAL xp_addr_region table because we should
+ * get the same page for remote_act_amos_pa after
+ * module reloads and system reboots.
+ */
+ if (sn_register_xp_addr_region
+ (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) {
+ dev_dbg(xpc_part,
+ "partition %d failed to "
+ "register xp_addr region 0x%016lx\n",
+ partid, remote_vars->amos_page_pa);
+
+ XPC_SET_REASON(part, xpcPhysAddrRegFailed,
+ __LINE__);
+ break;
+ }
+
+ /*
+ * The remote nasid is valid and available.
+ * Send an interrupt to that nasid to notify
+ * it that we are ready to begin activation.
+ */
+ dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
+ "nasid %d, phys_cpuid 0x%x\n",
+ remote_vars->amos_page_pa,
+ remote_vars->act_nasid,
+ remote_vars->act_phys_cpuid);
+
+ if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+ version)) {
+ part->remote_amos_page_pa =
+ remote_vars->amos_page_pa;
+ xpc_mark_partition_disengaged(part);
+ xpc_cancel_partition_disengage_request(part);
+ }
+ xpc_IPI_send_activate(remote_vars);
+ }
+ }
+
+ kfree(discovered_nasids);
+ kfree(remote_rp_base);
+}
+
+/*
+ * Given a partid, get the nasids owned by that partition from the
+ * remote partition's reserved page.
+ */
+enum xpc_retval
+xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
+{
+ struct xpc_partition *part;
+ u64 part_nasid_pa;
+ int bte_res;
+
+ part = &xpc_partitions[partid];
+ if (part->remote_rp_pa == 0)
+ return xpcPartitionDown;
+
+ memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
+
+ part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
+
+ bte_res = xp_bte_copy(part_nasid_pa, (u64)nasid_mask,
+ xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE),
+ NULL);
+
+ return xpc_map_bte_errors(bte_res);
+}
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
new file mode 100644
index 00000000000..a9543c65814
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -0,0 +1,677 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999-2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * Cross Partition Network Interface (XPNET) support
+ *
+ * XPNET provides a virtual network layered on top of the Cross
+ * Partition communication layer.
+ *
+ * XPNET provides direct point-to-point and broadcast-like support
+ * for an ethernet-like device. The ethernet broadcast medium is
+ * replaced with a point-to-point message structure which passes
+ * pointers to a DMA-capable block that a remote partition should
+ * retrieve and pass to the upper level networking layer.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/io.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/atomic.h>
+#include "xp.h"
+
+/*
+ * The message payload transferred by XPC.
+ *
+ * buf_pa is the physical address where the DMA should pull from.
+ *
+ * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
+ * cacheline boundary. To accomplish this, we record the number of
+ * bytes from the beginning of the first cacheline to the first useful
+ * byte of the skb (leadin_ignore) and the number of bytes from the
+ * last useful byte of the skb to the end of the last cacheline
+ * (tailout_ignore).
+ *
+ * size is the number of bytes to transfer which includes the skb->len
+ * (useful bytes of the senders skb) plus the leadin and tailout
+ */
+struct xpnet_message {
+ u16 version; /* Version for this message */
+ u16 embedded_bytes; /* #of bytes embedded in XPC message */
+ u32 magic; /* Special number indicating this is xpnet */
+ u64 buf_pa; /* phys address of buffer to retrieve */
+ u32 size; /* #of bytes in buffer */
+ u8 leadin_ignore; /* #of bytes to ignore at the beginning */
+ u8 tailout_ignore; /* #of bytes to ignore at the end */
+ unsigned char data; /* body of small packets */
+};
+
+/*
+ * Determine the size of our message, the cacheline aligned size,
+ * and then the number of message will request from XPC.
+ *
+ * XPC expects each message to exist in an individual cacheline.
+ */
+#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET)
+#define XPNET_MSG_DATA_MAX \
+ (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data))
+#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE))
+#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE)
+
+#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1)
+#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1)
+
+/*
+ * Version number of XPNET implementation. XPNET can always talk to versions
+ * with same major #, and never talk to versions with a different version.
+ */
+#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor))
+#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4)
+#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf)
+
+#define XPNET_VERSION _XPNET_VERSION(1, 0) /* version 1.0 */
+#define XPNET_VERSION_EMBED _XPNET_VERSION(1, 1) /* version 1.1 */
+#define XPNET_MAGIC 0x88786984 /* "XNET" */
+
+#define XPNET_VALID_MSG(_m) \
+ ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
+ && (msg->magic == XPNET_MAGIC))
+
+#define XPNET_DEVICE_NAME "xp0"
+
+/*
+ * When messages are queued with xpc_send_notify, a kmalloc'd buffer
+ * of the following type is passed as a notification cookie. When the
+ * notification function is called, we use the cookie to decide
+ * whether all outstanding message sends have completed. The skb can
+ * then be released.
+ */
+struct xpnet_pending_msg {
+ struct list_head free_list;
+ struct sk_buff *skb;
+ atomic_t use_count;
+};
+
+/* driver specific structure pointed to by the device structure */
+struct xpnet_dev_private {
+ struct net_device_stats stats;
+};
+
+struct net_device *xpnet_device;
+
+/*
+ * When we are notified of other partitions activating, we add them to
+ * our bitmask of partitions to which we broadcast.
+ */
+static u64 xpnet_broadcast_partitions;
+/* protect above */
+static DEFINE_SPINLOCK(xpnet_broadcast_lock);
+
+/*
+ * Since the Block Transfer Engine (BTE) is being used for the transfer
+ * and it relies upon cache-line size transfers, we need to reserve at
+ * least one cache-line for head and tail alignment. The BTE is
+ * limited to 8MB transfers.
+ *
+ * Testing has shown that changing MTU to greater than 64KB has no effect
+ * on TCP as the two sides negotiate a Max Segment Size that is limited
+ * to 64K. Other protocols May use packets greater than this, but for
+ * now, the default is 64KB.
+ */
+#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
+/* 32KB has been determined to be the ideal */
+#define XPNET_DEF_MTU (0x8000UL)
+
+/*
+ * The partition id is encapsulated in the MAC address. The following
+ * define locates the octet the partid is in.
+ */
+#define XPNET_PARTID_OCTET 1
+#define XPNET_LICENSE_OCTET 2
+
+/*
+ * Define the XPNET debug device structure that is to be used with dev_dbg(),
+ * dev_err(), dev_warn(), and dev_info().
+ */
+struct device_driver xpnet_dbg_name = {
+ .name = "xpnet"
+};
+
+struct device xpnet_dbg_subname = {
+ .bus_id = {0}, /* set to "" */
+ .driver = &xpnet_dbg_name
+};
+
+struct device *xpnet = &xpnet_dbg_subname;
+
+/*
+ * Packet was recevied by XPC and forwarded to us.
+ */
+static void
+xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
+{
+ struct sk_buff *skb;
+ bte_result_t bret;
+ struct xpnet_dev_private *priv =
+ (struct xpnet_dev_private *)xpnet_device->priv;
+
+ if (!XPNET_VALID_MSG(msg)) {
+ /*
+ * Packet with a different XPC version. Ignore.
+ */
+ xpc_received(partid, channel, (void *)msg);
+
+ priv->stats.rx_errors++;
+
+ return;
+ }
+ dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
+ msg->leadin_ignore, msg->tailout_ignore);
+
+ /* reserve an extra cache line */
+ skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
+ if (!skb) {
+ dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
+ msg->size + L1_CACHE_BYTES);
+
+ xpc_received(partid, channel, (void *)msg);
+
+ priv->stats.rx_errors++;
+
+ return;
+ }
+
+ /*
+ * The allocated skb has some reserved space.
+ * In order to use bte_copy, we need to get the
+ * skb->data pointer moved forward.
+ */
+ skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
+ (L1_CACHE_BYTES - 1)) +
+ msg->leadin_ignore));
+
+ /*
+ * Update the tail pointer to indicate data actually
+ * transferred.
+ */
+ skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
+
+ /*
+ * Move the data over from the other side.
+ */
+ if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
+ (msg->embedded_bytes != 0)) {
+ dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
+ "%lu)\n", skb->data, &msg->data,
+ (size_t)msg->embedded_bytes);
+
+ skb_copy_to_linear_data(skb, &msg->data,
+ (size_t)msg->embedded_bytes);
+ } else {
+ dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
+ "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa,
+ (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
+ msg->size);
+
+ bret = bte_copy(msg->buf_pa,
+ __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
+ msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+
+ if (bret != BTE_SUCCESS) {
+ /*
+ * >>> Need better way of cleaning skb. Currently skb
+ * >>> appears in_use and we can't just call
+ * >>> dev_kfree_skb.
+ */
+ dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned "
+ "error=0x%x\n", (void *)msg->buf_pa,
+ (void *)__pa((u64)skb->data &
+ ~(L1_CACHE_BYTES - 1)),
+ msg->size, bret);
+
+ xpc_received(partid, channel, (void *)msg);
+
+ priv->stats.rx_errors++;
+
+ return;
+ }
+ }
+
+ dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+ "skb->end=0x%p skb->len=%d\n", (void *)skb->head,
+ (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
+ skb->len);
+
+ skb->protocol = eth_type_trans(skb, xpnet_device);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ dev_dbg(xpnet, "passing skb to network layer\n"
+ KERN_DEBUG "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+ "skb->end=0x%p skb->len=%d\n",
+ (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
+ skb_end_pointer(skb), skb->len);
+
+ xpnet_device->last_rx = jiffies;
+ priv->stats.rx_packets++;
+ priv->stats.rx_bytes += skb->len + ETH_HLEN;
+
+ netif_rx_ni(skb);
+ xpc_received(partid, channel, (void *)msg);
+}
+
+/*
+ * This is the handler which XPC calls during any sort of change in
+ * state or message reception on a connection.
+ */
+static void
+xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel,
+ void *data, void *key)
+{
+ long bp;
+
+ DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+ DBUG_ON(channel != XPC_NET_CHANNEL);
+
+ switch (reason) {
+ case xpcMsgReceived: /* message received */
+ DBUG_ON(data == NULL);
+
+ xpnet_receive(partid, channel, (struct xpnet_message *)data);
+ break;
+
+ case xpcConnected: /* connection completed to a partition */
+ spin_lock_bh(&xpnet_broadcast_lock);
+ xpnet_broadcast_partitions |= 1UL << (partid - 1);
+ bp = xpnet_broadcast_partitions;
+ spin_unlock_bh(&xpnet_broadcast_lock);
+
+ netif_carrier_on(xpnet_device);
+
+ dev_dbg(xpnet, "%s connection created to partition %d; "
+ "xpnet_broadcast_partitions=0x%lx\n",
+ xpnet_device->name, partid, bp);
+ break;
+
+ default:
+ spin_lock_bh(&xpnet_broadcast_lock);
+ xpnet_broadcast_partitions &= ~(1UL << (partid - 1));
+ bp = xpnet_broadcast_partitions;
+ spin_unlock_bh(&xpnet_broadcast_lock);
+
+ if (bp == 0)
+ netif_carrier_off(xpnet_device);
+
+ dev_dbg(xpnet, "%s disconnected from partition %d; "
+ "xpnet_broadcast_partitions=0x%lx\n",
+ xpnet_device->name, partid, bp);
+ break;
+
+ }
+}
+
+static int
+xpnet_dev_open(struct net_device *dev)
+{
+ enum xpc_retval ret;
+
+ dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
+ "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
+ XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS,
+ XPNET_MAX_IDLE_KTHREADS);
+
+ ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
+ XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
+ XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
+ if (ret != xpcSuccess) {
+ dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
+ "ret=%d\n", dev->name, ret);
+
+ return -ENOMEM;
+ }
+
+ dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
+
+ return 0;
+}
+
+static int
+xpnet_dev_stop(struct net_device *dev)
+{
+ xpc_disconnect(XPC_NET_CHANNEL);
+
+ dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
+
+ return 0;
+}
+
+static int
+xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
+{
+ /* 68 comes from min TCP+IP+MAC header */
+ if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
+ dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
+ "between 68 and %ld\n", dev->name, new_mtu,
+ XPNET_MAX_MTU);
+ return -EINVAL;
+ }
+
+ dev->mtu = new_mtu;
+ dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
+ return 0;
+}
+
+/*
+ * Required for the net_device structure.
+ */
+static int
+xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map)
+{
+ return 0;
+}
+
+/*
+ * Return statistics to the caller.
+ */
+static struct net_device_stats *
+xpnet_dev_get_stats(struct net_device *dev)
+{
+ struct xpnet_dev_private *priv;
+
+ priv = (struct xpnet_dev_private *)dev->priv;
+
+ return &priv->stats;
+}
+
+/*
+ * Notification that the other end has received the message and
+ * DMA'd the skb information. At this point, they are done with
+ * our side. When all recipients are done processing, we
+ * release the skb and then release our pending message structure.
+ */
+static void
+xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel,
+ void *__qm)
+{
+ struct xpnet_pending_msg *queued_msg = (struct xpnet_pending_msg *)__qm;
+
+ DBUG_ON(queued_msg == NULL);
+
+ dev_dbg(xpnet, "message to %d notified with reason %d\n",
+ partid, reason);
+
+ if (atomic_dec_return(&queued_msg->use_count) == 0) {
+ dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
+ (void *)queued_msg->skb->head);
+
+ dev_kfree_skb_any(queued_msg->skb);
+ kfree(queued_msg);
+ }
+}
+
+/*
+ * Network layer has formatted a packet (skb) and is ready to place it
+ * "on the wire". Prepare and send an xpnet_message to all partitions
+ * which have connected with us and are targets of this packet.
+ *
+ * MAC-NOTE: For the XPNET driver, the MAC address contains the
+ * destination partition_id. If the destination partition id word
+ * is 0xff, this packet is to broadcast to all partitions.
+ */
+static int
+xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct xpnet_pending_msg *queued_msg;
+ enum xpc_retval ret;
+ struct xpnet_message *msg;
+ u64 start_addr, end_addr;
+ long dp;
+ u8 second_mac_octet;
+ partid_t dest_partid;
+ struct xpnet_dev_private *priv;
+ u16 embedded_bytes;
+
+ priv = (struct xpnet_dev_private *)dev->priv;
+
+ dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+ "skb->end=0x%p skb->len=%d\n", (void *)skb->head,
+ (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
+ skb->len);
+
+ /*
+ * The xpnet_pending_msg tracks how many outstanding
+ * xpc_send_notifies are relying on this skb. When none
+ * remain, release the skb.
+ */
+ queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
+ if (queued_msg == NULL) {
+ dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
+ "packet\n", sizeof(struct xpnet_pending_msg));
+
+ priv->stats.tx_errors++;
+
+ return -ENOMEM;
+ }
+
+ /* get the beginning of the first cacheline and end of last */
+ start_addr = ((u64)skb->data & ~(L1_CACHE_BYTES - 1));
+ end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
+
+ /* calculate how many bytes to embed in the XPC message */
+ embedded_bytes = 0;
+ if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
+ /* skb->data does fit so embed */
+ embedded_bytes = skb->len;
+ }
+
+ /*
+ * Since the send occurs asynchronously, we set the count to one
+ * and begin sending. Any sends that happen to complete before
+ * we are done sending will not free the skb. We will be left
+ * with that task during exit. This also handles the case of
+ * a packet destined for a partition which is no longer up.
+ */
+ atomic_set(&queued_msg->use_count, 1);
+ queued_msg->skb = skb;
+
+ second_mac_octet = skb->data[XPNET_PARTID_OCTET];
+ if (second_mac_octet == 0xff) {
+ /* we are being asked to broadcast to all partitions */
+ dp = xpnet_broadcast_partitions;
+ } else if (second_mac_octet != 0) {
+ dp = xpnet_broadcast_partitions &
+ (1UL << (second_mac_octet - 1));
+ } else {
+ /* 0 is an invalid partid. Ignore */
+ dp = 0;
+ }
+ dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp);
+
+ /*
+ * If we wanted to allow promiscuous mode to work like an
+ * unswitched network, this would be a good point to OR in a
+ * mask of partitions which should be receiving all packets.
+ */
+
+ /*
+ * Main send loop.
+ */
+ for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS;
+ dest_partid++) {
+
+ if (!(dp & (1UL << (dest_partid - 1)))) {
+ /* not destined for this partition */
+ continue;
+ }
+
+ /* remove this partition from the destinations mask */
+ dp &= ~(1UL << (dest_partid - 1));
+
+ /* found a partition to send to */
+
+ ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL,
+ XPC_NOWAIT, (void **)&msg);
+ if (unlikely(ret != xpcSuccess))
+ continue;
+
+ msg->embedded_bytes = embedded_bytes;
+ if (unlikely(embedded_bytes != 0)) {
+ msg->version = XPNET_VERSION_EMBED;
+ dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
+ &msg->data, skb->data, (size_t)embedded_bytes);
+ skb_copy_from_linear_data(skb, &msg->data,
+ (size_t)embedded_bytes);
+ } else {
+ msg->version = XPNET_VERSION;
+ }
+ msg->magic = XPNET_MAGIC;
+ msg->size = end_addr - start_addr;
+ msg->leadin_ignore = (u64)skb->data - start_addr;
+ msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
+ msg->buf_pa = __pa(start_addr);
+
+ dev_dbg(xpnet, "sending XPC message to %d:%d\n"
+ KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
+ "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
+ dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
+ msg->leadin_ignore, msg->tailout_ignore);
+
+ atomic_inc(&queued_msg->use_count);
+
+ ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg,
+ xpnet_send_completed, queued_msg);
+ if (unlikely(ret != xpcSuccess)) {
+ atomic_dec(&queued_msg->use_count);
+ continue;
+ }
+ }
+
+ if (atomic_dec_return(&queued_msg->use_count) == 0) {
+ dev_dbg(xpnet, "no partitions to receive packet destined for "
+ "%d\n", dest_partid);
+
+ dev_kfree_skb(skb);
+ kfree(queued_msg);
+ }
+
+ priv->stats.tx_packets++;
+ priv->stats.tx_bytes += skb->len;
+
+ return 0;
+}
+
+/*
+ * Deal with transmit timeouts coming from the network layer.
+ */
+static void
+xpnet_dev_tx_timeout(struct net_device *dev)
+{
+ struct xpnet_dev_private *priv;
+
+ priv = (struct xpnet_dev_private *)dev->priv;
+
+ priv->stats.tx_errors++;
+ return;
+}
+
+static int __init
+xpnet_init(void)
+{
+ int i;
+ u32 license_num;
+ int result = -ENOMEM;
+
+ if (!ia64_platform_is("sn2"))
+ return -ENODEV;
+
+ dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
+
+ /*
+ * use ether_setup() to init the majority of our device
+ * structure and then override the necessary pieces.
+ */
+ xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private),
+ XPNET_DEVICE_NAME, ether_setup);
+ if (xpnet_device == NULL)
+ return -ENOMEM;
+
+ netif_carrier_off(xpnet_device);
+
+ xpnet_device->mtu = XPNET_DEF_MTU;
+ xpnet_device->change_mtu = xpnet_dev_change_mtu;
+ xpnet_device->open = xpnet_dev_open;
+ xpnet_device->get_stats = xpnet_dev_get_stats;
+ xpnet_device->stop = xpnet_dev_stop;
+ xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit;
+ xpnet_device->tx_timeout = xpnet_dev_tx_timeout;
+ xpnet_device->set_config = xpnet_dev_set_config;
+
+ /*
+ * Multicast assumes the LSB of the first octet is set for multicast
+ * MAC addresses. We chose the first octet of the MAC to be unlikely
+ * to collide with any vendor's officially issued MAC.
+ */
+ xpnet_device->dev_addr[0] = 0xfe;
+ xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id;
+ license_num = sn_partition_serial_number_val();
+ for (i = 3; i >= 0; i--) {
+ xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] =
+ license_num & 0xff;
+ license_num = license_num >> 8;
+ }
+
+ /*
+ * ether_setup() sets this to a multicast device. We are
+ * really not supporting multicast at this time.
+ */
+ xpnet_device->flags &= ~IFF_MULTICAST;
+
+ /*
+ * No need to checksum as it is a DMA transfer. The BTE will
+ * report an error if the data is not retrievable and the
+ * packet will be dropped.
+ */
+ xpnet_device->features = NETIF_F_NO_CSUM;
+
+ result = register_netdev(xpnet_device);
+ if (result != 0)
+ free_netdev(xpnet_device);
+
+ return result;
+}
+
+module_init(xpnet_init);
+
+static void __exit
+xpnet_exit(void)
+{
+ dev_info(xpnet, "unregistering network device %s\n",
+ xpnet_device[0].name);
+
+ unregister_netdev(xpnet_device);
+
+ free_netdev(xpnet_device);
+}
+
+module_exit(xpnet_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index 6cb781262f9..3f28f6eabdb 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -21,7 +21,7 @@
* 02110-1301, USA.
*/
-#define TPACPI_VERSION "0.19"
+#define TPACPI_VERSION "0.20"
#define TPACPI_SYSFS_VERSION 0x020200
/*
@@ -67,6 +67,7 @@
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
#include <linux/input.h>
+#include <linux/leds.h>
#include <asm/uaccess.h>
#include <linux/dmi.h>
@@ -85,6 +86,8 @@
#define TP_CMOS_VOLUME_MUTE 2
#define TP_CMOS_BRIGHTNESS_UP 4
#define TP_CMOS_BRIGHTNESS_DOWN 5
+#define TP_CMOS_THINKLIGHT_ON 12
+#define TP_CMOS_THINKLIGHT_OFF 13
/* NVRAM Addresses */
enum tp_nvram_addr {
@@ -133,8 +136,12 @@ enum {
#define TPACPI_PROC_DIR "ibm"
#define TPACPI_ACPI_EVENT_PREFIX "ibm"
#define TPACPI_DRVR_NAME TPACPI_FILE
+#define TPACPI_DRVR_SHORTNAME "tpacpi"
#define TPACPI_HWMON_DRVR_NAME TPACPI_NAME "_hwmon"
+#define TPACPI_NVRAM_KTHREAD_NAME "ktpacpi_nvramd"
+#define TPACPI_WORKQUEUE_NAME "ktpacpid"
+
#define TPACPI_MAX_ACPI_ARGS 3
/* Debugging */
@@ -225,6 +232,7 @@ static struct {
u32 light:1;
u32 light_status:1;
u32 bright_16levels:1;
+ u32 bright_acpimode:1;
u32 wan:1;
u32 fan_ctrl_status_undef:1;
u32 input_device_registered:1;
@@ -236,6 +244,11 @@ static struct {
u32 hotkey_poll_active:1;
} tp_features;
+static struct {
+ u16 hotkey_mask_ff:1;
+ u16 bright_cmos_ec_unsync:1;
+} tp_warned;
+
struct thinkpad_id_data {
unsigned int vendor; /* ThinkPad vendor:
* PCI_VENDOR_ID_IBM/PCI_VENDOR_ID_LENOVO */
@@ -246,7 +259,8 @@ struct thinkpad_id_data {
u16 bios_model; /* Big Endian, TP-1Y = 0x5931, 0 = unknown */
u16 ec_model;
- char *model_str;
+ char *model_str; /* ThinkPad T43 */
+ char *nummodel_str; /* 9384A9C for a 9384-A9C model */
};
static struct thinkpad_id_data thinkpad_id;
@@ -259,6 +273,16 @@ static enum {
static int experimental;
static u32 dbg_level;
+static struct workqueue_struct *tpacpi_wq;
+
+/* Special LED class that can defer work */
+struct tpacpi_led_classdev {
+ struct led_classdev led_classdev;
+ struct work_struct work;
+ enum led_brightness new_brightness;
+ unsigned int led;
+};
+
/****************************************************************************
****************************************************************************
*
@@ -807,6 +831,80 @@ static int parse_strtoul(const char *buf,
return 0;
}
+static int __init tpacpi_query_bcl_levels(acpi_handle handle)
+{
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *obj;
+ int rc;
+
+ if (ACPI_SUCCESS(acpi_evaluate_object(handle, NULL, NULL, &buffer))) {
+ obj = (union acpi_object *)buffer.pointer;
+ if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) {
+ printk(TPACPI_ERR "Unknown _BCL data, "
+ "please report this to %s\n", TPACPI_MAIL);
+ rc = 0;
+ } else {
+ rc = obj->package.count;
+ }
+ } else {
+ return 0;
+ }
+
+ kfree(buffer.pointer);
+ return rc;
+}
+
+static acpi_status __init tpacpi_acpi_walk_find_bcl(acpi_handle handle,
+ u32 lvl, void *context, void **rv)
+{
+ char name[ACPI_PATH_SEGMENT_LENGTH];
+ struct acpi_buffer buffer = { sizeof(name), &name };
+
+ if (ACPI_SUCCESS(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)) &&
+ !strncmp("_BCL", name, sizeof(name) - 1)) {
+ BUG_ON(!rv || !*rv);
+ **(int **)rv = tpacpi_query_bcl_levels(handle);
+ return AE_CTRL_TERMINATE;
+ } else {
+ return AE_OK;
+ }
+}
+
+/*
+ * Returns 0 (no ACPI _BCL or _BCL invalid), or size of brightness map
+ */
+static int __init tpacpi_check_std_acpi_brightness_support(void)
+{
+ int status;
+ int bcl_levels = 0;
+ void *bcl_ptr = &bcl_levels;
+
+ if (!vid_handle) {
+ TPACPI_ACPIHANDLE_INIT(vid);
+ }
+ if (!vid_handle)
+ return 0;
+
+ /*
+ * Search for a _BCL method, and execute it. This is safe on all
+ * ThinkPads, and as a side-effect, _BCL will place a Lenovo Vista
+ * BIOS in ACPI backlight control mode. We do NOT have to care
+ * about calling the _BCL method in an enabled video device, any
+ * will do for our purposes.
+ */
+
+ status = acpi_walk_namespace(ACPI_TYPE_METHOD, vid_handle, 3,
+ tpacpi_acpi_walk_find_bcl, NULL,
+ &bcl_ptr);
+
+ if (ACPI_SUCCESS(status) && bcl_levels > 2) {
+ tp_features.bright_acpimode = 1;
+ return (bcl_levels - 2);
+ }
+
+ return 0;
+}
+
/*************************************************************************
* thinkpad-acpi driver attributes
*/
@@ -909,12 +1007,14 @@ static int __init thinkpad_acpi_driver_init(struct ibm_init_struct *iibm)
thinkpad_id.ec_version_str : "unknown");
if (thinkpad_id.vendor && thinkpad_id.model_str)
- printk(TPACPI_INFO "%s %s\n",
+ printk(TPACPI_INFO "%s %s, model %s\n",
(thinkpad_id.vendor == PCI_VENDOR_ID_IBM) ?
"IBM" : ((thinkpad_id.vendor ==
PCI_VENDOR_ID_LENOVO) ?
"Lenovo" : "Unknown vendor"),
- thinkpad_id.model_str);
+ thinkpad_id.model_str,
+ (thinkpad_id.nummodel_str) ?
+ thinkpad_id.nummodel_str : "unknown");
return 0;
}
@@ -1107,6 +1207,19 @@ static int hotkey_mask_set(u32 mask)
int rc = 0;
if (tp_features.hotkey_mask) {
+ if (!tp_warned.hotkey_mask_ff &&
+ (mask == 0xffff || mask == 0xffffff ||
+ mask == 0xffffffff)) {
+ tp_warned.hotkey_mask_ff = 1;
+ printk(TPACPI_NOTICE
+ "setting the hotkey mask to 0x%08x is likely "
+ "not the best way to go about it\n", mask);
+ printk(TPACPI_NOTICE
+ "please consider using the driver defaults, "
+ "and refer to up-to-date thinkpad-acpi "
+ "documentation\n");
+ }
+
HOTKEY_CONFIG_CRITICAL_START
for (i = 0; i < 32; i++) {
u32 m = 1 << i;
@@ -1427,8 +1540,7 @@ static void hotkey_poll_setup(int may_warn)
(tpacpi_inputdev->users > 0 || hotkey_report_mode < 2)) {
if (!tpacpi_hotkey_task) {
tpacpi_hotkey_task = kthread_run(hotkey_kthread,
- NULL,
- TPACPI_FILE "d");
+ NULL, TPACPI_NVRAM_KTHREAD_NAME);
if (IS_ERR(tpacpi_hotkey_task)) {
tpacpi_hotkey_task = NULL;
printk(TPACPI_ERR
@@ -1887,6 +1999,9 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
KEY_UNKNOWN, /* 0x0D: FN+INSERT */
KEY_UNKNOWN, /* 0x0E: FN+DELETE */
+ /* These either have to go through ACPI video, or
+ * act like in the IBM ThinkPads, so don't ever
+ * enable them by default */
KEY_RESERVED, /* 0x0F: FN+HOME (brightness up) */
KEY_RESERVED, /* 0x10: FN+END (brightness down) */
@@ -2091,6 +2206,32 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
set_bit(SW_TABLET_MODE, tpacpi_inputdev->swbit);
}
+ /* Do not issue duplicate brightness change events to
+ * userspace */
+ if (!tp_features.bright_acpimode)
+ /* update bright_acpimode... */
+ tpacpi_check_std_acpi_brightness_support();
+
+ if (tp_features.bright_acpimode) {
+ printk(TPACPI_INFO
+ "This ThinkPad has standard ACPI backlight "
+ "brightness control, supported by the ACPI "
+ "video driver\n");
+ printk(TPACPI_NOTICE
+ "Disabling thinkpad-acpi brightness events "
+ "by default...\n");
+
+ /* The hotkey_reserved_mask change below is not
+ * necessary while the keys are at KEY_RESERVED in the
+ * default map, but better safe than sorry, leave it
+ * here as a marker of what we have to do, especially
+ * when we finally become able to set this at runtime
+ * on response to X.org requests */
+ hotkey_reserved_mask |=
+ (1 << TP_ACPI_HOTKEYSCAN_FNHOME)
+ | (1 << TP_ACPI_HOTKEYSCAN_FNEND);
+ }
+
dbg_printk(TPACPI_DBG_INIT,
"enabling hot key handling\n");
res = hotkey_status_set(1);
@@ -3110,13 +3251,82 @@ static struct ibm_struct video_driver_data = {
TPACPI_HANDLE(lght, root, "\\LGHT"); /* A21e, A2xm/p, T20-22, X20-21 */
TPACPI_HANDLE(ledb, ec, "LEDB"); /* G4x */
+static int light_get_status(void)
+{
+ int status = 0;
+
+ if (tp_features.light_status) {
+ if (!acpi_evalf(ec_handle, &status, "KBLT", "d"))
+ return -EIO;
+ return (!!status);
+ }
+
+ return -ENXIO;
+}
+
+static int light_set_status(int status)
+{
+ int rc;
+
+ if (tp_features.light) {
+ if (cmos_handle) {
+ rc = acpi_evalf(cmos_handle, NULL, NULL, "vd",
+ (status)?
+ TP_CMOS_THINKLIGHT_ON :
+ TP_CMOS_THINKLIGHT_OFF);
+ } else {
+ rc = acpi_evalf(lght_handle, NULL, NULL, "vd",
+ (status)? 1 : 0);
+ }
+ return (rc)? 0 : -EIO;
+ }
+
+ return -ENXIO;
+}
+
+static void light_set_status_worker(struct work_struct *work)
+{
+ struct tpacpi_led_classdev *data =
+ container_of(work, struct tpacpi_led_classdev, work);
+
+ if (likely(tpacpi_lifecycle == TPACPI_LIFE_RUNNING))
+ light_set_status((data->new_brightness != LED_OFF));
+}
+
+static void light_sysfs_set(struct led_classdev *led_cdev,
+ enum led_brightness brightness)
+{
+ struct tpacpi_led_classdev *data =
+ container_of(led_cdev,
+ struct tpacpi_led_classdev,
+ led_classdev);
+ data->new_brightness = brightness;
+ queue_work(tpacpi_wq, &data->work);
+}
+
+static enum led_brightness light_sysfs_get(struct led_classdev *led_cdev)
+{
+ return (light_get_status() == 1)? LED_FULL : LED_OFF;
+}
+
+static struct tpacpi_led_classdev tpacpi_led_thinklight = {
+ .led_classdev = {
+ .name = "tpacpi::thinklight",
+ .brightness_set = &light_sysfs_set,
+ .brightness_get = &light_sysfs_get,
+ }
+};
+
static int __init light_init(struct ibm_init_struct *iibm)
{
+ int rc = 0;
+
vdbg_printk(TPACPI_DBG_INIT, "initializing light subdriver\n");
TPACPI_ACPIHANDLE_INIT(ledb);
TPACPI_ACPIHANDLE_INIT(lght);
TPACPI_ACPIHANDLE_INIT(cmos);
+ INIT_WORK(&tpacpi_led_thinklight.work, light_set_status_worker);
/* light not supported on 570, 600e/x, 770e, 770x, G4x, R30, R31 */
tp_features.light = (cmos_handle || lght_handle) && !ledb_handle;
@@ -3130,13 +3340,31 @@ static int __init light_init(struct ibm_init_struct *iibm)
vdbg_printk(TPACPI_DBG_INIT, "light is %s\n",
str_supported(tp_features.light));
- return (tp_features.light)? 0 : 1;
+ if (tp_features.light) {
+ rc = led_classdev_register(&tpacpi_pdev->dev,
+ &tpacpi_led_thinklight.led_classdev);
+ }
+
+ if (rc < 0) {
+ tp_features.light = 0;
+ tp_features.light_status = 0;
+ } else {
+ rc = (tp_features.light)? 0 : 1;
+ }
+ return rc;
+}
+
+static void light_exit(void)
+{
+ led_classdev_unregister(&tpacpi_led_thinklight.led_classdev);
+ if (work_pending(&tpacpi_led_thinklight.work))
+ flush_workqueue(tpacpi_wq);
}
static int light_read(char *p)
{
int len = 0;
- int status = 0;
+ int status;
if (!tp_features.light) {
len += sprintf(p + len, "status:\t\tnot supported\n");
@@ -3144,8 +3372,9 @@ static int light_read(char *p)
len += sprintf(p + len, "status:\t\tunknown\n");
len += sprintf(p + len, "commands:\ton, off\n");
} else {
- if (!acpi_evalf(ec_handle, &status, "KBLT", "d"))
- return -EIO;
+ status = light_get_status();
+ if (status < 0)
+ return status;
len += sprintf(p + len, "status:\t\t%s\n", onoff(status, 0));
len += sprintf(p + len, "commands:\ton, off\n");
}
@@ -3155,37 +3384,29 @@ static int light_read(char *p)
static int light_write(char *buf)
{
- int cmos_cmd, lght_cmd;
char *cmd;
- int success;
+ int newstatus = 0;
if (!tp_features.light)
return -ENODEV;
while ((cmd = next_cmd(&buf))) {
if (strlencmp(cmd, "on") == 0) {
- cmos_cmd = 0x0c;
- lght_cmd = 1;
+ newstatus = 1;
} else if (strlencmp(cmd, "off") == 0) {
- cmos_cmd = 0x0d;
- lght_cmd = 0;
+ newstatus = 0;
} else
return -EINVAL;
-
- success = cmos_handle ?
- acpi_evalf(cmos_handle, NULL, NULL, "vd", cmos_cmd) :
- acpi_evalf(lght_handle, NULL, NULL, "vd", lght_cmd);
- if (!success)
- return -EIO;
}
- return 0;
+ return light_set_status(newstatus);
}
static struct ibm_struct light_driver_data = {
.name = "light",
.read = light_read,
.write = light_write,
+ .exit = light_exit,
};
/*************************************************************************
@@ -3583,6 +3804,12 @@ enum { /* For TPACPI_LED_OLD */
TPACPI_LED_EC_HLMS = 0x0e, /* EC reg to select led to command */
};
+enum led_status_t {
+ TPACPI_LED_OFF = 0,
+ TPACPI_LED_ON,
+ TPACPI_LED_BLINK,
+};
+
static enum led_access_mode led_supported;
TPACPI_HANDLE(led, ec, "SLED", /* 570 */
@@ -3591,8 +3818,174 @@ TPACPI_HANDLE(led, ec, "SLED", /* 570 */
"LED", /* all others */
); /* R30, R31 */
+#define TPACPI_LED_NUMLEDS 8
+static struct tpacpi_led_classdev *tpacpi_leds;
+static enum led_status_t tpacpi_led_state_cache[TPACPI_LED_NUMLEDS];
+static const char const *tpacpi_led_names[TPACPI_LED_NUMLEDS] = {
+ /* there's a limit of 19 chars + NULL before 2.6.26 */
+ "tpacpi::power",
+ "tpacpi:orange:batt",
+ "tpacpi:green:batt",
+ "tpacpi::dock_active",
+ "tpacpi::bay_active",
+ "tpacpi::dock_batt",
+ "tpacpi::unknown_led",
+ "tpacpi::standby",
+};
+
+static int led_get_status(unsigned int led)
+{
+ int status;
+ enum led_status_t led_s;
+
+ switch (led_supported) {
+ case TPACPI_LED_570:
+ if (!acpi_evalf(ec_handle,
+ &status, "GLED", "dd", 1 << led))
+ return -EIO;
+ led_s = (status == 0)?
+ TPACPI_LED_OFF :
+ ((status == 1)?
+ TPACPI_LED_ON :
+ TPACPI_LED_BLINK);
+ tpacpi_led_state_cache[led] = led_s;
+ return led_s;
+ default:
+ return -ENXIO;
+ }
+
+ /* not reached */
+}
+
+static int led_set_status(unsigned int led, enum led_status_t ledstatus)
+{
+ /* off, on, blink. Index is led_status_t */
+ static const int const led_sled_arg1[] = { 0, 1, 3 };
+ static const int const led_exp_hlbl[] = { 0, 0, 1 }; /* led# * */
+ static const int const led_exp_hlcl[] = { 0, 1, 1 }; /* led# * */
+ static const int const led_led_arg1[] = { 0, 0x80, 0xc0 };
+
+ int rc = 0;
+
+ switch (led_supported) {
+ case TPACPI_LED_570:
+ /* 570 */
+ led = 1 << led;
+ if (!acpi_evalf(led_handle, NULL, NULL, "vdd",
+ led, led_sled_arg1[ledstatus]))
+ rc = -EIO;
+ break;
+ case TPACPI_LED_OLD:
+ /* 600e/x, 770e, 770x, A21e, A2xm/p, T20-22, X20 */
+ led = 1 << led;
+ rc = ec_write(TPACPI_LED_EC_HLMS, led);
+ if (rc >= 0)
+ rc = ec_write(TPACPI_LED_EC_HLBL,
+ led * led_exp_hlbl[ledstatus]);
+ if (rc >= 0)
+ rc = ec_write(TPACPI_LED_EC_HLCL,
+ led * led_exp_hlcl[ledstatus]);
+ break;
+ case TPACPI_LED_NEW:
+ /* all others */
+ if (!acpi_evalf(led_handle, NULL, NULL, "vdd",
+ led, led_led_arg1[ledstatus]))
+ rc = -EIO;
+ break;
+ default:
+ rc = -ENXIO;
+ }
+
+ if (!rc)
+ tpacpi_led_state_cache[led] = ledstatus;
+
+ return rc;
+}
+
+static void led_sysfs_set_status(unsigned int led,
+ enum led_brightness brightness)
+{
+ led_set_status(led,
+ (brightness == LED_OFF) ?
+ TPACPI_LED_OFF :
+ (tpacpi_led_state_cache[led] == TPACPI_LED_BLINK) ?
+ TPACPI_LED_BLINK : TPACPI_LED_ON);
+}
+
+static void led_set_status_worker(struct work_struct *work)
+{
+ struct tpacpi_led_classdev *data =
+ container_of(work, struct tpacpi_led_classdev, work);
+
+ if (likely(tpacpi_lifecycle == TPACPI_LIFE_RUNNING))
+ led_sysfs_set_status(data->led, data->new_brightness);
+}
+
+static void led_sysfs_set(struct led_classdev *led_cdev,
+ enum led_brightness brightness)
+{
+ struct tpacpi_led_classdev *data = container_of(led_cdev,
+ struct tpacpi_led_classdev, led_classdev);
+
+ data->new_brightness = brightness;
+ queue_work(tpacpi_wq, &data->work);
+}
+
+static int led_sysfs_blink_set(struct led_classdev *led_cdev,
+ unsigned long *delay_on, unsigned long *delay_off)
+{
+ struct tpacpi_led_classdev *data = container_of(led_cdev,
+ struct tpacpi_led_classdev, led_classdev);
+
+ /* Can we choose the flash rate? */
+ if (*delay_on == 0 && *delay_off == 0) {
+ /* yes. set them to the hardware blink rate (1 Hz) */
+ *delay_on = 500; /* ms */
+ *delay_off = 500; /* ms */
+ } else if ((*delay_on != 500) || (*delay_off != 500))
+ return -EINVAL;
+
+ data->new_brightness = TPACPI_LED_BLINK;
+ queue_work(tpacpi_wq, &data->work);
+
+ return 0;
+}
+
+static enum led_brightness led_sysfs_get(struct led_classdev *led_cdev)
+{
+ int rc;
+
+ struct tpacpi_led_classdev *data = container_of(led_cdev,
+ struct tpacpi_led_classdev, led_classdev);
+
+ rc = led_get_status(data->led);
+
+ if (rc == TPACPI_LED_OFF || rc < 0)
+ rc = LED_OFF; /* no error handling in led class :( */
+ else
+ rc = LED_FULL;
+
+ return rc;
+}
+
+static void led_exit(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
+ if (tpacpi_leds[i].led_classdev.name)
+ led_classdev_unregister(&tpacpi_leds[i].led_classdev);
+ }
+
+ kfree(tpacpi_leds);
+ tpacpi_leds = NULL;
+}
+
static int __init led_init(struct ibm_init_struct *iibm)
{
+ unsigned int i;
+ int rc;
+
vdbg_printk(TPACPI_DBG_INIT, "initializing LED subdriver\n");
TPACPI_ACPIHANDLE_INIT(led);
@@ -3613,10 +4006,41 @@ static int __init led_init(struct ibm_init_struct *iibm)
vdbg_printk(TPACPI_DBG_INIT, "LED commands are %s, mode %d\n",
str_supported(led_supported), led_supported);
+ tpacpi_leds = kzalloc(sizeof(*tpacpi_leds) * TPACPI_LED_NUMLEDS,
+ GFP_KERNEL);
+ if (!tpacpi_leds) {
+ printk(TPACPI_ERR "Out of memory for LED data\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
+ tpacpi_leds[i].led = i;
+
+ tpacpi_leds[i].led_classdev.brightness_set = &led_sysfs_set;
+ tpacpi_leds[i].led_classdev.blink_set = &led_sysfs_blink_set;
+ if (led_supported == TPACPI_LED_570)
+ tpacpi_leds[i].led_classdev.brightness_get =
+ &led_sysfs_get;
+
+ tpacpi_leds[i].led_classdev.name = tpacpi_led_names[i];
+
+ INIT_WORK(&tpacpi_leds[i].work, led_set_status_worker);
+
+ rc = led_classdev_register(&tpacpi_pdev->dev,
+ &tpacpi_leds[i].led_classdev);
+ if (rc < 0) {
+ tpacpi_leds[i].led_classdev.name = NULL;
+ led_exit();
+ return rc;
+ }
+ }
+
return (led_supported != TPACPI_LED_NONE)? 0 : 1;
}
-#define led_status(s) ((s) == 0 ? "off" : ((s) == 1 ? "on" : "blinking"))
+#define str_led_status(s) \
+ ((s) == TPACPI_LED_OFF ? "off" : \
+ ((s) == TPACPI_LED_ON ? "on" : "blinking"))
static int led_read(char *p)
{
@@ -3632,11 +4056,11 @@ static int led_read(char *p)
/* 570 */
int i, status;
for (i = 0; i < 8; i++) {
- if (!acpi_evalf(ec_handle,
- &status, "GLED", "dd", 1 << i))
+ status = led_get_status(i);
+ if (status < 0)
return -EIO;
len += sprintf(p + len, "%d:\t\t%s\n",
- i, led_status(status));
+ i, str_led_status(status));
}
}
@@ -3646,16 +4070,11 @@ static int led_read(char *p)
return len;
}
-/* off, on, blink */
-static const int led_sled_arg1[] = { 0, 1, 3 };
-static const int led_exp_hlbl[] = { 0, 0, 1 }; /* led# * */
-static const int led_exp_hlcl[] = { 0, 1, 1 }; /* led# * */
-static const int led_led_arg1[] = { 0, 0x80, 0xc0 };
-
static int led_write(char *buf)
{
char *cmd;
- int led, ind, ret;
+ int led, rc;
+ enum led_status_t s;
if (!led_supported)
return -ENODEV;
@@ -3665,38 +4084,18 @@ static int led_write(char *buf)
return -EINVAL;
if (strstr(cmd, "off")) {
- ind = 0;
+ s = TPACPI_LED_OFF;
} else if (strstr(cmd, "on")) {
- ind = 1;
+ s = TPACPI_LED_ON;
} else if (strstr(cmd, "blink")) {
- ind = 2;
- } else
- return -EINVAL;
-
- if (led_supported == TPACPI_LED_570) {
- /* 570 */
- led = 1 << led;
- if (!acpi_evalf(led_handle, NULL, NULL, "vdd",
- led, led_sled_arg1[ind]))
- return -EIO;
- } else if (led_supported == TPACPI_LED_OLD) {
- /* 600e/x, 770e, 770x, A21e, A2xm/p, T20-22, X20 */
- led = 1 << led;
- ret = ec_write(TPACPI_LED_EC_HLMS, led);
- if (ret >= 0)
- ret = ec_write(TPACPI_LED_EC_HLBL,
- led * led_exp_hlbl[ind]);
- if (ret >= 0)
- ret = ec_write(TPACPI_LED_EC_HLCL,
- led * led_exp_hlcl[ind]);
- if (ret < 0)
- return ret;
+ s = TPACPI_LED_BLINK;
} else {
- /* all others */
- if (!acpi_evalf(led_handle, NULL, NULL, "vdd",
- led, led_led_arg1[ind]))
- return -EIO;
+ return -EINVAL;
}
+
+ rc = led_set_status(led, s);
+ if (rc < 0)
+ return rc;
}
return 0;
@@ -3706,6 +4105,7 @@ static struct ibm_struct led_driver_data = {
.name = "led",
.read = led_read,
.write = led_write,
+ .exit = led_exit,
};
/*************************************************************************
@@ -4170,8 +4570,16 @@ static struct ibm_struct ecdump_driver_data = {
#define TPACPI_BACKLIGHT_DEV_NAME "thinkpad_screen"
+enum {
+ TP_EC_BACKLIGHT = 0x31,
+
+ /* TP_EC_BACKLIGHT bitmasks */
+ TP_EC_BACKLIGHT_LVLMSK = 0x1F,
+ TP_EC_BACKLIGHT_CMDMSK = 0xE0,
+ TP_EC_BACKLIGHT_MAPSW = 0x20,
+};
+
static struct backlight_device *ibm_backlight_device;
-static int brightness_offset = 0x31;
static int brightness_mode;
static unsigned int brightness_enable = 2; /* 2 = auto, 0 = no, 1 = yes */
@@ -4180,16 +4588,24 @@ static struct mutex brightness_mutex;
/*
* ThinkPads can read brightness from two places: EC 0x31, or
* CMOS NVRAM byte 0x5E, bits 0-3.
+ *
+ * EC 0x31 has the following layout
+ * Bit 7: unknown function
+ * Bit 6: unknown function
+ * Bit 5: Z: honour scale changes, NZ: ignore scale changes
+ * Bit 4: must be set to zero to avoid problems
+ * Bit 3-0: backlight brightness level
+ *
+ * brightness_get_raw returns status data in the EC 0x31 layout
*/
-static int brightness_get(struct backlight_device *bd)
+static int brightness_get_raw(int *status)
{
u8 lec = 0, lcmos = 0, level = 0;
if (brightness_mode & 1) {
- if (!acpi_ec_read(brightness_offset, &lec))
+ if (!acpi_ec_read(TP_EC_BACKLIGHT, &lec))
return -EIO;
- lec &= (tp_features.bright_16levels)? 0x0f : 0x07;
- level = lec;
+ level = lec & TP_EC_BACKLIGHT_LVLMSK;
};
if (brightness_mode & 2) {
lcmos = (nvram_read_byte(TP_NVRAM_ADDR_BRIGHTNESS)
@@ -4199,16 +4615,27 @@ static int brightness_get(struct backlight_device *bd)
level = lcmos;
}
- if (brightness_mode == 3 && lec != lcmos) {
- printk(TPACPI_ERR
- "CMOS NVRAM (%u) and EC (%u) do not agree "
- "on display brightness level\n",
- (unsigned int) lcmos,
- (unsigned int) lec);
- return -EIO;
+ if (brightness_mode == 3) {
+ *status = lec; /* Prefer EC, CMOS is just a backing store */
+ lec &= TP_EC_BACKLIGHT_LVLMSK;
+ if (lec == lcmos)
+ tp_warned.bright_cmos_ec_unsync = 0;
+ else {
+ if (!tp_warned.bright_cmos_ec_unsync) {
+ printk(TPACPI_ERR
+ "CMOS NVRAM (%u) and EC (%u) do not "
+ "agree on display brightness level\n",
+ (unsigned int) lcmos,
+ (unsigned int) lec);
+ tp_warned.bright_cmos_ec_unsync = 1;
+ }
+ return -EIO;
+ }
+ } else {
+ *status = level;
}
- return level;
+ return 0;
}
/* May return EINTR which can always be mapped to ERESTARTSYS */
@@ -4216,19 +4643,22 @@ static int brightness_set(int value)
{
int cmos_cmd, inc, i, res;
int current_value;
+ int command_bits;
- if (value > ((tp_features.bright_16levels)? 15 : 7))
+ if (value > ((tp_features.bright_16levels)? 15 : 7) ||
+ value < 0)
return -EINVAL;
res = mutex_lock_interruptible(&brightness_mutex);
if (res < 0)
return res;
- current_value = brightness_get(NULL);
- if (current_value < 0) {
- res = current_value;
+ res = brightness_get_raw(&current_value);
+ if (res < 0)
goto errout;
- }
+
+ command_bits = current_value & TP_EC_BACKLIGHT_CMDMSK;
+ current_value &= TP_EC_BACKLIGHT_LVLMSK;
cmos_cmd = value > current_value ?
TP_CMOS_BRIGHTNESS_UP :
@@ -4243,7 +4673,8 @@ static int brightness_set(int value)
goto errout;
}
if ((brightness_mode & 1) &&
- !acpi_ec_write(brightness_offset, i + inc)) {
+ !acpi_ec_write(TP_EC_BACKLIGHT,
+ (i + inc) | command_bits)) {
res = -EIO;
goto errout;;
}
@@ -4266,106 +4697,23 @@ static int brightness_update_status(struct backlight_device *bd)
bd->props.brightness : 0);
}
-static struct backlight_ops ibm_backlight_data = {
- .get_brightness = brightness_get,
- .update_status = brightness_update_status,
-};
-
-/* --------------------------------------------------------------------- */
-
-static int __init tpacpi_query_bcll_levels(acpi_handle handle)
-{
- struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
- union acpi_object *obj;
- int rc;
-
- if (ACPI_SUCCESS(acpi_evaluate_object(handle, NULL, NULL, &buffer))) {
- obj = (union acpi_object *)buffer.pointer;
- if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) {
- printk(TPACPI_ERR "Unknown BCLL data, "
- "please report this to %s\n", TPACPI_MAIL);
- rc = 0;
- } else {
- rc = obj->package.count;
- }
- } else {
- return 0;
- }
-
- kfree(buffer.pointer);
- return rc;
-}
-
-static acpi_status __init brightness_find_bcll(acpi_handle handle, u32 lvl,
- void *context, void **rv)
-{
- char name[ACPI_PATH_SEGMENT_LENGTH];
- struct acpi_buffer buffer = { sizeof(name), &name };
-
- if (ACPI_SUCCESS(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)) &&
- !strncmp("BCLL", name, sizeof(name) - 1)) {
- if (tpacpi_query_bcll_levels(handle) == 16) {
- *rv = handle;
- return AE_CTRL_TERMINATE;
- } else {
- return AE_OK;
- }
- } else {
- return AE_OK;
- }
-}
-
-static int __init brightness_check_levels(void)
+static int brightness_get(struct backlight_device *bd)
{
- int status;
- void *found_node = NULL;
+ int status, res;
- if (!vid_handle) {
- TPACPI_ACPIHANDLE_INIT(vid);
- }
- if (!vid_handle)
- return 0;
-
- /* Search for a BCLL package with 16 levels */
- status = acpi_walk_namespace(ACPI_TYPE_PACKAGE, vid_handle, 3,
- brightness_find_bcll, NULL,
- &found_node);
-
- return (ACPI_SUCCESS(status) && found_node != NULL);
-}
-
-static acpi_status __init brightness_find_bcl(acpi_handle handle, u32 lvl,
- void *context, void **rv)
-{
- char name[ACPI_PATH_SEGMENT_LENGTH];
- struct acpi_buffer buffer = { sizeof(name), &name };
+ res = brightness_get_raw(&status);
+ if (res < 0)
+ return 0; /* FIXME: teach backlight about error handling */
- if (ACPI_SUCCESS(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)) &&
- !strncmp("_BCL", name, sizeof(name) - 1)) {
- *rv = handle;
- return AE_CTRL_TERMINATE;
- } else {
- return AE_OK;
- }
+ return status & TP_EC_BACKLIGHT_LVLMSK;
}
-static int __init brightness_check_std_acpi_support(void)
-{
- int status;
- void *found_node = NULL;
-
- if (!vid_handle) {
- TPACPI_ACPIHANDLE_INIT(vid);
- }
- if (!vid_handle)
- return 0;
-
- /* Search for a _BCL method, but don't execute it */
- status = acpi_walk_namespace(ACPI_TYPE_METHOD, vid_handle, 3,
- brightness_find_bcl, NULL, &found_node);
+static struct backlight_ops ibm_backlight_data = {
+ .get_brightness = brightness_get,
+ .update_status = brightness_update_status,
+};
- return (ACPI_SUCCESS(status) && found_node != NULL);
-}
+/* --------------------------------------------------------------------- */
static int __init brightness_init(struct ibm_init_struct *iibm)
{
@@ -4375,13 +4723,19 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
mutex_init(&brightness_mutex);
- if (!brightness_enable) {
- dbg_printk(TPACPI_DBG_INIT,
- "brightness support disabled by "
- "module parameter\n");
- return 1;
- } else if (brightness_enable > 1) {
- if (brightness_check_std_acpi_support()) {
+ /*
+ * We always attempt to detect acpi support, so as to switch
+ * Lenovo Vista BIOS to ACPI brightness mode even if we are not
+ * going to publish a backlight interface
+ */
+ b = tpacpi_check_std_acpi_brightness_support();
+ if (b > 0) {
+ if (thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO) {
+ printk(TPACPI_NOTICE
+ "Lenovo BIOS switched to ACPI backlight "
+ "control mode\n");
+ }
+ if (brightness_enable > 1) {
printk(TPACPI_NOTICE
"standard ACPI backlight interface "
"available, not loading native one...\n");
@@ -4389,6 +4743,22 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
}
}
+ if (!brightness_enable) {
+ dbg_printk(TPACPI_DBG_INIT,
+ "brightness support disabled by "
+ "module parameter\n");
+ return 1;
+ }
+
+ if (b > 16) {
+ printk(TPACPI_ERR
+ "Unsupported brightness interface, "
+ "please contact %s\n", TPACPI_MAIL);
+ return 1;
+ }
+ if (b == 16)
+ tp_features.bright_16levels = 1;
+
if (!brightness_mode) {
if (thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO)
brightness_mode = 2;
@@ -4402,12 +4772,7 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
if (brightness_mode > 3)
return -EINVAL;
- tp_features.bright_16levels =
- thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO &&
- brightness_check_levels();
-
- b = brightness_get(NULL);
- if (b < 0)
+ if (brightness_get_raw(&b) < 0)
return 1;
if (tp_features.bright_16levels)
@@ -4425,7 +4790,7 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
ibm_backlight_device->props.max_brightness =
(tp_features.bright_16levels)? 15 : 7;
- ibm_backlight_device->props.brightness = b;
+ ibm_backlight_device->props.brightness = b & TP_EC_BACKLIGHT_LVLMSK;
backlight_update_status(ibm_backlight_device);
return 0;
@@ -5046,11 +5411,11 @@ static void fan_watchdog_reset(void)
if (fan_watchdog_maxinterval > 0 &&
tpacpi_lifecycle != TPACPI_LIFE_EXITING) {
fan_watchdog_active = 1;
- if (!schedule_delayed_work(&fan_watchdog_task,
+ if (!queue_delayed_work(tpacpi_wq, &fan_watchdog_task,
msecs_to_jiffies(fan_watchdog_maxinterval
* 1000))) {
printk(TPACPI_ERR
- "failed to schedule the fan watchdog, "
+ "failed to queue the fan watchdog, "
"watchdog will not trigger\n");
}
} else
@@ -5420,7 +5785,7 @@ static void fan_exit(void)
&driver_attr_fan_watchdog);
cancel_delayed_work(&fan_watchdog_task);
- flush_scheduled_work();
+ flush_workqueue(tpacpi_wq);
}
static int fan_read(char *p)
@@ -5826,10 +6191,13 @@ static void __init get_thinkpad_model_data(struct thinkpad_id_data *tp)
tp->model_str = kstrdup(dmi_get_system_info(DMI_PRODUCT_VERSION),
GFP_KERNEL);
- if (strnicmp(tp->model_str, "ThinkPad", 8) != 0) {
+ if (tp->model_str && strnicmp(tp->model_str, "ThinkPad", 8) != 0) {
kfree(tp->model_str);
tp->model_str = NULL;
}
+
+ tp->nummodel_str = kstrdup(dmi_get_system_info(DMI_PRODUCT_NAME),
+ GFP_KERNEL);
}
static int __init probe_for_thinkpad(void)
@@ -6071,6 +6439,9 @@ static void thinkpad_acpi_module_exit(void)
if (proc_dir)
remove_proc_entry(TPACPI_PROC_DIR, acpi_root_dir);
+ if (tpacpi_wq)
+ destroy_workqueue(tpacpi_wq);
+
kfree(thinkpad_id.bios_version_str);
kfree(thinkpad_id.ec_version_str);
kfree(thinkpad_id.model_str);
@@ -6101,6 +6472,12 @@ static int __init thinkpad_acpi_module_init(void)
TPACPI_ACPIHANDLE_INIT(ecrd);
TPACPI_ACPIHANDLE_INIT(ecwr);
+ tpacpi_wq = create_singlethread_workqueue(TPACPI_WORKQUEUE_NAME);
+ if (!tpacpi_wq) {
+ thinkpad_acpi_module_exit();
+ return -ENOMEM;
+ }
+
proc_dir = proc_mkdir(TPACPI_PROC_DIR, acpi_root_dir);
if (!proc_dir) {
printk(TPACPI_ERR
@@ -6223,6 +6600,8 @@ static int __init thinkpad_acpi_module_init(void)
/* Please remove this in year 2009 */
MODULE_ALIAS("ibm_acpi");
+MODULE_ALIAS(TPACPI_DRVR_SHORTNAME);
+
/*
* DMI matching for module autoloading
*