From db6ea2c17cef531a58f48c51c3a0892edcaf1380 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Sat, 21 Jun 2008 22:30:43 +0800
Subject: drivers/misc/atmel-ssc.c: Removed duplicated include

Removed duplicated include file <linux/list.h> in
drivers/misc/atmel-ssc.c.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
---
 drivers/misc/atmel-ssc.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
index e171650766c..bf5e4d06543 100644
--- a/drivers/misc/atmel-ssc.c
+++ b/drivers/misc/atmel-ssc.c
@@ -13,7 +13,6 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
-#include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/atmel-ssc.h>
 
-- 
cgit v1.2.3


From 12d2b8f951063076c7e0acdff7ae1fecd54920a0 Mon Sep 17 00:00:00 2001
From: Heikki Orsila <heikki.orsila@iki.fi>
Date: Sun, 6 Jul 2008 15:48:02 +0300
Subject: kconfig: fix typos: "Suport" -> "Support"

Signed-off-by: Heikki Orsila <heikki.orsila@iki.fi>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 drivers/misc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 321eb913463..f5ade1904aa 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -360,7 +360,7 @@ config THINKPAD_ACPI_VIDEO
 	  If you are not sure, say Y here.
 
 config THINKPAD_ACPI_HOTKEY_POLL
-	bool "Suport NVRAM polling for hot keys"
+	bool "Support NVRAM polling for hot keys"
 	depends on THINKPAD_ACPI
 	default y
 	---help---
-- 
cgit v1.2.3


From 0bc3cc03fa6e1c20aecb5a33356bcaae410640b9 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Thu, 24 Jul 2008 18:21:31 -0700
Subject: cpumask: change cpumask_of_cpu_ptr to use new cpumask_of_cpu

  * Replace previous instances of the cpumask_of_cpu_ptr* macros
    with a the new (lvalue capable) generic cpumask_of_cpu().

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/misc/sgi-xp/xpc_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 579b01ff82d..c3b4227f48a 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -229,11 +229,10 @@ xpc_hb_checker(void *ignore)
 	int last_IRQ_count = 0;
 	int new_IRQ_count;
 	int force_IRQ = 0;
-	cpumask_of_cpu_ptr(cpumask, XPC_HB_CHECK_CPU);
 
 	/* this thread was marked active by xpc_hb_init() */
 
-	set_cpus_allowed_ptr(current, cpumask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU));
 
 	/* set our heartbeating to other partitions into motion */
 	xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
-- 
cgit v1.2.3


From 34d8a380d784d1fbea941a68beebdd7f9a3bebdf Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:54 -0700
Subject: GRU Driver: hardware data structures

This series of patches adds a driver for the SGI UV GRU.  The driver is
still in development but it currently compiles for both x86_64 & IA64.
All simple regression tests pass on IA64.  Although features remain to be
added, I'd like to start the process of getting the driver into the
kernel.  Additional kernel drivers will depend on services provide by the
GRU driver.

The GRU is a hardware resource located in the system chipset.  The GRU
contains memory that is mmaped into the user address space.  This memory
is used to communicate with the GRU to perform functions such as
load/store, scatter/gather, bcopy, AMOs, etc.  The GRU is directly
accessed by user instructions using user virtual addresses.  GRU
instructions (ex., bcopy) use user virtual addresses for operands.

The GRU contains a large TLB that is functionally very similar to
processor TLBs.  Because the external contains a TLB with user virtual
address, it requires callouts from the core VM system when certain types
of changes are made to the process page tables.  There are several MMUOPS
patches currently being discussed but none has been accepted into the
kernel.  The GRU driver is built using version V18 from Andrea Arcangeli.

This patch:

Contains the definitions of the hardware GRU data structures that are used
by the driver to manage the GRU.

[akpm@linux-foundation;org: export hpage_shift]
Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/gruhandles.h | 663 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 663 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/gruhandles.h

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
new file mode 100644
index 00000000000..d16031d6267
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -0,0 +1,663 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              GRU HANDLE DEFINITION
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRUHANDLES_H__
+#define __GRUHANDLES_H__
+#include "gru_instructions.h"
+
+/*
+ * Manifest constants for GRU Memory Map
+ */
+#define GRU_GSEG0_BASE		0
+#define GRU_MCS_BASE		(64 * 1024 * 1024)
+#define GRU_SIZE		(128UL * 1024 * 1024)
+
+/* Handle & resource counts */
+#define GRU_NUM_CB		128
+#define GRU_NUM_DSR_BYTES	(32 * 1024)
+#define GRU_NUM_TFM		16
+#define GRU_NUM_TGH		24
+#define GRU_NUM_CBE		128
+#define GRU_NUM_TFH		128
+#define GRU_NUM_CCH		16
+#define GRU_NUM_GSH		1
+
+/* Maximum resource counts that can be reserved by user programs */
+#define GRU_NUM_USER_CBR	GRU_NUM_CBE
+#define GRU_NUM_USER_DSR_BYTES	GRU_NUM_DSR_BYTES
+
+/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles
+ * are the same */
+#define GRU_HANDLE_BYTES	64
+#define GRU_HANDLE_STRIDE	256
+
+/* Base addresses of handles */
+#define GRU_TFM_BASE		(GRU_MCS_BASE + 0x00000)
+#define GRU_TGH_BASE		(GRU_MCS_BASE + 0x08000)
+#define GRU_CBE_BASE		(GRU_MCS_BASE + 0x10000)
+#define GRU_TFH_BASE		(GRU_MCS_BASE + 0x18000)
+#define GRU_CCH_BASE		(GRU_MCS_BASE + 0x20000)
+#define GRU_GSH_BASE		(GRU_MCS_BASE + 0x30000)
+
+/* User gseg constants */
+#define GRU_GSEG_STRIDE		(4 * 1024 * 1024)
+#define GSEG_BASE(a)		((a) & ~(GRU_GSEG_PAGESIZE - 1))
+
+/* Data segment constants */
+#define GRU_DSR_AU_BYTES	1024
+#define GRU_DSR_CL		(GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU_CL		(GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU		(GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES)
+
+/* Control block constants */
+#define GRU_CBR_AU_SIZE		2
+#define GRU_CBR_AU		(GRU_NUM_CBE / GRU_CBR_AU_SIZE)
+
+/* Convert resource counts to the number of AU */
+#define GRU_DS_BYTES_TO_AU(n)	DIV_ROUND_UP(n, GRU_DSR_AU_BYTES)
+#define GRU_CB_COUNT_TO_AU(n)	DIV_ROUND_UP(n, GRU_CBR_AU_SIZE)
+
+/* UV limits */
+#define GRU_CHIPLETS_PER_HUB	2
+#define GRU_HUBS_PER_BLADE	1
+#define GRU_CHIPLETS_PER_BLADE	(GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB)
+
+/* User GRU Gseg offsets */
+#define GRU_CB_BASE		0
+#define GRU_CB_LIMIT		(GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE)
+#define GRU_DS_BASE		0x20000
+#define GRU_DS_LIMIT		(GRU_DS_BASE + GRU_NUM_DSR_BYTES)
+
+/* Convert a GRU physical address to the chiplet offset */
+#define GSEGPOFF(h) 		((h) & (GRU_SIZE - 1))
+
+/* Convert an arbitrary handle address to the beginning of the GRU segment */
+#ifndef __PLUGIN__
+#define GRUBASE(h)		((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
+#else
+extern void *gmu_grubase(void *h);
+#define GRUBASE(h)		gmu_grubase(h)
+#endif
+
+/* General addressing macros. */
+static inline void *get_gseg_base_address(void *base, int ctxnum)
+{
+	return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum);
+}
+
+static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line)
+{
+	return (void *)(get_gseg_base_address(base, ctxnum) +
+			GRU_CB_BASE + GRU_HANDLE_STRIDE * line);
+}
+
+static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line)
+{
+	return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE +
+			GRU_CACHE_LINE_BYTES * line);
+}
+
+static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum)
+{
+	return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum)
+{
+	return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum)
+{
+	return (struct gru_control_block_extended *)(base + GRU_CBE_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum)
+{
+	return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_context_configuration_handle *get_cch(void *base,
+					int ctxnum)
+{
+	return (struct gru_context_configuration_handle *)(base +
+				GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline unsigned long get_cb_number(void *cb)
+{
+	return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) /
+					GRU_HANDLE_STRIDE;
+}
+
+/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/
+static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode,
+							int chiplet)
+{
+	return paddr + GRU_SIZE * (2 * pnode  + chiplet);
+}
+
+static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
+{
+	return vaddr + GRU_SIZE * (2 * pnode  + chiplet);
+}
+
+
+
+/*
+ * Global TLB Fault Map
+ * 	Bitmap of outstanding TLB misses needing interrupt/polling service.
+ *
+ */
+struct gru_tlb_fault_map {
+	unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+	unsigned long fill0[2];
+	unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+	unsigned long fill1[2];
+};
+
+/*
+ * TGH - TLB Global Handle
+ * 	Used for TLB flushing.
+ *
+ */
+struct gru_tlb_global_handle {
+	unsigned int cmd:1;		/* DW 0 */
+	unsigned int delresp:1;
+	unsigned int opc:1;
+	unsigned int fill1:5;
+
+	unsigned int fill2:8;
+
+	unsigned int status:2;
+	unsigned long fill3:2;
+	unsigned int state:3;
+	unsigned long fill4:1;
+
+	unsigned int cause:3;
+	unsigned long fill5:37;
+
+	unsigned long vaddr:64;		/* DW 1 */
+
+	unsigned int asid:24;		/* DW 2 */
+	unsigned int fill6:8;
+
+	unsigned int pagesize:5;
+	unsigned int fill7:11;
+
+	unsigned int global:1;
+	unsigned int fill8:15;
+
+	unsigned long vaddrmask:39;	/* DW 3 */
+	unsigned int fill9:9;
+	unsigned int n:10;
+	unsigned int fill10:6;
+
+	unsigned int ctxbitmap:16;	/* DW4 */
+	unsigned long fill11[3];
+};
+
+enum gru_tgh_cmd {
+	TGHCMD_START
+};
+
+enum gru_tgh_opc {
+	TGHOP_TLBNOP,
+	TGHOP_TLBINV
+};
+
+enum gru_tgh_status {
+	TGHSTATUS_IDLE,
+	TGHSTATUS_EXCEPTION,
+	TGHSTATUS_ACTIVE
+};
+
+enum gru_tgh_state {
+	TGHSTATE_IDLE,
+	TGHSTATE_PE_INVAL,
+	TGHSTATE_INTERRUPT_INVAL,
+	TGHSTATE_WAITDONE,
+	TGHSTATE_RESTART_CTX,
+};
+
+/*
+ * TFH - TLB Global Handle
+ * 	Used for TLB dropins into the GRU TLB.
+ *
+ */
+struct gru_tlb_fault_handle {
+	unsigned int cmd:1;		/* DW 0 - low 32*/
+	unsigned int delresp:1;
+	unsigned int fill0:2;
+	unsigned int opc:3;
+	unsigned int fill1:9;
+
+	unsigned int status:2;
+	unsigned int fill2:1;
+	unsigned int color:1;
+	unsigned int state:3;
+	unsigned int fill3:1;
+
+	unsigned int cause:7;		/* DW 0 - high 32 */
+	unsigned int fill4:1;
+
+	unsigned int indexway:12;
+	unsigned int fill5:4;
+
+	unsigned int ctxnum:4;
+	unsigned int fill6:12;
+
+	unsigned long missvaddr:64;	/* DW 1 */
+
+	unsigned int missasid:24;	/* DW 2 */
+	unsigned int fill7:8;
+	unsigned int fillasid:24;
+	unsigned int dirty:1;
+	unsigned int gaa:2;
+	unsigned long fill8:5;
+
+	unsigned long pfn:41;		/* DW 3 */
+	unsigned int fill9:7;
+	unsigned int pagesize:5;
+	unsigned int fill10:11;
+
+	unsigned long fillvaddr:64;	/* DW 4 */
+
+	unsigned long fill11[3];
+};
+
+enum gru_tfh_opc {
+	TFHOP_NOOP,
+	TFHOP_RESTART,
+	TFHOP_WRITE_ONLY,
+	TFHOP_WRITE_RESTART,
+	TFHOP_EXCEPTION,
+	TFHOP_USER_POLLING_MODE = 7,
+};
+
+enum tfh_status {
+	TFHSTATUS_IDLE,
+	TFHSTATUS_EXCEPTION,
+	TFHSTATUS_ACTIVE,
+};
+
+enum tfh_state {
+	TFHSTATE_INACTIVE,
+	TFHSTATE_IDLE,
+	TFHSTATE_MISS_UPM,
+	TFHSTATE_MISS_FMM,
+	TFHSTATE_HW_ERR,
+	TFHSTATE_WRITE_TLB,
+	TFHSTATE_RESTART_CBR,
+};
+
+/* TFH cause bits */
+enum tfh_cause {
+	TFHCAUSE_NONE,
+	TFHCAUSE_TLB_MISS,
+	TFHCAUSE_TLB_MOD,
+	TFHCAUSE_HW_ERROR_RR,
+	TFHCAUSE_HW_ERROR_MAIN_ARRAY,
+	TFHCAUSE_HW_ERROR_VALID,
+	TFHCAUSE_HW_ERROR_PAGESIZE,
+	TFHCAUSE_INSTRUCTION_EXCEPTION,
+	TFHCAUSE_UNCORRECTIBLE_ERROR,
+};
+
+/* GAA values */
+#define GAA_RAM				0x0
+#define GAA_NCRAM			0x2
+#define GAA_MMIO			0x1
+#define GAA_REGISTER			0x3
+
+/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */
+#define GRU_PADDR_SHIFT			12
+
+/*
+ * Context Configuration handle
+ * 	Used to allocate resources to a GSEG context.
+ *
+ */
+struct gru_context_configuration_handle {
+	unsigned int cmd:1;			/* DW0 */
+	unsigned int delresp:1;
+	unsigned int opc:3;
+	unsigned int unmap_enable:1;
+	unsigned int req_slice_set_enable:1;
+	unsigned int req_slice:2;
+	unsigned int cb_int_enable:1;
+	unsigned int tlb_int_enable:1;
+	unsigned int tfm_fault_bit_enable:1;
+	unsigned int tlb_int_select:4;
+
+	unsigned int status:2;
+	unsigned int state:2;
+	unsigned int reserved2:4;
+
+	unsigned int cause:4;
+	unsigned int tfm_done_bit_enable:1;
+	unsigned int unused:3;
+
+	unsigned int dsr_allocation_map;
+
+	unsigned long cbr_allocation_map;	/* DW1 */
+
+	unsigned int asid[8];			/* DW 2 - 5 */
+	unsigned short sizeavail[8];		/* DW 6 - 7 */
+} __attribute__ ((packed));
+
+enum gru_cch_opc {
+	CCHOP_START = 1,
+	CCHOP_ALLOCATE,
+	CCHOP_INTERRUPT,
+	CCHOP_DEALLOCATE,
+	CCHOP_INTERRUPT_SYNC,
+};
+
+enum gru_cch_status {
+	CCHSTATUS_IDLE,
+	CCHSTATUS_EXCEPTION,
+	CCHSTATUS_ACTIVE,
+};
+
+enum gru_cch_state {
+	CCHSTATE_INACTIVE,
+	CCHSTATE_MAPPED,
+	CCHSTATE_ACTIVE,
+	CCHSTATE_INTERRUPTED,
+};
+
+/* CCH Exception cause */
+enum gru_cch_cause {
+	CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1,
+	CCHCAUSE_ILLEGAL_OPCODE = 2,
+	CCHCAUSE_INVALID_START_REQUEST = 3,
+	CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4,
+	CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5,
+	CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6,
+	CCHCAUSE_CCH_BUSY = 7,
+	CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8,
+	CCHCAUSE_BAD_TFM_CONFIG = 9,
+	CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10,
+	CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11,
+	CCHCAUSE_CBR_DEALLOCATION_ERROR = 12,
+};
+/*
+ * CBE - Control Block Extended
+ * 	Maintains internal GRU state for active CBs.
+ *
+ */
+struct gru_control_block_extended {
+	unsigned int reserved0:1;	/* DW 0  - low */
+	unsigned int imacpy:3;
+	unsigned int reserved1:4;
+	unsigned int xtypecpy:3;
+	unsigned int iaa0cpy:2;
+	unsigned int iaa1cpy:2;
+	unsigned int reserved2:1;
+	unsigned int opccpy:8;
+	unsigned int exopccpy:8;
+
+	unsigned int idef2cpy:22;	/* DW 0  - high */
+	unsigned int reserved3:10;
+
+	unsigned int idef4cpy:22;	/* DW 1 */
+	unsigned int reserved4:10;
+	unsigned int idef4upd:22;
+	unsigned int reserved5:10;
+
+	unsigned long idef1upd:64;	/* DW 2 */
+
+	unsigned long idef5cpy:64;	/* DW 3 */
+
+	unsigned long idef6cpy:64;	/* DW 4 */
+
+	unsigned long idef3upd:64;	/* DW 5 */
+
+	unsigned long idef5upd:64;	/* DW 6 */
+
+	unsigned int idef2upd:22;	/* DW 7 */
+	unsigned int reserved6:10;
+
+	unsigned int ecause:20;
+	unsigned int cbrstate:4;
+	unsigned int cbrexecstatus:8;
+};
+
+enum gru_cbr_state {
+	CBRSTATE_INACTIVE,
+	CBRSTATE_IDLE,
+	CBRSTATE_PE_CHECK,
+	CBRSTATE_QUEUED,
+	CBRSTATE_WAIT_RESPONSE,
+	CBRSTATE_INTERRUPTED,
+	CBRSTATE_INTERRUPTED_MISS_FMM,
+	CBRSTATE_BUSY_INTERRUPT_MISS_FMM,
+	CBRSTATE_INTERRUPTED_MISS_UPM,
+	CBRSTATE_BUSY_INTERRUPTED_MISS_UPM,
+	CBRSTATE_REQUEST_ISSUE,
+	CBRSTATE_BUSY_INTERRUPT,
+};
+
+/* CBE cbrexecstatus bits */
+#define CBR_EXS_ABORT_OCC_BIT			0
+#define CBR_EXS_INT_OCC_BIT			1
+#define CBR_EXS_PENDING_BIT			2
+#define CBR_EXS_QUEUED_BIT			3
+#define CBR_EXS_TLBHW_BIT			4
+#define CBR_EXS_EXCEPTION_BIT			5
+
+#define CBR_EXS_ABORT_OCC			(1 << CBR_EXS_ABORT_OCC_BIT)
+#define CBR_EXS_INT_OCC				(1 << CBR_EXS_INT_OCC_BIT)
+#define CBR_EXS_PENDING				(1 << CBR_EXS_PENDING_BIT)
+#define CBR_EXS_QUEUED				(1 << CBR_EXS_QUEUED_BIT)
+#define CBR_EXS_TLBHW				(1 << CBR_EXS_TLBHW_BIT)
+#define CBR_EXS_EXCEPTION			(1 << CBR_EXS_EXCEPTION_BIT)
+
+/* CBE ecause bits  - defined in gru_instructions.h */
+
+/*
+ * Convert a processor pagesize into the strange encoded pagesize used by the
+ * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT)
+ * 	pagesize	log pagesize	grupagesize
+ * 	  4k			12	0
+ * 	 16k 			14	1
+ * 	 64k			16	2
+ * 	256k			18	3
+ * 	  1m			20	4
+ * 	  2m			21	5
+ * 	  4m			22	6
+ * 	 16m			24	7
+ * 	 64m			26	8
+ * 	...
+ */
+#define GRU_PAGESIZE(sh)	((((sh) > 20 ? (sh) + 2: (sh)) >> 1) - 6)
+#define GRU_SIZEAVAIL(sh)	(1UL << GRU_PAGESIZE(sh))
+
+/* minimum TLB purge count to ensure a full purge */
+#define GRUMAXINVAL		1024UL
+
+
+/* Extract the status field from a kernel handle */
+#define GET_MSEG_HANDLE_STATUS(h)	(((*(unsigned long *)(h)) >> 16) & 3)
+
+static inline void start_instruction(void *h)
+{
+	unsigned long *w0 = h;
+
+	wmb();		/* setting CMD bit must be last */
+	*w0 = *w0 | 1;
+	gru_flush_cache(h);
+}
+
+static inline int wait_instruction_complete(void *h)
+{
+	int status;
+
+	do {
+		cpu_relax();
+		barrier();
+		status = GET_MSEG_HANDLE_STATUS(h);
+	} while (status == CCHSTATUS_ACTIVE);
+	return status;
+}
+
+#if defined CONFIG_IA64
+static inline void cch_allocate_set_asids(
+		  struct gru_context_configuration_handle *cch, int asidval)
+{
+	int i;
+
+	for (i = 0; i <= RGN_HPAGE; i++) {  /*  assume HPAGE is last region */
+		cch->asid[i] = (asidval++);
+#if 0
+		/* ZZZ hugepages not supported yet */
+		if (i == RGN_HPAGE)
+			cch->sizeavail[i] = GRU_SIZEAVAIL(hpage_shift);
+		else
+#endif
+			cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT);
+	}
+}
+#elif defined CONFIG_X86_64
+static inline void cch_allocate_set_asids(
+		  struct gru_context_configuration_handle *cch, int asidval)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		cch->asid[i] = asidval++;
+		cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT) |
+			GRU_SIZEAVAIL(21);
+	}
+}
+#endif
+
+static inline int cch_allocate(struct gru_context_configuration_handle *cch,
+			       int asidval, unsigned long cbrmap,
+			       unsigned long dsrmap)
+{
+	cch_allocate_set_asids(cch, asidval);
+	cch->dsr_allocation_map = dsrmap;
+	cch->cbr_allocation_map = cbrmap;
+	cch->opc = CCHOP_ALLOCATE;
+	start_instruction(cch);
+	return wait_instruction_complete(cch);
+}
+
+static inline int cch_start(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_START;
+	start_instruction(cch);
+	return wait_instruction_complete(cch);
+}
+
+static inline int cch_interrupt(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_INTERRUPT;
+	start_instruction(cch);
+	return wait_instruction_complete(cch);
+}
+
+static inline int cch_deallocate(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_DEALLOCATE;
+	start_instruction(cch);
+	return wait_instruction_complete(cch);
+}
+
+static inline int cch_interrupt_sync(struct gru_context_configuration_handle
+				     *cch)
+{
+	cch->opc = CCHOP_INTERRUPT_SYNC;
+	start_instruction(cch);
+	return wait_instruction_complete(cch);
+}
+
+static inline int tgh_invalidate(struct gru_tlb_global_handle *tgh,
+				 unsigned long vaddr, unsigned long vaddrmask,
+				 int asid, int pagesize, int global, int n,
+				 unsigned short ctxbitmap)
+{
+	tgh->vaddr = vaddr;
+	tgh->asid = asid;
+	tgh->pagesize = pagesize;
+	tgh->n = n;
+	tgh->global = global;
+	tgh->vaddrmask = vaddrmask;
+	tgh->ctxbitmap = ctxbitmap;
+	tgh->opc = TGHOP_TLBINV;
+	start_instruction(tgh);
+	return wait_instruction_complete(tgh);
+}
+
+static inline void tfh_write_only(struct gru_tlb_fault_handle *tfh,
+				  unsigned long pfn, unsigned long vaddr,
+				  int asid, int dirty, int pagesize)
+{
+	tfh->fillasid = asid;
+	tfh->fillvaddr = vaddr;
+	tfh->pfn = pfn;
+	tfh->dirty = dirty;
+	tfh->pagesize = pagesize;
+	tfh->opc = TFHOP_WRITE_ONLY;
+	start_instruction(tfh);
+}
+
+static inline void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
+				     unsigned long paddr, int gaa,
+				     unsigned long vaddr, int asid, int dirty,
+				     int pagesize)
+{
+	tfh->fillasid = asid;
+	tfh->fillvaddr = vaddr;
+	tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+	tfh->gaa = gaa;
+	tfh->dirty = dirty;
+	tfh->pagesize = pagesize;
+	tfh->opc = TFHOP_WRITE_RESTART;
+	start_instruction(tfh);
+}
+
+static inline void tfh_restart(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_RESTART;
+	start_instruction(tfh);
+}
+
+static inline void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_USER_POLLING_MODE;
+	start_instruction(tfh);
+}
+
+static inline void tfh_exception(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_EXCEPTION;
+	start_instruction(tfh);
+}
+
+#endif /* __GRUHANDLES_H__ */
-- 
cgit v1.2.3


From 4c921d4d8aa74140597fd8736261837f73ca6e7a Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:54 -0700
Subject: GRU Driver: GRU instructions & macros

This patchs contains macros & inline functions used to issue instructions
to the GRU.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/gru_instructions.h | 679 ++++++++++++++++++++++++++++++++
 1 file changed, 679 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/gru_instructions.h

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
new file mode 100644
index 00000000000..3159b261c5a
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -0,0 +1,679 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRU_INSTRUCTIONS_H__
+#define __GRU_INSTRUCTIONS_H__
+
+#define gru_flush_cache_hook(p)
+#define gru_emulator_wait_hook(p, w)
+
+/*
+ * Architecture dependent functions
+ */
+
+#if defined CONFIG_IA64
+#include <linux/compiler.h>
+#include <asm/intrinsics.h>
+#define __flush_cache(p)		ia64_fc(p)
+/* Use volatile on IA64 to ensure ordering via st4.rel */
+#define gru_ordered_store_int(p,v)					\
+		do {							\
+			barrier();					\
+			*((volatile int *)(p)) = v; /* force st.rel */	\
+		} while (0)
+#elif defined CONFIG_X86_64
+#define __flush_cache(p)		clflush(p)
+#define gru_ordered_store_int(p,v)					\
+		do {							\
+			barrier();					\
+			*(int *)p = v;					\
+		} while (0)
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Control block status and exception codes
+ */
+#define CBS_IDLE			0
+#define CBS_EXCEPTION			1
+#define CBS_ACTIVE			2
+#define CBS_CALL_OS			3
+
+/* CB substatus bitmasks */
+#define CBSS_MSG_QUEUE_MASK		7
+#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK	8
+
+/* CB substatus message queue values (low 3 bits of substatus) */
+#define CBSS_NO_ERROR			0
+#define CBSS_LB_OVERFLOWED		1
+#define CBSS_QLIMIT_REACHED		2
+#define CBSS_PAGE_OVERFLOW		3
+#define CBSS_AMO_NACKED			4
+#define CBSS_PUT_NACKED			5
+
+/*
+ * Structure used to fetch exception detail for CBs that terminate with
+ * CBS_EXCEPTION
+ */
+struct control_block_extended_exc_detail {
+	unsigned long	cb;
+	int		opc;
+	int		ecause;
+	int		exopc;
+	long		exceptdet0;
+	int		exceptdet1;
+};
+
+/*
+ * Instruction formats
+ */
+
+/*
+ * Generic instruction format.
+ * This definition has precise bit field definitions.
+ */
+struct gru_instruction_bits {
+    /* DW 0  - low */
+    unsigned int		icmd:      1;
+    unsigned char		ima:	   3;	/* CB_DelRep, unmapped mode */
+    unsigned char		reserved0: 4;
+    unsigned int		xtype:     3;
+    unsigned int		iaa0:      2;
+    unsigned int		iaa1:      2;
+    unsigned char		reserved1: 1;
+    unsigned char		opc:       8;	/* opcode */
+    unsigned char		exopc:     8;	/* extended opcode */
+    /* DW 0  - high */
+    unsigned int		idef2:    22;	/* TRi0 */
+    unsigned char		reserved2: 2;
+    unsigned char		istatus:   2;
+    unsigned char		isubstatus:4;
+    unsigned char		reserved3: 2;
+    /* DW 1 */
+    unsigned long		idef4;		/* 42 bits: TRi1, BufSize */
+    /* DW 2-6 */
+    unsigned long		idef1;		/* BAddr0 */
+    unsigned long		idef5;		/* Nelem */
+    unsigned long		idef6;		/* Stride, Operand1 */
+    unsigned long		idef3;		/* BAddr1, Value, Operand2 */
+    unsigned long		reserved4;
+    /* DW 7 */
+    unsigned long		avalue;		 /* AValue */
+};
+
+/*
+ * Generic instruction with friendlier names. This format is used
+ * for inline instructions.
+ */
+struct gru_instruction {
+    /* DW 0 */
+    unsigned int		op32;    /* icmd,xtype,iaa0,ima,opc */
+    unsigned int		tri0;
+    unsigned long		tri1_bufsize;		/* DW 1 */
+    unsigned long		baddr0;			/* DW 2 */
+    unsigned long		nelem;			/* DW 3 */
+    unsigned long		op1_stride;		/* DW 4 */
+    unsigned long		op2_value_baddr1;	/* DW 5 */
+    unsigned long		reserved0;		/* DW 6 */
+    unsigned long		avalue;			/* DW 7 */
+};
+
+/* Some shifts and masks for the low 32 bits of a GRU command */
+#define GRU_CB_ICMD_SHFT	0
+#define GRU_CB_ICMD_MASK	0x1
+#define GRU_CB_XTYPE_SHFT	8
+#define GRU_CB_XTYPE_MASK	0x7
+#define GRU_CB_IAA0_SHFT	11
+#define GRU_CB_IAA0_MASK	0x3
+#define GRU_CB_IAA1_SHFT	13
+#define GRU_CB_IAA1_MASK	0x3
+#define GRU_CB_IMA_SHFT		1
+#define GRU_CB_IMA_MASK		0x3
+#define GRU_CB_OPC_SHFT		16
+#define GRU_CB_OPC_MASK		0xff
+#define GRU_CB_EXOPC_SHFT	24
+#define GRU_CB_EXOPC_MASK	0xff
+
+/* GRU instruction opcodes (opc field) */
+#define OP_NOP		0x00
+#define OP_BCOPY	0x01
+#define OP_VLOAD	0x02
+#define OP_IVLOAD	0x03
+#define OP_VSTORE	0x04
+#define OP_IVSTORE	0x05
+#define OP_VSET		0x06
+#define OP_IVSET	0x07
+#define OP_MESQ		0x08
+#define OP_GAMXR	0x09
+#define OP_GAMIR	0x0a
+#define OP_GAMIRR	0x0b
+#define OP_GAMER	0x0c
+#define OP_GAMERR	0x0d
+#define OP_BSTORE	0x0e
+#define OP_VFLUSH	0x0f
+
+
+/* Extended opcodes values (exopc field) */
+
+/* GAMIR - AMOs with implicit operands */
+#define EOP_IR_FETCH	0x01 /* Plain fetch of memory */
+#define EOP_IR_CLR	0x02 /* Fetch and clear */
+#define EOP_IR_INC	0x05 /* Fetch and increment */
+#define EOP_IR_DEC	0x07 /* Fetch and decrement */
+#define EOP_IR_QCHK1	0x0d /* Queue check, 64 byte msg */
+#define EOP_IR_QCHK2	0x0e /* Queue check, 128 byte msg */
+
+/* GAMIRR - Registered AMOs with implicit operands */
+#define EOP_IRR_FETCH	0x01 /* Registered fetch of memory */
+#define EOP_IRR_CLR	0x02 /* Registered fetch and clear */
+#define EOP_IRR_INC	0x05 /* Registered fetch and increment */
+#define EOP_IRR_DEC	0x07 /* Registered fetch and decrement */
+#define EOP_IRR_DECZ	0x0f /* Registered fetch and decrement, update on zero*/
+
+/* GAMER - AMOs with explicit operands */
+#define EOP_ER_SWAP	0x00 /* Exchange argument and memory */
+#define EOP_ER_OR	0x01 /* Logical OR with memory */
+#define EOP_ER_AND	0x02 /* Logical AND with memory */
+#define EOP_ER_XOR	0x03 /* Logical XOR with memory */
+#define EOP_ER_ADD	0x04 /* Add value to memory */
+#define EOP_ER_CSWAP	0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ER_CADD	0x0c /* Queue check, operand1*64 byte msg */
+
+/* GAMERR - Registered AMOs with explicit operands */
+#define EOP_ERR_SWAP	0x00 /* Exchange argument and memory */
+#define EOP_ERR_OR	0x01 /* Logical OR with memory */
+#define EOP_ERR_AND	0x02 /* Logical AND with memory */
+#define EOP_ERR_XOR	0x03 /* Logical XOR with memory */
+#define EOP_ERR_ADD	0x04 /* Add value to memory */
+#define EOP_ERR_CSWAP	0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ERR_EPOLL	0x09 /* Poll for equality */
+#define EOP_ERR_NPOLL	0x0a /* Poll for inequality */
+
+/* GAMXR - SGI Arithmetic unit */
+#define EOP_XR_CSWAP	0x0b /* Masked compare exchange */
+
+
+/* Transfer types (xtype field) */
+#define XTYPE_B		0x0	/* byte */
+#define XTYPE_S		0x1	/* short (2-byte) */
+#define XTYPE_W		0x2	/* word (4-byte) */
+#define XTYPE_DW	0x3	/* doubleword (8-byte) */
+#define XTYPE_CL	0x6	/* cacheline (64-byte) */
+
+
+/* Instruction access attributes (iaa0, iaa1 fields) */
+#define IAA_RAM		0x0	/* normal cached RAM access */
+#define IAA_NCRAM	0x2	/* noncoherent RAM access */
+#define IAA_MMIO	0x1	/* noncoherent memory-mapped I/O space */
+#define IAA_REGISTER	0x3	/* memory-mapped registers, etc. */
+
+
+/* Instruction mode attributes (ima field) */
+#define IMA_MAPPED	0x0	/* Virtual mode  */
+#define IMA_CB_DELAY	0x1	/* hold read responses until status changes */
+#define IMA_UNMAPPED	0x2	/* bypass the TLBs (OS only) */
+#define IMA_INTERRUPT	0x4	/* Interrupt when instruction completes */
+
+/* CBE ecause bits */
+#define CBE_CAUSE_RI				(1 << 0)
+#define CBE_CAUSE_INVALID_INSTRUCTION		(1 << 1)
+#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN	(1 << 2)
+#define CBE_CAUSE_PE_CHECK_DATA_ERROR		(1 << 3)
+#define CBE_CAUSE_IAA_GAA_MISMATCH		(1 << 4)
+#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION	(1 << 5)
+#define CBE_CAUSE_OS_FATAL_TLB_FAULT		(1 << 6)
+#define CBE_CAUSE_EXECUTION_HW_ERROR		(1 << 7)
+#define CBE_CAUSE_TLBHW_ERROR			(1 << 8)
+#define CBE_CAUSE_RA_REQUEST_TIMEOUT		(1 << 9)
+#define CBE_CAUSE_HA_REQUEST_TIMEOUT		(1 << 10)
+#define CBE_CAUSE_RA_RESPONSE_FATAL		(1 << 11)
+#define CBE_CAUSE_RA_RESPONSE_NON_FATAL		(1 << 12)
+#define CBE_CAUSE_HA_RESPONSE_FATAL		(1 << 13)
+#define CBE_CAUSE_HA_RESPONSE_NON_FATAL		(1 << 14)
+#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR	(1 << 15)
+#define CBE_CAUSE_RESPONSE_DATA_ERROR		(1 << 16)
+#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR	(1 << 17)
+
+/*
+ * Exceptions are retried for the following cases. If any OTHER bits are set
+ * in ecause, the exception is not retryable.
+ */
+#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR |		\
+			      CBE_CAUSE_RA_REQUEST_TIMEOUT |		\
+			      CBE_CAUSE_TLBHW_ERROR |			\
+			      CBE_CAUSE_HA_REQUEST_TIMEOUT)
+
+/* Message queue head structure */
+union gru_mesqhead {
+	unsigned long	val;
+	struct {
+		unsigned int	head;
+		unsigned int	limit;
+	};
+};
+
+
+/* Generate the low word of a GRU instruction */
+static inline unsigned int
+__opword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
+       unsigned char iaa0, unsigned char iaa1,
+       unsigned char ima)
+{
+    return (1 << GRU_CB_ICMD_SHFT) |
+	   (iaa0 << GRU_CB_IAA0_SHFT) |
+	   (iaa1 << GRU_CB_IAA1_SHFT) |
+	   (ima << GRU_CB_IMA_SHFT) |
+	   (xtype << GRU_CB_XTYPE_SHFT) |
+	   (opcode << GRU_CB_OPC_SHFT) |
+	   (exopc << GRU_CB_EXOPC_SHFT);
+}
+
+/*
+ * Prefetch a cacheline. Fetch is unconditional. Must page fault if
+ * no valid TLB entry is found.
+ * 	??? should I use actual "load" or hardware prefetch???
+ */
+static inline void gru_prefetch(void *p)
+{
+	*(volatile char *)p;
+}
+
+/*
+ * Architecture specific intrinsics
+ */
+static inline void gru_flush_cache(void *p)
+{
+	__flush_cache(p);
+}
+
+/*
+ * Store the lower 32 bits of the command including the "start" bit. Then
+ * start the instruction executing.
+ */
+static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
+{
+	gru_ordered_store_int(ins, op32);
+}
+
+
+/* Convert "hints" to IMA */
+#define CB_IMA(h)		((h) | IMA_UNMAPPED)
+
+/* Convert data segment cache line index into TRI0 / TRI1 value */
+#define GRU_DINDEX(i)		((i) * GRU_CACHE_LINE_BYTES)
+
+/* Inline functions for GRU instructions.
+ *     Note:
+ *     	- nelem and stride are in elements
+ *     	- tri0/tri1 is in bytes for the beginning of the data segment.
+ */
+static inline void gru_vload(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->tri0 = tri0;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline void gru_vstore(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->tri0 = tri0;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline void gru_ivload(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned int tri1, unsigned char xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->tri0 = tri0;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline void gru_ivstore(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned int tri1,
+		unsigned char xtype, unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->tri0 = tri0;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline void gru_vset(void *cb, unsigned long mem_addr,
+		unsigned long value, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op2_value_baddr1 = value;
+	ins->nelem = nelem;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0,
+					 CB_IMA(hints)));
+}
+
+static inline void gru_ivset(void *cb, unsigned long mem_addr,
+		unsigned int tri1, unsigned long value, unsigned char xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op2_value_baddr1 = value;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline void gru_vflush(void *cb, unsigned long mem_addr,
+		unsigned long nelem, unsigned char xtype, unsigned long stride,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op1_stride = stride;
+	ins->nelem = nelem;
+	gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline void gru_nop(void *cb, int hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints)));
+}
+
+
+static inline void gru_bcopy(void *cb, const unsigned long src,
+		unsigned long dest,
+		unsigned int tri0, unsigned int xtype, unsigned long nelem,
+		unsigned int bufsize, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op2_value_baddr1 = (long)dest;
+	ins->nelem = nelem;
+	ins->tri0 = tri0;
+	ins->tri1_bufsize = bufsize;
+	gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM,
+					IAA_RAM, CB_IMA(hints)));
+}
+
+static inline void gru_bstore(void *cb, const unsigned long src,
+		unsigned long dest, unsigned int tri0, unsigned int xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op2_value_baddr1 = (long)dest;
+	ins->nelem = nelem;
+	ins->tri0 = tri0;
+	gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
+					CB_IMA(hints)));
+}
+
+static inline void gru_gamir(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline void gru_gamer(void *cb, int exopc, unsigned long src,
+		unsigned int xtype,
+		unsigned long operand1, unsigned long operand2,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op1_stride = operand1;
+	ins->op2_value_baddr1 = operand2;
+	gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long operand1,
+		unsigned long operand2, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op1_stride = operand1;
+	ins->op2_value_baddr1 = operand2;
+	gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline void gru_gamxr(void *cb, unsigned long src,
+		unsigned int tri0, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->nelem = 4;
+	gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
+				 IAA_RAM, 0, CB_IMA(hints)));
+}
+
+static inline void gru_mesq(void *cb, unsigned long queue,
+		unsigned long tri0, unsigned long nelem,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)queue;
+	ins->nelem = nelem;
+	ins->tri0 = tri0;
+	gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
+					CB_IMA(hints)));
+}
+
+static inline unsigned long gru_get_amo_value(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue;
+}
+
+static inline int gru_get_amo_value_head(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue & 0xffffffff;
+}
+
+static inline int gru_get_amo_value_limit(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue >> 32;
+}
+
+static inline union gru_mesqhead  gru_mesq_head(int head, int limit)
+{
+	union gru_mesqhead mqh;
+
+	mqh.head = head;
+	mqh.limit = limit;
+	return mqh;
+}
+
+/*
+ * Get struct control_block_extended_exc_detail for CB.
+ */
+extern int gru_get_cb_exception_detail(void *cb,
+		       struct control_block_extended_exc_detail *excdet);
+
+#define GRU_EXC_STR_SIZE		256
+
+extern int gru_check_status_proc(void *cb);
+extern int gru_wait_proc(void *cb);
+extern void gru_wait_abort_proc(void *cb);
+
+/*
+ * Control block definition for checking status
+ */
+struct gru_control_block_status {
+	unsigned int	icmd		:1;
+	unsigned int	unused1		:31;
+	unsigned int	unused2		:24;
+	unsigned int	istatus		:2;
+	unsigned int	isubstatus	:4;
+	unsigned int	inused3		:2;
+};
+
+/* Get CB status */
+static inline int gru_get_cb_status(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->istatus;
+}
+
+/* Get CB message queue substatus */
+static inline int gru_get_cb_message_queue_substatus(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->isubstatus & CBSS_MSG_QUEUE_MASK;
+}
+
+/* Get CB substatus */
+static inline int gru_get_cb_substatus(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->isubstatus;
+}
+
+/* Check the status of a CB. If the CB is in UPM mode, call the
+ * OS to handle the UPM status.
+ * Returns the CB status field value (0 for normal completion)
+ */
+static inline int gru_check_status(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+	int ret = cbs->istatus;
+
+	if (ret == CBS_CALL_OS)
+		ret = gru_check_status_proc(cb);
+	return ret;
+}
+
+/* Wait for CB to complete.
+ * Returns the CB status field value (0 for normal completion)
+ */
+static inline int gru_wait(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+	int ret = cbs->istatus;;
+
+	if (ret != CBS_IDLE)
+		ret = gru_wait_proc(cb);
+	return ret;
+}
+
+/* Wait for CB to complete. Aborts program if error. (Note: error does NOT
+ * mean TLB mis - only fatal errors such as memory parity error or user
+ * bugs will cause termination.
+ */
+static inline void gru_wait_abort(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	if (cbs->istatus != CBS_IDLE)
+		gru_wait_abort_proc(cb);
+}
+
+
+/*
+ * Get a pointer to a control block
+ * 	gseg	- GSeg address returned from gru_get_thread_gru_segment()
+ * 	index	- index of desired CB
+ */
+static inline void *gru_get_cb_pointer(void *gseg,
+						      int index)
+{
+	return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE;
+}
+
+/*
+ * Get a pointer to a cacheline in the data segment portion of a GSeg
+ * 	gseg	- GSeg address returned from gru_get_thread_gru_segment()
+ * 	index	- index of desired cache line
+ */
+static inline void *gru_get_data_pointer(void *gseg, int index)
+{
+	return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES;
+}
+
+/*
+ * Convert a vaddr into the tri index within the GSEG
+ * 	vaddr		- virtual address of within gseg
+ */
+static inline int gru_get_tri(void *vaddr)
+{
+	return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE;
+}
+#endif		/* __GRU_INSTRUCTIONS_H__ */
-- 
cgit v1.2.3


From 13d19498b0446cad2c394f9fbec8149b44a60c6e Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:55 -0700
Subject: GRU Driver: driver internal header files

This patch contains header files internal to the GRU driver.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/gru.h       |  67 +++++
 drivers/misc/sgi-gru/grulib.h    |  97 +++++++
 drivers/misc/sgi-gru/grutables.h | 545 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 709 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/gru.h
 create mode 100644 drivers/misc/sgi-gru/grulib.h
 create mode 100644 drivers/misc/sgi-gru/grutables.h

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
new file mode 100644
index 00000000000..40df7cb3f0a
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru.h
@@ -0,0 +1,67 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRU_H__
+#define __GRU_H__
+
+/*
+ * GRU architectural definitions
+ */
+#define GRU_CACHE_LINE_BYTES		64
+#define GRU_HANDLE_STRIDE		256
+#define GRU_CB_BASE			0
+#define GRU_DS_BASE			0x20000
+
+/*
+ * Size used to map GRU GSeg
+ */
+#if defined CONFIG_IA64
+#define GRU_GSEG_PAGESIZE	(256 * 1024UL)
+#elif defined CONFIG_X86_64
+#define GRU_GSEG_PAGESIZE	(256 * 1024UL)		/* ZZZ 2MB ??? */
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Structure for obtaining GRU resource information
+ */
+struct gru_chiplet_info {
+	int	node;
+	int	chiplet;
+	int	blade;
+	int	total_dsr_bytes;
+	int	total_cbr;
+	int	total_user_dsr_bytes;
+	int	total_user_cbr;
+	int	free_user_dsr_bytes;
+	int	free_user_cbr;
+};
+
+/* Flags for GRU options on the gru_create_context() call */
+/* Select one of the follow 4 options to specify how TLB misses are handled */
+#define GRU_OPT_MISS_DEFAULT	0x0000	/* Use default mode */
+#define GRU_OPT_MISS_USER_POLL	0x0001	/* User will poll CB for faults */
+#define GRU_OPT_MISS_FMM_INTR	0x0002	/* Send interrupt to cpu to
+					   handle fault */
+#define GRU_OPT_MISS_FMM_POLL	0x0003	/* Use system polling thread */
+#define GRU_OPT_MISS_MASK	0x0003	/* Mask for TLB MISS option */
+
+
+
+#endif		/* __GRU_H__ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
new file mode 100644
index 00000000000..e56e196a699
--- /dev/null
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -0,0 +1,97 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRULIB_H__
+#define __GRULIB_H__
+
+#define GRU_BASENAME		"gru"
+#define GRU_FULLNAME		"/dev/gru"
+#define GRU_IOCTL_NUM 		 'G'
+
+/*
+ * Maximum number of GRU segments that a user can have open
+ * ZZZ temp - set high for testing. Revisit.
+ */
+#define GRU_MAX_OPEN_CONTEXTS		32
+
+/* Set Number of Request Blocks */
+#define GRU_CREATE_CONTEXT		_IOWR(GRU_IOCTL_NUM, 1, void *)
+
+/* Register task as using the slice */
+#define GRU_SET_TASK_SLICE		_IOWR(GRU_IOCTL_NUM, 5, void *)
+
+/* Fetch exception detail */
+#define GRU_USER_GET_EXCEPTION_DETAIL	_IOWR(GRU_IOCTL_NUM, 6, void *)
+
+/* For user call_os handling - normally a TLB fault */
+#define GRU_USER_CALL_OS		_IOWR(GRU_IOCTL_NUM, 8, void *)
+
+/* For user unload context */
+#define GRU_USER_UNLOAD_CONTEXT		_IOWR(GRU_IOCTL_NUM, 9, void *)
+
+/* For fetching GRU chiplet status */
+#define GRU_GET_CHIPLET_STATUS		_IOWR(GRU_IOCTL_NUM, 10, void *)
+
+/* For user TLB flushing (primarily for tests) */
+#define GRU_USER_FLUSH_TLB		_IOWR(GRU_IOCTL_NUM, 50, void *)
+
+/* Get some config options (primarily for tests & emulator) */
+#define GRU_GET_CONFIG_INFO		_IOWR(GRU_IOCTL_NUM, 51, void *)
+
+#define CONTEXT_WINDOW_BYTES(th)        (GRU_GSEG_PAGESIZE * (th))
+#define THREAD_POINTER(p, th)		(p + GRU_GSEG_PAGESIZE * (th))
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_create_context_req {
+	unsigned long		gseg;
+	unsigned int		data_segment_bytes;
+	unsigned int		control_blocks;
+	unsigned int		maximum_thread_count;
+	unsigned int		options;
+};
+
+/*
+ * Structure used to pass unload context parameters to the driver
+ */
+struct gru_unload_context_req {
+	unsigned long	gseg;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_flush_tlb_req {
+	unsigned long	gseg;
+	unsigned long	vaddr;
+	size_t		len;
+};
+
+/*
+ * GRU configuration info (temp - for testing)
+ */
+struct gru_config_info {
+	int		cpus;
+	int		blades;
+	int		nodes;
+	int		chiplets;
+	int		fill[16];
+};
+
+#endif /* __GRULIB_H__ */
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
new file mode 100644
index 00000000000..f97d8464012
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -0,0 +1,545 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *            GRU DRIVER TABLES, MACROS, externs, etc
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRUTABLES_H__
+#define __GRUTABLES_H__
+
+/*
+ * Tables:
+ *
+ * 	VDATA-VMA Data		- Holds a few parameters. Head of linked list of
+ * 				  GTS tables for threads using the GSEG
+ * 	GTS - Gru Thread State  - contains info for managing a GSEG context. A
+ * 				  GTS is allocated for each thread accessing a
+ * 				  GSEG.
+ *     	GTD - GRU Thread Data   - contains shadow copy of GRU data when GSEG is
+ *     				  not loaded into a GRU
+ *	GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs
+ *				  where a GSEG has been loaded. Similar to
+ *				  an mm_struct but for GRU.
+ *
+ *	GS  - GRU State 	- Used to manage the state of a GRU chiplet
+ *	BS  - Blade State	- Used to manage state of all GRU chiplets
+ *				  on a blade
+ *
+ *
+ *  Normal task tables for task using GRU.
+ *  		- 2 threads in process
+ *  		- 2 GSEGs open in process
+ *  		- GSEG1 is being used by both threads
+ *  		- GSEG2 is used only by thread 2
+ *
+ *       task -->|
+ *       task ---+---> mm ->------ (notifier) -------+-> gms
+ *                     |                             |
+ *                     |--> vma -> vdata ---> gts--->|		GSEG1 (thread1)
+ *                     |                  |          |
+ *                     |                  +-> gts--->|		GSEG1 (thread2)
+ *                     |                             |
+ *                     |--> vma -> vdata ---> gts--->|		GSEG2 (thread2)
+ *                     .
+ *                     .
+ *
+ *  GSEGs are marked DONTCOPY on fork
+ *
+ * At open
+ * 	file.private_data -> NULL
+ *
+ * At mmap,
+ * 	vma -> vdata
+ *
+ * After gseg reference
+ * 	vma -> vdata ->gts
+ *
+ * After fork
+ *   parent
+ * 	vma -> vdata -> gts
+ *   child
+ * 	(vma is not copied)
+ *
+ */
+
+#include <linux/rmap.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/mmu_notifier.h>
+#include "gru.h"
+#include "gruhandles.h"
+
+extern struct gru_stats_s gru_stats;
+extern struct gru_blade_state *gru_base[];
+extern unsigned long gru_start_paddr, gru_end_paddr;
+
+#define GRU_MAX_BLADES		MAX_NUMNODES
+#define GRU_MAX_GRUS		(GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
+
+#define GRU_DRIVER_ID_STR	"SGI GRU Device Driver"
+#define GRU_DRIVER_VERSION_STR	"0.80"
+
+/*
+ * GRU statistics.
+ */
+struct gru_stats_s {
+	atomic_long_t vdata_alloc;
+	atomic_long_t vdata_free;
+	atomic_long_t gts_alloc;
+	atomic_long_t gts_free;
+	atomic_long_t vdata_double_alloc;
+	atomic_long_t gts_double_allocate;
+	atomic_long_t assign_context;
+	atomic_long_t assign_context_failed;
+	atomic_long_t free_context;
+	atomic_long_t load_context;
+	atomic_long_t unload_context;
+	atomic_long_t steal_context;
+	atomic_long_t steal_context_failed;
+	atomic_long_t nopfn;
+	atomic_long_t break_cow;
+	atomic_long_t asid_new;
+	atomic_long_t asid_next;
+	atomic_long_t asid_wrap;
+	atomic_long_t asid_reuse;
+	atomic_long_t intr;
+	atomic_long_t call_os;
+	atomic_long_t call_os_check_for_bug;
+	atomic_long_t call_os_wait_queue;
+	atomic_long_t user_flush_tlb;
+	atomic_long_t user_unload_context;
+	atomic_long_t user_exception;
+	atomic_long_t set_task_slice;
+	atomic_long_t migrate_check;
+	atomic_long_t migrated_retarget;
+	atomic_long_t migrated_unload;
+	atomic_long_t migrated_unload_delay;
+	atomic_long_t migrated_nopfn_retarget;
+	atomic_long_t migrated_nopfn_unload;
+	atomic_long_t tlb_dropin;
+	atomic_long_t tlb_dropin_fail_no_asid;
+	atomic_long_t tlb_dropin_fail_upm;
+	atomic_long_t tlb_dropin_fail_invalid;
+	atomic_long_t tlb_dropin_fail_range_active;
+	atomic_long_t tlb_dropin_fail_idle;
+	atomic_long_t tlb_dropin_fail_fmm;
+	atomic_long_t mmu_invalidate_range;
+	atomic_long_t mmu_invalidate_page;
+	atomic_long_t mmu_clear_flush_young;
+	atomic_long_t flush_tlb;
+	atomic_long_t flush_tlb_gru;
+	atomic_long_t flush_tlb_gru_tgh;
+	atomic_long_t flush_tlb_gru_zero_asid;
+
+	atomic_long_t copy_gpa;
+
+	atomic_long_t mesq_receive;
+	atomic_long_t mesq_receive_none;
+	atomic_long_t mesq_send;
+	atomic_long_t mesq_send_failed;
+	atomic_long_t mesq_noop;
+	atomic_long_t mesq_send_unexpected_error;
+	atomic_long_t mesq_send_lb_overflow;
+	atomic_long_t mesq_send_qlimit_reached;
+	atomic_long_t mesq_send_amo_nacked;
+	atomic_long_t mesq_send_put_nacked;
+	atomic_long_t mesq_qf_not_full;
+	atomic_long_t mesq_qf_locked;
+	atomic_long_t mesq_qf_noop_not_full;
+	atomic_long_t mesq_qf_switch_head_failed;
+	atomic_long_t mesq_qf_unexpected_error;
+	atomic_long_t mesq_noop_unexpected_error;
+	atomic_long_t mesq_noop_lb_overflow;
+	atomic_long_t mesq_noop_qlimit_reached;
+	atomic_long_t mesq_noop_amo_nacked;
+	atomic_long_t mesq_noop_put_nacked;
+
+};
+
+#define OPT_DPRINT	1
+#define OPT_STATS	2
+#define GRU_QUICKLOOK	4
+
+
+#define IRQ_GRU			110	/* Starting IRQ number for interrupts */
+
+/* Delay in jiffies between attempts to assign a GRU context */
+#define GRU_ASSIGN_DELAY	((HZ * 20) / 1000)
+
+/*
+ * If a process has it's context stolen, min delay in jiffies before trying to
+ * steal a context from another process.
+ */
+#define GRU_STEAL_DELAY		((HZ * 200) / 1000)
+
+#define STAT(id)	do {						\
+				if (options & OPT_STATS)		\
+					atomic_long_inc(&gru_stats.id);	\
+			} while (0)
+
+#ifdef CONFIG_SGI_GRU_DEBUG
+#define gru_dbg(dev, fmt, x...)						\
+	do {								\
+		if (options & OPT_DPRINT)				\
+			dev_dbg(dev, "%s: " fmt, __func__, x);		\
+	} while (0)
+#else
+#define gru_dbg(x...)
+#endif
+
+/*-----------------------------------------------------------------------------
+ * ASID management
+ */
+#define MAX_ASID	0xfffff0
+#define MIN_ASID	8
+#define ASID_INC	8	/* number of regions */
+
+/* Generate a GRU asid value from a GRU base asid & a virtual address. */
+#if defined CONFIG_IA64
+#define VADDR_HI_BIT		64
+#define GRUREGION(addr)		((addr) >> (VADDR_HI_BIT - 3) & 3)
+#elif defined __x86_64
+#define VADDR_HI_BIT		48
+#define GRUREGION(addr)		(0)		/* ZZZ could do better */
+#else
+#error "Unsupported architecture"
+#endif
+#define GRUASID(asid, addr)	((asid) + GRUREGION(addr))
+
+/*------------------------------------------------------------------------------
+ *  File & VMS Tables
+ */
+
+struct gru_state;
+
+/*
+ * This structure is pointed to from the mmstruct via the notifier pointer.
+ * There is one of these per address space.
+ */
+struct gru_mm_tracker {
+	unsigned int		mt_asid_gen;	/* ASID wrap count */
+	int			mt_asid;	/* current base ASID for gru */
+	unsigned short		mt_ctxbitmap;	/* bitmap of contexts using
+						   asid */
+};
+
+struct gru_mm_struct {
+	struct mmu_notifier	ms_notifier;
+	atomic_t		ms_refcnt;
+	spinlock_t		ms_asid_lock;	/* protects ASID assignment */
+	atomic_t		ms_range_active;/* num range_invals active */
+	char			ms_released;
+	wait_queue_head_t	ms_wait_queue;
+	DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
+	struct gru_mm_tracker	ms_asids[GRU_MAX_GRUS];
+};
+
+/*
+ * One of these structures is allocated when a GSEG is mmaped. The
+ * structure is pointed to by the vma->vm_private_data field in the vma struct.
+ */
+struct gru_vma_data {
+	spinlock_t		vd_lock;	/* Serialize access to vma */
+	struct list_head	vd_head;	/* head of linked list of gts */
+	long			vd_user_options;/* misc user option flags */
+	int			vd_cbr_au_count;
+	int			vd_dsr_au_count;
+};
+
+/*
+ * One of these is allocated for each thread accessing a mmaped GRU. A linked
+ * list of these structure is hung off the struct gru_vma_data in the mm_struct.
+ */
+struct gru_thread_state {
+	struct list_head	ts_next;	/* list - head at vma-private */
+	struct mutex		ts_ctxlock;	/* load/unload CTX lock */
+	struct mm_struct	*ts_mm;		/* mm currently mapped to
+						   context */
+	struct vm_area_struct	*ts_vma;	/* vma of GRU context */
+	struct gru_state	*ts_gru;	/* GRU where the context is
+						   loaded */
+	struct gru_mm_struct	*ts_gms;	/* asid & ioproc struct */
+	unsigned long		ts_cbr_map;	/* map of allocated CBRs */
+	unsigned long		ts_dsr_map;	/* map of allocated DATA
+						   resources */
+	unsigned long		ts_steal_jiffies;/* jiffies when context last
+						    stolen */
+	long			ts_user_options;/* misc user option flags */
+	pid_t			ts_tgid_owner;	/* task that is using the
+						   context - for migration */
+	int			ts_tsid;	/* thread that owns the
+						   structure */
+	int			ts_tlb_int_select;/* target cpu if interrupts
+						     enabled */
+	int			ts_ctxnum;	/* context number where the
+						   context is loaded */
+	atomic_t		ts_refcnt;	/* reference count GTS */
+	unsigned char		ts_dsr_au_count;/* Number of DSR resources
+						   required for contest */
+	unsigned char		ts_cbr_au_count;/* Number of CBR resources
+						   required for contest */
+	char			ts_force_unload;/* force context to be unloaded
+						   after migration */
+	char			ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
+							  allocated CB */
+	unsigned long		ts_gdata[0];	/* save area for GRU data (CB,
+						   DS, CBE) */
+};
+
+/*
+ * Threaded programs actually allocate an array of GSEGs when a context is
+ * created. Each thread uses a separate GSEG. TSID is the index into the GSEG
+ * array.
+ */
+#define TSID(a, v)		(((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE)
+#define UGRUADDR(gts)		((gts)->ts_vma->vm_start +		\
+					(gts)->ts_tsid * GRU_GSEG_PAGESIZE)
+
+#define NULLCTX			(-1)	/* if context not loaded into GRU */
+
+/*-----------------------------------------------------------------------------
+ *  GRU State Tables
+ */
+
+/*
+ * One of these exists for each GRU chiplet.
+ */
+struct gru_state {
+	struct gru_blade_state	*gs_blade;		/* GRU state for entire
+							   blade */
+	unsigned long		gs_gru_base_paddr;	/* Physical address of
+							   gru segments (64) */
+	void			*gs_gru_base_vaddr;	/* Virtual address of
+							   gru segments (64) */
+	unsigned char		gs_gid;			/* unique GRU number */
+	unsigned char		gs_tgh_local_shift;	/* used to pick TGH for
+							   local flush */
+	unsigned char		gs_tgh_first_remote;	/* starting TGH# for
+							   remote flush */
+	unsigned short		gs_blade_id;		/* blade of GRU */
+	spinlock_t		gs_asid_lock;		/* lock used for
+							   assigning asids */
+	spinlock_t		gs_lock;		/* lock used for
+							   assigning contexts */
+
+	/* -- the following are protected by the gs_asid_lock spinlock ---- */
+	unsigned int		gs_asid;		/* Next availe ASID */
+	unsigned int		gs_asid_limit;		/* Limit of available
+							   ASIDs */
+	unsigned int		gs_asid_gen;		/* asid generation.
+							   Inc on wrap */
+
+	/* --- the following fields are protected by the gs_lock spinlock --- */
+	unsigned long		gs_context_map;		/* bitmap to manage
+							   contexts in use */
+	unsigned long		gs_cbr_map;		/* bitmap to manage CB
+							   resources */
+	unsigned long		gs_dsr_map;		/* bitmap used to manage
+							   DATA resources */
+	unsigned int		gs_reserved_cbrs;	/* Number of kernel-
+							   reserved cbrs */
+	unsigned int		gs_reserved_dsr_bytes;	/* Bytes of kernel-
+							   reserved dsrs */
+	unsigned short		gs_active_contexts;	/* number of contexts
+							   in use */
+	struct gru_thread_state	*gs_gts[GRU_NUM_CCH];	/* GTS currently using
+							   the context */
+};
+
+/*
+ * This structure contains the GRU state for all the GRUs on a blade.
+ */
+struct gru_blade_state {
+	void			*kernel_cb;		/* First kernel
+							   reserved cb */
+	void			*kernel_dsr;		/* First kernel
+							   reserved DSR */
+	/* ---- the following are protected by the bs_lock spinlock ---- */
+	spinlock_t		bs_lock;		/* lock used for
+							   stealing contexts */
+	int			bs_lru_ctxnum;		/* STEAL - last context
+							   stolen */
+	struct gru_state	*bs_lru_gru;		/* STEAL - last gru
+							   stolen */
+
+	struct gru_state	bs_grus[GRU_CHIPLETS_PER_BLADE];
+};
+
+/*-----------------------------------------------------------------------------
+ * Address Primitives
+ */
+#define get_tfm_for_cpu(g, c)						\
+	((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c)))
+#define get_tfh_by_index(g, i)						\
+	((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i)))
+#define get_tgh_by_index(g, i)						\
+	((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i)))
+#define get_cbe_by_index(g, i)						\
+	((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\
+			(i)))
+
+/*-----------------------------------------------------------------------------
+ * Useful Macros
+ */
+
+/* Given a blade# & chiplet#, get a pointer to the GRU */
+#define get_gru(b, c)		(&gru_base[b]->bs_grus[c])
+
+/* Number of bytes to save/restore when unloading/loading GRU contexts */
+#define DSR_BYTES(dsr)		((dsr) * GRU_DSR_AU_BYTES)
+#define CBR_BYTES(cbr)		((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2)
+
+/* Convert a user CB number to the actual CBRNUM */
+#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \
+				  * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE)
+
+/* Convert a gid to a pointer to the GRU */
+#define GID_TO_GRU(gid)							\
+	(gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ?			\
+		(&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]->		\
+			bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) :	\
+	 NULL)
+
+/* Scan all active GRUs in a GRU bitmap */
+#define for_each_gru_in_bitmap(gid, map)				\
+	for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\
+		(gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid)))
+
+/* Scan all active GRUs on a specific blade */
+#define for_each_gru_on_blade(gru, nid, i)				\
+	for ((gru) = gru_base[nid]->bs_grus, (i) = 0;			\
+			(i) < GRU_CHIPLETS_PER_BLADE;			\
+			(i)++, (gru)++)
+
+/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
+#define for_each_gts_on_gru(gts, gru, ctxnum)				\
+	for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++)		\
+		if (((gts) = (gru)->gs_gts[ctxnum]))
+
+/* Scan each CBR whose bit is set in a TFM (or copy of) */
+#define for_each_cbr_in_tfm(i, map)					\
+	for ((i) = find_first_bit(map, GRU_NUM_CBE);			\
+			(i) < GRU_NUM_CBE;				\
+			(i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i))
+
+/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
+#define for_each_cbr_in_allocation_map(i, map, k)			\
+	for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU;	\
+			(k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) 	\
+		for ((i) = (k)*GRU_CBR_AU_SIZE;				\
+				(i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
+
+/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
+#define for_each_dsr_in_allocation_map(i, map, k)			\
+	for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\
+			(k) < GRU_DSR_AU;				\
+			(k) = find_next_bit((const unsigned long *)map,	\
+					  GRU_DSR_AU, (k) + 1))		\
+		for ((i) = (k) * GRU_DSR_AU_CL;				\
+				(i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
+
+#define gseg_physical_address(gru, ctxnum)				\
+		((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
+#define gseg_virtual_address(gru, ctxnum)				\
+		((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE)
+
+/*-----------------------------------------------------------------------------
+ * Lock / Unlock GRU handles
+ * 	Use the "delresp" bit in the handle as a "lock" bit.
+ */
+
+/* Lock hierarchy checking enabled only in emulator */
+
+static inline void __lock_handle(void *h)
+{
+	while (test_and_set_bit(1, h))
+		cpu_relax();
+}
+
+static inline void __unlock_handle(void *h)
+{
+	clear_bit(1, h);
+}
+
+static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+	__lock_handle(cch);
+}
+
+static inline void unlock_cch_handle(struct gru_context_configuration_handle
+				     *cch)
+{
+	__unlock_handle(cch);
+}
+
+static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	__lock_handle(tgh);
+}
+
+static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	__unlock_handle(tgh);
+}
+
+/*-----------------------------------------------------------------------------
+ * Function prototypes & externs
+ */
+struct gru_unload_context_req;
+
+extern struct vm_operations_struct gru_vm_ops;
+extern struct device *grudev;
+
+extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
+				int tsid);
+extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
+				*vma, int tsid);
+extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
+				*vma, int tsid);
+extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
+extern void gts_drop(struct gru_thread_state *gts);
+extern void gru_tgh_flush_init(struct gru_state *gru);
+extern int gru_kservices_init(struct gru_state *gru);
+extern irqreturn_t gru_intr(int irq, void *dev_id);
+extern int gru_handle_user_call_os(unsigned long address);
+extern int gru_user_flush_tlb(unsigned long arg);
+extern int gru_user_unload_context(unsigned long arg);
+extern int gru_get_exception_detail(unsigned long arg);
+extern int gru_set_task_slice(long address);
+extern int gru_cpu_fault_map_id(void);
+extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
+extern void gru_flush_all_tlb(struct gru_state *gru);
+extern int gru_proc_init(void);
+extern void gru_proc_exit(void);
+
+extern unsigned long reserve_gru_cb_resources(struct gru_state *gru,
+		int cbr_au_count, char *cbmap);
+extern unsigned long reserve_gru_ds_resources(struct gru_state *gru,
+		int dsr_au_count, char *dsmap);
+extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
+extern struct gru_mm_struct *gru_register_mmu_notifier(void);
+extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
+
+extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+					unsigned long len);
+
+extern unsigned long options;
+
+#endif /* __GRUTABLES_H__ */
-- 
cgit v1.2.3


From b2fb06fcb6d6c9912b43e61394891e3994d4b613 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:56 -0700
Subject: GRU Driver: kernel services header files

This patch contains the header file used to export GRU services to other
kernel drivers such as XPMEM or XPNET.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grukservices.h | 134 ++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/grukservices.h

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
new file mode 100644
index 00000000000..eb17e0a3ac6
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -0,0 +1,134 @@
+
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#ifndef __GRU_KSERVICES_H_
+#define __GRU_KSERVICES_H_
+
+
+/*
+ * Message queues using the GRU to send/receive messages.
+ *
+ * These function allow the user to create a message queue for
+ * sending/receiving 1 or 2 cacheline messages using the GRU.
+ *
+ * Processes SENDING messages will use a kernel CBR/DSR to send
+ * the message. This is transparent to the caller.
+ *
+ * The receiver does not use any GRU resources.
+ *
+ * The functions support:
+ * 	- single receiver
+ * 	- multiple senders
+ *	- cross partition message
+ *
+ * Missing features ZZZ:
+ * 	- user options for dealing with timeouts, queue full, etc.
+ * 	- gru_create_message_queue() needs interrupt vector info
+ */
+
+/*
+ * Initialize a user allocated chunk of memory to be used as
+ * a message queue. The caller must ensure that the queue is
+ * in contiguous physical memory and is cacheline aligned.
+ *
+ * Message queue size is the total number of bytes allocated
+ * to the queue including a 2 cacheline header that is used
+ * to manage the queue.
+ *
+ *  Input:
+ * 	p	pointer to user allocated memory.
+ * 	bytes	size of message queue in bytes
+ *
+ *  Errors:
+ *  	0	OK
+ *  	>0	error
+ */
+extern int gru_create_message_queue(void *p, unsigned int bytes);
+
+/*
+ * Send a message to a message queue.
+ *
+ * Note: The message queue transport mechanism uses the first 32
+ * bits of the message. Users should avoid using these bits.
+ *
+ *
+ *   Input:
+ * 	xmq	message queue - must be a UV global physical address
+ * 	mesg	pointer to message. Must be 64-bit aligned
+ * 	bytes	size of message in bytes
+ *
+ *   Output:
+ *      0	message sent
+ *     >0	Send failure - see error codes below
+ *
+ */
+extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg,
+						unsigned int bytes);
+
+/* Status values for gru_send_message() */
+#define MQE_OK			0	/* message sent successfully */
+#define MQE_CONGESTION		1	/* temporary congestion, try again */
+#define MQE_QUEUE_FULL		2	/* queue is full */
+#define MQE_UNEXPECTED_CB_ERR	3	/* unexpected CB error */
+#define MQE_PAGE_OVERFLOW	10	/* BUG - queue overflowed a page */
+#define MQE_BUG_NO_RESOURCES	11	/* BUG - could not alloc GRU cb/dsr */
+
+/*
+ * Advance the receive pointer for the message queue to the next message.
+ * Note: current API requires messages to be gotten & freed in order. Future
+ * API extensions may allow for out-of-order freeing.
+ *
+ *   Input
+ * 	mq	message queue
+ * 	mesq	message being freed
+ */
+extern void gru_free_message(void *mq, void *mesq);
+
+/*
+ * Get next message from message queue. Returns pointer to
+ * message OR NULL if no message present.
+ * User must call gru_free_message() after message is processed
+ * in order to move the queue pointers to next message.
+ *
+ *   Input
+ * 	mq	message queue
+ *
+ *   Output:
+ *	p	pointer to message
+ *	NULL	no message available
+ */
+extern void *gru_get_next_message(void *mq);
+
+
+/*
+ * Copy data using the GRU. Source or destination can be located in a remote
+ * partition.
+ *
+ *    Input:
+ *    	dest_gpa	destination global physical address
+ *    	src_gpa		source global physical address
+ *    	bytes		number of bytes to copy
+ *
+ *    Output:
+ *	0		OK
+ *	>0		error
+ */
+extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+							unsigned int bytes);
+
+#endif 		/* __GRU_KSERVICES_H_ */
-- 
cgit v1.2.3


From 78cf1de49b11c0e2edb35cce91ac6c279cc852b3 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:56 -0700
Subject: GRU Driver: driver initialization, file & vma ops

This file contains the functions for initializing the driver, handling
file & vma operations and for processing IOCTL requests from the user.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grufile.c | 481 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 481 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/grufile.c

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
new file mode 100644
index 00000000000..09c9c65ff9d
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -0,0 +1,481 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              FILE OPERATIONS & DRIVER INITIALIZATION
+ *
+ * This file supports the user system call for file open, close, mmap, etc.
+ * This also incudes the driver initialization code.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#if defined CONFIG_X86_64
+#include <asm/genapic.h>
+#include <asm/irq.h>
+#define IS_UV()		is_uv_system()
+#elif defined CONFIG_IA64
+#include <asm/system.h>
+#include <asm/sn/simulator.h>
+/* temp support for running on hardware simulator */
+#define IS_UV()		IS_MEDUSA() || ia64_platform_is("uv")
+#else
+#define IS_UV()		0
+#endif
+
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_mmrs.h>
+
+struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
+unsigned long gru_start_paddr, gru_end_paddr __read_mostly;
+struct gru_stats_s gru_stats;
+
+/* Guaranteed user available resources on each node */
+static int max_user_cbrs, max_user_dsr_bytes;
+
+static struct file_operations gru_fops;
+static struct miscdevice gru_miscdev;
+
+
+/*
+ * gru_vma_close
+ *
+ * Called when unmapping a device mapping. Frees all gru resources
+ * and tables belonging to the vma.
+ */
+static void gru_vma_close(struct vm_area_struct *vma)
+{
+	struct gru_vma_data *vdata;
+	struct gru_thread_state *gts;
+	struct list_head *entry, *next;
+
+	if (!vma->vm_private_data)
+		return;
+
+	vdata = vma->vm_private_data;
+	vma->vm_private_data = NULL;
+	gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file,
+				vdata);
+	list_for_each_safe(entry, next, &vdata->vd_head) {
+		gts =
+		    list_entry(entry, struct gru_thread_state, ts_next);
+		list_del(&gts->ts_next);
+		mutex_lock(&gts->ts_ctxlock);
+		if (gts->ts_gru)
+			gru_unload_context(gts, 0);
+		mutex_unlock(&gts->ts_ctxlock);
+		gts_drop(gts);
+	}
+	kfree(vdata);
+	STAT(vdata_free);
+}
+
+/*
+ * gru_file_mmap
+ *
+ * Called when mmaping the device.  Initializes the vma with a fault handler
+ * and private data structure necessary to allocate, track, and free the
+ * underlying pages.
+ */
+static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
+		return -EPERM;
+
+	vma->vm_flags |=
+	    (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
+			VM_RESERVED);
+	vma->vm_page_prot = PAGE_SHARED;
+	vma->vm_ops = &gru_vm_ops;
+
+	vma->vm_private_data = gru_alloc_vma_data(vma, 0);
+	if (!vma->vm_private_data)
+		return -ENOMEM;
+
+	gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n",
+		file, vma->vm_start, vma, vma->vm_private_data);
+	return 0;
+}
+
+/*
+ * Create a new GRU context
+ */
+static int gru_create_new_context(unsigned long arg)
+{
+	struct gru_create_context_req req;
+	struct vm_area_struct *vma;
+	struct gru_vma_data *vdata;
+	int ret = -EINVAL;
+
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	if (req.data_segment_bytes == 0 ||
+				req.data_segment_bytes > max_user_dsr_bytes)
+		return -EINVAL;
+	if (!req.control_blocks || !req.maximum_thread_count ||
+				req.control_blocks > max_user_cbrs)
+		return -EINVAL;
+
+	if (!(req.options & GRU_OPT_MISS_MASK))
+		req.options |= GRU_OPT_MISS_FMM_INTR;
+
+	down_write(&current->mm->mmap_sem);
+	vma = gru_find_vma(req.gseg);
+	if (vma) {
+		vdata = vma->vm_private_data;
+		vdata->vd_user_options = req.options;
+		vdata->vd_dsr_au_count =
+		    GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
+		vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
+		ret = 0;
+	}
+	up_write(&current->mm->mmap_sem);
+
+	return ret;
+}
+
+/*
+ * Get GRU configuration info (temp - for emulator testing)
+ */
+static long gru_get_config_info(unsigned long arg)
+{
+	struct gru_config_info info;
+	int nodesperblade;
+
+	if (num_online_nodes() > 1 &&
+			(uv_node_to_blade_id(1) == uv_node_to_blade_id(0)))
+		nodesperblade = 2;
+	else
+		nodesperblade = 1;
+	info.cpus = num_online_cpus();
+	info.nodes = num_online_nodes();
+	info.blades = info.nodes / nodesperblade;
+	info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades;
+
+	if (copy_to_user((void __user *)arg, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * Get GRU chiplet status
+ */
+static long gru_get_chiplet_status(unsigned long arg)
+{
+	struct gru_state *gru;
+	struct gru_chiplet_info info;
+
+	if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+		return -EFAULT;
+
+	if (info.node == -1)
+		info.node = numa_node_id();
+	if (info.node >= num_possible_nodes() ||
+			info.chiplet >= GRU_CHIPLETS_PER_HUB ||
+			info.node < 0 || info.chiplet < 0)
+		return -EINVAL;
+
+	info.blade = uv_node_to_blade_id(info.node);
+	gru = get_gru(info.blade, info.chiplet);
+
+	info.total_dsr_bytes = GRU_NUM_DSR_BYTES;
+	info.total_cbr = GRU_NUM_CB;
+	info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES -
+		gru->gs_reserved_dsr_bytes;
+	info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs;
+	info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) *
+			GRU_DSR_AU_BYTES;
+	info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+
+	if (copy_to_user((void __user *)arg, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * gru_file_unlocked_ioctl
+ *
+ * Called to update file attributes via IOCTL calls.
+ */
+static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
+				    unsigned long arg)
+{
+	int err = -EBADRQC;
+
+	gru_dbg(grudev, "file %p\n", file);
+
+	switch (req) {
+	case GRU_CREATE_CONTEXT:
+		err = gru_create_new_context(arg);
+		break;
+	case GRU_SET_TASK_SLICE:
+		err = gru_set_task_slice(arg);
+		break;
+	case GRU_USER_GET_EXCEPTION_DETAIL:
+		err = gru_get_exception_detail(arg);
+		break;
+	case GRU_USER_UNLOAD_CONTEXT:
+		err = gru_user_unload_context(arg);
+		break;
+	case GRU_GET_CHIPLET_STATUS:
+		err = gru_get_chiplet_status(arg);
+		break;
+	case GRU_USER_FLUSH_TLB:
+		err = gru_user_flush_tlb(arg);
+		break;
+	case GRU_USER_CALL_OS:
+		err = gru_handle_user_call_os(arg);
+		break;
+	case GRU_GET_CONFIG_INFO:
+		err = gru_get_config_info(arg);
+		break;
+	}
+	return err;
+}
+
+/*
+ * Called at init time to build tables for all GRUs that are present in the
+ * system.
+ */
+static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
+			     void *vaddr, int nid, int bid, int grunum)
+{
+	spin_lock_init(&gru->gs_lock);
+	spin_lock_init(&gru->gs_asid_lock);
+	gru->gs_gru_base_paddr = paddr;
+	gru->gs_gru_base_vaddr = vaddr;
+	gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum;
+	gru->gs_blade = gru_base[bid];
+	gru->gs_blade_id = bid;
+	gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
+	gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
+	gru_tgh_flush_init(gru);
+	gru_dbg(grudev, "bid %d, nid %d, gru %x, vaddr %p (0x%lx)\n",
+		bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr,
+		gru->gs_gru_base_paddr);
+	gru_kservices_init(gru);
+}
+
+static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
+{
+	int pnode, nid, bid, chip;
+	int cbrs, dsrbytes, n;
+	int order = get_order(sizeof(struct gru_blade_state));
+	struct page *page;
+	struct gru_state *gru;
+	unsigned long paddr;
+	void *vaddr;
+
+	max_user_cbrs = GRU_NUM_CB;
+	max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
+	for_each_online_node(nid) {
+		bid = uv_node_to_blade_id(nid);
+		pnode = uv_node_to_pnode(nid);
+		if (gru_base[bid])
+			continue;
+		page = alloc_pages_node(nid, GFP_KERNEL, order);
+		if (!page)
+			goto fail;
+		gru_base[bid] = page_address(page);
+		memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
+		gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
+		spin_lock_init(&gru_base[bid]->bs_lock);
+
+		dsrbytes = 0;
+		cbrs = 0;
+		for (gru = gru_base[bid]->bs_grus, chip = 0;
+		     		chip < GRU_CHIPLETS_PER_BLADE;
+				chip++, gru++) {
+			paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
+			vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
+			gru_init_chiplet(gru, paddr, vaddr, bid, nid, chip);
+			n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+			cbrs = max(cbrs, n);
+			n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+			dsrbytes = max(dsrbytes, n);
+		}
+		max_user_cbrs = min(max_user_cbrs, cbrs);
+		max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes);
+	}
+
+	return 0;
+
+fail:
+	for (nid--; nid >= 0; nid--)
+		free_pages((unsigned long)gru_base[nid], order);
+	return -ENOMEM;
+}
+
+#ifdef CONFIG_IA64
+
+static int get_base_irq(void)
+{
+	return IRQ_GRU;
+}
+
+#elif defined CONFIG_X86_64
+
+static void noop(unsigned int irq)
+{
+}
+
+static struct irq_chip gru_chip = {
+	.name		= "gru",
+	.mask		= noop,
+	.unmask		= noop,
+	.ack		= noop,
+};
+
+static int get_base_irq(void)
+{
+	set_irq_chip(IRQ_GRU, &gru_chip);
+	set_irq_chip(IRQ_GRU + 1, &gru_chip);
+	return IRQ_GRU;
+}
+#endif
+
+/*
+ * gru_init
+ *
+ * Called at boot or module load time to initialize the GRUs.
+ */
+static int __init gru_init(void)
+{
+	int ret, irq, chip;
+	char id[10];
+	void *gru_start_vaddr;
+
+	if (!IS_UV())
+		return 0;
+
+#if defined CONFIG_IA64
+	gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
+#else
+	gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
+				0x7fffffffffffUL;
+
+#endif
+	gru_start_vaddr = __va(gru_start_paddr);
+	gru_end_paddr = gru_start_paddr + MAX_NUMNODES * GRU_SIZE;
+	printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
+	       gru_start_paddr, gru_end_paddr);
+	irq = get_base_irq();
+	for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
+		ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
+		if (ret) {
+			printk(KERN_ERR "%s: request_irq failed\n",
+			       GRU_DRIVER_ID_STR);
+			goto exit1;
+		}
+	}
+
+	ret = misc_register(&gru_miscdev);
+	if (ret) {
+		printk(KERN_ERR "%s: misc_register failed\n",
+		       GRU_DRIVER_ID_STR);
+		goto exit1;
+	}
+
+	ret = gru_proc_init();
+	if (ret) {
+		printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
+		goto exit2;
+	}
+
+	ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
+	if (ret) {
+		printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
+		goto exit3;
+	}
+
+	printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
+	       GRU_DRIVER_VERSION_STR);
+	return 0;
+
+exit3:
+	gru_proc_exit();
+exit2:
+	misc_deregister(&gru_miscdev);
+exit1:
+	for (--chip; chip >= 0; chip--)
+		free_irq(irq + chip, NULL);
+	return ret;
+
+}
+
+static void __exit gru_exit(void)
+{
+	int i, bid;
+	int order = get_order(sizeof(struct gru_state) *
+			      GRU_CHIPLETS_PER_BLADE);
+
+	for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
+		free_irq(IRQ_GRU + i, NULL);
+
+	for (bid = 0; bid < GRU_MAX_BLADES; bid++)
+		free_pages((unsigned long)gru_base[bid], order);
+
+	misc_deregister(&gru_miscdev);
+	gru_proc_exit();
+}
+
+static struct file_operations gru_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= gru_file_unlocked_ioctl,
+	.mmap		= gru_file_mmap,
+};
+
+static struct miscdevice gru_miscdev = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "gru",
+	.fops		= &gru_fops,
+};
+
+struct vm_operations_struct gru_vm_ops = {
+	.close		= gru_vma_close,
+	.fault		= gru_fault,
+};
+
+module_init(gru_init);
+module_exit(gru_exit);
+
+module_param(options, ulong, 0644);
+MODULE_PARM_DESC(options, "Various debug options");
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR);
+MODULE_VERSION(GRU_DRIVER_VERSION_STR);
+
-- 
cgit v1.2.3


From 142586409c8be7dc071bb94d7cd2d69ccfd99b6b Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:57 -0700
Subject: GRU Driver: page faults & exceptions

This file contains the functions that manage GRU page faults and
exceptions.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grufault.c | 633 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 633 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/grufault.c

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
new file mode 100644
index 00000000000..3d33015bbf3
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -0,0 +1,633 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              FAULT HANDLER FOR GRU DETECTED TLB MISSES
+ *
+ * This file contains code that handles TLB misses within the GRU.
+ * These misses are reported either via interrupts or user polling of
+ * the user CB.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <asm/pgtable.h>
+#include "gru.h"
+#include "grutables.h"
+#include "grulib.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/*
+ * Test if a physical address is a valid GRU GSEG address
+ */
+static inline int is_gru_paddr(unsigned long paddr)
+{
+	return paddr >= gru_start_paddr && paddr < gru_end_paddr;
+}
+
+/*
+ * Find the vma of a GRU segment. Caller must hold mmap_sem.
+ */
+struct vm_area_struct *gru_find_vma(unsigned long vaddr)
+{
+	struct vm_area_struct *vma;
+
+	vma = find_vma(current->mm, vaddr);
+	if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
+		return vma;
+	return NULL;
+}
+
+/*
+ * Find and lock the gts that contains the specified user vaddr.
+ *
+ * Returns:
+ * 	- *gts with the mmap_sem locked for read and the GTS locked.
+ *	- NULL if vaddr invalid OR is not a valid GSEG vaddr.
+ */
+
+static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct gru_thread_state *gts = NULL;
+
+	down_read(&mm->mmap_sem);
+	vma = gru_find_vma(vaddr);
+	if (vma)
+		gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+	if (gts)
+		mutex_lock(&gts->ts_ctxlock);
+	else
+		up_read(&mm->mmap_sem);
+	return gts;
+}
+
+static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct gru_thread_state *gts = NULL;
+
+	down_write(&mm->mmap_sem);
+	vma = gru_find_vma(vaddr);
+	if (vma)
+		gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
+	if (gts) {
+		mutex_lock(&gts->ts_ctxlock);
+		downgrade_write(&mm->mmap_sem);
+	} else {
+		up_write(&mm->mmap_sem);
+	}
+
+	return gts;
+}
+
+/*
+ * Unlock a GTS that was previously locked with gru_find_lock_gts().
+ */
+static void gru_unlock_gts(struct gru_thread_state *gts)
+{
+	mutex_unlock(&gts->ts_ctxlock);
+	up_read(&current->mm->mmap_sem);
+}
+
+/*
+ * Set a CB.istatus to active using a user virtual address. This must be done
+ * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
+ * If the line is evicted, the status may be lost. The in-cache update
+ * is necessary to prevent the user from seeing a stale cb.istatus that will
+ * change as soon as the TFH restart is complete. Races may cause an
+ * occasional failure to clear the cb.istatus, but that is ok.
+ *
+ * If the cb address is not valid (should not happen, but...), nothing
+ * bad will happen.. The get_user()/put_user() will fail but there
+ * are no bad side-effects.
+ */
+static void gru_cb_set_istatus_active(unsigned long __user *cb)
+{
+	union {
+		struct gru_instruction_bits bits;
+		unsigned long dw;
+	} u;
+
+	if (cb) {
+		get_user(u.dw, cb);
+		u.bits.istatus = CBS_ACTIVE;
+		put_user(u.dw, cb);
+	}
+}
+
+/*
+ * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the
+ * interrupt. Interrupts are always sent to a cpu on the blade that contains the
+ * GRU (except for headless blades which are not currently supported). A blade
+ * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ
+ * number uniquely identifies the GRU chiplet on the local blade that caused the
+ * interrupt. Always called in interrupt context.
+ */
+static inline struct gru_state *irq_to_gru(int irq)
+{
+	return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU];
+}
+
+/*
+ * Read & clear a TFM
+ *
+ * The GRU has an array of fault maps. A map is private to a cpu
+ * Only one cpu will be accessing a cpu's fault map.
+ *
+ * This function scans the cpu-private fault map & clears all bits that
+ * are set. The function returns a bitmap that indicates the bits that
+ * were cleared. Note that sense the maps may be updated asynchronously by
+ * the GRU, atomic operations must be used to clear bits.
+ */
+static void get_clear_fault_map(struct gru_state *gru,
+				struct gru_tlb_fault_map *map)
+{
+	unsigned long i, k;
+	struct gru_tlb_fault_map *tfm;
+
+	tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
+	prefetchw(tfm);		/* Helps on hardware, required for emulator */
+	for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
+		k = tfm->fault_bits[i];
+		if (k)
+			k = xchg(&tfm->fault_bits[i], 0UL);
+		map->fault_bits[i] = k;
+	}
+
+	/*
+	 * Not functionally required but helps performance. (Required
+	 * on emulator)
+	 */
+	gru_flush_cache(tfm);
+}
+
+/*
+ * Atomic (interrupt context) & non-atomic (user context) functions to
+ * convert a vaddr into a physical address. The size of the page
+ * is returned in pageshift.
+ * 	returns:
+ * 		  0 - successful
+ * 		< 0 - error code
+ * 		  1 - (atomic only) try again in non-atomic context
+ */
+static int non_atomic_pte_lookup(struct vm_area_struct *vma,
+				 unsigned long vaddr, int write,
+				 unsigned long *paddr, int *pageshift)
+{
+	struct page *page;
+
+	/* ZZZ Need to handle HUGE pages */
+	if (is_vm_hugetlb_page(vma))
+		return -EFAULT;
+	*pageshift = PAGE_SHIFT;
+	if (get_user_pages
+	    (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0)
+		return -EFAULT;
+	*paddr = page_to_phys(page);
+	put_page(page);
+	return 0;
+}
+
+/*
+ *
+ * atomic_pte_lookup
+ *
+ * Convert a user virtual address to a physical address
+ * Only supports Intel large pages (2MB only) on x86_64.
+ *	ZZZ - hugepage support is incomplete
+ */
+static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
+	int write, unsigned long *paddr, int *pageshift)
+{
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pud_t *pudp;
+	pte_t pte;
+
+	WARN_ON(irqs_disabled());		/* ZZZ debug */
+
+	local_irq_disable();
+	pgdp = pgd_offset(vma->vm_mm, vaddr);
+	if (unlikely(pgd_none(*pgdp)))
+		goto err;
+
+	pudp = pud_offset(pgdp, vaddr);
+	if (unlikely(pud_none(*pudp)))
+		goto err;
+
+	pmdp = pmd_offset(pudp, vaddr);
+	if (unlikely(pmd_none(*pmdp)))
+		goto err;
+#ifdef CONFIG_X86_64
+	if (unlikely(pmd_large(*pmdp)))
+		pte = *(pte_t *) pmdp;
+	else
+#endif
+		pte = *pte_offset_kernel(pmdp, vaddr);
+
+	local_irq_enable();
+
+	if (unlikely(!pte_present(pte) ||
+		     (write && (!pte_write(pte) || !pte_dirty(pte)))))
+		return 1;
+
+	*paddr = pte_pfn(pte) << PAGE_SHIFT;
+	*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+	return 0;
+
+err:
+	local_irq_enable();
+	return 1;
+}
+
+/*
+ * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
+ *	Input:
+ *		cb    Address of user CBR. Null if not running in user context
+ * 	Return:
+ * 		  0 = dropin, exception, or switch to UPM successful
+ * 		  1 = range invalidate active
+ * 		< 0 = error code
+ *
+ */
+static int gru_try_dropin(struct gru_thread_state *gts,
+			  struct gru_tlb_fault_handle *tfh,
+			  unsigned long __user *cb)
+{
+	struct mm_struct *mm = gts->ts_mm;
+	struct vm_area_struct *vma;
+	int pageshift, asid, write, ret;
+	unsigned long paddr, gpa, vaddr;
+
+	/*
+	 * NOTE: The GRU contains magic hardware that eliminates races between
+	 * TLB invalidates and TLB dropins. If an invalidate occurs
+	 * in the window between reading the TFH and the subsequent TLB dropin,
+	 * the dropin is ignored. This eliminates the need for additional locks.
+	 */
+
+	/*
+	 * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
+	 * Might be a hardware race OR a stupid user. Ignore FMM because FMM
+	 * is a transient state.
+	 */
+	if (tfh->state == TFHSTATE_IDLE)
+		goto failidle;
+	if (tfh->state == TFHSTATE_MISS_FMM && cb)
+		goto failfmm;
+
+	write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
+	vaddr = tfh->missvaddr;
+	asid = tfh->missasid;
+	if (asid == 0)
+		goto failnoasid;
+
+	rmb();	/* TFH must be cache resident before reading ms_range_active */
+
+	/*
+	 * TFH is cache resident - at least briefly. Fail the dropin
+	 * if a range invalidate is active.
+	 */
+	if (atomic_read(&gts->ts_gms->ms_range_active))
+		goto failactive;
+
+	vma = find_vma(mm, vaddr);
+	if (!vma)
+		goto failinval;
+
+	/*
+	 * Atomic lookup is faster & usually works even if called in non-atomic
+	 * context.
+	 */
+	ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift);
+	if (ret) {
+		if (!cb)
+			goto failupm;
+		if (non_atomic_pte_lookup(vma, vaddr, write, &paddr,
+					  &pageshift))
+			goto failinval;
+	}
+	if (is_gru_paddr(paddr))
+		goto failinval;
+
+	paddr = paddr & ~((1UL << pageshift) - 1);
+	gpa = uv_soc_phys_ram_to_gpa(paddr);
+	gru_cb_set_istatus_active(cb);
+	tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
+			  GRU_PAGESIZE(pageshift));
+	STAT(tlb_dropin);
+	gru_dbg(grudev,
+		"%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n",
+		ret ? "non-atomic" : "atomic", tfh, vaddr, asid,
+		pageshift, gpa);
+	return 0;
+
+failnoasid:
+	/* No asid (delayed unload). */
+	STAT(tlb_dropin_fail_no_asid);
+	gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	if (!cb)
+		tfh_user_polling_mode(tfh);
+	else
+		gru_flush_cache(tfh);
+	return -EAGAIN;
+
+failupm:
+	/* Atomic failure switch CBR to UPM */
+	tfh_user_polling_mode(tfh);
+	STAT(tlb_dropin_fail_upm);
+	gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	return 1;
+
+failfmm:
+	/* FMM state on UPM call */
+	STAT(tlb_dropin_fail_fmm);
+	gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
+	return 0;
+
+failidle:
+	/* TFH was idle  - no miss pending */
+	gru_flush_cache(tfh);
+	if (cb)
+		gru_flush_cache(cb);
+	STAT(tlb_dropin_fail_idle);
+	gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
+	return 0;
+
+failinval:
+	/* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
+	tfh_exception(tfh);
+	STAT(tlb_dropin_fail_invalid);
+	gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	return -EFAULT;
+
+failactive:
+	/* Range invalidate active. Switch to UPM iff atomic */
+	if (!cb)
+		tfh_user_polling_mode(tfh);
+	else
+		gru_flush_cache(tfh);
+	STAT(tlb_dropin_fail_range_active);
+	gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
+		tfh, vaddr);
+	return 1;
+}
+
+/*
+ * Process an external interrupt from the GRU. This interrupt is
+ * caused by a TLB miss.
+ * Note that this is the interrupt handler that is registered with linux
+ * interrupt handlers.
+ */
+irqreturn_t gru_intr(int irq, void *dev_id)
+{
+	struct gru_state *gru;
+	struct gru_tlb_fault_map map;
+	struct gru_thread_state *gts;
+	struct gru_tlb_fault_handle *tfh = NULL;
+	int cbrnum, ctxnum;
+
+	STAT(intr);
+
+	gru = irq_to_gru(irq);
+	if (!gru) {
+		dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n",
+			raw_smp_processor_id(), irq);
+		return IRQ_NONE;
+	}
+	get_clear_fault_map(gru, &map);
+	gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid,
+		map.fault_bits[0]);
+
+	for_each_cbr_in_tfm(cbrnum, map.fault_bits) {
+		tfh = get_tfh_by_index(gru, cbrnum);
+		prefetchw(tfh);	/* Helps on hdw, required for emulator */
+
+		/*
+		 * When hardware sets a bit in the faultmap, it implicitly
+		 * locks the GRU context so that it cannot be unloaded.
+		 * The gts cannot change until a TFH start/writestart command
+		 * is issued.
+		 */
+		ctxnum = tfh->ctxnum;
+		gts = gru->gs_gts[ctxnum];
+
+		/*
+		 * This is running in interrupt context. Trylock the mmap_sem.
+		 * If it fails, retry the fault in user context.
+		 */
+		if (down_read_trylock(&gts->ts_mm->mmap_sem)) {
+			gru_try_dropin(gts, tfh, NULL);
+			up_read(&gts->ts_mm->mmap_sem);
+		} else {
+			tfh_user_polling_mode(tfh);
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+
+static int gru_user_dropin(struct gru_thread_state *gts,
+			   struct gru_tlb_fault_handle *tfh,
+			   unsigned long __user *cb)
+{
+	struct gru_mm_struct *gms = gts->ts_gms;
+	int ret;
+
+	while (1) {
+		wait_event(gms->ms_wait_queue,
+			   atomic_read(&gms->ms_range_active) == 0);
+		prefetchw(tfh);	/* Helps on hdw, required for emulator */
+		ret = gru_try_dropin(gts, tfh, cb);
+		if (ret <= 0)
+			return ret;
+		STAT(call_os_wait_queue);
+	}
+}
+
+/*
+ * This interface is called as a result of a user detecting a "call OS" bit
+ * in a user CB. Normally means that a TLB fault has occurred.
+ * 	cb - user virtual address of the CB
+ */
+int gru_handle_user_call_os(unsigned long cb)
+{
+	struct gru_tlb_fault_handle *tfh;
+	struct gru_thread_state *gts;
+	unsigned long __user *cbp;
+	int ucbnum, cbrnum, ret = -EINVAL;
+
+	STAT(call_os);
+	gru_dbg(grudev, "address 0x%lx\n", cb);
+
+	/* sanity check the cb pointer */
+	ucbnum = get_cb_number((void *)cb);
+	if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
+		return -EINVAL;
+	cbp = (unsigned long *)cb;
+
+	gts = gru_find_lock_gts(cb);
+	if (!gts)
+		return -EINVAL;
+
+	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	/*
+	 * If force_unload is set, the UPM TLB fault is phony. The task
+	 * has migrated to another node and the GSEG must be moved. Just
+	 * unload the context. The task will page fault and assign a new
+	 * context.
+	 */
+	ret = -EAGAIN;
+	cbrnum = thread_cbr_number(gts, ucbnum);
+	if (gts->ts_force_unload) {
+		gru_unload_context(gts, 1);
+	} else if (gts->ts_gru) {
+		tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
+		ret = gru_user_dropin(gts, tfh, cbp);
+	}
+exit:
+	gru_unlock_gts(gts);
+	return ret;
+}
+
+/*
+ * Fetch the exception detail information for a CB that terminated with
+ * an exception.
+ */
+int gru_get_exception_detail(unsigned long arg)
+{
+	struct control_block_extended_exc_detail excdet;
+	struct gru_control_block_extended *cbe;
+	struct gru_thread_state *gts;
+	int ucbnum, cbrnum, ret;
+
+	STAT(user_exception);
+	if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
+		return -EFAULT;
+
+	gru_dbg(grudev, "address 0x%lx\n", excdet.cb);
+	gts = gru_find_lock_gts(excdet.cb);
+	if (!gts)
+		return -EINVAL;
+
+	if (gts->ts_gru) {
+		ucbnum = get_cb_number((void *)excdet.cb);
+		cbrnum = thread_cbr_number(gts, ucbnum);
+		cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
+		excdet.opc = cbe->opccpy;
+		excdet.exopc = cbe->exopccpy;
+		excdet.ecause = cbe->ecause;
+		excdet.exceptdet0 = cbe->idef1upd;
+		excdet.exceptdet1 = cbe->idef3upd;
+		ret = 0;
+	} else {
+		ret = -EAGAIN;
+	}
+	gru_unlock_gts(gts);
+
+	gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb,
+		excdet.ecause);
+	if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
+		ret = -EFAULT;
+	return ret;
+}
+
+/*
+ * User request to unload a context. Content is saved for possible reload.
+ */
+int gru_user_unload_context(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_unload_context_req req;
+
+	STAT(user_unload_context);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts)
+		return -EINVAL;
+
+	if (gts->ts_gru)
+		gru_unload_context(gts, 1);
+	gru_unlock_gts(gts);
+
+	return 0;
+}
+
+/*
+ * User request to flush a range of virtual addresses from the GRU TLB
+ * (Mainly for testing).
+ */
+int gru_user_flush_tlb(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_flush_tlb_req req;
+
+	STAT(user_flush_tlb);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
+		req.vaddr, req.len);
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts)
+		return -EINVAL;
+
+	gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.vaddr + req.len);
+	gru_unlock_gts(gts);
+
+	return 0;
+}
+
+/*
+ * Register the current task as the user of the GSEG slice.
+ * Needed for TLB fault interrupt targeting.
+ */
+int gru_set_task_slice(long address)
+{
+	struct gru_thread_state *gts;
+
+	STAT(set_task_slice);
+	gru_dbg(grudev, "address 0x%lx\n", address);
+	gts = gru_alloc_locked_gts(address);
+	if (!gts)
+		return -EINVAL;
+
+	gts->ts_tgid_owner = current->tgid;
+	gru_unlock_gts(gts);
+
+	return 0;
+}
-- 
cgit v1.2.3


From 28bffaf094a6d0992c85e1b01f04c9b0f56c9d62 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:57 -0700
Subject: GRU Driver: kernel services provide by driver

This file contains functions for handling services provided to other
kernel modules that use the GRU.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grukservices.c | 679 ++++++++++++++++++++++++++++++++++++
 1 file changed, 679 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/grukservices.c

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
new file mode 100644
index 00000000000..234d165fb11
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -0,0 +1,679 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              KERNEL SERVICES THAT USE THE GRU
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+#include "grukservices.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/*
+ * Kernel GRU Usage
+ *
+ * The following is an interim algorithm for management of kernel GRU
+ * resources. This will likely be replaced when we better understand the
+ * kernel/user requirements.
+ *
+ * At boot time, the kernel permanently reserves a fixed number of
+ * CBRs/DSRs for each cpu to use. The resources are all taken from
+ * the GRU chiplet 1 on the blade. This leaves the full set of resources
+ * of chiplet 0 available to be allocated to a single user.
+ */
+
+/* Blade percpu resources PERMANENTLY reserved for kernel use */
+#define GRU_NUM_KERNEL_CBR      1
+#define GRU_NUM_KERNEL_DSR_BYTES 256
+#define KERNEL_CTXNUM           15
+
+/* GRU instruction attributes for all instructions */
+#define IMA			IMA_CB_DELAY
+
+/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
+#define __gru_cacheline_aligned__                               \
+	__attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
+
+#define MAGIC	0x1234567887654321UL
+
+/* Default retry count for GRU errors on kernel instructions */
+#define EXCEPTION_RETRY_LIMIT	3
+
+/* Status of message queue sections */
+#define MQS_EMPTY		0
+#define MQS_FULL		1
+#define MQS_NOOP		2
+
+/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
+/* optimized for x86_64 */
+struct message_queue {
+	union gru_mesqhead	head __gru_cacheline_aligned__;	/* CL 0 */
+	int			qlines;				/* DW 1 */
+	long 			hstatus[2];
+	void 			*next __gru_cacheline_aligned__;/* CL 1 */
+	void 			*limit;
+	void 			*start;
+	void 			*start2;
+	char			data ____cacheline_aligned;	/* CL 2 */
+};
+
+/* First word in every message - used by mesq interface */
+struct message_header {
+	char	present;
+	char	present2;
+	char 	lines;
+	char	fill;
+};
+
+#define QLINES(mq)	((mq) + offsetof(struct message_queue, qlines))
+#define HSTATUS(mq, h)	((mq) + offsetof(struct message_queue, hstatus[h]))
+
+static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
+{
+	struct gru_blade_state *bs;
+	int lcpu;
+
+	BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
+	preempt_disable();
+	bs = gru_base[uv_numa_blade_id()];
+	lcpu = uv_blade_processor_id();
+	*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
+	*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
+	return 0;
+}
+
+static void gru_free_cpu_resources(void *cb, void *dsr)
+{
+	preempt_enable();
+}
+
+int gru_get_cb_exception_detail(void *cb,
+		struct control_block_extended_exc_detail *excdet)
+{
+	struct gru_control_block_extended *cbe;
+
+	cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
+	excdet->opc = cbe->opccpy;
+	excdet->exopc = cbe->exopccpy;
+	excdet->ecause = cbe->ecause;
+	excdet->exceptdet0 = cbe->idef1upd;
+	excdet->exceptdet1 = cbe->idef3upd;
+	return 0;
+}
+
+char *gru_get_cb_exception_detail_str(int ret, void *cb,
+				      char *buf, int size)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	struct control_block_extended_exc_detail excdet;
+
+	if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
+		gru_get_cb_exception_detail(cb, &excdet);
+		snprintf(buf, size,
+			"GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
+			"excdet0 0x%lx, excdet1 0x%x",
+			gen, excdet.opc, excdet.exopc, excdet.ecause,
+			excdet.exceptdet0, excdet.exceptdet1);
+	} else {
+		snprintf(buf, size, "No exception");
+	}
+	return buf;
+}
+
+static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
+{
+	while (gen->istatus >= CBS_ACTIVE) {
+		cpu_relax();
+		barrier();
+	}
+	return gen->istatus;
+}
+
+static int gru_retry_exception(void *cb)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	struct control_block_extended_exc_detail excdet;
+	int retry = EXCEPTION_RETRY_LIMIT;
+
+	while (1)  {
+		if (gru_get_cb_message_queue_substatus(cb))
+			break;
+		if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
+			return CBS_IDLE;
+
+		gru_get_cb_exception_detail(cb, &excdet);
+		if (excdet.ecause & ~EXCEPTION_RETRY_BITS)
+			break;
+		if (retry-- == 0)
+			break;
+		gen->icmd = 1;
+		gru_flush_cache(gen);
+	}
+	return CBS_EXCEPTION;
+}
+
+int gru_check_status_proc(void *cb)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	int ret;
+
+	ret = gen->istatus;
+	if (ret != CBS_EXCEPTION)
+		return ret;
+	return gru_retry_exception(cb);
+
+}
+
+int gru_wait_proc(void *cb)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	int ret;
+
+	ret = gru_wait_idle_or_exception(gen);
+	if (ret == CBS_EXCEPTION)
+		ret = gru_retry_exception(cb);
+
+	return ret;
+}
+
+void gru_abort(int ret, void *cb, char *str)
+{
+	char buf[GRU_EXC_STR_SIZE];
+
+	panic("GRU FATAL ERROR: %s - %s\n", str,
+	      gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
+}
+
+void gru_wait_abort_proc(void *cb)
+{
+	int ret;
+
+	ret = gru_wait_proc(cb);
+	if (ret)
+		gru_abort(ret, cb, "gru_wait_abort");
+}
+
+
+/*------------------------------ MESSAGE QUEUES -----------------------------*/
+
+/* Internal status . These are NOT returned to the user. */
+#define MQIE_AGAIN		-1	/* try again */
+
+
+/*
+ * Save/restore the "present" flag that is in the second line of 2-line
+ * messages
+ */
+static inline int get_present2(void *p)
+{
+	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+	return mhdr->present;
+}
+
+static inline void restore_present2(void *p, int val)
+{
+	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+	mhdr->present = val;
+}
+
+/*
+ * Create a message queue.
+ * 	qlines - message queue size in cache lines. Includes 2-line header.
+ */
+int gru_create_message_queue(void *p, unsigned int bytes)
+{
+	struct message_queue *mq = p;
+	unsigned int qlines;
+
+	qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
+	memset(mq, 0, bytes);
+	mq->start = &mq->data;
+	mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
+	mq->next = &mq->data;
+	mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
+	mq->qlines = qlines;
+	mq->hstatus[0] = 0;
+	mq->hstatus[1] = 1;
+	mq->head = gru_mesq_head(2, qlines / 2 + 1);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gru_create_message_queue);
+
+/*
+ * Send a NOOP message to a message queue
+ * 	Returns:
+ * 		 0 - if queue is full after the send. This is the normal case
+ * 		     but various races can change this.
+ *		-1 - if mesq sent successfully but queue not full
+ *		>0 - unexpected error. MQE_xxx returned
+ */
+static int send_noop_message(void *cb,
+				unsigned long mq, void *mesg)
+{
+	const struct message_header noop_header = {
+					.present = MQS_NOOP, .lines = 1};
+	unsigned long m;
+	int substatus, ret;
+	struct message_header save_mhdr, *mhdr = mesg;
+
+	STAT(mesq_noop);
+	save_mhdr = *mhdr;
+	*mhdr = noop_header;
+	gru_mesq(cb, mq, gru_get_tri(mhdr), 1, IMA);
+	ret = gru_wait(cb);
+
+	if (ret) {
+		substatus = gru_get_cb_message_queue_substatus(cb);
+		switch (substatus) {
+		case CBSS_NO_ERROR:
+			STAT(mesq_noop_unexpected_error);
+			ret = MQE_UNEXPECTED_CB_ERR;
+			break;
+		case CBSS_LB_OVERFLOWED:
+			STAT(mesq_noop_lb_overflow);
+			ret = MQE_CONGESTION;
+			break;
+		case CBSS_QLIMIT_REACHED:
+			STAT(mesq_noop_qlimit_reached);
+			ret = 0;
+			break;
+		case CBSS_AMO_NACKED:
+			STAT(mesq_noop_amo_nacked);
+			ret = MQE_CONGESTION;
+			break;
+		case CBSS_PUT_NACKED:
+			STAT(mesq_noop_put_nacked);
+			m = mq + (gru_get_amo_value_head(cb) << 6);
+			gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
+						IMA);
+			if (gru_wait(cb) == CBS_IDLE)
+				ret = MQIE_AGAIN;
+			else
+				ret = MQE_UNEXPECTED_CB_ERR;
+			break;
+		case CBSS_PAGE_OVERFLOW:
+		default:
+			BUG();
+		}
+	}
+	*mhdr = save_mhdr;
+	return ret;
+}
+
+/*
+ * Handle a gru_mesq full.
+ */
+static int send_message_queue_full(void *cb,
+			   unsigned long mq, void *mesg, int lines)
+{
+	union gru_mesqhead mqh;
+	unsigned int limit, head;
+	unsigned long avalue;
+	int half, qlines, save;
+
+	/* Determine if switching to first/second half of q */
+	avalue = gru_get_amo_value(cb);
+	head = gru_get_amo_value_head(cb);
+	limit = gru_get_amo_value_limit(cb);
+
+	/*
+	 * Fetch "qlines" from the queue header. Since the queue may be
+	 * in memory that can't be accessed using socket addresses, use
+	 * the GRU to access the data. Use DSR space from the message.
+	 */
+	save = *(int *)mesg;
+	gru_vload(cb, QLINES(mq), gru_get_tri(mesg), XTYPE_W, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		goto cberr;
+	qlines = *(int *)mesg;
+	*(int *)mesg = save;
+	half = (limit != qlines);
+
+	if (half)
+		mqh = gru_mesq_head(qlines / 2 + 1, qlines);
+	else
+		mqh = gru_mesq_head(2, qlines / 2 + 1);
+
+	/* Try to get lock for switching head pointer */
+	gru_gamir(cb, EOP_IR_CLR, HSTATUS(mq, half), XTYPE_DW, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		goto cberr;
+	if (!gru_get_amo_value(cb)) {
+		STAT(mesq_qf_locked);
+		return MQE_QUEUE_FULL;
+	}
+
+	/* Got the lock. Send optional NOP if queue not full, */
+	if (head != limit) {
+		if (send_noop_message(cb, mq, mesg)) {
+			gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half),
+					XTYPE_DW, IMA);
+			if (gru_wait(cb) != CBS_IDLE)
+				goto cberr;
+			STAT(mesq_qf_noop_not_full);
+			return MQIE_AGAIN;
+		}
+		avalue++;
+	}
+
+	/* Then flip queuehead to other half of queue. */
+	gru_gamer(cb, EOP_ERR_CSWAP, mq, XTYPE_DW, mqh.val, avalue, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		goto cberr;
+
+	/* If not successfully in swapping queue head, clear the hstatus lock */
+	if (gru_get_amo_value(cb) != avalue) {
+		STAT(mesq_qf_switch_head_failed);
+		gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), XTYPE_DW, IMA);
+		if (gru_wait(cb) != CBS_IDLE)
+			goto cberr;
+	}
+	return MQIE_AGAIN;
+cberr:
+	STAT(mesq_qf_unexpected_error);
+	return MQE_UNEXPECTED_CB_ERR;
+}
+
+
+/*
+ * Handle a gru_mesq failure. Some of these failures are software recoverable
+ * or retryable.
+ */
+static int send_message_failure(void *cb,
+				unsigned long mq,
+				void *mesg,
+				int lines)
+{
+	int substatus, ret = 0;
+	unsigned long m;
+
+	substatus = gru_get_cb_message_queue_substatus(cb);
+	switch (substatus) {
+	case CBSS_NO_ERROR:
+		STAT(mesq_send_unexpected_error);
+		ret = MQE_UNEXPECTED_CB_ERR;
+		break;
+	case CBSS_LB_OVERFLOWED:
+		STAT(mesq_send_lb_overflow);
+		ret = MQE_CONGESTION;
+		break;
+	case CBSS_QLIMIT_REACHED:
+		STAT(mesq_send_qlimit_reached);
+		ret = send_message_queue_full(cb, mq, mesg, lines);
+		break;
+	case CBSS_AMO_NACKED:
+		STAT(mesq_send_amo_nacked);
+		ret = MQE_CONGESTION;
+		break;
+	case CBSS_PUT_NACKED:
+		STAT(mesq_send_put_nacked);
+		m =mq + (gru_get_amo_value_head(cb) << 6);
+		gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
+		if (gru_wait(cb) == CBS_IDLE)
+			ret = MQE_OK;
+		else
+			ret = MQE_UNEXPECTED_CB_ERR;
+		break;
+	default:
+		BUG();
+	}
+	return ret;
+}
+
+/*
+ * Send a message to a message queue
+ * 	cb	GRU control block to use to send message
+ * 	mq	message queue
+ * 	mesg	message. ust be vaddr within a GSEG
+ * 	bytes	message size (<= 2 CL)
+ */
+int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes)
+{
+	struct message_header *mhdr;
+	void *cb;
+	void *dsr;
+	int istatus, clines, ret;
+
+	STAT(mesq_send);
+	BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
+
+	clines = (bytes + GRU_CACHE_LINE_BYTES - 1) / GRU_CACHE_LINE_BYTES;
+	if (gru_get_cpu_resources(bytes, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	memcpy(dsr, mesg, bytes);
+	mhdr = dsr;
+	mhdr->present = MQS_FULL;
+	mhdr->lines = clines;
+	if (clines == 2) {
+		mhdr->present2 = get_present2(mhdr);
+		restore_present2(mhdr, MQS_FULL);
+	}
+
+	do {
+		ret = MQE_OK;
+		gru_mesq(cb, mq, gru_get_tri(mhdr), clines, IMA);
+		istatus = gru_wait(cb);
+		if (istatus != CBS_IDLE)
+			ret = send_message_failure(cb, mq, dsr, clines);
+	} while (ret == MQIE_AGAIN);
+	gru_free_cpu_resources(cb, dsr);
+
+	if (ret)
+		STAT(mesq_send_failed);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_send_message_gpa);
+
+/*
+ * Advance the receive pointer for the queue to the next message.
+ */
+void gru_free_message(void *rmq, void *mesg)
+{
+	struct message_queue *mq = rmq;
+	struct message_header *mhdr = mq->next;
+	void *next, *pnext;
+	int half = -1;
+	int lines = mhdr->lines;
+
+	if (lines == 2)
+		restore_present2(mhdr, MQS_EMPTY);
+	mhdr->present = MQS_EMPTY;
+
+	pnext = mq->next;
+	next = pnext + GRU_CACHE_LINE_BYTES * lines;
+	if (next == mq->limit) {
+		next = mq->start;
+		half = 1;
+	} else if (pnext < mq->start2 && next >= mq->start2) {
+		half = 0;
+	}
+
+	if (half >= 0)
+		mq->hstatus[half] = 1;
+	mq->next = next;
+}
+EXPORT_SYMBOL_GPL(gru_free_message);
+
+/*
+ * Get next message from message queue. Return NULL if no message
+ * present. User must call next_message() to move to next message.
+ * 	rmq	message queue
+ */
+void *gru_get_next_message(void *rmq)
+{
+	struct message_queue *mq = rmq;
+	struct message_header *mhdr = mq->next;
+	int present = mhdr->present;
+
+	/* skip NOOP messages */
+	STAT(mesq_receive);
+	while (present == MQS_NOOP) {
+		gru_free_message(rmq, mhdr);
+		mhdr = mq->next;
+		present = mhdr->present;
+	}
+
+	/* Wait for both halves of 2 line messages */
+	if (present == MQS_FULL && mhdr->lines == 2 &&
+				get_present2(mhdr) == MQS_EMPTY)
+		present = MQS_EMPTY;
+
+	if (!present) {
+		STAT(mesq_receive_none);
+		return NULL;
+	}
+
+	if (mhdr->lines == 2)
+		restore_present2(mhdr, mhdr->present2);
+
+	return mhdr;
+}
+EXPORT_SYMBOL_GPL(gru_get_next_message);
+
+/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
+
+/*
+ * Copy a block of data using the GRU resources
+ */
+int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+				unsigned int bytes)
+{
+	void *cb;
+	void *dsr;
+	int ret;
+
+	STAT(copy_gpa);
+	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
+		  XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_BYTES, IMA);
+	ret = gru_wait(cb);
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_copy_gpa);
+
+/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
+/* 	Temp - will delete after we gain confidence in the GRU		*/
+static __cacheline_aligned unsigned long word0;
+static __cacheline_aligned unsigned long word1;
+
+static int quicktest(struct gru_state *gru)
+{
+	void *cb;
+	void *ds;
+	unsigned long *p;
+
+	cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
+	ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
+	p = ds;
+	word0 = MAGIC;
+
+	gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		BUG();
+
+	if (*(unsigned long *)ds != MAGIC)
+		BUG();
+	gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		BUG();
+
+	if (word0 != word1 || word0 != MAGIC) {
+		printk
+		    ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n",
+		     gru->gs_gid, word1, MAGIC);
+		BUG();		/* ZZZ should not be fatal */
+	}
+
+	return 0;
+}
+
+
+int gru_kservices_init(struct gru_state *gru)
+{
+	struct gru_blade_state *bs;
+	struct gru_context_configuration_handle *cch;
+	unsigned long cbr_map, dsr_map;
+	int err, num, cpus_possible;
+
+	/*
+	 * Currently, resources are reserved ONLY on the second chiplet
+	 * on each blade. This leaves ALL resources on chiplet 0 available
+	 * for user code.
+	 */
+	bs = gru->gs_blade;
+	if (gru != &bs->bs_grus[1])
+		return 0;
+
+	cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
+
+	num = GRU_NUM_KERNEL_CBR * cpus_possible;
+	cbr_map = reserve_gru_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
+	gru->gs_reserved_cbrs += num;
+
+	num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
+	dsr_map = reserve_gru_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
+	gru->gs_reserved_dsr_bytes += num;
+
+	gru->gs_active_contexts++;
+	__set_bit(KERNEL_CTXNUM, &gru->gs_context_map);
+	cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
+
+	bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr,
+					KERNEL_CTXNUM, 0);
+	bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr,
+					KERNEL_CTXNUM, 0);
+
+	lock_cch_handle(cch);
+	cch->tfm_fault_bit_enable = 0;
+	cch->tlb_int_enable = 0;
+	cch->tfm_done_bit_enable = 0;
+	cch->unmap_enable = 1;
+	err = cch_allocate(cch, 0, cbr_map, dsr_map);
+	if (err) {
+		gru_dbg(grudev,
+			"Unable to allocate kernel CCH: gru %d, err %d\n",
+			gru->gs_gid, err);
+		BUG();
+	}
+	if (cch_start(cch)) {
+		gru_dbg(grudev, "Unable to start kernel CCH: gru %d, err %d\n",
+			gru->gs_gid, err);
+		BUG();
+	}
+	unlock_cch_handle(cch);
+
+	if (options & GRU_QUICKLOOK)
+		quicktest(gru);
+	return 0;
+}
-- 
cgit v1.2.3


From 9a0deecc90de62c91d7107611446c0c950091851 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:58 -0700
Subject: GRU Driver: resource management

This file contains functions realted to managing GRU resources provided to
the user.  Examples include GRU context assignment, load, unload,
migration, etc..

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grumain.c | 798 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 798 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/grumain.c

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
new file mode 100644
index 00000000000..aef6822cb80
--- /dev/null
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -0,0 +1,798 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *            DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+
+unsigned long options __read_mostly;
+
+static struct device_driver gru_driver = {
+	.name = "gru"
+};
+
+static struct device gru_device = {
+	.bus_id = {0},
+	.driver = &gru_driver,
+};
+
+struct device *grudev = &gru_device;
+
+/*
+ * Select a gru fault map to be used by the current cpu. Note that
+ * multiple cpus may be using the same map.
+ *	ZZZ should "shift" be used?? Depends on HT cpu numbering
+ *	ZZZ should be inline but did not work on emulator
+ */
+int gru_cpu_fault_map_id(void)
+{
+	return uv_blade_processor_id() % GRU_NUM_TFM;
+}
+
+/*--------- ASID Management -------------------------------------------
+ *
+ *  Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
+ *  Once MAX is reached, flush the TLB & start over. However,
+ *  some asids may still be in use. There won't be many (percentage wise) still
+ *  in use. Search active contexts & determine the value of the first
+ *  asid in use ("x"s below). Set "limit" to this value.
+ *  This defines a block of assignable asids.
+ *
+ *  When "limit" is reached, search forward from limit+1 and determine the
+ *  next block of assignable asids.
+ *
+ *  Repeat until MAX_ASID is reached, then start over again.
+ *
+ *  Each time MAX_ASID is reached, increment the asid generation. Since
+ *  the search for in-use asids only checks contexts with GRUs currently
+ *  assigned, asids in some contexts will be missed. Prior to loading
+ *  a context, the asid generation of the GTS asid is rechecked. If it
+ *  doesn't match the current generation, a new asid will be assigned.
+ *
+ *   	0---------------x------------x---------------------x----|
+ *	  ^-next	^-limit	   				^-MAX_ASID
+ *
+ * All asid manipulation & context loading/unloading is protected by the
+ * gs_lock.
+ */
+
+/* Hit the asid limit. Start over */
+static int gru_wrap_asid(struct gru_state *gru)
+{
+	gru_dbg(grudev, "gru %p\n", gru);
+	STAT(asid_wrap);
+	gru->gs_asid_gen++;
+	gru_flush_all_tlb(gru);
+	return MIN_ASID;
+}
+
+/* Find the next chunk of unused asids */
+static int gru_reset_asid_limit(struct gru_state *gru, int asid)
+{
+	int i, gid, inuse_asid, limit;
+
+	gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
+	STAT(asid_next);
+	limit = MAX_ASID;
+	if (asid >= limit)
+		asid = gru_wrap_asid(gru);
+	gid = gru->gs_gid;
+again:
+	for (i = 0; i < GRU_NUM_CCH; i++) {
+		if (!gru->gs_gts[i])
+			continue;
+		inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
+		gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n",
+			gru, inuse_asid, i, gru->gs_gts[i]);
+		if (inuse_asid == asid) {
+			asid += ASID_INC;
+			if (asid >= limit) {
+				/*
+				 * empty range: reset the range limit and
+				 * start over
+				 */
+				limit = MAX_ASID;
+				if (asid >= MAX_ASID)
+					asid = gru_wrap_asid(gru);
+				goto again;
+			}
+		}
+
+		if ((inuse_asid > asid) && (inuse_asid < limit))
+			limit = inuse_asid;
+	}
+	gru->gs_asid_limit = limit;
+	gru->gs_asid = asid;
+	gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid,
+		limit);
+	return asid;
+}
+
+/* Assign a new ASID to a thread context.  */
+static int gru_assign_asid(struct gru_state *gru)
+{
+	int asid;
+
+	spin_lock(&gru->gs_asid_lock);
+	gru->gs_asid += ASID_INC;
+	asid = gru->gs_asid;
+	if (asid >= gru->gs_asid_limit)
+		asid = gru_reset_asid_limit(gru, asid);
+	spin_unlock(&gru->gs_asid_lock);
+
+	gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
+	return asid;
+}
+
+/*
+ * Clear n bits in a word. Return a word indicating the bits that were cleared.
+ * Optionally, build an array of chars that contain the bit numbers allocated.
+ */
+static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
+				       char *idx)
+{
+	unsigned long bits = 0;
+	int i;
+
+	do {
+		i = find_first_bit(p, mmax);
+		if (i == mmax)
+			BUG();
+		__clear_bit(i, p);
+		__set_bit(i, &bits);
+		if (idx)
+			*idx++ = i;
+	} while (--n);
+	return bits;
+}
+
+unsigned long reserve_gru_cb_resources(struct gru_state *gru, int cbr_au_count,
+				       char *cbmap)
+{
+	return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
+				 cbmap);
+}
+
+unsigned long reserve_gru_ds_resources(struct gru_state *gru, int dsr_au_count,
+				       char *dsmap)
+{
+	return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
+				 dsmap);
+}
+
+static void reserve_gru_resources(struct gru_state *gru,
+				  struct gru_thread_state *gts)
+{
+	gru->gs_active_contexts++;
+	gts->ts_cbr_map =
+	    reserve_gru_cb_resources(gru, gts->ts_cbr_au_count,
+				     gts->ts_cbr_idx);
+	gts->ts_dsr_map =
+	    reserve_gru_ds_resources(gru, gts->ts_dsr_au_count, NULL);
+}
+
+static void free_gru_resources(struct gru_state *gru,
+			       struct gru_thread_state *gts)
+{
+	gru->gs_active_contexts--;
+	gru->gs_cbr_map |= gts->ts_cbr_map;
+	gru->gs_dsr_map |= gts->ts_dsr_map;
+}
+
+/*
+ * Check if a GRU has sufficient free resources to satisfy an allocation
+ * request. Note: GRU locks may or may not be held when this is called. If
+ * not held, recheck after acquiring the appropriate locks.
+ *
+ * Returns 1 if sufficient resources, 0 if not
+ */
+static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
+			       int dsr_au_count, int max_active_contexts)
+{
+	return hweight64(gru->gs_cbr_map) >= cbr_au_count
+		&& hweight64(gru->gs_dsr_map) >= dsr_au_count
+		&& gru->gs_active_contexts < max_active_contexts;
+}
+
+/*
+ * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
+ * context.
+ */
+static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms,
+			       int ctxnum)
+{
+	struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
+	unsigned short ctxbitmap = (1 << ctxnum);
+	int asid;
+
+	spin_lock(&gms->ms_asid_lock);
+	asid = asids->mt_asid;
+
+	if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) {
+		asid = gru_assign_asid(gru);
+		asids->mt_asid = asid;
+		asids->mt_asid_gen = gru->gs_asid_gen;
+		STAT(asid_new);
+	} else {
+		STAT(asid_reuse);
+	}
+
+	BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
+	asids->mt_ctxbitmap |= ctxbitmap;
+	if (!test_bit(gru->gs_gid, gms->ms_asidmap))
+		__set_bit(gru->gs_gid, gms->ms_asidmap);
+	spin_unlock(&gms->ms_asid_lock);
+
+	gru_dbg(grudev,
+		"gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n",
+		gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]);
+	return asid;
+}
+
+static void gru_unload_mm_tracker(struct gru_state *gru,
+				  struct gru_mm_struct *gms, int ctxnum)
+{
+	struct gru_mm_tracker *asids;
+	unsigned short ctxbitmap;
+
+	asids = &gms->ms_asids[gru->gs_gid];
+	ctxbitmap = (1 << ctxnum);
+	spin_lock(&gms->ms_asid_lock);
+	BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
+	asids->mt_ctxbitmap ^= ctxbitmap;
+	gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
+		gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]);
+	spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Decrement the reference count on a GTS structure. Free the structure
+ * if the reference count goes to zero.
+ */
+void gts_drop(struct gru_thread_state *gts)
+{
+	if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
+		gru_drop_mmu_notifier(gts->ts_gms);
+		kfree(gts);
+		STAT(gts_free);
+	}
+}
+
+/*
+ * Locate the GTS structure for the current thread.
+ */
+static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
+			    *vdata, int tsid)
+{
+	struct gru_thread_state *gts;
+
+	list_for_each_entry(gts, &vdata->vd_head, ts_next)
+	    if (gts->ts_tsid == tsid)
+		return gts;
+	return NULL;
+}
+
+/*
+ * Allocate a thread state structure.
+ */
+static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+					      struct gru_vma_data *vdata,
+					      int tsid)
+{
+	struct gru_thread_state *gts;
+	int bytes;
+
+	bytes = DSR_BYTES(vdata->vd_dsr_au_count) +
+				CBR_BYTES(vdata->vd_cbr_au_count);
+	bytes += sizeof(struct gru_thread_state);
+	gts = kzalloc(bytes, GFP_KERNEL);
+	if (!gts)
+		return NULL;
+
+	STAT(gts_alloc);
+	atomic_set(&gts->ts_refcnt, 1);
+	mutex_init(&gts->ts_ctxlock);
+	gts->ts_cbr_au_count = vdata->vd_cbr_au_count;
+	gts->ts_dsr_au_count = vdata->vd_dsr_au_count;
+	gts->ts_user_options = vdata->vd_user_options;
+	gts->ts_tsid = tsid;
+	gts->ts_user_options = vdata->vd_user_options;
+	gts->ts_ctxnum = NULLCTX;
+	gts->ts_mm = current->mm;
+	gts->ts_vma = vma;
+	gts->ts_tlb_int_select = -1;
+	gts->ts_gms = gru_register_mmu_notifier();
+	if (!gts->ts_gms)
+		goto err;
+
+	gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts);
+	return gts;
+
+err:
+	gts_drop(gts);
+	return NULL;
+}
+
+/*
+ * Allocate a vma private data structure.
+ */
+struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
+{
+	struct gru_vma_data *vdata = NULL;
+
+	vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
+	if (!vdata)
+		return NULL;
+
+	INIT_LIST_HEAD(&vdata->vd_head);
+	spin_lock_init(&vdata->vd_lock);
+	gru_dbg(grudev, "alloc vdata %p\n", vdata);
+	return vdata;
+}
+
+/*
+ * Find the thread state structure for the current thread.
+ */
+struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
+					int tsid)
+{
+	struct gru_vma_data *vdata = vma->vm_private_data;
+	struct gru_thread_state *gts;
+
+	spin_lock(&vdata->vd_lock);
+	gts = gru_find_current_gts_nolock(vdata, tsid);
+	spin_unlock(&vdata->vd_lock);
+	gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+	return gts;
+}
+
+/*
+ * Allocate a new thread state for a GSEG. Note that races may allow
+ * another thread to race to create a gts.
+ */
+struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
+					int tsid)
+{
+	struct gru_vma_data *vdata = vma->vm_private_data;
+	struct gru_thread_state *gts, *ngts;
+
+	gts = gru_alloc_gts(vma, vdata, tsid);
+	if (!gts)
+		return NULL;
+
+	spin_lock(&vdata->vd_lock);
+	ngts = gru_find_current_gts_nolock(vdata, tsid);
+	if (ngts) {
+		gts_drop(gts);
+		gts = ngts;
+		STAT(gts_double_allocate);
+	} else {
+		list_add(&gts->ts_next, &vdata->vd_head);
+	}
+	spin_unlock(&vdata->vd_lock);
+	gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+	return gts;
+}
+
+/*
+ * Free the GRU context assigned to the thread state.
+ */
+static void gru_free_gru_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru;
+
+	gru = gts->ts_gru;
+	gru_dbg(grudev, "gts %p, gru %p\n", gts, gru);
+
+	spin_lock(&gru->gs_lock);
+	gru->gs_gts[gts->ts_ctxnum] = NULL;
+	free_gru_resources(gru, gts);
+	BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
+	__clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
+	gts->ts_ctxnum = NULLCTX;
+	gts->ts_gru = NULL;
+	spin_unlock(&gru->gs_lock);
+
+	gts_drop(gts);
+	STAT(free_context);
+}
+
+/*
+ * Prefetching cachelines help hardware performance.
+ */
+static void prefetch_data(void *p, int num, int stride)
+{
+	while (num-- > 0) {
+		prefetchw(p);
+		p += stride;
+	}
+}
+
+static inline long gru_copy_handle(void *d, void *s)
+{
+	memcpy(d, s, GRU_HANDLE_BYTES);
+	return GRU_HANDLE_BYTES;
+}
+
+/* rewrite in assembly & use lots of prefetch */
+static void gru_load_context_data(void *save, void *grubase, int ctxnum,
+				  unsigned long cbrmap, unsigned long dsrmap)
+{
+	void *gseg, *cb, *cbe;
+	unsigned long length;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+	prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
+		      GRU_CACHE_LINE_BYTES);
+
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
+		prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
+			      GRU_CACHE_LINE_BYTES);
+		cb += GRU_HANDLE_STRIDE;
+	}
+
+	cb = gseg + GRU_CB_BASE;
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		save += gru_copy_handle(cb, save);
+		save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
+		cb += GRU_HANDLE_STRIDE;
+	}
+
+	memcpy(gseg + GRU_DS_BASE, save, length);
+}
+
+static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
+				    unsigned long cbrmap, unsigned long dsrmap)
+{
+	void *gseg, *cb, *cbe;
+	unsigned long length;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		save += gru_copy_handle(save, cb);
+		save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
+		cb += GRU_HANDLE_STRIDE;
+	}
+	length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+	memcpy(save, gseg + GRU_DS_BASE, length);
+}
+
+void gru_unload_context(struct gru_thread_state *gts, int savestate)
+{
+	struct gru_state *gru = gts->ts_gru;
+	struct gru_context_configuration_handle *cch;
+	int ctxnum = gts->ts_ctxnum;
+
+	zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+	lock_cch_handle(cch);
+	if (cch_interrupt_sync(cch))
+		BUG();
+	gru_dbg(grudev, "gts %p\n", gts);
+
+	gru_unload_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
+	if (savestate)
+		gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
+					ctxnum, gts->ts_cbr_map,
+					gts->ts_dsr_map);
+
+	if (cch_deallocate(cch))
+		BUG();
+	gts->ts_force_unload = 0;	/* ts_force_unload locked by CCH lock */
+	unlock_cch_handle(cch);
+
+	gru_free_gru_context(gts);
+	STAT(unload_context);
+}
+
+/*
+ * Load a GRU context by copying it from the thread data structure in memory
+ * to the GRU.
+ */
+static void gru_load_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru = gts->ts_gru;
+	struct gru_context_configuration_handle *cch;
+	int err, asid, ctxnum = gts->ts_ctxnum;
+
+	gru_dbg(grudev, "gts %p\n", gts);
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+	lock_cch_handle(cch);
+	asid = gru_load_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
+	cch->tfm_fault_bit_enable =
+	    (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+	     || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+	cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+	if (cch->tlb_int_enable) {
+		gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+		cch->tlb_int_select = gts->ts_tlb_int_select;
+	}
+	cch->tfm_done_bit_enable = 0;
+	err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map);
+	if (err) {
+		gru_dbg(grudev,
+			"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
+			err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
+		BUG();
+	}
+
+	gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
+			      gts->ts_cbr_map, gts->ts_dsr_map);
+
+	if (cch_start(cch))
+		BUG();
+	unlock_cch_handle(cch);
+
+	STAT(load_context);
+}
+
+/*
+ * Update fields in an active CCH:
+ * 	- retarget interrupts on local blade
+ * 	- force a delayed context unload by clearing the CCH asids. This
+ * 	  forces TLB misses for new GRU instructions. The context is unloaded
+ * 	  when the next TLB miss occurs.
+ */
+static int gru_update_cch(struct gru_thread_state *gts, int int_select)
+{
+	struct gru_context_configuration_handle *cch;
+	struct gru_state *gru = gts->ts_gru;
+	int i, ctxnum = gts->ts_ctxnum, ret = 0;
+
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+	lock_cch_handle(cch);
+	if (cch->state == CCHSTATE_ACTIVE) {
+		if (gru->gs_gts[gts->ts_ctxnum] != gts)
+			goto exit;
+		if (cch_interrupt(cch))
+			BUG();
+		if (int_select >= 0) {
+			gts->ts_tlb_int_select = int_select;
+			cch->tlb_int_select = int_select;
+		} else {
+			for (i = 0; i < 8; i++)
+				cch->asid[i] = 0;
+			cch->tfm_fault_bit_enable = 0;
+			cch->tlb_int_enable = 0;
+			gts->ts_force_unload = 1;
+		}
+		if (cch_start(cch))
+			BUG();
+		ret = 1;
+	}
+exit:
+	unlock_cch_handle(cch);
+	return ret;
+}
+
+/*
+ * Update CCH tlb interrupt select. Required when all the following is true:
+ * 	- task's GRU context is loaded into a GRU
+ * 	- task is using interrupt notification for TLB faults
+ * 	- task has migrated to a different cpu on the same blade where
+ * 	  it was previously running.
+ */
+static int gru_retarget_intr(struct gru_thread_state *gts)
+{
+	if (gts->ts_tlb_int_select < 0
+	    || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
+		return 0;
+
+	gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
+		gru_cpu_fault_map_id());
+	return gru_update_cch(gts, gru_cpu_fault_map_id());
+}
+
+
+/*
+ * Insufficient GRU resources available on the local blade. Steal a context from
+ * a process. This is a hack until a _real_ resource scheduler is written....
+ */
+#define next_ctxnum(n)	((n) <  GRU_NUM_CCH - 2 ? (n) + 1 : 0)
+#define next_gru(b, g)	(((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ?  \
+				 ((g)+1) : &(b)->bs_grus[0])
+
+static void gru_steal_context(struct gru_thread_state *gts)
+{
+	struct gru_blade_state *blade;
+	struct gru_state *gru, *gru0;
+	struct gru_thread_state *ngts = NULL;
+	int ctxnum, ctxnum0, flag = 0, cbr, dsr;
+
+	cbr = gts->ts_cbr_au_count;
+	dsr = gts->ts_dsr_au_count;
+
+	preempt_disable();
+	blade = gru_base[uv_numa_blade_id()];
+	spin_lock(&blade->bs_lock);
+
+	ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
+	gru = blade->bs_lru_gru;
+	if (ctxnum == 0)
+		gru = next_gru(blade, gru);
+	ctxnum0 = ctxnum;
+	gru0 = gru;
+	while (1) {
+		if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
+			break;
+		spin_lock(&gru->gs_lock);
+		for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
+			if (flag && gru == gru0 && ctxnum == ctxnum0)
+				break;
+			ngts = gru->gs_gts[ctxnum];
+			/*
+			 * We are grabbing locks out of order, so trylock is
+			 * needed. GTSs are usually not locked, so the odds of
+			 * success are high. If trylock fails, try to steal a
+			 * different GSEG.
+			 */
+			if (ngts && mutex_trylock(&ngts->ts_ctxlock))
+				break;
+			ngts = NULL;
+			flag = 1;
+		}
+		spin_unlock(&gru->gs_lock);
+		if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
+			break;
+		ctxnum = 0;
+		gru = next_gru(blade, gru);
+	}
+	blade->bs_lru_gru = gru;
+	blade->bs_lru_ctxnum = ctxnum;
+	spin_unlock(&blade->bs_lock);
+	preempt_enable();
+
+	if (ngts) {
+		STAT(steal_context);
+		ngts->ts_steal_jiffies = jiffies;
+		gru_unload_context(ngts, 1);
+		mutex_unlock(&ngts->ts_ctxlock);
+	} else {
+		STAT(steal_context_failed);
+	}
+	gru_dbg(grudev,
+		"stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;"
+		" avail cb %ld, ds %ld\n",
+		gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
+		hweight64(gru->gs_dsr_map));
+}
+
+/*
+ * Scan the GRUs on the local blade & assign a GRU context.
+ */
+static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru, *grux;
+	int i, max_active_contexts;
+
+	preempt_disable();
+
+again:
+	gru = NULL;
+	max_active_contexts = GRU_NUM_CCH;
+	for_each_gru_on_blade(grux, uv_numa_blade_id(), i) {
+		if (check_gru_resources(grux, gts->ts_cbr_au_count,
+					gts->ts_dsr_au_count,
+					max_active_contexts)) {
+			gru = grux;
+			max_active_contexts = grux->gs_active_contexts;
+			if (max_active_contexts == 0)
+				break;
+		}
+	}
+
+	if (gru) {
+		spin_lock(&gru->gs_lock);
+		if (!check_gru_resources(gru, gts->ts_cbr_au_count,
+					 gts->ts_dsr_au_count, GRU_NUM_CCH)) {
+			spin_unlock(&gru->gs_lock);
+			goto again;
+		}
+		reserve_gru_resources(gru, gts);
+		gts->ts_gru = gru;
+		gts->ts_ctxnum =
+		    find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
+		BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH);
+		atomic_inc(&gts->ts_refcnt);
+		gru->gs_gts[gts->ts_ctxnum] = gts;
+		__set_bit(gts->ts_ctxnum, &gru->gs_context_map);
+		spin_unlock(&gru->gs_lock);
+
+		STAT(assign_context);
+		gru_dbg(grudev,
+			"gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n",
+			gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
+			gts->ts_gru->gs_gid, gts->ts_ctxnum,
+			gts->ts_cbr_au_count, gts->ts_dsr_au_count);
+	} else {
+		gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
+		STAT(assign_context_failed);
+	}
+
+	preempt_enable();
+	return gru;
+}
+
+/*
+ * gru_nopage
+ *
+ * Map the user's GRU segment
+ */
+int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct gru_thread_state *gts;
+	unsigned long paddr, vaddr;
+
+	vaddr = (unsigned long)vmf->virtual_address;
+	gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
+		vma, vaddr, GSEG_BASE(vaddr));
+	STAT(nopfn);
+
+	gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+	if (!gts)
+		return VM_FAULT_SIGBUS;
+
+again:
+	preempt_disable();
+	mutex_lock(&gts->ts_ctxlock);
+	if (gts->ts_gru) {
+		if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) {
+			STAT(migrated_nopfn_unload);
+			gru_unload_context(gts, 1);
+		} else {
+			if (gru_retarget_intr(gts))
+				STAT(migrated_nopfn_retarget);
+		}
+	}
+
+	if (!gts->ts_gru) {
+		while (!gru_assign_gru_context(gts)) {
+			mutex_unlock(&gts->ts_ctxlock);
+			preempt_enable();
+			schedule_timeout(GRU_ASSIGN_DELAY);  /* true hack ZZZ */
+			if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
+				gru_steal_context(gts);
+			goto again;
+		}
+		gru_load_context(gts);
+		paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
+		remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
+				paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
+				vma->vm_page_prot);
+	}
+
+	mutex_unlock(&gts->ts_ctxlock);
+	preempt_enable();
+
+	return VM_FAULT_NOPAGE;
+}
+
-- 
cgit v1.2.3


From 1d09d737ab017ff7a9745962e19909713ac89b37 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:59 -0700
Subject: GRU Driver: /proc interfaces

This file externalizes some GRU state & statistics to the user using the
/proc file system.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/gruprocfs.c | 336 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 336 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/gruprocfs.c

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
new file mode 100644
index 00000000000..bdb1ad83bbf
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -0,0 +1,336 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              PROC INTERFACES
+ *
+ * This file supports the /proc interfaces for the GRU driver
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#define printstat(s, f)		printstat_val(s, &gru_stats.f, #f)
+
+static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
+{
+	unsigned long val = atomic_long_read(v);
+
+	if (val)
+		seq_printf(s, "%16lu %s\n", val, id);
+}
+
+static int statistics_show(struct seq_file *s, void *p)
+{
+	printstat(s, vdata_alloc);
+	printstat(s, vdata_free);
+	printstat(s, gts_alloc);
+	printstat(s, gts_free);
+	printstat(s, vdata_double_alloc);
+	printstat(s, gts_double_allocate);
+	printstat(s, assign_context);
+	printstat(s, assign_context_failed);
+	printstat(s, free_context);
+	printstat(s, load_context);
+	printstat(s, unload_context);
+	printstat(s, steal_context);
+	printstat(s, steal_context_failed);
+	printstat(s, nopfn);
+	printstat(s, break_cow);
+	printstat(s, asid_new);
+	printstat(s, asid_next);
+	printstat(s, asid_wrap);
+	printstat(s, asid_reuse);
+	printstat(s, intr);
+	printstat(s, call_os);
+	printstat(s, call_os_check_for_bug);
+	printstat(s, call_os_wait_queue);
+	printstat(s, user_flush_tlb);
+	printstat(s, user_unload_context);
+	printstat(s, user_exception);
+	printstat(s, set_task_slice);
+	printstat(s, migrate_check);
+	printstat(s, migrated_retarget);
+	printstat(s, migrated_unload);
+	printstat(s, migrated_unload_delay);
+	printstat(s, migrated_nopfn_retarget);
+	printstat(s, migrated_nopfn_unload);
+	printstat(s, tlb_dropin);
+	printstat(s, tlb_dropin_fail_no_asid);
+	printstat(s, tlb_dropin_fail_upm);
+	printstat(s, tlb_dropin_fail_invalid);
+	printstat(s, tlb_dropin_fail_range_active);
+	printstat(s, tlb_dropin_fail_idle);
+	printstat(s, tlb_dropin_fail_fmm);
+	printstat(s, mmu_invalidate_range);
+	printstat(s, mmu_invalidate_page);
+	printstat(s, mmu_clear_flush_young);
+	printstat(s, flush_tlb);
+	printstat(s, flush_tlb_gru);
+	printstat(s, flush_tlb_gru_tgh);
+	printstat(s, flush_tlb_gru_zero_asid);
+	printstat(s, copy_gpa);
+	printstat(s, mesq_receive);
+	printstat(s, mesq_receive_none);
+	printstat(s, mesq_send);
+	printstat(s, mesq_send_failed);
+	printstat(s, mesq_noop);
+	printstat(s, mesq_send_unexpected_error);
+	printstat(s, mesq_send_lb_overflow);
+	printstat(s, mesq_send_qlimit_reached);
+	printstat(s, mesq_send_amo_nacked);
+	printstat(s, mesq_send_put_nacked);
+	printstat(s, mesq_qf_not_full);
+	printstat(s, mesq_qf_locked);
+	printstat(s, mesq_qf_noop_not_full);
+	printstat(s, mesq_qf_switch_head_failed);
+	printstat(s, mesq_qf_unexpected_error);
+	printstat(s, mesq_noop_unexpected_error);
+	printstat(s, mesq_noop_lb_overflow);
+	printstat(s, mesq_noop_qlimit_reached);
+	printstat(s, mesq_noop_amo_nacked);
+	printstat(s, mesq_noop_put_nacked);
+	return 0;
+}
+
+static ssize_t statistics_write(struct file *file, const char __user *userbuf,
+				size_t count, loff_t *data)
+{
+	memset(&gru_stats, 0, sizeof(gru_stats));
+	return count;
+}
+
+static int options_show(struct seq_file *s, void *p)
+{
+	seq_printf(s, "0x%lx\n", options);
+	return 0;
+}
+
+static ssize_t options_write(struct file *file, const char __user *userbuf,
+			     size_t count, loff_t *data)
+{
+	unsigned long val;
+	char buf[80];
+
+	if (copy_from_user
+	    (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
+		return -EFAULT;
+	if (!strict_strtoul(buf, 10, &val))
+		options = val;
+
+	return count;
+}
+
+static int cch_seq_show(struct seq_file *file, void *data)
+{
+	long gid = *(long *)data;
+	int i;
+	struct gru_state *gru = GID_TO_GRU(gid);
+	struct gru_thread_state *ts;
+	const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
+
+	if (gid == 0)
+		seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid",
+			   "ctx#", "pid", "cbrs", "dsbytes", "mode");
+	if (gru)
+		for (i = 0; i < GRU_NUM_CCH; i++) {
+			ts = gru->gs_gts[i];
+			if (!ts)
+				continue;
+			seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n",
+				   gru->gs_gid, gru->gs_blade_id, i,
+				   ts->ts_tgid_owner,
+				   ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
+				   ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
+				   mode[ts->ts_user_options &
+					GRU_OPT_MISS_MASK]);
+		}
+
+	return 0;
+}
+
+static int gru_seq_show(struct seq_file *file, void *data)
+{
+	long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
+	struct gru_state *gru = GID_TO_GRU(gid);
+
+	if (gid == 0) {
+		seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
+			   "ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
+		seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
+			   "busy", "busy", "free", "free", "free");
+	}
+	if (gru) {
+		ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
+		cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+		dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+		seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
+			   gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
+			   GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
+			   ctxfree, cbrfree, dsrfree);
+	}
+
+	return 0;
+}
+
+static void seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static void *seq_start(struct seq_file *file, loff_t *gid)
+{
+	if (*gid < GRU_MAX_GRUS)
+		return gid;
+	return NULL;
+}
+
+static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
+{
+	(*gid)++;
+	if (*gid < GRU_MAX_GRUS)
+		return gid;
+	return NULL;
+}
+
+static const struct seq_operations cch_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= cch_seq_show
+};
+
+static const struct seq_operations gru_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= gru_seq_show
+};
+
+static int statistics_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, statistics_show, NULL);
+}
+
+static int options_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, options_show, NULL);
+}
+
+static int cch_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &cch_seq_ops);
+}
+
+static int gru_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &gru_seq_ops);
+}
+
+/* *INDENT-OFF* */
+static const struct file_operations statistics_fops = {
+	.open 		= statistics_open,
+	.read 		= seq_read,
+	.write 		= statistics_write,
+	.llseek 	= seq_lseek,
+	.release 	= single_release,
+};
+
+static const struct file_operations options_fops = {
+	.open 		= options_open,
+	.read 		= seq_read,
+	.write 		= options_write,
+	.llseek 	= seq_lseek,
+	.release 	= single_release,
+};
+
+static const struct file_operations cch_fops = {
+	.open 		= cch_open,
+	.read 		= seq_read,
+	.llseek 	= seq_lseek,
+	.release 	= seq_release,
+};
+static const struct file_operations gru_fops = {
+	.open 		= gru_open,
+	.read 		= seq_read,
+	.llseek 	= seq_lseek,
+	.release 	= seq_release,
+};
+
+static struct proc_entry {
+	char *name;
+	int mode;
+	const struct file_operations *fops;
+	struct proc_dir_entry *entry;
+} proc_files[] = {
+	{"statistics", 0644, &statistics_fops},
+	{"debug_options", 0644, &options_fops},
+	{"cch_status", 0444, &cch_fops},
+	{"gru_status", 0444, &gru_fops},
+	{NULL}
+};
+/* *INDENT-ON* */
+
+static struct proc_dir_entry *proc_gru __read_mostly;
+
+static int create_proc_file(struct proc_entry *p)
+{
+	p->entry = create_proc_entry(p->name, p->mode, proc_gru);
+	if (!p->entry)
+		return -1;
+	p->entry->proc_fops = p->fops;
+	return 0;
+}
+
+static void delete_proc_files(void)
+{
+	struct proc_entry *p;
+
+	if (proc_gru) {
+		for (p = proc_files; p->name; p++)
+			if (p->entry)
+				remove_proc_entry(p->name, proc_gru);
+		remove_proc_entry("gru", NULL);
+	}
+}
+
+int gru_proc_init(void)
+{
+	struct proc_entry *p;
+
+	proc_mkdir("sgi_uv", NULL);
+	proc_gru = proc_mkdir("sgi_uv/gru", NULL);
+
+	for (p = proc_files; p->name; p++)
+		if (create_proc_file(p))
+			goto err;
+	return 0;
+
+err:
+	delete_proc_files();
+	return -1;
+}
+
+void gru_proc_exit(void)
+{
+	delete_proc_files();
+}
-- 
cgit v1.2.3


From ee5b8feca3af01400e26637209a72fbf137c82ff Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:59 -0700
Subject: GRU Driver: TLB flushing, MMUOPS callouts

This file contains the functions for handlinf GRU TLB flushing, This
includes functions to handle the MMUOPS callouts.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grutlbpurge.c | 370 +++++++++++++++++++++++++++++++++++++
 1 file changed, 370 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/grutlbpurge.c

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
new file mode 100644
index 00000000000..bb6b0e64e10
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,370 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * 		MMUOPS callbacks  + TLB flushing
+ *
+ * This file handles emu notifier callbacks from the core kernel. The callbacks
+ * are used to update the TLB in the GRU as a result of changes in the
+ * state of a process address space. This file also handles TLB invalidates
+ * from the GRU driver.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/hugetlb.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/delay.h>
+#include <linux/srcu.h>
+#include <asm/processor.h>
+#include "gru.h"
+#include "grutables.h"
+#include <asm/uv/uv_hub.h>
+
+#define gru_random()	get_cycles()
+
+/* ---------------------------------- TLB Invalidation functions --------
+ * get_tgh_handle
+ *
+ * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
+ * local blade, use a fixed TGH that is a function of the blade-local cpu
+ * number. Normally, this TGH is private to the cpu & no contention occurs for
+ * the TGH. For offblade GRUs, select a random TGH in the range above the
+ * private TGHs. A spinlock is required to access this TGH & the lock must be
+ * released when the invalidate is completes. This sucks, but it is the best we
+ * can do.
+ *
+ * Note that the spinlock is IN the TGH handle so locking does not involve
+ * additional cache lines.
+ *
+ */
+static inline int get_off_blade_tgh(struct gru_state *gru)
+{
+	int n;
+
+	n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
+	n = gru_random() % n;
+	n += gru->gs_tgh_first_remote;
+	return n;
+}
+
+static inline int get_on_blade_tgh(struct gru_state *gru)
+{
+	return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
+}
+
+static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
+							 *gru)
+{
+	struct gru_tlb_global_handle *tgh;
+	int n;
+
+	preempt_disable();
+	if (uv_numa_blade_id() == gru->gs_blade_id)
+		n = get_on_blade_tgh(gru);
+	else
+		n = get_off_blade_tgh(gru);
+	tgh = get_tgh_by_index(gru, n);
+	lock_tgh_handle(tgh);
+
+	return tgh;
+}
+
+static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	unlock_tgh_handle(tgh);
+	preempt_enable();
+}
+
+/*
+ * gru_flush_tlb_range
+ *
+ * General purpose TLB invalidation function. This function scans every GRU in
+ * the ENTIRE system (partition) looking for GRUs where the specified MM has
+ * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
+ * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
+ * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
+ * cost of (possibly) a large number of future TLBmisses.
+ *
+ * The current algorithm is optimized based on the following (somewhat true)
+ * assumptions:
+ * 	- GRU contexts are not loaded into a GRU unless a reference is made to
+ * 	  the data segment or control block (this is true, not an assumption).
+ * 	  If a DS/CB is referenced, the user will also issue instructions that
+ * 	  cause TLBmisses. It is not necessary to optimize for the case where
+ * 	  contexts are loaded but no instructions cause TLB misses. (I know
+ * 	  this will happen but I'm not optimizing for it).
+ * 	- GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
+ * 	  a few usec but in unusual cases, it could be longer. Avoid if
+ * 	  possible.
+ * 	- intrablade process migration between cpus is not frequent but is
+ * 	  common.
+ * 	- a GRU context is not typically migrated to a different GRU on the
+ * 	  blade because of intrablade migration
+ *	- interblade migration is rare. Processes migrate their GRU context to
+ *	  the new blade.
+ *	- if interblade migration occurs, migration back to the original blade
+ *	  is very very rare (ie., no optimization for this case)
+ *	- most GRU instruction operate on a subset of the user REGIONS. Code
+ *	  & shared library regions are not likely targets of GRU instructions.
+ *
+ * To help improve the efficiency of TLB invalidation, the GMS data
+ * structure is maintained for EACH address space (MM struct). The GMS is
+ * also the structure that contains the pointer to the mmu callout
+ * functions. This structure is linked to the mm_struct for the address space
+ * using the mmu "register" function. The mmu interfaces are used to
+ * provide the callbacks for TLB invalidation. The GMS contains:
+ *
+ * 	- asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
+ * 	  loaded into the GRU.
+ * 	- asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
+ * 	  the above array
+ *	- ctxbitmap[maxgrus]. Indicates the contexts that are currently active
+ *	  in the GRU for the address space. This bitmap must be passed to the
+ *	  GRU to do an invalidate.
+ *
+ * The current algorithm for invalidating TLBs is:
+ * 	- scan the asidmap for GRUs where the context has been loaded, ie,
+ * 	  asid is non-zero.
+ * 	- for each gru found:
+ * 		- if the ctxtmap is non-zero, there are active contexts in the
+ * 		  GRU. TLB invalidate instructions must be issued to the GRU.
+ *		- if the ctxtmap is zero, no context is active. Set the ASID to
+ *		  zero to force a full TLB invalidation. This is fast but will
+ *		  cause a lot of TLB misses if the context is reloaded onto the
+ *		  GRU
+ *
+ */
+
+void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+			 unsigned long len)
+{
+	struct gru_state *gru;
+	struct gru_mm_tracker *asids;
+	struct gru_tlb_global_handle *tgh;
+	unsigned long num;
+	int grupagesize, pagesize, pageshift, gid, asid;
+
+	/* ZZZ TODO - handle huge pages */
+	pageshift = PAGE_SHIFT;
+	pagesize = (1UL << pageshift);
+	grupagesize = GRU_PAGESIZE(pageshift);
+	num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
+
+	STAT(flush_tlb);
+	gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
+		start, len, gms->ms_asidmap[0]);
+
+	spin_lock(&gms->ms_asid_lock);
+	for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
+		STAT(flush_tlb_gru);
+		gru = GID_TO_GRU(gid);
+		asids = gms->ms_asids + gid;
+		asid = asids->mt_asid;
+		if (asids->mt_ctxbitmap && asid) {
+			STAT(flush_tlb_gru_tgh);
+			asid = GRUASID(asid, start);
+			gru_dbg(grudev,
+	"  FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
+				gid, asid, num, asids->mt_ctxbitmap);
+			tgh = get_lock_tgh_handle(gru);
+			tgh_invalidate(tgh, start, 0, asid, grupagesize, 0,
+				       num - 1, asids->mt_ctxbitmap);
+			get_unlock_tgh_handle(tgh);
+		} else {
+			STAT(flush_tlb_gru_zero_asid);
+			asids->mt_asid = 0;
+			__clear_bit(gru->gs_gid, gms->ms_asidmap);
+			gru_dbg(grudev,
+	"  CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
+				gid, asid, asids->mt_ctxbitmap,
+				gms->ms_asidmap[0]);
+		}
+	}
+	spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Flush the entire TLB on a chiplet.
+ */
+void gru_flush_all_tlb(struct gru_state *gru)
+{
+	struct gru_tlb_global_handle *tgh;
+
+	gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid);
+	tgh = get_lock_tgh_handle(gru);
+	tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0);
+	get_unlock_tgh_handle(tgh);
+	preempt_enable();
+}
+
+/*
+ * MMUOPS notifier callout functions
+ */
+static void gru_invalidate_range_start(struct mmu_notifier *mn,
+				       struct mm_struct *mm,
+				       unsigned long start, unsigned long end)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	STAT(mmu_invalidate_range);
+	atomic_inc(&gms->ms_range_active);
+	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
+		start, end, atomic_read(&gms->ms_range_active));
+	gru_flush_tlb_range(gms, start, end - start);
+}
+
+static void gru_invalidate_range_end(struct mmu_notifier *mn,
+				     struct mm_struct *mm, unsigned long start,
+				     unsigned long end)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	atomic_dec(&gms->ms_range_active);
+	wake_up_all(&gms->ms_wait_queue);
+	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
+}
+
+static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
+				unsigned long address)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	STAT(mmu_invalidate_page);
+	gru_flush_tlb_range(gms, address, PAGE_SIZE);
+	gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
+}
+
+static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	gms->ms_released = 1;
+	gru_dbg(grudev, "gms %p\n", gms);
+}
+
+
+static const struct mmu_notifier_ops gru_mmuops = {
+	.invalidate_page	= gru_invalidate_page,
+	.invalidate_range_start	= gru_invalidate_range_start,
+	.invalidate_range_end	= gru_invalidate_range_end,
+	.release		= gru_release,
+};
+
+/* Move this to the basic mmu_notifier file. But for now... */
+static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
+			const struct mmu_notifier_ops *ops)
+{
+	struct mmu_notifier *mn, *gru_mn = NULL;
+	struct hlist_node *n;
+
+	if (mm->mmu_notifier_mm) {
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
+					 hlist)
+		    if (mn->ops == ops) {
+			gru_mn = mn;
+			break;
+		}
+		rcu_read_unlock();
+	}
+	return gru_mn;
+}
+
+struct gru_mm_struct *gru_register_mmu_notifier(void)
+{
+	struct gru_mm_struct *gms;
+	struct mmu_notifier *mn;
+
+	mn = mmu_find_ops(current->mm, &gru_mmuops);
+	if (mn) {
+		gms = container_of(mn, struct gru_mm_struct, ms_notifier);
+		atomic_inc(&gms->ms_refcnt);
+	} else {
+		gms = kzalloc(sizeof(*gms), GFP_KERNEL);
+		if (gms) {
+			spin_lock_init(&gms->ms_asid_lock);
+			gms->ms_notifier.ops = &gru_mmuops;
+			atomic_set(&gms->ms_refcnt, 1);
+			init_waitqueue_head(&gms->ms_wait_queue);
+			__mmu_notifier_register(&gms->ms_notifier, current->mm);
+		}
+	}
+	gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
+		atomic_read(&gms->ms_refcnt));
+	return gms;
+}
+
+void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
+{
+	gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
+		atomic_read(&gms->ms_refcnt), gms->ms_released);
+	if (atomic_dec_return(&gms->ms_refcnt) == 0) {
+		if (!gms->ms_released)
+			mmu_notifier_unregister(&gms->ms_notifier, current->mm);
+		kfree(gms);
+	}
+}
+
+/*
+ * Setup TGH parameters. There are:
+ * 	- 24 TGH handles per GRU chiplet
+ * 	- a portion (MAX_LOCAL_TGH) of the handles are reserved for
+ * 	  use by blade-local cpus
+ * 	- the rest are used by off-blade cpus. This usage is
+ * 	  less frequent than blade-local usage.
+ *
+ * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
+ * has less tan or equal to 16 cpus, each cpu has a unique handle that it can
+ * use.
+ */
+#define MAX_LOCAL_TGH	16
+
+void gru_tgh_flush_init(struct gru_state *gru)
+{
+	int cpus, shift = 0, n;
+
+	cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
+
+	/* n = cpus rounded up to next power of 2 */
+	if (cpus) {
+		n = 1 << fls(cpus - 1);
+
+		/*
+		 * shift count for converting local cpu# to TGH index
+		 *      0 if cpus <= MAX_LOCAL_TGH,
+		 *      1 if cpus <= 2*MAX_LOCAL_TGH,
+		 *      etc
+		 */
+		shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
+	}
+	gru->gs_tgh_local_shift = shift;
+
+	/* first starting TGH index to use for remote purges */
+	gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
+
+}
-- 
cgit v1.2.3


From 3c45f6928322773b1810fbec1ece77056f914114 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:34:00 -0700
Subject: GRU Driver: driver makefile

This patch adds the GRU driver makefile

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/Makefile | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 drivers/misc/sgi-gru/Makefile

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
new file mode 100644
index 00000000000..d03597a521b
--- /dev/null
+++ b/drivers/misc/sgi-gru/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_SGI_GRU) := gru.o
+gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o
+
-- 
cgit v1.2.3


From 3d919e5f6b440bb0cc7996eb7628b29be09e6343 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:34:01 -0700
Subject: GRU Driver: driver/misc Makefile & Kconfig changes

Driver/misc changes for the GRU driver

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/Kconfig  | 23 +++++++++++++++++++++++
 drivers/misc/Makefile |  1 +
 2 files changed, 24 insertions(+)

(limited to 'drivers/misc')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index f5ade1904aa..4b288f43ca8 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -450,4 +450,27 @@ config HP_ILO
 	  To compile this driver as a module, choose M here: the
 	  module will be called hpilo.
 
+config SGI_GRU
+	tristate "SGI GRU driver"
+	depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
+	default n
+	select MMU_NOTIFIER
+	---help---
+	The GRU is a hardware resource located in the system chipset. The GRU
+	contains memory that can be mmapped into the user address space. This memory is
+	used to communicate with the GRU to perform functions such as load/store,
+	scatter/gather, bcopy, AMOs, etc.  The GRU is directly accessed by user
+	instructions using user virtual addresses. GRU instructions (ex., bcopy) use
+	user virtual addresses for operands.
+
+	If you are not running on a SGI UV system, say N.
+
+config SGI_GRU_DEBUG
+	bool  "SGI GRU driver debug"
+	depends on SGI_GRU
+	default n
+	---help---
+	This option enables addition debugging code for the SGI GRU driver. If
+	you are unsure, say N.
+
 endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index f5e273420c0..c6c13f60b45 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -28,4 +28,5 @@ obj-$(CONFIG_INTEL_MENLOW)	+= intel_menlow.o
 obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
 obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
 obj-$(CONFIG_SGI_XP)		+= sgi-xp/
+obj-$(CONFIG_SGI_GRU)		+= sgi-gru/
 obj-$(CONFIG_HP_ILO)		+= hpilo.o
-- 
cgit v1.2.3


From 9ca8e40c130c906c1060d105e63628410c860261 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:34:02 -0700
Subject: GRU Driver V3: fixes to resolve code review comments

Fixes problems identified in a code review:
	- add comment with high level dscription of the GRU
	- prepend "gru_" to all global names
	- delete unused function
	- couple of trivial bug fixes

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/gru_instructions.h | 10 -----
 drivers/misc/sgi-gru/grufile.c          |  8 +++-
 drivers/misc/sgi-gru/grukservices.c     |  6 +--
 drivers/misc/sgi-gru/grumain.c          | 16 ++++---
 drivers/misc/sgi-gru/gruprocfs.c        |  4 +-
 drivers/misc/sgi-gru/grutables.h        | 74 ++++++++++++++++++++++++++++++---
 drivers/misc/sgi-gru/grutlbpurge.c      |  4 +-
 7 files changed, 93 insertions(+), 29 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
index 3159b261c5a..0dc36225c7c 100644
--- a/drivers/misc/sgi-gru/gru_instructions.h
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -284,16 +284,6 @@ __opword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
 	   (exopc << GRU_CB_EXOPC_SHFT);
 }
 
-/*
- * Prefetch a cacheline. Fetch is unconditional. Must page fault if
- * no valid TLB entry is found.
- * 	??? should I use actual "load" or hardware prefetch???
- */
-static inline void gru_prefetch(void *p)
-{
-	*(volatile char *)p;
-}
-
 /*
  * Architecture specific intrinsics
  */
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 09c9c65ff9d..23c91f5f6b6 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -112,6 +112,10 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
 		return -EPERM;
 
+	if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
+			vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
+		return -EINVAL;
+
 	vma->vm_flags |=
 	    (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
 			VM_RESERVED);
@@ -471,8 +475,8 @@ struct vm_operations_struct gru_vm_ops = {
 module_init(gru_init);
 module_exit(gru_exit);
 
-module_param(options, ulong, 0644);
-MODULE_PARM_DESC(options, "Various debug options");
+module_param(gru_options, ulong, 0644);
+MODULE_PARM_DESC(gru_options, "Various debug options");
 
 MODULE_AUTHOR("Silicon Graphics, Inc.");
 MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index 234d165fb11..dfd49af0fe1 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -638,11 +638,11 @@ int gru_kservices_init(struct gru_state *gru)
 	cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
 
 	num = GRU_NUM_KERNEL_CBR * cpus_possible;
-	cbr_map = reserve_gru_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
+	cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
 	gru->gs_reserved_cbrs += num;
 
 	num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
-	dsr_map = reserve_gru_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
+	dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
 	gru->gs_reserved_dsr_bytes += num;
 
 	gru->gs_active_contexts++;
@@ -673,7 +673,7 @@ int gru_kservices_init(struct gru_state *gru)
 	}
 	unlock_cch_handle(cch);
 
-	if (options & GRU_QUICKLOOK)
+	if (gru_options & GRU_QUICKLOOK)
 		quicktest(gru);
 	return 0;
 }
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index aef6822cb80..0eeb8dddd2f 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -22,7 +22,7 @@
 #include "grutables.h"
 #include "gruhandles.h"
 
-unsigned long options __read_mostly;
+unsigned long gru_options __read_mostly;
 
 static struct device_driver gru_driver = {
 	.name = "gru"
@@ -163,14 +163,14 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
 	return bits;
 }
 
-unsigned long reserve_gru_cb_resources(struct gru_state *gru, int cbr_au_count,
+unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
 				       char *cbmap)
 {
 	return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
 				 cbmap);
 }
 
-unsigned long reserve_gru_ds_resources(struct gru_state *gru, int dsr_au_count,
+unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
 				       char *dsmap)
 {
 	return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
@@ -182,10 +182,10 @@ static void reserve_gru_resources(struct gru_state *gru,
 {
 	gru->gs_active_contexts++;
 	gts->ts_cbr_map =
-	    reserve_gru_cb_resources(gru, gts->ts_cbr_au_count,
+	    gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
 				     gts->ts_cbr_idx);
 	gts->ts_dsr_map =
-	    reserve_gru_ds_resources(gru, gts->ts_dsr_au_count, NULL);
+	    gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
 }
 
 static void free_gru_resources(struct gru_state *gru,
@@ -416,6 +416,7 @@ static void gru_free_gru_context(struct gru_thread_state *gts)
 
 /*
  * Prefetching cachelines help hardware performance.
+ * (Strictly a performance enhancement. Not functionally required).
  */
 static void prefetch_data(void *p, int num, int stride)
 {
@@ -746,6 +747,8 @@ again:
  * gru_nopage
  *
  * Map the user's GRU segment
+ *
+ * 	Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
  */
 int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
@@ -757,6 +760,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		vma, vaddr, GSEG_BASE(vaddr));
 	STAT(nopfn);
 
+	/* The following check ensures vaddr is a valid address in the VMA */
 	gts = gru_find_thread_state(vma, TSID(vaddr, vma));
 	if (!gts)
 		return VM_FAULT_SIGBUS;
@@ -775,7 +779,7 @@ again:
 	}
 
 	if (!gts->ts_gru) {
-		while (!gru_assign_gru_context(gts)) {
+		if (!gru_assign_gru_context(gts)) {
 			mutex_unlock(&gts->ts_ctxlock);
 			preempt_enable();
 			schedule_timeout(GRU_ASSIGN_DELAY);  /* true hack ZZZ */
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index bdb1ad83bbf..533923f83f1 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -122,7 +122,7 @@ static ssize_t statistics_write(struct file *file, const char __user *userbuf,
 
 static int options_show(struct seq_file *s, void *p)
 {
-	seq_printf(s, "0x%lx\n", options);
+	seq_printf(s, "0x%lx\n", gru_options);
 	return 0;
 }
 
@@ -136,7 +136,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf,
 	    (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
 		return -EFAULT;
 	if (!strict_strtoul(buf, 10, &val))
-		options = val;
+		gru_options = val;
 
 	return count;
 }
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index f97d8464012..4251018f70f 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -24,6 +24,70 @@
 #define __GRUTABLES_H__
 
 /*
+ * GRU Chiplet:
+ *   The GRU is a user addressible memory accelerator. It provides
+ *   several forms of load, store, memset, bcopy instructions. In addition, it
+ *   contains special instructions for AMOs, sending messages to message
+ *   queues, etc.
+ *
+ *   The GRU is an integral part of the node controller. It connects
+ *   directly to the cpu socket. In its current implementation, there are 2
+ *   GRU chiplets in the node controller on each blade (~node).
+ *
+ *   The entire GRU memory space is fully coherent and cacheable by the cpus.
+ *
+ *   Each GRU chiplet has a physical memory map that looks like the following:
+ *
+ *   	+-----------------+
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	+-----------------+
+ *   	|  system control |
+ *   	+-----------------+        _______ +-------------+
+ *   	|/////////////////|       /        |             |
+ *   	|/////////////////|      /         |             |
+ *   	|/////////////////|     /          | instructions|
+ *   	|/////////////////|    /           |             |
+ *   	|/////////////////|   /            |             |
+ *   	|/////////////////|  /             |-------------|
+ *   	|/////////////////| /              |             |
+ *   	+-----------------+                |             |
+ *   	|   context 15    |                |  data       |
+ *   	+-----------------+                |             |
+ *   	|    ......       | \              |             |
+ *   	+-----------------+  \____________ +-------------+
+ *   	|   context 1     |
+ *   	+-----------------+
+ *   	|   context 0     |
+ *   	+-----------------+
+ *
+ *   Each of the "contexts" is a chunk of memory that can be mmaped into user
+ *   space. The context consists of 2 parts:
+ *
+ *  	- an instruction space that can be directly accessed by the user
+ *  	  to issue GRU instructions and to check instruction status.
+ *
+ *  	- a data area that acts as normal RAM.
+ *
+ *   User instructions contain virtual addresses of data to be accessed by the
+ *   GRU. The GRU contains a TLB that is used to convert these user virtual
+ *   addresses to physical addresses.
+ *
+ *   The "system control" area of the GRU chiplet is used by the kernel driver
+ *   to manage user contexts and to perform functions such as TLB dropin and
+ *   purging.
+ *
+ *   One context may be reserved for the kernel and used for cross-partition
+ *   communication. The GRU will also be used to asynchronously zero out
+ *   large blocks of memory (not currently implemented).
+ *
+ *
  * Tables:
  *
  * 	VDATA-VMA Data		- Holds a few parameters. Head of linked list of
@@ -190,14 +254,14 @@ struct gru_stats_s {
 #define GRU_STEAL_DELAY		((HZ * 200) / 1000)
 
 #define STAT(id)	do {						\
-				if (options & OPT_STATS)		\
+				if (gru_options & OPT_STATS)		\
 					atomic_long_inc(&gru_stats.id);	\
 			} while (0)
 
 #ifdef CONFIG_SGI_GRU_DEBUG
 #define gru_dbg(dev, fmt, x...)						\
 	do {								\
-		if (options & OPT_DPRINT)				\
+		if (gru_options & OPT_DPRINT)				\
 			dev_dbg(dev, "%s: " fmt, __func__, x);		\
 	} while (0)
 #else
@@ -529,9 +593,9 @@ extern void gru_flush_all_tlb(struct gru_state *gru);
 extern int gru_proc_init(void);
 extern void gru_proc_exit(void);
 
-extern unsigned long reserve_gru_cb_resources(struct gru_state *gru,
+extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
 		int cbr_au_count, char *cbmap);
-extern unsigned long reserve_gru_ds_resources(struct gru_state *gru,
+extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
 		int dsr_au_count, char *dsmap);
 extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
 extern struct gru_mm_struct *gru_register_mmu_notifier(void);
@@ -540,6 +604,6 @@ extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
 extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
 					unsigned long len);
 
-extern unsigned long options;
+extern unsigned long gru_options;
 
 #endif /* __GRUTABLES_H__ */
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index bb6b0e64e10..bcfd5425e2e 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -242,7 +242,9 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn,
 	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
 						 ms_notifier);
 
-	atomic_dec(&gms->ms_range_active);
+	/* ..._and_test() provides needed barrier */
+	(void)atomic_dec_and_test(&gms->ms_range_active);
+
 	wake_up_all(&gms->ms_wait_queue);
 	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
 }
-- 
cgit v1.2.3


From 355c54d2e70093f09910d2ecf343023aefc219e1 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:02 -0700
Subject: sgi-xp: define is_shub() and is_uv() macros

Define the is_shub()/is_uv() macros if they've not already been defined.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 03a87a307e3..83627eac412 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -25,6 +25,22 @@
 #define DBUG_ON(condition)
 #endif
 
+#ifndef is_shub1
+#define is_shub1()	0
+#endif
+
+#ifndef is_shub2
+#define is_shub2()	0
+#endif
+
+#ifndef is_shub
+#define is_shub()	(is_shub1() || is_shub2())
+#endif
+
+#ifndef is_uv
+#define is_uv()		0
+#endif
+
 /*
  * Define the maximum number of logically defined partitions the system
  * can support. It is constrained by the maximum number of hardware
-- 
cgit v1.2.3


From da9705259848b968cdf6151b977334fe7b5b0461 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:03 -0700
Subject: sgi-xp: define xpSalError reason code

Define xpSalError reason code.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 83627eac412..21cb8a31def 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -249,8 +249,9 @@ enum xp_retval {
 	xpDisconnected,		/* 51: channel disconnected (closed) */
 
 	xpBteCopyError,		/* 52: bte_copy() returned error */
+	xpSalError,		/* 53: sn SAL error */
 
-	xpUnknownReason		/* 53: unknown reason - must be last in enum */
+	xpUnknownReason		/* 54: unknown reason - must be last in enum */
 };
 
 /*
-- 
cgit v1.2.3


From 78ce1bbe446e9b46dcd6c1e60a4768448a8ce355 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:03 -0700
Subject: sgi-xp: define BYTES_PER_WORD

Add a BYTES_PER_WORD #define.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 21cb8a31def..867fb4863d5 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -19,6 +19,9 @@
 #include <asm/sn/types.h>
 #include <asm/sn/bte.h>
 
+/* >>> Add this #define to some linux header file some day. */
+#define BYTES_PER_WORD	sizeof(void *)
+
 #ifdef USE_DBUG_ON
 #define DBUG_ON(condition)	BUG_ON(condition)
 #else
-- 
cgit v1.2.3


From bc63d387e4f5dbbe4ea0c5ade862c38073fd7fa3 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:04 -0700
Subject: sgi-xp: support runtime selection of xp_max_npartitions

Support runtime selection of the max number of partitions based on the
hardware being run on.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/Makefile        |   3 +-
 drivers/misc/sgi-xp/xp.h            |  53 ++++++++++++-------
 drivers/misc/sgi-xp/xp_main.c       |  84 ++++++++++++-----------------
 drivers/misc/sgi-xp/xp_sn2.c        |  92 ++++++++++++++++++++++++++++++++
 drivers/misc/sgi-xp/xp_uv.c         |  30 +++++++++++
 drivers/misc/sgi-xp/xpc.h           |  12 +++--
 drivers/misc/sgi-xp/xpc_channel.c   |  20 +++----
 drivers/misc/sgi-xp/xpc_main.c      | 103 +++++++++++++++++-------------------
 drivers/misc/sgi-xp/xpc_partition.c |  16 ++----
 drivers/misc/sgi-xp/xpnet.c         |   4 +-
 10 files changed, 266 insertions(+), 151 deletions(-)
 create mode 100644 drivers/misc/sgi-xp/xp_sn2.c
 create mode 100644 drivers/misc/sgi-xp/xp_uv.c

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
index b6e40a7958c..b50f2921781 100644
--- a/drivers/misc/sgi-xp/Makefile
+++ b/drivers/misc/sgi-xp/Makefile
@@ -3,7 +3,8 @@
 #
 
 obj-$(CONFIG_SGI_XP)		+= xp.o
-xp-y				:= xp_main.o xp_nofault.o
+xp-y				:= xp_main.o xp_uv.o
+xp-$(CONFIG_IA64)		+= xp_sn2.o xp_nofault.o
 
 obj-$(CONFIG_SGI_XP)		+= xpc.o
 xpc-y				:= xpc_main.o xpc_channel.o xpc_partition.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 867fb4863d5..51087e11188 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -18,6 +18,9 @@
 #include <linux/mutex.h>
 #include <asm/sn/types.h>
 #include <asm/sn/bte.h>
+#ifdef CONFIG_IA64
+#include <asm/sn/arch.h>
+#endif
 
 /* >>> Add this #define to some linux header file some day. */
 #define BYTES_PER_WORD	sizeof(void *)
@@ -45,17 +48,18 @@
 #endif
 
 /*
- * Define the maximum number of logically defined partitions the system
- * can support. It is constrained by the maximum number of hardware
- * partitionable regions. The term 'region' in this context refers to the
- * minimum number of nodes that can comprise an access protection grouping.
- * The access protection is in regards to memory, IPI and IOI.
+ * Define the maximum number of partitions the system can possibly support.
+ * It is based on the maximum number of hardware partitionable regions. The
+ * term 'region' in this context refers to the minimum number of nodes that
+ * can comprise an access protection grouping. The access protection is in
+ * regards to memory, IPI and IOI.
  *
  * The maximum number of hardware partitionable regions is equal to the
  * maximum number of nodes in the entire system divided by the minimum number
  * of nodes that comprise an access protection grouping.
  */
-#define XP_MAX_PARTITIONS	64
+#define XP_MAX_NPARTITIONS_SN2	64
+#define XP_MAX_NPARTITIONS_UV	256
 
 /*
  * Define the number of u64s required to represent all the C-brick nasids
@@ -112,24 +116,28 @@ xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
  * other partition that is currently up. Over these channels, kernel-level
  * `users' can communicate with their counterparts on the other partitions.
  *
- * The maxinum number of channels is limited to eight. For performance reasons,
- * the internal cross partition structures require sixteen bytes per channel,
- * and eight allows all of this interface-shared info to fit in one cache line.
+>>> The following described limitation of a max of eight channels possible
+>>> pertains only to ia64-sn2. THIS ISN'T TRUE SINCE I'M PLANNING TO JUST
+>>> TIE INTO THE EXISTING MECHANISM ONCE THE CHANNEL MESSAGES ARE RECEIVED.
+>>> THE 128-BYTE CACHELINE PERFORMANCE ISSUE IS TIED TO IA64-SN2.
  *
- * XPC_NCHANNELS reflects the total number of channels currently defined.
  * If the need for additional channels arises, one can simply increase
- * XPC_NCHANNELS accordingly. If the day should come where that number
- * exceeds the MAXIMUM number of channels allowed (eight), then one will need
- * to make changes to the XPC code to allow for this.
+ * XPC_MAX_NCHANNELS accordingly. If the day should come where that number
+ * exceeds the absolute MAXIMUM number of channels possible (eight), then one
+ * will need to make changes to the XPC code to accommodate for this.
+ *
+ * The absolute maximum number of channels possible is currently limited to
+ * eight for performance reasons. The internal cross partition structures
+ * require sixteen bytes per channel, and eight allows all of this
+ * interface-shared info to fit in one 128-byte cacheline.
  */
 #define XPC_MEM_CHANNEL		0	/* memory channel number */
 #define	XPC_NET_CHANNEL		1	/* network channel number */
 
-#define	XPC_NCHANNELS		2	/* #of defined channels */
-#define XPC_MAX_NCHANNELS	8	/* max #of channels allowed */
+#define XPC_MAX_NCHANNELS	2	/* max #of channels allowed */
 
-#if XPC_NCHANNELS > XPC_MAX_NCHANNELS
-#error	XPC_NCHANNELS exceeds MAXIMUM allowed.
+#if XPC_MAX_NCHANNELS > 8
+#error	XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible.
 #endif
 
 /*
@@ -254,7 +262,8 @@ enum xp_retval {
 	xpBteCopyError,		/* 52: bte_copy() returned error */
 	xpSalError,		/* 53: sn SAL error */
 
-	xpUnknownReason		/* 54: unknown reason - must be last in enum */
+	xpUnsupported,		/* 54: unsupported functionality or resource */
+	xpUnknownReason		/* 55: unknown reason - must be last in enum */
 };
 
 /*
@@ -397,8 +406,16 @@ xpc_partid_to_nasids(short partid, void *nasids)
 	return xpc_interface.partid_to_nasids(partid, nasids);
 }
 
+extern short xp_max_npartitions;
+
 extern u64 xp_nofault_PIOR_target;
 extern int xp_nofault_PIOR(void *);
 extern int xp_error_PIOR(void);
 
+extern struct device *xp;
+extern enum xp_retval xp_init_sn2(void);
+extern enum xp_retval xp_init_uv(void);
+extern void xp_exit_sn2(void);
+extern void xp_exit_uv(void);
+
 #endif /* _DRIVERS_MISC_SGIXP_XP_H */
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 196480b691a..c5cec606377 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -15,28 +15,32 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
+#include <linux/device.h>
 #include "xp.h"
 
-/*
- * The export of xp_nofault_PIOR needs to happen here since it is defined
- * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
- * defined here.
- */
-EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
+/* define the XP debug device structures to be used with dev_dbg() et al */
 
-u64 xp_nofault_PIOR_target;
-EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
+struct device_driver xp_dbg_name = {
+	.name = "xp"
+};
+
+struct device xp_dbg_subname = {
+	.bus_id = {0},		/* set to "" */
+	.driver = &xp_dbg_name
+};
+
+struct device *xp = &xp_dbg_subname;
+
+/* max #of partitions possible */
+short xp_max_npartitions;
+EXPORT_SYMBOL_GPL(xp_max_npartitions);
 
 /*
  * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
  * users of XPC.
  */
-struct xpc_registration xpc_registrations[XPC_NCHANNELS];
+struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS];
 EXPORT_SYMBOL_GPL(xpc_registrations);
 
 /*
@@ -135,7 +139,7 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
 {
 	struct xpc_registration *registration;
 
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
 	DBUG_ON(payload_size == 0 || nentries == 0);
 	DBUG_ON(func == NULL);
 	DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
@@ -185,7 +189,7 @@ xpc_disconnect(int ch_number)
 {
 	struct xpc_registration *registration;
 
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
 
 	registration = &xpc_registrations[ch_number];
 
@@ -221,39 +225,21 @@ EXPORT_SYMBOL_GPL(xpc_disconnect);
 int __init
 xp_init(void)
 {
-	int ret, ch_number;
-	u64 func_addr = *(u64 *)xp_nofault_PIOR;
-	u64 err_func_addr = *(u64 *)xp_error_PIOR;
-
-	if (!ia64_platform_is("sn2"))
-		return -ENODEV;
+	enum xp_retval ret;
+	int ch_number;
 
-	/*
-	 * Register a nofault code region which performs a cross-partition
-	 * PIO read. If the PIO read times out, the MCA handler will consume
-	 * the error and return to a kernel-provided instruction to indicate
-	 * an error. This PIO read exists because it is guaranteed to timeout
-	 * if the destination is down (AMO operations do not timeout on at
-	 * least some CPUs on Shubs <= v1.2, which unfortunately we have to
-	 * work around).
-	 */
-	ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
-				       1, 1);
-	if (ret != 0) {
-		printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
-		       ret);
-	}
-	/*
-	 * Setup the nofault PIO read target. (There is no special reason why
-	 * SH_IPI_ACCESS was selected.)
-	 */
-	if (is_shub2())
-		xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
+	if (is_shub())
+		ret = xp_init_sn2();
+	else if (is_uv())
+		ret = xp_init_uv();
 	else
-		xp_nofault_PIOR_target = SH1_IPI_ACCESS;
+		ret = xpUnsupported;
+
+	if (ret != xpSuccess)
+		return -ENODEV;
 
 	/* initialize the connection registration mutex */
-	for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++)
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++)
 		mutex_init(&xpc_registrations[ch_number].mutex);
 
 	return 0;
@@ -264,12 +250,10 @@ module_init(xp_init);
 void __exit
 xp_exit(void)
 {
-	u64 func_addr = *(u64 *)xp_nofault_PIOR;
-	u64 err_func_addr = *(u64 *)xp_error_PIOR;
-
-	/* unregister the PIO read nofault code region */
-	(void)sn_register_nofault_code(func_addr, err_func_addr,
-				       err_func_addr, 1, 0);
+	if (is_shub())
+		xp_exit_sn2();
+	else if (is_uv())
+		xp_exit_uv();
 }
 
 module_exit(xp_exit);
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
new file mode 100644
index 00000000000..b92579356a6
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -0,0 +1,92 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) sn2-based functions.
+ *
+ *      Architecture specific implementation of common functions.
+ */
+
+#include <linux/device.h>
+#include <asm/sn/sn_sal.h>
+#include "xp.h"
+
+/*
+ * The export of xp_nofault_PIOR needs to happen here since it is defined
+ * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
+ * defined here.
+ */
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
+
+u64 xp_nofault_PIOR_target;
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
+
+/*
+ * Register a nofault code region which performs a cross-partition PIO read.
+ * If the PIO read times out, the MCA handler will consume the error and
+ * return to a kernel-provided instruction to indicate an error. This PIO read
+ * exists because it is guaranteed to timeout if the destination is down
+ * (AMO operations do not timeout on at least some CPUs on Shubs <= v1.2,
+ * which unfortunately we have to work around).
+ */
+static enum xp_retval
+xp_register_nofault_code_sn2(void)
+{
+	int ret;
+	u64 func_addr;
+	u64 err_func_addr;
+
+	func_addr = *(u64 *)xp_nofault_PIOR;
+	err_func_addr = *(u64 *)xp_error_PIOR;
+	ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
+				       1, 1);
+	if (ret != 0) {
+		dev_err(xp, "can't register nofault code, error=%d\n", ret);
+		return xpSalError;
+	}
+	/*
+	 * Setup the nofault PIO read target. (There is no special reason why
+	 * SH_IPI_ACCESS was selected.)
+	 */
+	if (is_shub1())
+		xp_nofault_PIOR_target = SH1_IPI_ACCESS;
+	else if (is_shub2())
+		xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
+
+	return xpSuccess;
+}
+
+void
+xp_unregister_nofault_code_sn2(void)
+{
+	u64 func_addr = *(u64 *)xp_nofault_PIOR;
+	u64 err_func_addr = *(u64 *)xp_error_PIOR;
+
+	/* unregister the PIO read nofault code region */
+	(void)sn_register_nofault_code(func_addr, err_func_addr,
+				       err_func_addr, 1, 0);
+}
+
+enum xp_retval
+xp_init_sn2(void)
+{
+	BUG_ON(!is_shub());
+
+	xp_max_npartitions = XP_MAX_NPARTITIONS_SN2;
+
+	return xp_register_nofault_code_sn2();
+}
+
+void
+xp_exit_sn2(void)
+{
+	BUG_ON(!is_shub());
+
+	xp_unregister_nofault_code_sn2();
+}
+
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
new file mode 100644
index 00000000000..30888be2cdb
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -0,0 +1,30 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) uv-based functions.
+ *
+ *      Architecture specific implementation of common functions.
+ *
+ */
+
+#include "xp.h"
+
+enum xp_retval
+xp_init_uv(void)
+{
+	BUG_ON(!is_uv());
+
+	xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
+}
+
+void
+xp_exit_uv(void)
+{
+	BUG_ON(!is_uv());
+}
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 11ac267ed68..0f2affd01df 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -210,7 +210,7 @@ xpc_disallow_hb(short partid, struct xpc_vars *vars)
  * the XPC running on the remote partition).
  */
 #define XPC_NOTIFY_IRQ_AMOS	   0
-#define XPC_ACTIVATE_IRQ_AMOS	   (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
+#define XPC_ACTIVATE_IRQ_AMOS	   (XPC_NOTIFY_IRQ_AMOS + XP_MAX_NPARTITIONS_SN2)
 #define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
 #define XPC_DISENGAGE_REQUEST_AMO  (XPC_ENGAGED_PARTITIONS_AMO + 1)
 
@@ -285,7 +285,7 @@ struct xpc_gp {
 };
 
 #define XPC_GP_SIZE \
-		L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS)
+		L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_MAX_NCHANNELS)
 
 /*
  * Define a structure that contains arguments associated with opening and
@@ -300,7 +300,8 @@ struct xpc_openclose_args {
 };
 
 #define XPC_OPENCLOSE_ARGS_SIZE \
-	      L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS)
+	      L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \
+	      XPC_MAX_NCHANNELS)
 
 /* struct xpc_msg flags */
 
@@ -637,7 +638,7 @@ extern int xpc_exiting;
 extern struct xpc_vars *xpc_vars;
 extern struct xpc_rsvd_page *xpc_rsvd_page;
 extern struct xpc_vars_part *xpc_vars_part;
-extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
+extern struct xpc_partition *xpc_partitions;
 extern char *xpc_remote_copy_buffer;
 extern void *xpc_remote_copy_buffer_base;
 extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
@@ -1104,13 +1105,14 @@ xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
 }
 
 /*
+>>> this block comment needs to be moved and re-written.
  * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
  * pages are located in the lowest granule. The lowest granule uses 4k pages
  * for cached references and an alternate TLB handler to never provide a
  * cacheable mapping for the entire region. This will prevent speculative
  * reading of cached copies of our lines from being issued which will cause
  * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
- * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
+ * AMO variables (based on xp_max_npartitions) for message notification and an
  * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
  * activation and 2 AMO variables for partition deactivation.
  */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 9c90c2d55c0..12d8eb6957a 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -110,14 +110,14 @@ xpc_setup_infrastructure(struct xpc_partition *part)
 	 * Allocate all of the channel structures as a contiguous chunk of
 	 * memory.
 	 */
-	part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
+	part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
 				 GFP_KERNEL);
 	if (part->channels == NULL) {
 		dev_err(xpc_chan, "can't get memory for channels\n");
 		return xpNoMemory;
 	}
 
-	part->nchannels = XPC_NCHANNELS;
+	part->nchannels = XPC_MAX_NCHANNELS;
 
 	/* allocate all the required GET/PUT values */
 
@@ -1432,9 +1432,9 @@ xpc_initiate_connect(int ch_number)
 	struct xpc_partition *part;
 	struct xpc_channel *ch;
 
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
 
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
 		part = &xpc_partitions[partid];
 
 		if (xpc_part_ref(part)) {
@@ -1488,10 +1488,10 @@ xpc_initiate_disconnect(int ch_number)
 	struct xpc_partition *part;
 	struct xpc_channel *ch;
 
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
 
 	/* initiate the channel disconnect for every active partition */
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
 		part = &xpc_partitions[partid];
 
 		if (xpc_part_ref(part)) {
@@ -1734,7 +1734,7 @@ xpc_initiate_allocate(short partid, int ch_number, u32 flags, void **payload)
 	enum xp_retval ret = xpUnknownReason;
 	struct xpc_msg *msg = NULL;
 
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
 
 	*payload = NULL;
@@ -1918,7 +1918,7 @@ xpc_initiate_send(short partid, int ch_number, void *payload)
 	dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg,
 		partid, ch_number);
 
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
 	DBUG_ON(msg == NULL);
 
@@ -1968,7 +1968,7 @@ xpc_initiate_send_notify(short partid, int ch_number, void *payload,
 	dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg,
 		partid, ch_number);
 
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
 	DBUG_ON(msg == NULL);
 	DBUG_ON(func == NULL);
@@ -2210,7 +2210,7 @@ xpc_initiate_received(short partid, int ch_number, void *payload)
 	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
 	s64 get, msg_number = msg->number;
 
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
 
 	ch = &part->channels[ch_number];
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index c3b4227f48a..a05c7c7da22 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -433,7 +433,7 @@ xpc_activating(void *__partid)
 	struct xpc_partition *part = &xpc_partitions[partid];
 	unsigned long irq_flags;
 
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 
 	spin_lock_irqsave(&part->act_lock, irq_flags);
 
@@ -544,7 +544,7 @@ xpc_notify_IRQ_handler(int irq, void *dev_id)
 	short partid = (short)(u64)dev_id;
 	struct xpc_partition *part = &xpc_partitions[partid];
 
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 
 	if (xpc_part_ref(part)) {
 		xpc_check_for_channel_activity(part);
@@ -815,7 +815,7 @@ xpc_disconnect_wait(int ch_number)
 	int wakeup_channel_mgr;
 
 	/* now wait for all callouts to the caller's function to cease */
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
 		part = &xpc_partitions[partid];
 
 		if (!xpc_part_ref(part))
@@ -895,7 +895,7 @@ xpc_do_exit(enum xp_retval reason)
 	do {
 		active_part_count = 0;
 
-		for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+		for (partid = 0; partid < xp_max_npartitions; partid++) {
 			part = &xpc_partitions[partid];
 
 			if (xpc_partition_disengaged(part) &&
@@ -956,11 +956,8 @@ xpc_do_exit(enum xp_retval reason)
 	DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
 
 	if (reason == xpUnloading) {
-		/* take ourselves off of the reboot_notifier_list */
-		(void)unregister_reboot_notifier(&xpc_reboot_notifier);
-
-		/* take ourselves off of the die_notifier list */
 		(void)unregister_die_notifier(&xpc_die_notifier);
+		(void)unregister_reboot_notifier(&xpc_reboot_notifier);
 	}
 
 	/* close down protections for IPI operations */
@@ -972,6 +969,7 @@ xpc_do_exit(enum xp_retval reason)
 	if (xpc_sysctl)
 		unregister_sysctl_table(xpc_sysctl);
 
+	kfree(xpc_partitions);
 	kfree(xpc_remote_copy_buffer_base);
 }
 
@@ -1017,7 +1015,7 @@ xpc_die_disengage(void)
 
 	xpc_vars->heartbeating_to_mask = 0;	/* indicate we're deactivated */
 
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
 		part = &xpc_partitions[partid];
 
 		if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
@@ -1053,7 +1051,8 @@ xpc_die_disengage(void)
 
 		time = rtc_time();
 		if (time >= disengage_request_timeout) {
-			for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+			for (partid = 0; partid < xp_max_npartitions;
+			     partid++) {
 				if (engaged & (1UL << partid)) {
 					dev_info(xpc_part, "disengage from "
 						 "remote partition %d timed "
@@ -1132,18 +1131,26 @@ xpc_init(void)
 	if (!ia64_platform_is("sn2"))
 		return -ENODEV;
 
+	snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
+	snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
+
 	buf_size = max(XPC_RP_VARS_SIZE,
 		       XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
 	xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
 							       GFP_KERNEL,
 						  &xpc_remote_copy_buffer_base);
-	if (xpc_remote_copy_buffer == NULL)
+	if (xpc_remote_copy_buffer == NULL) {
+		dev_err(xpc_part, "can't get memory for remote copy buffer\n");
 		return -ENOMEM;
+	}
 
-	snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
-	snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
-
-	xpc_sysctl = register_sysctl_table(xpc_sys_dir);
+	xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
+				 xp_max_npartitions, GFP_KERNEL);
+	if (xpc_partitions == NULL) {
+		dev_err(xpc_part, "can't get memory for partition structure\n");
+		ret = -ENOMEM;
+		goto out_1;
+	}
 
 	/*
 	 * The first few fields of each entry of xpc_partitions[] need to
@@ -1153,7 +1160,7 @@ xpc_init(void)
 	 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
 	 * PARTITION HAS BEEN ACTIVATED.
 	 */
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
 		part = &xpc_partitions[partid];
 
 		DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
@@ -1173,6 +1180,8 @@ xpc_init(void)
 		atomic_set(&part->references, 0);
 	}
 
+	xpc_sysctl = register_sysctl_table(xpc_sys_dir);
+
 	/*
 	 * Open up protections for IPI operations (and AMO operations on
 	 * Shub 1.1 systems).
@@ -1196,14 +1205,8 @@ xpc_init(void)
 	if (ret != 0) {
 		dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
 			"errno=%d\n", -ret);
-
-		xpc_restrict_IPI_ops();
-
-		if (xpc_sysctl)
-			unregister_sysctl_table(xpc_sysctl);
-
-		kfree(xpc_remote_copy_buffer_base);
-		return -EBUSY;
+		ret = -EBUSY;
+		goto out_2;
 	}
 
 	/*
@@ -1213,16 +1216,9 @@ xpc_init(void)
 	 */
 	xpc_rsvd_page = xpc_rsvd_page_init();
 	if (xpc_rsvd_page == NULL) {
-		dev_err(xpc_part, "could not setup our reserved page\n");
-
-		free_irq(SGI_XPC_ACTIVATE, NULL);
-		xpc_restrict_IPI_ops();
-
-		if (xpc_sysctl)
-			unregister_sysctl_table(xpc_sysctl);
-
-		kfree(xpc_remote_copy_buffer_base);
-		return -EBUSY;
+		dev_err(xpc_part, "can't setup our reserved page\n");
+		ret = -EBUSY;
+		goto out_3;
 	}
 
 	/* add ourselves to the reboot_notifier_list */
@@ -1245,25 +1241,8 @@ xpc_init(void)
 	kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
 	if (IS_ERR(kthread)) {
 		dev_err(xpc_part, "failed while forking hb check thread\n");
-
-		/* indicate to others that our reserved page is uninitialized */
-		xpc_rsvd_page->vars_pa = 0;
-
-		/* take ourselves off of the reboot_notifier_list */
-		(void)unregister_reboot_notifier(&xpc_reboot_notifier);
-
-		/* take ourselves off of the die_notifier list */
-		(void)unregister_die_notifier(&xpc_die_notifier);
-
-		del_timer_sync(&xpc_hb_timer);
-		free_irq(SGI_XPC_ACTIVATE, NULL);
-		xpc_restrict_IPI_ops();
-
-		if (xpc_sysctl)
-			unregister_sysctl_table(xpc_sysctl);
-
-		kfree(xpc_remote_copy_buffer_base);
-		return -EBUSY;
+		ret = -EBUSY;
+		goto out_4;
 	}
 
 	/*
@@ -1290,6 +1269,24 @@ xpc_init(void)
 			  xpc_initiate_partid_to_nasids);
 
 	return 0;
+
+	/* initialization was not successful */
+out_4:
+	/* indicate to others that our reserved page is uninitialized */
+	xpc_rsvd_page->vars_pa = 0;
+	del_timer_sync(&xpc_hb_timer);
+	(void)unregister_die_notifier(&xpc_die_notifier);
+	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
+out_3:
+	free_irq(SGI_XPC_ACTIVATE, NULL);
+out_2:
+	xpc_restrict_IPI_ops();
+	if (xpc_sysctl)
+		unregister_sysctl_table(xpc_sysctl);
+	kfree(xpc_partitions);
+out_1:
+	kfree(xpc_remote_copy_buffer_base);
+	return ret;
 }
 
 module_init(xpc_init);
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 7dd4b5812c4..02a858eddd8 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -51,13 +51,7 @@ struct xpc_vars_part *xpc_vars_part;
 static int xp_nasid_mask_bytes;	/* actual size in bytes of nasid mask */
 static int xp_nasid_mask_words;	/* actual size in words of nasid mask */
 
-/*
- * For performance reasons, each entry of xpc_partitions[] is cacheline
- * aligned. And xpc_partitions[] is padded with an additional entry at the
- * end so that the last legitimate entry doesn't share its cacheline with
- * another variable.
- */
-struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
+struct xpc_partition *xpc_partitions;
 
 /*
  * Generic buffer used to store a local copy of portions of a remote
@@ -261,7 +255,7 @@ xpc_rsvd_page_init(void)
 
 	/* clear xpc_vars_part */
 	memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
-	       XP_MAX_PARTITIONS);
+	       xp_max_npartitions);
 
 	/* initialize the activate IRQ related AMO variables */
 	for (i = 0; i < xp_nasid_mask_words; i++)
@@ -408,7 +402,7 @@ xpc_check_remote_hb(void)
 
 	remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
 
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
 
 		if (xpc_exiting)
 			break;
@@ -487,10 +481,8 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 
 	/* check that the partid is for another partition */
 
-	if (remote_rp->partid < 1 ||
-	    remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
+	if (remote_rp->partid < 0 || remote_rp->partid >= xp_max_npartitions)
 		return xpInvalidPartid;
-	}
 
 	if (remote_rp->partid == sn_partition_id)
 		return xpLocalPartid;
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 822dc8e8d7f..cc252f201b2 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -287,7 +287,7 @@ xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
 {
 	long bp;
 
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 	DBUG_ON(channel != XPC_NET_CHANNEL);
 
 	switch (reason) {
@@ -513,7 +513,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/*
 	 * Main send loop.
 	 */
-	for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS;
+	for (dest_partid = 0; dp && dest_partid < xp_max_npartitions;
 	     dest_partid++) {
 
 		if (!(dp & (1UL << (dest_partid - 1)))) {
-- 
cgit v1.2.3


From 908787db9b95f548270af18d83d62b9d2020ca10 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:05 -0700
Subject: sgi-xp: create a common xp_remote_memcpy() function

Create a common remote memcpy function that maps to what the hardware
booted supports.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h            | 43 ++---------------------------
 drivers/misc/sgi-xp/xp_main.c       |  3 ++
 drivers/misc/sgi-xp/xp_sn2.c        | 46 +++++++++++++++++++++++++++++++
 drivers/misc/sgi-xp/xp_uv.c         | 11 ++++++++
 drivers/misc/sgi-xp/xpc.h           |  7 -----
 drivers/misc/sgi-xp/xpc_channel.c   | 20 ++++++--------
 drivers/misc/sgi-xp/xpc_partition.c | 55 ++++++++++++++++---------------------
 drivers/misc/sgi-xp/xpnet.c         | 28 +++++++++----------
 8 files changed, 107 insertions(+), 106 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 51087e11188..c42196a1a6b 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -17,7 +17,6 @@
 #include <linux/hardirq.h>
 #include <linux/mutex.h>
 #include <asm/sn/types.h>
-#include <asm/sn/bte.h>
 #ifdef CONFIG_IA64
 #include <asm/sn/arch.h>
 #endif
@@ -71,46 +70,6 @@
 #define XP_NASID_MASK_BYTES	((XP_MAX_PHYSNODE_ID + 7) / 8)
 #define XP_NASID_MASK_WORDS	((XP_MAX_PHYSNODE_ID + 63) / 64)
 
-/*
- * Wrapper for bte_copy() that should it return a failure status will retry
- * the bte_copy() once in the hope that the failure was due to a temporary
- * aberration (i.e., the link going down temporarily).
- *
- * 	src - physical address of the source of the transfer.
- *	vdst - virtual address of the destination of the transfer.
- *	len - number of bytes to transfer from source to destination.
- *	mode - see bte_copy() for definition.
- *	notification - see bte_copy() for definition.
- *
- * Note: xp_bte_copy() should never be called while holding a spinlock.
- */
-static inline bte_result_t
-xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
-{
-	bte_result_t ret;
-	u64 pdst = ia64_tpa(vdst);
-
-	/*
-	 * Ensure that the physically mapped memory is contiguous.
-	 *
-	 * We do this by ensuring that the memory is from region 7 only.
-	 * If the need should arise to use memory from one of the other
-	 * regions, then modify the BUG_ON() statement to ensure that the
-	 * memory from that region is always physically contiguous.
-	 */
-	BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
-
-	ret = bte_copy(src, pdst, len, mode, notification);
-	if ((ret != BTE_SUCCESS) && BTE_ERROR_RETRY(ret)) {
-		if (!in_interrupt())
-			cond_resched();
-
-		ret = bte_copy(src, pdst, len, mode, notification);
-	}
-
-	return ret;
-}
-
 /*
  * XPC establishes channel connections between the local partition and any
  * other partition that is currently up. Over these channels, kernel-level
@@ -408,6 +367,8 @@ xpc_partid_to_nasids(short partid, void *nasids)
 
 extern short xp_max_npartitions;
 
+extern enum xp_retval (*xp_remote_memcpy) (void *, const void *, size_t);
+
 extern u64 xp_nofault_PIOR_target;
 extern int xp_nofault_PIOR(void *);
 extern int xp_error_PIOR(void);
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index c5cec606377..6f25613b27e 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -36,6 +36,9 @@ struct device *xp = &xp_dbg_subname;
 short xp_max_npartitions;
 EXPORT_SYMBOL_GPL(xp_max_npartitions);
 
+enum xp_retval (*xp_remote_memcpy) (void *dst, const void *src, size_t len);
+EXPORT_SYMBOL_GPL(xp_remote_memcpy);
+
 /*
  * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
  * users of XPC.
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
index b92579356a6..3d553fa73f4 100644
--- a/drivers/misc/sgi-xp/xp_sn2.c
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/device.h>
+#include <asm/sn/bte.h>
 #include <asm/sn/sn_sal.h>
 #include "xp.h"
 
@@ -72,6 +73,49 @@ xp_unregister_nofault_code_sn2(void)
 				       err_func_addr, 1, 0);
 }
 
+/*
+ * Wrapper for bte_copy().
+ *
+ *	vdst - virtual address of the destination of the transfer.
+ *	psrc - physical address of the source of the transfer.
+ *	len - number of bytes to transfer from source to destination.
+ *
+ * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock.
+ */
+static enum xp_retval
+xp_remote_memcpy_sn2(void *vdst, const void *psrc, size_t len)
+{
+	bte_result_t ret;
+	u64 pdst = ia64_tpa(vdst);
+	/* >>> What are the rules governing the src and dst addresses passed in?
+	 * >>> Currently we're assuming that dst is a virtual address and src
+	 * >>> is a physical address, is this appropriate? Can we allow them to
+	 * >>> be whatever and we make the change here without damaging the
+	 * >>> addresses?
+	 */
+
+	/*
+	 * Ensure that the physically mapped memory is contiguous.
+	 *
+	 * We do this by ensuring that the memory is from region 7 only.
+	 * If the need should arise to use memory from one of the other
+	 * regions, then modify the BUG_ON() statement to ensure that the
+	 * memory from that region is always physically contiguous.
+	 */
+	BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
+
+	ret = bte_copy((u64)psrc, pdst, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+	if (ret == BTE_SUCCESS)
+		return xpSuccess;
+
+	if (is_shub2())
+		dev_err(xp, "bte_copy() on shub2 failed, error=0x%x\n", ret);
+	else
+		dev_err(xp, "bte_copy() failed, error=%d\n", ret);
+
+	return xpBteCopyError;
+}
+
 enum xp_retval
 xp_init_sn2(void)
 {
@@ -79,6 +123,8 @@ xp_init_sn2(void)
 
 	xp_max_npartitions = XP_MAX_NPARTITIONS_SN2;
 
+	xp_remote_memcpy = xp_remote_memcpy_sn2;
+
 	return xp_register_nofault_code_sn2();
 }
 
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
index 30888be2cdb..dca519fdef9 100644
--- a/drivers/misc/sgi-xp/xp_uv.c
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -15,12 +15,23 @@
 
 #include "xp.h"
 
+static enum xp_retval
+xp_remote_memcpy_uv(void *vdst, const void *psrc, size_t len)
+{
+	/* >>> this function needs fleshing out */
+	return xpUnsupported;
+}
+
 enum xp_retval
 xp_init_uv(void)
 {
 	BUG_ON(!is_uv());
 
 	xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
+
+	xp_remote_memcpy = xp_remote_memcpy_uv;
+
+	return xpSuccess;
 }
 
 void
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 0f2affd01df..60388bed770 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -20,7 +20,6 @@
 #include <linux/completion.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
-#include <asm/sn/bte.h>
 #include <asm/sn/clksupport.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/mspec.h>
@@ -1125,12 +1124,6 @@ xpc_IPI_init(int index)
 	return amo;
 }
 
-static inline enum xp_retval
-xpc_map_bte_errors(bte_result_t error)
-{
-	return ((error == BTE_SUCCESS) ? xpSuccess : xpBteCopyError);
-}
-
 /*
  * Check to see if there is any channel activity to/from the specified
  * partition.
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 12d8eb6957a..9e79ad7eafe 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -21,7 +21,6 @@
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
 #include <linux/completion.h>
-#include <asm/sn/bte.h>
 #include <asm/sn/sn_sal.h>
 #include "xpc.h"
 
@@ -252,13 +251,13 @@ xpc_setup_infrastructure(struct xpc_partition *part)
  *
  * src must be a cacheline aligned physical address on the remote partition.
  * dst must be a cacheline aligned virtual address on this partition.
- * cnt must be an cacheline sized
+ * cnt must be cacheline sized
  */
 static enum xp_retval
 xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
 			   const void *src, size_t cnt)
 {
-	bte_result_t bte_ret;
+	enum xp_retval ret;
 
 	DBUG_ON((u64)src != L1_CACHE_ALIGN((u64)src));
 	DBUG_ON((u64)dst != L1_CACHE_ALIGN((u64)dst));
@@ -267,15 +266,12 @@ xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
 	if (part->act_state == XPC_P_DEACTIVATING)
 		return part->reason;
 
-	bte_ret = xp_bte_copy((u64)src, (u64)dst, (u64)cnt,
-			      (BTE_NORMAL | BTE_WACQUIRE), NULL);
-	if (bte_ret == BTE_SUCCESS)
-		return xpSuccess;
-
-	dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n",
-		XPC_PARTID(part), bte_ret);
-
-	return xpc_map_bte_errors(bte_ret);
+	ret = xp_remote_memcpy(dst, src, cnt);
+	if (ret != xpSuccess) {
+		dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed,"
+			" ret=%d\n", XPC_PARTID(part), ret);
+	}
+	return ret;
 }
 
 /*
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 02a858eddd8..6c82f205097 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -21,7 +21,6 @@
 #include <linux/mmzone.h>
 #include <linux/nodemask.h>
 #include <asm/uncached.h>
-#include <asm/sn/bte.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/nodepda.h>
@@ -92,7 +91,7 @@ xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
 static u64
 xpc_get_rsvd_page_pa(int nasid)
 {
-	bte_result_t bte_res;
+	enum xp_retval ret;
 	s64 status;
 	u64 cookie = 0;
 	u64 rp_pa = nasid;	/* seed with nasid */
@@ -113,6 +112,7 @@ xpc_get_rsvd_page_pa(int nasid)
 		if (status != SALRET_MORE_PASSES)
 			break;
 
+		/* >>> L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
 		if (L1_CACHE_ALIGN(len) > buf_len) {
 			kfree(buf_base);
 			buf_len = L1_CACHE_ALIGN(len);
@@ -127,10 +127,9 @@ xpc_get_rsvd_page_pa(int nasid)
 			}
 		}
 
-		bte_res = xp_bte_copy(rp_pa, buf, buf_len,
-				      (BTE_NOTIFY | BTE_WACQUIRE), NULL);
-		if (bte_res != BTE_SUCCESS) {
-			dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
+		ret = xp_remote_memcpy((void *)buf, (void *)rp_pa, buf_len);
+		if (ret != xpSuccess) {
+			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
 			status = SALRET_ERROR;
 			break;
 		}
@@ -398,7 +397,7 @@ xpc_check_remote_hb(void)
 	struct xpc_vars *remote_vars;
 	struct xpc_partition *part;
 	short partid;
-	bte_result_t bres;
+	enum xp_retval ret;
 
 	remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
 
@@ -418,13 +417,11 @@ xpc_check_remote_hb(void)
 		}
 
 		/* pull the remote_hb cache line */
-		bres = xp_bte_copy(part->remote_vars_pa,
-				   (u64)remote_vars,
-				   XPC_RP_VARS_SIZE,
-				   (BTE_NOTIFY | BTE_WACQUIRE), NULL);
-		if (bres != BTE_SUCCESS) {
-			XPC_DEACTIVATE_PARTITION(part,
-						 xpc_map_bte_errors(bres));
+		ret = xp_remote_memcpy(remote_vars,
+				       (void *)part->remote_vars_pa,
+				       XPC_RP_VARS_SIZE);
+		if (ret != xpSuccess) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
 			continue;
 		}
 
@@ -457,7 +454,8 @@ static enum xp_retval
 xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 		  struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
 {
-	int bres, i;
+	int i;
+	enum xp_retval ret;
 
 	/* get the reserved page's physical address */
 
@@ -466,11 +464,10 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 		return xpNoRsvdPageAddr;
 
 	/* pull over the reserved page header and part_nasids mask */
-	bres = xp_bte_copy(*remote_rp_pa, (u64)remote_rp,
-			   XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
-			   (BTE_NOTIFY | BTE_WACQUIRE), NULL);
-	if (bres != BTE_SUCCESS)
-		return xpc_map_bte_errors(bres);
+	ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
+			       XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes);
+	if (ret != xpSuccess)
+		return ret;
 
 	if (discovered_nasids != NULL) {
 		u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
@@ -504,16 +501,16 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 static enum xp_retval
 xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
 {
-	int bres;
+	enum xp_retval ret;
 
 	if (remote_vars_pa == 0)
 		return xpVarsNotSet;
 
 	/* pull over the cross partition variables */
-	bres = xp_bte_copy(remote_vars_pa, (u64)remote_vars, XPC_RP_VARS_SIZE,
-			   (BTE_NOTIFY | BTE_WACQUIRE), NULL);
-	if (bres != BTE_SUCCESS)
-		return xpc_map_bte_errors(bres);
+	ret = xp_remote_memcpy(remote_vars, (void *)remote_vars_pa,
+			       XPC_RP_VARS_SIZE);
+	if (ret != xpSuccess)
+		return ret;
 
 	if (XPC_VERSION_MAJOR(remote_vars->version) !=
 	    XPC_VERSION_MAJOR(XPC_V_VERSION)) {
@@ -1148,7 +1145,6 @@ xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
 {
 	struct xpc_partition *part;
 	u64 part_nasid_pa;
-	int bte_res;
 
 	part = &xpc_partitions[partid];
 	if (part->remote_rp_pa == 0)
@@ -1158,9 +1154,6 @@ xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
 
 	part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
 
-	bte_res = xp_bte_copy(part_nasid_pa, (u64)nasid_mask,
-			      xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE),
-			      NULL);
-
-	return xpc_map_bte_errors(bte_res);
+	return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
+				xp_nasid_mask_bytes);
 }
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index cc252f201b2..9c540eb1847 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -32,7 +32,6 @@
 #include <linux/mii.h>
 #include <linux/smp.h>
 #include <linux/string.h>
-#include <asm/sn/bte.h>
 #include <asm/sn/io.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/atomic.h>
@@ -169,7 +168,7 @@ static void
 xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 {
 	struct sk_buff *skb;
-	bte_result_t bret;
+	enum xp_retval ret;
 	struct xpnet_dev_private *priv =
 	    (struct xpnet_dev_private *)xpnet_device->priv;
 
@@ -201,7 +200,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 
 	/*
 	 * The allocated skb has some reserved space.
-	 * In order to use bte_copy, we need to get the
+	 * In order to use xp_remote_memcpy(), we need to get the
 	 * skb->data pointer moved forward.
 	 */
 	skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
@@ -227,25 +226,24 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 					(size_t)msg->embedded_bytes);
 	} else {
 		dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
-			"bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa,
-			(void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
-			msg->size);
+			"xp_remote_memcpy(0x%p, 0x%p, %hu)\n", (void *)
+				       ((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
+					  (void *)msg->buf_pa, msg->size);
 
-		bret = bte_copy(msg->buf_pa,
-				__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
-				msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+		ret = xp_remote_memcpy((void *)((u64)skb->data &
+				                ~(L1_CACHE_BYTES - 1)),
+				       (void *)msg->buf_pa, msg->size);
 
-		if (bret != BTE_SUCCESS) {
+		if (ret != xpSuccess) {
 			/*
 			 * >>> Need better way of cleaning skb.  Currently skb
 			 * >>> appears in_use and we can't just call
 			 * >>> dev_kfree_skb.
 			 */
-			dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned "
-				"error=0x%x\n", (void *)msg->buf_pa,
-				(void *)__pa((u64)skb->data &
-					     ~(L1_CACHE_BYTES - 1)),
-				msg->size, bret);
+			dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
+				"returned error=0x%x\n", (void *)
+				((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
+				(void *)msg->buf_pa, msg->size, ret);
 
 			xpc_received(partid, channel, (void *)msg);
 
-- 
cgit v1.2.3


From 94bd2708d4a95d7da5a1c7c28a063eccd127fb69 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:05 -0700
Subject: sgi-xp: prepare xpc_rsvd_page to work on either sn2 or uv hardware

Prepare XPC's reserved page header to work for either sn2 or uv.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/Makefile        |   3 +-
 drivers/misc/sgi-xp/xp.h            |   5 +-
 drivers/misc/sgi-xp/xpc.h           |  57 +++++++++-----
 drivers/misc/sgi-xp/xpc_main.c      |  27 ++++++-
 drivers/misc/sgi-xp/xpc_partition.c | 149 +++++++++++-------------------------
 drivers/misc/sgi-xp/xpc_sn2.c       | 111 +++++++++++++++++++++++++++
 drivers/misc/sgi-xp/xpc_uv.c        |  48 ++++++++++++
 7 files changed, 267 insertions(+), 133 deletions(-)
 create mode 100644 drivers/misc/sgi-xp/xpc_sn2.c
 create mode 100644 drivers/misc/sgi-xp/xpc_uv.c

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
index b50f2921781..b3eeff31ebf 100644
--- a/drivers/misc/sgi-xp/Makefile
+++ b/drivers/misc/sgi-xp/Makefile
@@ -7,6 +7,7 @@ xp-y				:= xp_main.o xp_uv.o
 xp-$(CONFIG_IA64)		+= xp_sn2.o xp_nofault.o
 
 obj-$(CONFIG_SGI_XP)		+= xpc.o
-xpc-y				:= xpc_main.o xpc_channel.o xpc_partition.o
+xpc-y				:= xpc_main.o xpc_uv.o xpc_channel.o xpc_partition.o
+xpc-$(CONFIG_IA64)		+= xpc_sn2.o
 
 obj-$(CONFIG_SGI_XP)		+= xpnet.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index c42196a1a6b..0f75592896d 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -220,9 +220,10 @@ enum xp_retval {
 
 	xpBteCopyError,		/* 52: bte_copy() returned error */
 	xpSalError,		/* 53: sn SAL error */
+	xpRsvdPageNotSet,	/* 54: the reserved page is not set up */
 
-	xpUnsupported,		/* 54: unsupported functionality or resource */
-	xpUnknownReason		/* 55: unknown reason - must be last in enum */
+	xpUnsupported,		/* 55: unsupported functionality or resource */
+	xpUnknownReason		/* 56: unknown reason - must be last in enum */
 };
 
 /*
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 60388bed770..94b52bb8151 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -71,11 +71,11 @@
  *
  *   reserved page header
  *
- *     The first cacheline of the reserved page contains the header
- *     (struct xpc_rsvd_page). Before SAL initialization has completed,
+ *     The first two 64-byte cachelines of the reserved page contain the
+ *     header (struct xpc_rsvd_page). Before SAL initialization has completed,
  *     SAL has set up the following fields of the reserved page header:
- *     SAL_signature, SAL_version, partid, and nasids_size. The other
- *     fields are set up by XPC. (xpc_rsvd_page points to the local
+ *     SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The
+ *     other fields are set up by XPC. (xpc_rsvd_page points to the local
  *     partition's reserved page.)
  *
  *   part_nasids mask
@@ -89,11 +89,11 @@
  *     nasids. The part_nasids mask is located starting at the first cacheline
  *     following the reserved page header. The mach_nasids mask follows right
  *     after the part_nasids mask. The size in bytes of each mask is reflected
- *     by the reserved page header field 'nasids_size'. (Local partition's
+ *     by the reserved page header field 'SAL_nasids_size'. (Local partition's
  *     mask pointers are xpc_part_nasids and xpc_mach_nasids.)
  *
- *   vars
- *   vars part
+ *   vars	(ia64-sn2 only)
+ *   vars part	(ia64-sn2 only)
  *
  *     Immediately following the mach_nasids mask are the XPC variables
  *     required by other partitions. First are those that are generic to all
@@ -101,25 +101,31 @@
  *     which are partition specific (vars part). These are setup by XPC.
  *     (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
  *
- * Note: Until vars_pa is set, the partition XPC code has not been initialized.
+ * Note: Until 'stamp' is set non-zero, the partition XPC code has not been
+ *       initialized.
  */
 struct xpc_rsvd_page {
 	u64 SAL_signature;	/* SAL: unique signature */
 	u64 SAL_version;	/* SAL: version */
-	u8 partid;		/* SAL: partition ID */
+	short SAL_partid;	/* SAL: partition ID */
+	short max_npartitions;	/* value of XPC_MAX_PARTITIONS */
 	u8 version;
-	u8 pad1[6];		/* align to next u64 in cacheline */
-	u64 vars_pa;		/* physical address of struct xpc_vars */
+	u8 pad1[3];		/* align to next u64 in 1st 64-byte cacheline */
+	union {
+		u64 vars_pa;	/* physical address of struct xpc_vars */
+		u64 activate_mq_gpa;	/* global phys address of activate_mq */
+	} sn;
 	struct timespec stamp;	/* time when reserved page was setup by XPC */
-	u64 pad2[9];		/* align to last u64 in cacheline */
-	u64 nasids_size;	/* SAL: size of each nasid mask in bytes */
+	u64 pad2[9];		/* align to last u64 in 2nd 64-byte cacheline */
+	u64 SAL_nasids_size;	/* SAL: size of each nasid mask in bytes */
 };
 
-#define XPC_RP_VERSION _XPC_VERSION(1, 1) /* version 1.1 of the reserved page */
+#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */
 
 #define XPC_SUPPORTS_RP_STAMP(_version) \
 			(_version >= _XPC_VERSION(1, 1))
 
+#define ZERO_STAMP	((struct timespec){0, 0})
 /*
  * compare stamps - the return value is:
  *
@@ -218,10 +224,10 @@ xpc_disallow_hb(short partid, struct xpc_vars *vars)
  *
  * An array of these structures, one per partition, will be defined. As a
  * partition becomes active XPC will copy the array entry corresponding to
- * itself from that partition. It is desirable that the size of this
- * structure evenly divide into a cacheline, such that none of the entries
- * in this array crosses a cacheline boundary. As it is now, each entry
- * occupies half a cacheline.
+ * itself from that partition. It is desirable that the size of this structure
+ * evenly divides into a 128-byte cacheline, such that none of the entries in
+ * this array crosses a 128-byte cacheline boundary. As it is now, each entry
+ * occupies a 64-byte cacheline.
  */
 struct xpc_vars_part {
 	u64 magic;
@@ -632,16 +638,25 @@ extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int, int);
 extern void xpc_disconnect_wait(int);
 
+extern enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *);
+
+/* found in xpc_sn2.c */
+extern void xpc_init_sn2(void);
+extern struct xpc_vars *xpc_vars;		/*>>> eliminate from here */
+extern struct xpc_vars_part *xpc_vars_part;	/*>>> eliminate from here */
+
+/* found in xpc_uv.c */
+extern void xpc_init_uv(void);
+
 /* found in xpc_partition.c */
 extern int xpc_exiting;
-extern struct xpc_vars *xpc_vars;
+extern int xp_nasid_mask_words;
 extern struct xpc_rsvd_page *xpc_rsvd_page;
-extern struct xpc_vars_part *xpc_vars_part;
 extern struct xpc_partition *xpc_partitions;
 extern char *xpc_remote_copy_buffer;
 extern void *xpc_remote_copy_buffer_base;
 extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
-extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
+extern struct xpc_rsvd_page *xpc_setup_rsvd_page(void);
 extern void xpc_allow_IPI_ops(void);
 extern void xpc_restrict_IPI_ops(void);
 extern int xpc_identify_act_IRQ_sender(void);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index a05c7c7da22..2180f1f7e08 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -175,6 +175,8 @@ static struct notifier_block xpc_die_notifier = {
 	.notifier_call = xpc_system_die,
 };
 
+enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *rp);
+
 /*
  * Timer function to enforce the timelimit on the partition disengage request.
  */
@@ -949,7 +951,7 @@ xpc_do_exit(enum xp_retval reason)
 	DBUG_ON(xpc_partition_engaged(-1UL));
 
 	/* indicate to others that our reserved page is uninitialized */
-	xpc_rsvd_page->vars_pa = 0;
+	xpc_rsvd_page->stamp = ZERO_STAMP;
 
 	/* now it's time to eliminate our heartbeat */
 	del_timer_sync(&xpc_hb_timer);
@@ -1128,8 +1130,24 @@ xpc_init(void)
 	struct task_struct *kthread;
 	size_t buf_size;
 
-	if (!ia64_platform_is("sn2"))
+	if (is_shub()) {
+		/*
+		 * The ia64-sn2 architecture supports at most 64 partitions.
+		 * And the inability to unregister remote AMOs restricts us
+		 * further to only support exactly 64 partitions on this
+		 * architecture, no less.
+		 */
+		if (xp_max_npartitions != 64)
+			return -EINVAL;
+
+		xpc_init_sn2();
+
+	} else if (is_uv()) {
+		xpc_init_uv();
+
+	} else {
 		return -ENODEV;
+	}
 
 	snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
 	snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
@@ -1214,7 +1232,7 @@ xpc_init(void)
 	 * other partitions to discover we are alive and establish initial
 	 * communications.
 	 */
-	xpc_rsvd_page = xpc_rsvd_page_init();
+	xpc_rsvd_page = xpc_setup_rsvd_page();
 	if (xpc_rsvd_page == NULL) {
 		dev_err(xpc_part, "can't setup our reserved page\n");
 		ret = -EBUSY;
@@ -1273,7 +1291,8 @@ xpc_init(void)
 	/* initialization was not successful */
 out_4:
 	/* indicate to others that our reserved page is uninitialized */
-	xpc_rsvd_page->vars_pa = 0;
+	xpc_rsvd_page->stamp = ZERO_STAMP;
+
 	del_timer_sync(&xpc_hb_timer);
 	(void)unregister_die_notifier(&xpc_die_notifier);
 	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 6c82f205097..1db84cb4914 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -20,7 +20,6 @@
 #include <linux/cache.h>
 #include <linux/mmzone.h>
 #include <linux/nodemask.h>
-#include <asm/uncached.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/nodepda.h>
@@ -44,11 +43,10 @@ u64 xpc_prot_vec[MAX_NUMNODES];
 struct xpc_rsvd_page *xpc_rsvd_page;
 static u64 *xpc_part_nasids;
 static u64 *xpc_mach_nasids;
-struct xpc_vars *xpc_vars;
-struct xpc_vars_part *xpc_vars_part;
 
-static int xp_nasid_mask_bytes;	/* actual size in bytes of nasid mask */
-static int xp_nasid_mask_words;	/* actual size in words of nasid mask */
+/* >>> next two variables should be 'xpc_' if they remain here */
+static int xp_sizeof_nasid_mask;	/* actual size in bytes of nasid mask */
+int xp_nasid_mask_words;	/* actual size in words of nasid mask */
 
 struct xpc_partition *xpc_partitions;
 
@@ -150,12 +148,10 @@ xpc_get_rsvd_page_pa(int nasid)
  * communications.
  */
 struct xpc_rsvd_page *
-xpc_rsvd_page_init(void)
+xpc_setup_rsvd_page(void)
 {
 	struct xpc_rsvd_page *rp;
-	AMO_t *amos_page;
-	u64 rp_pa, nasid_array = 0;
-	int i, ret;
+	u64 rp_pa;
 
 	/* get the local reserved page's address */
 
@@ -168,110 +164,44 @@ xpc_rsvd_page_init(void)
 	}
 	rp = (struct xpc_rsvd_page *)__va(rp_pa);
 
-	if (rp->partid != sn_partition_id) {
-		dev_err(xpc_part, "the reserved page's partid of %d should be "
-			"%d\n", rp->partid, sn_partition_id);
+	if (rp->SAL_version < 3) {
+		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
+		rp->SAL_partid &= 0xff;
+	}
+	BUG_ON(rp->SAL_partid != sn_partition_id);
+
+	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
+		dev_err(xpc_part, "the reserved page's partid of %d is outside "
+			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
+			xp_max_npartitions);
 		return NULL;
 	}
 
 	rp->version = XPC_RP_VERSION;
+	rp->max_npartitions = xp_max_npartitions;
 
 	/* establish the actual sizes of the nasid masks */
 	if (rp->SAL_version == 1) {
 		/* SAL_version 1 didn't set the nasids_size field */
-		rp->nasids_size = 128;
+		rp->SAL_nasids_size = 128;
 	}
-	xp_nasid_mask_bytes = rp->nasids_size;
-	xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
+	xp_sizeof_nasid_mask = rp->SAL_nasids_size;
+	xp_nasid_mask_words = DIV_ROUND_UP(xp_sizeof_nasid_mask,
+					   BYTES_PER_WORD);
 
 	/* setup the pointers to the various items in the reserved page */
 	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
 	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
-	xpc_vars = XPC_RP_VARS(rp);
-	xpc_vars_part = XPC_RP_VARS_PART(rp);
 
-	/*
-	 * Before clearing xpc_vars, see if a page of AMOs had been previously
-	 * allocated. If not we'll need to allocate one and set permissions
-	 * so that cross-partition AMOs are allowed.
-	 *
-	 * The allocated AMO page needs MCA reporting to remain disabled after
-	 * XPC has unloaded.  To make this work, we keep a copy of the pointer
-	 * to this page (i.e., amos_page) in the struct xpc_vars structure,
-	 * which is pointed to by the reserved page, and re-use that saved copy
-	 * on subsequent loads of XPC. This AMO page is never freed, and its
-	 * memory protections are never restricted.
-	 */
-	amos_page = xpc_vars->amos_page;
-	if (amos_page == NULL) {
-		amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1));
-		if (amos_page == NULL) {
-			dev_err(xpc_part, "can't allocate page of AMOs\n");
-			return NULL;
-		}
-
-		/*
-		 * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
-		 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
-		 */
-		if (!enable_shub_wars_1_1()) {
-			ret = sn_change_memprotect(ia64_tpa((u64)amos_page),
-						   PAGE_SIZE,
-						   SN_MEMPROT_ACCESS_CLASS_1,
-						   &nasid_array);
-			if (ret != 0) {
-				dev_err(xpc_part, "can't change memory "
-					"protections\n");
-				uncached_free_page(__IA64_UNCACHED_OFFSET |
-						   TO_PHYS((u64)amos_page), 1);
-				return NULL;
-			}
-		}
-	} else if (!IS_AMO_ADDRESS((u64)amos_page)) {
-		/*
-		 * EFI's XPBOOT can also set amos_page in the reserved page,
-		 * but it happens to leave it as an uncached physical address
-		 * and we need it to be an uncached virtual, so we'll have to
-		 * convert it.
-		 */
-		if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) {
-			dev_err(xpc_part, "previously used amos_page address "
-				"is bad = 0x%p\n", (void *)amos_page);
-			return NULL;
-		}
-		amos_page = (AMO_t *)TO_AMO((u64)amos_page);
-	}
-
-	/* clear xpc_vars */
-	memset(xpc_vars, 0, sizeof(struct xpc_vars));
-
-	xpc_vars->version = XPC_V_VERSION;
-	xpc_vars->act_nasid = cpuid_to_nasid(0);
-	xpc_vars->act_phys_cpuid = cpu_physical_id(0);
-	xpc_vars->vars_part_pa = __pa(xpc_vars_part);
-	xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
-	xpc_vars->amos_page = amos_page;	/* save for next load of XPC */
-
-	/* clear xpc_vars_part */
-	memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
-	       xp_max_npartitions);
-
-	/* initialize the activate IRQ related AMO variables */
-	for (i = 0; i < xp_nasid_mask_words; i++)
-		(void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
-
-	/* initialize the engaged remote partitions related AMO variables */
-	(void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
-	(void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
-
-	/* timestamp of when reserved page was setup by XPC */
-	rp->stamp = CURRENT_TIME;
+	if (xpc_rsvd_page_init(rp) != xpSuccess)
+		return NULL;
 
 	/*
+	 * Set timestamp of when reserved page was setup by XPC.
 	 * This signifies to the remote partition that our reserved
 	 * page is initialized.
 	 */
-	rp->vars_pa = __pa(xpc_vars);
+	rp->stamp = CURRENT_TIME;
 
 	return rp;
 }
@@ -465,7 +395,7 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 
 	/* pull over the reserved page header and part_nasids mask */
 	ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
-			       XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes);
+			       XPC_RP_HEADER_SIZE + xp_sizeof_nasid_mask);
 	if (ret != xpSuccess)
 		return ret;
 
@@ -476,19 +406,28 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 			discovered_nasids[i] |= remote_part_nasids[i];
 	}
 
-	/* check that the partid is for another partition */
+	/* check that the partid is valid and is for another partition */
 
-	if (remote_rp->partid < 0 || remote_rp->partid >= xp_max_npartitions)
+	if (remote_rp->SAL_partid < 0 ||
+	    remote_rp->SAL_partid >= xp_max_npartitions) {
 		return xpInvalidPartid;
+	}
 
-	if (remote_rp->partid == sn_partition_id)
+	if (remote_rp->SAL_partid == sn_partition_id)
 		return xpLocalPartid;
 
+	/* see if the rest of the reserved page has been set up by XPC */
+	if (timespec_equal(&remote_rp->stamp, &ZERO_STAMP))
+		return xpRsvdPageNotSet;
+
 	if (XPC_VERSION_MAJOR(remote_rp->version) !=
 	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
 		return xpBadVersion;
 	}
 
+	if (remote_rp->max_npartitions <= sn_partition_id)
+		return xpInvalidPartid;
+
 	return xpSuccess;
 }
 
@@ -592,7 +531,7 @@ xpc_identify_act_IRQ_req(int nasid)
 	int remote_rp_version;
 	int reactivate = 0;
 	int stamp_diff;
-	struct timespec remote_rp_stamp = { 0, 0 };
+	struct timespec remote_rp_stamp = { 0, 0 }; /*>>> ZERO_STAMP */
 	short partid;
 	struct xpc_partition *part;
 	enum xp_retval ret;
@@ -608,12 +547,12 @@ xpc_identify_act_IRQ_req(int nasid)
 		return;
 	}
 
-	remote_vars_pa = remote_rp->vars_pa;
+	remote_vars_pa = remote_rp->sn.vars_pa;
 	remote_rp_version = remote_rp->version;
 	if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
 		remote_rp_stamp = remote_rp->stamp;
 
-	partid = remote_rp->partid;
+	partid = remote_rp->SAL_partid;
 	part = &xpc_partitions[partid];
 
 	/* pull over the cross partition variables */
@@ -977,7 +916,7 @@ xpc_discovery(void)
 	enum xp_retval ret;
 
 	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
-						  xp_nasid_mask_bytes,
+						  xp_sizeof_nasid_mask,
 						  GFP_KERNEL, &remote_rp_base);
 	if (remote_rp == NULL)
 		return;
@@ -1063,9 +1002,9 @@ xpc_discovery(void)
 				continue;
 			}
 
-			remote_vars_pa = remote_rp->vars_pa;
+			remote_vars_pa = remote_rp->sn.vars_pa;
 
-			partid = remote_rp->partid;
+			partid = remote_rp->SAL_partid;
 			part = &xpc_partitions[partid];
 
 			/* pull over the cross partition variables */
@@ -1155,5 +1094,5 @@ xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
 	part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
 
 	return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
-				xp_nasid_mask_bytes);
+				xp_sizeof_nasid_mask);
 }
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
new file mode 100644
index 00000000000..5a37348715c
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -0,0 +1,111 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) sn2-based functions.
+ *
+ *     Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <asm/uncached.h>
+#include <asm/sn/sn_sal.h>
+#include "xpc.h"
+
+struct xpc_vars *xpc_vars;
+struct xpc_vars_part *xpc_vars_part;
+
+static enum xp_retval
+xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
+{
+	AMO_t *amos_page;
+	u64 nasid_array = 0;
+	int i;
+	int ret;
+
+	xpc_vars = XPC_RP_VARS(rp);
+
+	rp->sn.vars_pa = __pa(xpc_vars);
+
+	xpc_vars_part = XPC_RP_VARS_PART(rp);
+
+	/*
+	 * Before clearing xpc_vars, see if a page of AMOs had been previously
+	 * allocated. If not we'll need to allocate one and set permissions
+	 * so that cross-partition AMOs are allowed.
+	 *
+	 * The allocated AMO page needs MCA reporting to remain disabled after
+	 * XPC has unloaded.  To make this work, we keep a copy of the pointer
+	 * to this page (i.e., amos_page) in the struct xpc_vars structure,
+	 * which is pointed to by the reserved page, and re-use that saved copy
+	 * on subsequent loads of XPC. This AMO page is never freed, and its
+	 * memory protections are never restricted.
+	 */
+	amos_page = xpc_vars->amos_page;
+	if (amos_page == NULL) {
+		amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1));
+		if (amos_page == NULL) {
+			dev_err(xpc_part, "can't allocate page of AMOs\n");
+			return xpNoMemory;
+		}
+
+		/*
+		 * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
+		 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
+		 */
+		if (!enable_shub_wars_1_1()) {
+			ret = sn_change_memprotect(ia64_tpa((u64)amos_page),
+						   PAGE_SIZE,
+						   SN_MEMPROT_ACCESS_CLASS_1,
+						   &nasid_array);
+			if (ret != 0) {
+				dev_err(xpc_part, "can't change memory "
+					"protections\n");
+				uncached_free_page(__IA64_UNCACHED_OFFSET |
+						   TO_PHYS((u64)amos_page), 1);
+				return xpSalError;
+			}
+		}
+	}
+
+	/* clear xpc_vars */
+	memset(xpc_vars, 0, sizeof(struct xpc_vars));
+
+	xpc_vars->version = XPC_V_VERSION;
+	xpc_vars->act_nasid = cpuid_to_nasid(0);
+	xpc_vars->act_phys_cpuid = cpu_physical_id(0);
+	xpc_vars->vars_part_pa = __pa(xpc_vars_part);
+	xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
+	xpc_vars->amos_page = amos_page;	/* save for next load of XPC */
+
+	/* clear xpc_vars_part */
+	memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
+	       xp_max_npartitions);
+
+	/* initialize the activate IRQ related AMO variables */
+	for (i = 0; i < xp_nasid_mask_words; i++)
+		(void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
+
+	/* initialize the engaged remote partitions related AMO variables */
+	(void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
+	(void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
+
+	return xpSuccess;
+}
+
+void
+xpc_init_sn2(void)
+{
+	xpc_rsvd_page_init = xpc_rsvd_page_init_sn2;
+}
+
+void
+xpc_exit_sn2(void)
+{
+}
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
new file mode 100644
index 00000000000..8327cd4017e
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -0,0 +1,48 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) uv-based functions.
+ *
+ *     Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/kernel.h>
+
+/* >>> #include <gru/grukservices.h> */
+/* >>> uv_gpa() is defined in <gru/grukservices.h> */
+#define uv_gpa(_a)		((unsigned long)_a)
+
+/* >>> temporarily define next three items for xpc.h */
+#define	SGI_XPC_ACTIVATE	23
+#define	SGI_XPC_NOTIFY		24
+#define sn_send_IPI_phys(_a, _b, _c, _d)
+
+#include "xpc.h"
+
+static void *xpc_activate_mq;
+
+static enum xp_retval
+xpc_rsvd_page_init_uv(struct xpc_rsvd_page *rp)
+{
+	/* >>> need to have established xpc_activate_mq earlier */
+	rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq);
+	return xpSuccess;
+}
+
+void
+xpc_init_uv(void)
+{
+	xpc_rsvd_page_init = xpc_rsvd_page_init_uv;
+}
+
+void
+xpc_exit_uv(void)
+{
+}
-- 
cgit v1.2.3


From e17d416b1bc947df68499863f13b401fb42b48f6 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:06 -0700
Subject: sgi-xp: isolate xpc_vars_part structure to sn2 only

Isolate the xpc_vars_part structure of XPC's reserved page to sn2 only.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc.h           |  26 +-
 drivers/misc/sgi-xp/xpc_channel.c   | 538 +---------------------------------
 drivers/misc/sgi-xp/xpc_main.c      |  97 ++-----
 drivers/misc/sgi-xp/xpc_partition.c |   1 +
 drivers/misc/sgi-xp/xpc_sn2.c       | 563 +++++++++++++++++++++++++++++++++++-
 drivers/misc/sgi-xp/xpc_uv.c        |  48 +++
 6 files changed, 644 insertions(+), 629 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 94b52bb8151..e8c2a162960 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -227,9 +227,9 @@ xpc_disallow_hb(short partid, struct xpc_vars *vars)
  * itself from that partition. It is desirable that the size of this structure
  * evenly divides into a 128-byte cacheline, such that none of the entries in
  * this array crosses a 128-byte cacheline boundary. As it is now, each entry
- * occupies a 64-byte cacheline.
+ * occupies 64-bytes.
  */
-struct xpc_vars_part {
+struct xpc_vars_part_sn2 {
 	u64 magic;
 
 	u64 openclose_args_pa;	/* physical address of open and close args */
@@ -265,8 +265,6 @@ struct xpc_vars_part {
 #define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
 #define XPC_RP_VARS(_rp)	((struct xpc_vars *)(XPC_RP_MACH_NASIDS(_rp) + \
 				    xp_nasid_mask_words))
-#define XPC_RP_VARS_PART(_rp)	((struct xpc_vars_part *) \
-				    ((u8 *)XPC_RP_VARS(_rp) + XPC_RP_VARS_SIZE))
 
 /*
  * Functions registered by add_timer() or called by kernel_thread() only
@@ -541,13 +539,6 @@ struct xpc_partition {
 	wait_queue_head_t teardown_wq;	/* kthread waiting to teardown infra */
 	atomic_t references;	/* #of references to infrastructure */
 
-	/*
-	 * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN
-	 * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION
-	 * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE
-	 * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.)
-	 */
-
 	u8 nchannels;		/* #of defined channels supported */
 	atomic_t nchannels_active;  /* #of channels that are not DISCONNECTED */
 	atomic_t nchannels_engaged;  /* #of channels engaged with remote part */
@@ -613,7 +604,7 @@ struct xpc_partition {
  * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
  * after the IPI was received.
  */
-#define XPC_P_DROPPED_IPI_WAIT	(0.25 * HZ)
+#define XPC_P_DROPPED_IPI_WAIT_INTERVAL	(0.25 * HZ)
 
 /* number of seconds to wait for other partitions to disengage */
 #define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT	90
@@ -637,13 +628,16 @@ extern void xpc_activate_partition(struct xpc_partition *);
 extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int, int);
 extern void xpc_disconnect_wait(int);
-
 extern enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *);
+extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
+extern u64 (*xpc_get_IPI_flags) (struct xpc_partition *);
+extern struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *);
+extern enum xp_retval (*xpc_setup_infrastructure) (struct xpc_partition *);
+extern void (*xpc_teardown_infrastructure) (struct xpc_partition *);
 
 /* found in xpc_sn2.c */
 extern void xpc_init_sn2(void);
 extern struct xpc_vars *xpc_vars;		/*>>> eliminate from here */
-extern struct xpc_vars_part *xpc_vars_part;	/*>>> eliminate from here */
 
 /* found in xpc_uv.c */
 extern void xpc_init_uv(void);
@@ -670,6 +664,7 @@ extern void xpc_deactivate_partition(const int, struct xpc_partition *,
 extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
 
 /* found in xpc_channel.c */
+extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern void xpc_initiate_connect(int);
 extern void xpc_initiate_disconnect(int);
 extern enum xp_retval xpc_initiate_allocate(short, int, u32, void **);
@@ -677,8 +672,6 @@ extern enum xp_retval xpc_initiate_send(short, int, void *);
 extern enum xp_retval xpc_initiate_send_notify(short, int, void *,
 					       xpc_notify_func, void *);
 extern void xpc_initiate_received(short, int, void *);
-extern enum xp_retval xpc_setup_infrastructure(struct xpc_partition *);
-extern enum xp_retval xpc_pull_remote_vars_part(struct xpc_partition *);
 extern void xpc_process_channel_activity(struct xpc_partition *);
 extern void xpc_connected_callout(struct xpc_channel *);
 extern void xpc_deliver_msg(struct xpc_channel *);
@@ -686,7 +679,6 @@ extern void xpc_disconnect_channel(const int, struct xpc_channel *,
 				   enum xp_retval, unsigned long *);
 extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
 extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
-extern void xpc_teardown_infrastructure(struct xpc_partition *);
 
 static inline void
 xpc_wakeup_channel_mgr(struct xpc_partition *part)
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 9e79ad7eafe..8081e8155df 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -27,7 +27,7 @@
 /*
  * Guarantee that the kzalloc'd memory is cacheline aligned.
  */
-static void *
+void *
 xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
 {
 	/* see if kzalloc will give us cachline aligned memory by default */
@@ -48,382 +48,6 @@ xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
 	return (void *)L1_CACHE_ALIGN((u64)*base);
 }
 
-/*
- * Set up the initial values for the XPartition Communication channels.
- */
-static void
-xpc_initialize_channels(struct xpc_partition *part, short partid)
-{
-	int ch_number;
-	struct xpc_channel *ch;
-
-	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
-		ch = &part->channels[ch_number];
-
-		ch->partid = partid;
-		ch->number = ch_number;
-		ch->flags = XPC_C_DISCONNECTED;
-
-		ch->local_GP = &part->local_GPs[ch_number];
-		ch->local_openclose_args =
-		    &part->local_openclose_args[ch_number];
-
-		atomic_set(&ch->kthreads_assigned, 0);
-		atomic_set(&ch->kthreads_idle, 0);
-		atomic_set(&ch->kthreads_active, 0);
-
-		atomic_set(&ch->references, 0);
-		atomic_set(&ch->n_to_notify, 0);
-
-		spin_lock_init(&ch->lock);
-		mutex_init(&ch->msg_to_pull_mutex);
-		init_completion(&ch->wdisconnect_wait);
-
-		atomic_set(&ch->n_on_msg_allocate_wq, 0);
-		init_waitqueue_head(&ch->msg_allocate_wq);
-		init_waitqueue_head(&ch->idle_wq);
-	}
-}
-
-/*
- * Setup the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
- */
-enum xp_retval
-xpc_setup_infrastructure(struct xpc_partition *part)
-{
-	int ret, cpuid;
-	struct timer_list *timer;
-	short partid = XPC_PARTID(part);
-
-	/*
-	 * Zero out MOST of the entry for this partition. Only the fields
-	 * starting with `nchannels' will be zeroed. The preceding fields must
-	 * remain `viable' across partition ups and downs, since they may be
-	 * referenced during this memset() operation.
-	 */
-	memset(&part->nchannels, 0, sizeof(struct xpc_partition) -
-	       offsetof(struct xpc_partition, nchannels));
-
-	/*
-	 * Allocate all of the channel structures as a contiguous chunk of
-	 * memory.
-	 */
-	part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
-				 GFP_KERNEL);
-	if (part->channels == NULL) {
-		dev_err(xpc_chan, "can't get memory for channels\n");
-		return xpNoMemory;
-	}
-
-	part->nchannels = XPC_MAX_NCHANNELS;
-
-	/* allocate all the required GET/PUT values */
-
-	part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
-							GFP_KERNEL,
-							&part->local_GPs_base);
-	if (part->local_GPs == NULL) {
-		kfree(part->channels);
-		part->channels = NULL;
-		dev_err(xpc_chan, "can't get memory for local get/put "
-			"values\n");
-		return xpNoMemory;
-	}
-
-	part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
-							 GFP_KERNEL,
-							 &part->
-							 remote_GPs_base);
-	if (part->remote_GPs == NULL) {
-		dev_err(xpc_chan, "can't get memory for remote get/put "
-			"values\n");
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpNoMemory;
-	}
-
-	/* allocate all the required open and close args */
-
-	part->local_openclose_args =
-	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
-					  &part->local_openclose_args_base);
-	if (part->local_openclose_args == NULL) {
-		dev_err(xpc_chan, "can't get memory for local connect args\n");
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpNoMemory;
-	}
-
-	part->remote_openclose_args =
-	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
-					  &part->remote_openclose_args_base);
-	if (part->remote_openclose_args == NULL) {
-		dev_err(xpc_chan, "can't get memory for remote connect args\n");
-		kfree(part->local_openclose_args_base);
-		part->local_openclose_args = NULL;
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpNoMemory;
-	}
-
-	xpc_initialize_channels(part, partid);
-
-	atomic_set(&part->nchannels_active, 0);
-	atomic_set(&part->nchannels_engaged, 0);
-
-	/* local_IPI_amo were set to 0 by an earlier memset() */
-
-	/* Initialize this partitions AMO_t structure */
-	part->local_IPI_amo_va = xpc_IPI_init(partid);
-
-	spin_lock_init(&part->IPI_lock);
-
-	atomic_set(&part->channel_mgr_requests, 1);
-	init_waitqueue_head(&part->channel_mgr_wq);
-
-	sprintf(part->IPI_owner, "xpc%02d", partid);
-	ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED,
-			  part->IPI_owner, (void *)(u64)partid);
-	if (ret != 0) {
-		dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
-			"errno=%d\n", -ret);
-		kfree(part->remote_openclose_args_base);
-		part->remote_openclose_args = NULL;
-		kfree(part->local_openclose_args_base);
-		part->local_openclose_args = NULL;
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpLackOfResources;
-	}
-
-	/* Setup a timer to check for dropped IPIs */
-	timer = &part->dropped_IPI_timer;
-	init_timer(timer);
-	timer->function = (void (*)(unsigned long))xpc_dropped_IPI_check;
-	timer->data = (unsigned long)part;
-	timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT;
-	add_timer(timer);
-
-	/*
-	 * With the setting of the partition setup_state to XPC_P_SETUP, we're
-	 * declaring that this partition is ready to go.
-	 */
-	part->setup_state = XPC_P_SETUP;
-
-	/*
-	 * Setup the per partition specific variables required by the
-	 * remote partition to establish channel connections with us.
-	 *
-	 * The setting of the magic # indicates that these per partition
-	 * specific variables are ready to be used.
-	 */
-	xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs);
-	xpc_vars_part[partid].openclose_args_pa =
-	    __pa(part->local_openclose_args);
-	xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
-	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
-	xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
-	xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
-	xpc_vars_part[partid].nchannels = part->nchannels;
-	xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
-
-	return xpSuccess;
-}
-
-/*
- * Create a wrapper that hides the underlying mechanism for pulling a cacheline
- * (or multiple cachelines) from a remote partition.
- *
- * src must be a cacheline aligned physical address on the remote partition.
- * dst must be a cacheline aligned virtual address on this partition.
- * cnt must be cacheline sized
- */
-static enum xp_retval
-xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
-			   const void *src, size_t cnt)
-{
-	enum xp_retval ret;
-
-	DBUG_ON((u64)src != L1_CACHE_ALIGN((u64)src));
-	DBUG_ON((u64)dst != L1_CACHE_ALIGN((u64)dst));
-	DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
-
-	if (part->act_state == XPC_P_DEACTIVATING)
-		return part->reason;
-
-	ret = xp_remote_memcpy(dst, src, cnt);
-	if (ret != xpSuccess) {
-		dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed,"
-			" ret=%d\n", XPC_PARTID(part), ret);
-	}
-	return ret;
-}
-
-/*
- * Pull the remote per partition specific variables from the specified
- * partition.
- */
-enum xp_retval
-xpc_pull_remote_vars_part(struct xpc_partition *part)
-{
-	u8 buffer[L1_CACHE_BYTES * 2];
-	struct xpc_vars_part *pulled_entry_cacheline =
-	    (struct xpc_vars_part *)L1_CACHE_ALIGN((u64)buffer);
-	struct xpc_vars_part *pulled_entry;
-	u64 remote_entry_cacheline_pa, remote_entry_pa;
-	short partid = XPC_PARTID(part);
-	enum xp_retval ret;
-
-	/* pull the cacheline that contains the variables we're interested in */
-
-	DBUG_ON(part->remote_vars_part_pa !=
-		L1_CACHE_ALIGN(part->remote_vars_part_pa));
-	DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2);
-
-	remote_entry_pa = part->remote_vars_part_pa +
-	    sn_partition_id * sizeof(struct xpc_vars_part);
-
-	remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
-
-	pulled_entry = (struct xpc_vars_part *)((u64)pulled_entry_cacheline +
-						(remote_entry_pa &
-						 (L1_CACHE_BYTES - 1)));
-
-	ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline,
-					 (void *)remote_entry_cacheline_pa,
-					 L1_CACHE_BYTES);
-	if (ret != xpSuccess) {
-		dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
-			"partition %d, ret=%d\n", partid, ret);
-		return ret;
-	}
-
-	/* see if they've been set up yet */
-
-	if (pulled_entry->magic != XPC_VP_MAGIC1 &&
-	    pulled_entry->magic != XPC_VP_MAGIC2) {
-
-		if (pulled_entry->magic != 0) {
-			dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
-				"partition %d has bad magic value (=0x%lx)\n",
-				partid, sn_partition_id, pulled_entry->magic);
-			return xpBadMagic;
-		}
-
-		/* they've not been initialized yet */
-		return xpRetry;
-	}
-
-	if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
-
-		/* validate the variables */
-
-		if (pulled_entry->GPs_pa == 0 ||
-		    pulled_entry->openclose_args_pa == 0 ||
-		    pulled_entry->IPI_amo_pa == 0) {
-
-			dev_err(xpc_chan, "partition %d's XPC vars_part for "
-				"partition %d are not valid\n", partid,
-				sn_partition_id);
-			return xpInvalidAddress;
-		}
-
-		/* the variables we imported look to be valid */
-
-		part->remote_GPs_pa = pulled_entry->GPs_pa;
-		part->remote_openclose_args_pa =
-		    pulled_entry->openclose_args_pa;
-		part->remote_IPI_amo_va =
-		    (AMO_t *)__va(pulled_entry->IPI_amo_pa);
-		part->remote_IPI_nasid = pulled_entry->IPI_nasid;
-		part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
-
-		if (part->nchannels > pulled_entry->nchannels)
-			part->nchannels = pulled_entry->nchannels;
-
-		/* let the other side know that we've pulled their variables */
-
-		xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
-	}
-
-	if (pulled_entry->magic == XPC_VP_MAGIC1)
-		return xpRetry;
-
-	return xpSuccess;
-}
-
-/*
- * Get the IPI flags and pull the openclose args and/or remote GPs as needed.
- */
-static u64
-xpc_get_IPI_flags(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	u64 IPI_amo;
-	enum xp_retval ret;
-
-	/*
-	 * See if there are any IPI flags to be handled.
-	 */
-
-	spin_lock_irqsave(&part->IPI_lock, irq_flags);
-	IPI_amo = part->local_IPI_amo;
-	if (IPI_amo != 0)
-		part->local_IPI_amo = 0;
-
-	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
-
-	if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
-		ret = xpc_pull_remote_cachelines(part,
-						 part->remote_openclose_args,
-						 (void *)part->
-						 remote_openclose_args_pa,
-						 XPC_OPENCLOSE_ARGS_SIZE);
-		if (ret != xpSuccess) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-
-			dev_dbg(xpc_chan, "failed to pull openclose args from "
-				"partition %d, ret=%d\n", XPC_PARTID(part),
-				ret);
-
-			/* don't bother processing IPIs anymore */
-			IPI_amo = 0;
-		}
-	}
-
-	if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
-		ret = xpc_pull_remote_cachelines(part, part->remote_GPs,
-						 (void *)part->remote_GPs_pa,
-						 XPC_GP_SIZE);
-		if (ret != xpSuccess) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-
-			dev_dbg(xpc_chan, "failed to pull GPs from partition "
-				"%d, ret=%d\n", XPC_PARTID(part), ret);
-
-			/* don't bother processing IPIs anymore */
-			IPI_amo = 0;
-		}
-	}
-
-	return IPI_amo;
-}
-
 /*
  * Allocate the local message queue and the notify queue.
  */
@@ -1364,59 +988,6 @@ xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason)
 	xpc_part_deref(part);
 }
 
-/*
- * Teardown the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
- */
-void
-xpc_teardown_infrastructure(struct xpc_partition *part)
-{
-	short partid = XPC_PARTID(part);
-
-	/*
-	 * We start off by making this partition inaccessible to local
-	 * processes by marking it as no longer setup. Then we make it
-	 * inaccessible to remote processes by clearing the XPC per partition
-	 * specific variable's magic # (which indicates that these variables
-	 * are no longer valid) and by ignoring all XPC notify IPIs sent to
-	 * this partition.
-	 */
-
-	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
-	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
-	DBUG_ON(part->setup_state != XPC_P_SETUP);
-	part->setup_state = XPC_P_WTEARDOWN;
-
-	xpc_vars_part[partid].magic = 0;
-
-	free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
-
-	/*
-	 * Before proceeding with the teardown we have to wait until all
-	 * existing references cease.
-	 */
-	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
-
-	/* now we can begin tearing down the infrastructure */
-
-	part->setup_state = XPC_P_TORNDOWN;
-
-	/* in case we've still got outstanding timers registered... */
-	del_timer_sync(&part->dropped_IPI_timer);
-
-	kfree(part->remote_openclose_args_base);
-	part->remote_openclose_args = NULL;
-	kfree(part->local_openclose_args_base);
-	part->local_openclose_args = NULL;
-	kfree(part->remote_GPs_base);
-	part->remote_GPs = NULL;
-	kfree(part->local_GPs_base);
-	part->local_GPs = NULL;
-	kfree(part->channels);
-	part->channels = NULL;
-	part->local_IPI_amo_va = NULL;
-}
-
 /*
  * Called by XP at the time of channel connection registration to cause
  * XPC to establish connections to all currently active partitions.
@@ -1974,113 +1545,6 @@ xpc_initiate_send_notify(short partid, int ch_number, void *payload,
 	return ret;
 }
 
-static struct xpc_msg *
-xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
-{
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-	struct xpc_msg *remote_msg, *msg;
-	u32 msg_index, nmsgs;
-	u64 msg_offset;
-	enum xp_retval ret;
-
-	if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
-		/* we were interrupted by a signal */
-		return NULL;
-	}
-
-	while (get >= ch->next_msg_to_pull) {
-
-		/* pull as many messages as are ready and able to be pulled */
-
-		msg_index = ch->next_msg_to_pull % ch->remote_nentries;
-
-		DBUG_ON(ch->next_msg_to_pull >= ch->w_remote_GP.put);
-		nmsgs = ch->w_remote_GP.put - ch->next_msg_to_pull;
-		if (msg_index + nmsgs > ch->remote_nentries) {
-			/* ignore the ones that wrap the msg queue for now */
-			nmsgs = ch->remote_nentries - msg_index;
-		}
-
-		msg_offset = msg_index * ch->msg_size;
-		msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
-		remote_msg = (struct xpc_msg *)(ch->remote_msgqueue_pa +
-						msg_offset);
-
-		ret = xpc_pull_remote_cachelines(part, msg, remote_msg,
-						 nmsgs * ch->msg_size);
-		if (ret != xpSuccess) {
-
-			dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
-				" msg %ld from partition %d, channel=%d, "
-				"ret=%d\n", nmsgs, ch->next_msg_to_pull,
-				ch->partid, ch->number, ret);
-
-			XPC_DEACTIVATE_PARTITION(part, ret);
-
-			mutex_unlock(&ch->msg_to_pull_mutex);
-			return NULL;
-		}
-
-		ch->next_msg_to_pull += nmsgs;
-	}
-
-	mutex_unlock(&ch->msg_to_pull_mutex);
-
-	/* return the message we were looking for */
-	msg_offset = (get % ch->remote_nentries) * ch->msg_size;
-	msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
-
-	return msg;
-}
-
-/*
- * Get a message to be delivered.
- */
-static struct xpc_msg *
-xpc_get_deliverable_msg(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg = NULL;
-	s64 get;
-
-	do {
-		if (ch->flags & XPC_C_DISCONNECTING)
-			break;
-
-		get = ch->w_local_GP.get;
-		rmb();	/* guarantee that .get loads before .put */
-		if (get == ch->w_remote_GP.put)
-			break;
-
-		/* There are messages waiting to be pulled and delivered.
-		 * We need to try to secure one for ourselves. We'll do this
-		 * by trying to increment w_local_GP.get and hope that no one
-		 * else beats us to it. If they do, we'll we'll simply have
-		 * to try again for the next one.
-		 */
-
-		if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) {
-			/* we got the entry referenced by get */
-
-			dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
-				"partid=%d, channel=%d\n", get + 1,
-				ch->partid, ch->number);
-
-			/* pull the message from the remote partition */
-
-			msg = xpc_pull_remote_msg(ch, get);
-
-			DBUG_ON(msg != NULL && msg->number != get);
-			DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
-			DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
-
-			break;
-		}
-
-	} while (1);
-
-	return msg;
-}
-
 /*
  * Deliver a message to its intended recipient.
  */
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 2180f1f7e08..be3a4853930 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -176,6 +176,12 @@ static struct notifier_block xpc_die_notifier = {
 };
 
 enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *rp);
+enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
+u64 (*xpc_get_IPI_flags) (struct xpc_partition *part);
+struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *ch);
+enum xp_retval (*xpc_setup_infrastructure) (struct xpc_partition *part);
+void (*xpc_teardown_infrastructure) (struct xpc_partition *part);
+
 
 /*
  * Timer function to enforce the timelimit on the partition disengage request.
@@ -312,38 +318,9 @@ xpc_initiate_discovery(void *ignore)
 	return 0;
 }
 
-/*
- * Establish first contact with the remote partititon. This involves pulling
- * the XPC per partition variables from the remote partition and waiting for
- * the remote partition to pull ours.
- */
-static enum xp_retval
-xpc_make_first_contact(struct xpc_partition *part)
-{
-	enum xp_retval ret;
-
-	while ((ret = xpc_pull_remote_vars_part(part)) != xpSuccess) {
-		if (ret != xpRetry) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-			return ret;
-		}
-
-		dev_dbg(xpc_chan, "waiting to make first contact with "
-			"partition %d\n", XPC_PARTID(part));
-
-		/* wait a 1/4 of a second or so */
-		(void)msleep_interruptible(250);
-
-		if (part->act_state == XPC_P_DEACTIVATING)
-			return part->reason;
-	}
-
-	return xpc_mark_partition_active(part);
-}
-
 /*
  * The first kthread assigned to a newly activated partition is the one
- * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to
+ * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
  * that kthread until the partition is brought down, at which time that kthread
  * returns back to XPC HB. (The return of that kthread will signify to XPC HB
  * that XPC has dismantled all communication infrastructure for the associated
@@ -393,41 +370,10 @@ xpc_channel_mgr(struct xpc_partition *part)
  * upped partition.
  *
  * The kthread that was created by XPC HB and which setup the XPC
- * infrastructure will remain assigned to the partition until the partition
- * goes down. At which time the kthread will teardown the XPC infrastructure
- * and then exit.
- *
- * XPC HB will put the remote partition's XPC per partition specific variables
- * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
- * calling xpc_partition_up().
+ * infrastructure will remain assigned to the partition becoming the channel
+ * manager for that partition until the partition is deactivating, at which
+ * time the kthread will teardown the XPC infrastructure and then exit.
  */
-static void
-xpc_partition_up(struct xpc_partition *part)
-{
-	DBUG_ON(part->channels != NULL);
-
-	dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part));
-
-	if (xpc_setup_infrastructure(part) != xpSuccess)
-		return;
-
-	/*
-	 * The kthread that XPC HB called us with will become the
-	 * channel manager for this partition. It will not return
-	 * back to XPC HB until the partition's XPC infrastructure
-	 * has been dismantled.
-	 */
-
-	(void)xpc_part_ref(part);	/* this will always succeed */
-
-	if (xpc_make_first_contact(part) == xpSuccess)
-		xpc_channel_mgr(part);
-
-	xpc_part_deref(part);
-
-	xpc_teardown_infrastructure(part);
-}
-
 static int
 xpc_activating(void *__partid)
 {
@@ -453,7 +399,7 @@ xpc_activating(void *__partid)
 	XPC_SET_REASON(part, 0, 0);
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
 
-	dev_dbg(xpc_part, "bringing partition %d up\n", partid);
+	dev_dbg(xpc_part, "activating partition %d\n", partid);
 
 	/*
 	 * Register the remote partition's AMOs with SAL so it can handle
@@ -467,7 +413,7 @@ xpc_activating(void *__partid)
 	 */
 	if (sn_register_xp_addr_region(part->remote_amos_page_pa,
 				       PAGE_SIZE, 1) < 0) {
-		dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
+		dev_warn(xpc_part, "xpc_activating(%d) failed to register "
 			 "xp_addr region\n", partid);
 
 		spin_lock_irqsave(&part->act_lock, irq_flags);
@@ -481,11 +427,18 @@ xpc_activating(void *__partid)
 	xpc_allow_hb(partid, xpc_vars);
 	xpc_IPI_send_activated(part);
 
-	/*
-	 * xpc_partition_up() holds this thread and marks this partition as
-	 * XPC_P_ACTIVE by calling xpc_hb_mark_active().
-	 */
-	(void)xpc_partition_up(part);
+	if (xpc_setup_infrastructure(part) == xpSuccess) {
+		(void)xpc_part_ref(part);	/* this will always succeed */
+
+		if (xpc_make_first_contact(part) == xpSuccess) {
+			xpc_mark_partition_active(part);
+			xpc_channel_mgr(part);
+			/* won't return until partition is deactivating */
+		}
+
+		xpc_part_deref(part);
+		xpc_teardown_infrastructure(part);
+	}
 
 	xpc_disallow_hb(partid, xpc_vars);
 	xpc_mark_partition_inactive(part);
@@ -568,7 +521,7 @@ xpc_dropped_IPI_check(struct xpc_partition *part)
 		xpc_check_for_channel_activity(part);
 
 		part->dropped_IPI_timer.expires = jiffies +
-		    XPC_P_DROPPED_IPI_WAIT;
+		    XPC_P_DROPPED_IPI_WAIT_INTERVAL;
 		add_timer(&part->dropped_IPI_timer);
 		xpc_part_deref(part);
 	}
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 1db84cb4914..be5b7547dab 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -486,6 +486,7 @@ xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
 	dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
 		part->last_heartbeat);
 
+/* >>> remote_vars_part_pa and vars_part_pa are sn2 only!!! */
 	part->remote_vars_part_pa = remote_vars->vars_part_pa;
 	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
 		part->remote_vars_part_pa);
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 5a37348715c..ee28e231dc4 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -14,12 +14,13 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/delay.h>
 #include <asm/uncached.h>
 #include <asm/sn/sn_sal.h>
 #include "xpc.h"
 
 struct xpc_vars *xpc_vars;
-struct xpc_vars_part *xpc_vars_part;
+static struct xpc_vars_part_sn2 *xpc_vars_part; /* >>> Add _sn2 suffix? */
 
 static enum xp_retval
 xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
@@ -33,7 +34,10 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 
 	rp->sn.vars_pa = __pa(xpc_vars);
 
-	xpc_vars_part = XPC_RP_VARS_PART(rp);
+	/* vars_part array follows immediately after vars */
+	xpc_vars_part = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
+						     XPC_RP_VARS_SIZE);
+
 
 	/*
 	 * Before clearing xpc_vars, see if a page of AMOs had been previously
@@ -85,7 +89,7 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	xpc_vars->amos_page = amos_page;	/* save for next load of XPC */
 
 	/* clear xpc_vars_part */
-	memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
+	memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part_sn2) *
 	       xp_max_npartitions);
 
 	/* initialize the activate IRQ related AMO variables */
@@ -99,10 +103,563 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	return xpSuccess;
 }
 
+/*
+ * Setup the infrastructure necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static enum xp_retval
+xpc_setup_infrastructure_sn2(struct xpc_partition *part)
+{
+	enum xp_retval retval;
+	int ret;
+	int cpuid;
+	int ch_number;
+	struct xpc_channel *ch;
+	struct timer_list *timer;
+	short partid = XPC_PARTID(part);
+
+	/*
+	 * Allocate all of the channel structures as a contiguous chunk of
+	 * memory.
+	 */
+	DBUG_ON(part->channels != NULL);
+	part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
+				 GFP_KERNEL);
+	if (part->channels == NULL) {
+		dev_err(xpc_chan, "can't get memory for channels\n");
+		return xpNoMemory;
+	}
+
+	/* allocate all the required GET/PUT values */
+
+	part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
+							GFP_KERNEL,
+							&part->local_GPs_base);
+	if (part->local_GPs == NULL) {
+		dev_err(xpc_chan, "can't get memory for local get/put "
+			"values\n");
+		retval = xpNoMemory;
+		goto out_1;
+	}
+
+	part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
+							 GFP_KERNEL,
+							 &part->
+							 remote_GPs_base);
+	if (part->remote_GPs == NULL) {
+		dev_err(xpc_chan, "can't get memory for remote get/put "
+			"values\n");
+		retval = xpNoMemory;
+		goto out_2;
+	}
+
+	part->remote_GPs_pa = 0;
+
+	/* allocate all the required open and close args */
+
+	part->local_openclose_args =
+	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
+					  &part->local_openclose_args_base);
+	if (part->local_openclose_args == NULL) {
+		dev_err(xpc_chan, "can't get memory for local connect args\n");
+		retval = xpNoMemory;
+		goto out_3;
+	}
+
+	part->remote_openclose_args =
+	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
+					  &part->remote_openclose_args_base);
+	if (part->remote_openclose_args == NULL) {
+		dev_err(xpc_chan, "can't get memory for remote connect args\n");
+		retval = xpNoMemory;
+		goto out_4;
+	}
+
+	part->remote_openclose_args_pa = 0;
+
+	part->local_IPI_amo_va = xpc_IPI_init(partid);
+	part->local_IPI_amo = 0;
+	spin_lock_init(&part->IPI_lock);
+
+	part->remote_IPI_nasid = 0;
+	part->remote_IPI_phys_cpuid = 0;
+	part->remote_IPI_amo_va = NULL;
+
+	atomic_set(&part->channel_mgr_requests, 1);
+	init_waitqueue_head(&part->channel_mgr_wq);
+
+	sprintf(part->IPI_owner, "xpc%02d", partid);
+	ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED,
+			  part->IPI_owner, (void *)(u64)partid);
+	if (ret != 0) {
+		dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
+			"errno=%d\n", -ret);
+		retval = xpLackOfResources;
+		goto out_5;
+	}
+
+	/* Setup a timer to check for dropped IPIs */
+	timer = &part->dropped_IPI_timer;
+	init_timer(timer);
+	timer->function = (void (*)(unsigned long))xpc_dropped_IPI_check;
+	timer->data = (unsigned long)part;
+	timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT_INTERVAL;
+	add_timer(timer);
+
+	part->nchannels = XPC_MAX_NCHANNELS;
+
+	atomic_set(&part->nchannels_active, 0);
+	atomic_set(&part->nchannels_engaged, 0);
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		ch->partid = partid;
+		ch->number = ch_number;
+		ch->flags = XPC_C_DISCONNECTED;
+
+		ch->local_GP = &part->local_GPs[ch_number];
+		ch->local_openclose_args =
+		    &part->local_openclose_args[ch_number];
+
+		atomic_set(&ch->kthreads_assigned, 0);
+		atomic_set(&ch->kthreads_idle, 0);
+		atomic_set(&ch->kthreads_active, 0);
+
+		atomic_set(&ch->references, 0);
+		atomic_set(&ch->n_to_notify, 0);
+
+		spin_lock_init(&ch->lock);
+		mutex_init(&ch->msg_to_pull_mutex);
+		init_completion(&ch->wdisconnect_wait);
+
+		atomic_set(&ch->n_on_msg_allocate_wq, 0);
+		init_waitqueue_head(&ch->msg_allocate_wq);
+		init_waitqueue_head(&ch->idle_wq);
+	}
+
+	/*
+	 * With the setting of the partition setup_state to XPC_P_SETUP, we're
+	 * declaring that this partition is ready to go.
+	 */
+	part->setup_state = XPC_P_SETUP;
+
+	/*
+	 * Setup the per partition specific variables required by the
+	 * remote partition to establish channel connections with us.
+	 *
+	 * The setting of the magic # indicates that these per partition
+	 * specific variables are ready to be used.
+	 */
+	xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs);
+	xpc_vars_part[partid].openclose_args_pa =
+	    __pa(part->local_openclose_args);
+	xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
+	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
+	xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
+	xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
+	xpc_vars_part[partid].nchannels = part->nchannels;
+	xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
+
+	return xpSuccess;
+
+	/* setup of infrastructure failed */
+out_5:
+	kfree(part->remote_openclose_args_base);
+	part->remote_openclose_args = NULL;
+out_4:
+	kfree(part->local_openclose_args_base);
+	part->local_openclose_args = NULL;
+out_3:
+	kfree(part->remote_GPs_base);
+	part->remote_GPs = NULL;
+out_2:
+	kfree(part->local_GPs_base);
+	part->local_GPs = NULL;
+out_1:
+	kfree(part->channels);
+	part->channels = NULL;
+	return retval;
+}
+
+/*
+ * Teardown the infrastructure necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static void
+xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
+{
+	short partid = XPC_PARTID(part);
+
+	/*
+	 * We start off by making this partition inaccessible to local
+	 * processes by marking it as no longer setup. Then we make it
+	 * inaccessible to remote processes by clearing the XPC per partition
+	 * specific variable's magic # (which indicates that these variables
+	 * are no longer valid) and by ignoring all XPC notify IPIs sent to
+	 * this partition.
+	 */
+
+	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
+	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
+	DBUG_ON(part->setup_state != XPC_P_SETUP);
+	part->setup_state = XPC_P_WTEARDOWN;
+
+	xpc_vars_part[partid].magic = 0;
+
+	free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
+
+	/*
+	 * Before proceeding with the teardown we have to wait until all
+	 * existing references cease.
+	 */
+	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
+
+	/* now we can begin tearing down the infrastructure */
+
+	part->setup_state = XPC_P_TORNDOWN;
+
+	/* in case we've still got outstanding timers registered... */
+	del_timer_sync(&part->dropped_IPI_timer);
+
+	kfree(part->remote_openclose_args_base);
+	part->remote_openclose_args = NULL;
+	kfree(part->local_openclose_args_base);
+	part->local_openclose_args = NULL;
+	kfree(part->remote_GPs_base);
+	part->remote_GPs = NULL;
+	kfree(part->local_GPs_base);
+	part->local_GPs = NULL;
+	kfree(part->channels);
+	part->channels = NULL;
+	part->local_IPI_amo_va = NULL;
+}
+
+/*
+ * Create a wrapper that hides the underlying mechanism for pulling a cacheline
+ * (or multiple cachelines) from a remote partition.
+ *
+ * src must be a cacheline aligned physical address on the remote partition.
+ * dst must be a cacheline aligned virtual address on this partition.
+ * cnt must be cacheline sized
+ */
+/* >>> Replace this function by call to xp_remote_memcpy() or bte_copy()? */
+static enum xp_retval
+xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
+			       const void *src, size_t cnt)
+{
+	enum xp_retval ret;
+
+	DBUG_ON((u64)src != L1_CACHE_ALIGN((u64)src));
+	DBUG_ON((u64)dst != L1_CACHE_ALIGN((u64)dst));
+	DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
+
+	if (part->act_state == XPC_P_DEACTIVATING)
+		return part->reason;
+
+	ret = xp_remote_memcpy(dst, src, cnt);
+	if (ret != xpSuccess) {
+		dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed,"
+			" ret=%d\n", XPC_PARTID(part), ret);
+	}
+	return ret;
+}
+
+/*
+ * Pull the remote per partition specific variables from the specified
+ * partition.
+ */
+static enum xp_retval
+xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
+{
+	u8 buffer[L1_CACHE_BYTES * 2];
+	struct xpc_vars_part_sn2 *pulled_entry_cacheline =
+	    (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer);
+	struct xpc_vars_part_sn2 *pulled_entry;
+	u64 remote_entry_cacheline_pa, remote_entry_pa;
+	short partid = XPC_PARTID(part);
+	enum xp_retval ret;
+
+	/* pull the cacheline that contains the variables we're interested in */
+
+	DBUG_ON(part->remote_vars_part_pa !=
+		L1_CACHE_ALIGN(part->remote_vars_part_pa));
+	DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2);
+
+	remote_entry_pa = part->remote_vars_part_pa +
+	    sn_partition_id * sizeof(struct xpc_vars_part_sn2);
+
+	remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
+
+	pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline
+						    + (remote_entry_pa &
+						    (L1_CACHE_BYTES - 1)));
+
+	ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline,
+					     (void *)remote_entry_cacheline_pa,
+					     L1_CACHE_BYTES);
+	if (ret != xpSuccess) {
+		dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
+			"partition %d, ret=%d\n", partid, ret);
+		return ret;
+	}
+
+	/* see if they've been set up yet */
+
+	if (pulled_entry->magic != XPC_VP_MAGIC1 &&
+	    pulled_entry->magic != XPC_VP_MAGIC2) {
+
+		if (pulled_entry->magic != 0) {
+			dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
+				"partition %d has bad magic value (=0x%lx)\n",
+				partid, sn_partition_id, pulled_entry->magic);
+			return xpBadMagic;
+		}
+
+		/* they've not been initialized yet */
+		return xpRetry;
+	}
+
+	if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
+
+		/* validate the variables */
+
+		if (pulled_entry->GPs_pa == 0 ||
+		    pulled_entry->openclose_args_pa == 0 ||
+		    pulled_entry->IPI_amo_pa == 0) {
+
+			dev_err(xpc_chan, "partition %d's XPC vars_part for "
+				"partition %d are not valid\n", partid,
+				sn_partition_id);
+			return xpInvalidAddress;
+		}
+
+		/* the variables we imported look to be valid */
+
+		part->remote_GPs_pa = pulled_entry->GPs_pa;
+		part->remote_openclose_args_pa =
+		    pulled_entry->openclose_args_pa;
+		part->remote_IPI_amo_va =
+		    (AMO_t *)__va(pulled_entry->IPI_amo_pa);
+		part->remote_IPI_nasid = pulled_entry->IPI_nasid;
+		part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
+
+		if (part->nchannels > pulled_entry->nchannels)
+			part->nchannels = pulled_entry->nchannels;
+
+		/* let the other side know that we've pulled their variables */
+
+		xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
+	}
+
+	if (pulled_entry->magic == XPC_VP_MAGIC1)
+		return xpRetry;
+
+	return xpSuccess;
+}
+
+/*
+ * Establish first contact with the remote partititon. This involves pulling
+ * the XPC per partition variables from the remote partition and waiting for
+ * the remote partition to pull ours.
+ */
+static enum xp_retval
+xpc_make_first_contact_sn2(struct xpc_partition *part)
+{
+	enum xp_retval ret;
+
+	while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) {
+		if (ret != xpRetry) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
+			return ret;
+		}
+
+		dev_dbg(xpc_part, "waiting to make first contact with "
+			"partition %d\n", XPC_PARTID(part));
+
+		/* wait a 1/4 of a second or so */
+		(void)msleep_interruptible(250);
+
+		if (part->act_state == XPC_P_DEACTIVATING)
+			return part->reason;
+	}
+
+	return xpSuccess;
+}
+
+/*
+ * Get the IPI flags and pull the openclose args and/or remote GPs as needed.
+ */
+static u64
+xpc_get_IPI_flags_sn2(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	u64 IPI_amo;
+	enum xp_retval ret;
+
+	/*
+	 * See if there are any IPI flags to be handled.
+	 */
+
+	spin_lock_irqsave(&part->IPI_lock, irq_flags);
+	IPI_amo = part->local_IPI_amo;
+	if (IPI_amo != 0)
+		part->local_IPI_amo = 0;
+
+	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
+
+	if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
+		ret = xpc_pull_remote_cachelines_sn2(part,
+						    part->remote_openclose_args,
+						     (void *)part->
+						     remote_openclose_args_pa,
+						     XPC_OPENCLOSE_ARGS_SIZE);
+		if (ret != xpSuccess) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
+
+			dev_dbg(xpc_chan, "failed to pull openclose args from "
+				"partition %d, ret=%d\n", XPC_PARTID(part),
+				ret);
+
+			/* don't bother processing IPIs anymore */
+			IPI_amo = 0;
+		}
+	}
+
+	if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
+		ret = xpc_pull_remote_cachelines_sn2(part, part->remote_GPs,
+						    (void *)part->remote_GPs_pa,
+						     XPC_GP_SIZE);
+		if (ret != xpSuccess) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
+
+			dev_dbg(xpc_chan, "failed to pull GPs from partition "
+				"%d, ret=%d\n", XPC_PARTID(part), ret);
+
+			/* don't bother processing IPIs anymore */
+			IPI_amo = 0;
+		}
+	}
+
+	return IPI_amo;
+}
+
+static struct xpc_msg *
+xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	struct xpc_msg *remote_msg, *msg;
+	u32 msg_index, nmsgs;
+	u64 msg_offset;
+	enum xp_retval ret;
+
+	if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
+		/* we were interrupted by a signal */
+		return NULL;
+	}
+
+	while (get >= ch->next_msg_to_pull) {
+
+		/* pull as many messages as are ready and able to be pulled */
+
+		msg_index = ch->next_msg_to_pull % ch->remote_nentries;
+
+		DBUG_ON(ch->next_msg_to_pull >= ch->w_remote_GP.put);
+		nmsgs = ch->w_remote_GP.put - ch->next_msg_to_pull;
+		if (msg_index + nmsgs > ch->remote_nentries) {
+			/* ignore the ones that wrap the msg queue for now */
+			nmsgs = ch->remote_nentries - msg_index;
+		}
+
+		msg_offset = msg_index * ch->msg_size;
+		msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
+		remote_msg = (struct xpc_msg *)(ch->remote_msgqueue_pa +
+						msg_offset);
+
+		ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg,
+						     nmsgs * ch->msg_size);
+		if (ret != xpSuccess) {
+
+			dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
+				" msg %ld from partition %d, channel=%d, "
+				"ret=%d\n", nmsgs, ch->next_msg_to_pull,
+				ch->partid, ch->number, ret);
+
+			XPC_DEACTIVATE_PARTITION(part, ret);
+
+			mutex_unlock(&ch->msg_to_pull_mutex);
+			return NULL;
+		}
+
+		ch->next_msg_to_pull += nmsgs;
+	}
+
+	mutex_unlock(&ch->msg_to_pull_mutex);
+
+	/* return the message we were looking for */
+	msg_offset = (get % ch->remote_nentries) * ch->msg_size;
+	msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
+
+	return msg;
+}
+
+/*
+ * Get a message to be delivered.
+ */
+static struct xpc_msg *
+xpc_get_deliverable_msg_sn2(struct xpc_channel *ch)
+{
+	struct xpc_msg *msg = NULL;
+	s64 get;
+
+	do {
+		if (ch->flags & XPC_C_DISCONNECTING)
+			break;
+
+		get = ch->w_local_GP.get;
+		rmb();	/* guarantee that .get loads before .put */
+		if (get == ch->w_remote_GP.put)
+			break;
+
+		/* There are messages waiting to be pulled and delivered.
+		 * We need to try to secure one for ourselves. We'll do this
+		 * by trying to increment w_local_GP.get and hope that no one
+		 * else beats us to it. If they do, we'll we'll simply have
+		 * to try again for the next one.
+		 */
+
+		if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) {
+			/* we got the entry referenced by get */
+
+			dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
+				"partid=%d, channel=%d\n", get + 1,
+				ch->partid, ch->number);
+
+			/* pull the message from the remote partition */
+
+			msg = xpc_pull_remote_msg_sn2(ch, get);
+
+			DBUG_ON(msg != NULL && msg->number != get);
+			DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
+			DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
+
+			break;
+		}
+
+	} while (1);
+
+	return msg;
+}
+
 void
 xpc_init_sn2(void)
 {
 	xpc_rsvd_page_init = xpc_rsvd_page_init_sn2;
+	xpc_setup_infrastructure = xpc_setup_infrastructure_sn2;
+	xpc_teardown_infrastructure = xpc_teardown_infrastructure_sn2;
+	xpc_make_first_contact = xpc_make_first_contact_sn2;
+	xpc_get_IPI_flags = xpc_get_IPI_flags_sn2;
+	xpc_get_deliverable_msg = xpc_get_deliverable_msg_sn2;
 }
 
 void
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 8327cd4017e..770f0a8c669 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -36,10 +36,58 @@ xpc_rsvd_page_init_uv(struct xpc_rsvd_page *rp)
 	return xpSuccess;
 }
 
+/*
+ * Setup the infrastructure necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static enum xp_retval
+xpc_setup_infrastructure_uv(struct xpc_partition *part)
+{
+	/* >>> this function needs fleshing out */
+	return xpUnsupported;
+}
+
+/*
+ * Teardown the infrastructure necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static void
+xpc_teardown_infrastructure_uv(struct xpc_partition *part)
+{
+	/* >>> this function needs fleshing out */
+	return;
+}
+
+static enum xp_retval
+xpc_make_first_contact_uv(struct xpc_partition *part)
+{
+	/* >>> this function needs fleshing out */
+	return xpUnsupported;
+}
+
+static u64
+xpc_get_IPI_flags_uv(struct xpc_partition *part)
+{
+	/* >>> this function needs fleshing out */
+	return 0UL;
+}
+
+static struct xpc_msg *
+xpc_get_deliverable_msg_uv(struct xpc_channel *ch)
+{
+	/* >>> this function needs fleshing out */
+	return NULL;
+}
+
 void
 xpc_init_uv(void)
 {
 	xpc_rsvd_page_init = xpc_rsvd_page_init_uv;
+	xpc_setup_infrastructure = xpc_setup_infrastructure_uv;
+	xpc_teardown_infrastructure = xpc_teardown_infrastructure_uv;
+	xpc_make_first_contact = xpc_make_first_contact_uv;
+	xpc_get_IPI_flags = xpc_get_IPI_flags_uv;
+	xpc_get_deliverable_msg = xpc_get_deliverable_msg_uv;
 }
 
 void
-- 
cgit v1.2.3


From 33ba3c7724be79f7cdbfc611335572c056d9a05a Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:07 -0700
Subject: sgi-xp: isolate xpc_vars structure to sn2 only

Isolate the xpc_vars structure of XPC's reserved page to sn2 only.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc.h           |  529 +++-------------
 drivers/misc/sgi-xp/xpc_channel.c   |  315 +---------
 drivers/misc/sgi-xp/xpc_main.c      |  152 +++--
 drivers/misc/sgi-xp/xpc_partition.c |  454 +-------------
 drivers/misc/sgi-xp/xpc_sn2.c       | 1181 ++++++++++++++++++++++++++++++++++-
 drivers/misc/sgi-xp/xpc_uv.c        |   57 +-
 6 files changed, 1433 insertions(+), 1255 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index e8c2a162960..a3a67485cf8 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -159,10 +159,10 @@ xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
  * reflected by incrementing either the major or minor version numbers
  * of struct xpc_vars.
  */
-struct xpc_vars {
+struct xpc_vars_sn2 {
 	u8 version;
 	u64 heartbeat;
-	u64 heartbeating_to_mask;
+	DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
 	u64 heartbeat_offline;	/* if 0, heartbeat should be changing */
 	int act_nasid;
 	int act_phys_cpuid;
@@ -176,46 +176,23 @@ struct xpc_vars {
 #define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
 			(_version >= _XPC_VERSION(3, 1))
 
-static inline int
-xpc_hb_allowed(short partid, struct xpc_vars *vars)
-{
-	return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
-}
-
-static inline void
-xpc_allow_hb(short partid, struct xpc_vars *vars)
-{
-	u64 old_mask, new_mask;
-
-	do {
-		old_mask = vars->heartbeating_to_mask;
-		new_mask = (old_mask | (1UL << partid));
-	} while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
-		 old_mask);
-}
-
-static inline void
-xpc_disallow_hb(short partid, struct xpc_vars *vars)
-{
-	u64 old_mask, new_mask;
-
-	do {
-		old_mask = vars->heartbeating_to_mask;
-		new_mask = (old_mask & ~(1UL << partid));
-	} while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
-		 old_mask);
-}
-
 /*
- * The AMOs page consists of a number of AMO variables which are divided into
- * four groups, The first two groups are used to identify an IRQ's sender.
- * These two groups consist of 64 and 128 AMO variables respectively. The last
- * two groups, consisting of just one AMO variable each, are used to identify
- * the remote partitions that are currently engaged (from the viewpoint of
- * the XPC running on the remote partition).
+ * The following pertains to ia64-sn2 only.
+ *
+ * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
+ * pages are located in the lowest granule. The lowest granule uses 4k pages
+ * for cached references and an alternate TLB handler to never provide a
+ * cacheable mapping for the entire region. This will prevent speculative
+ * reading of cached copies of our lines from being issued which will cause
+ * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
+ * AMO variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
+ * NOTIFY IRQs, 128 AMO variables (based on XP_NASID_MASK_WORDS) to identify
+ * the senders of ACTIVATE IRQs, and 2 AMO variables to identify which remote
+ * partitions (i.e., XPCs) consider themselves currently engaged with the
+ * local XPC.
  */
-#define XPC_NOTIFY_IRQ_AMOS	   0
-#define XPC_ACTIVATE_IRQ_AMOS	   (XPC_NOTIFY_IRQ_AMOS + XP_MAX_NPARTITIONS_SN2)
+#define XPC_NOTIFY_IRQ_AMOS	0
+#define XPC_ACTIVATE_IRQ_AMOS	(XPC_NOTIFY_IRQ_AMOS + XP_MAX_NPARTITIONS_SN2)
 #define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
 #define XPC_DISENGAGE_REQUEST_AMO  (XPC_ENGAGED_PARTITIONS_AMO + 1)
 
@@ -259,11 +236,11 @@ struct xpc_vars_part_sn2 {
 /* the reserved page sizes and offsets */
 
 #define XPC_RP_HEADER_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
-#define XPC_RP_VARS_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_vars))
+#define XPC_RP_VARS_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2))
 
 #define XPC_RP_PART_NASIDS(_rp) ((u64 *)((u8 *)(_rp) + XPC_RP_HEADER_SIZE))
 #define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
-#define XPC_RP_VARS(_rp)	((struct xpc_vars *)(XPC_RP_MACH_NASIDS(_rp) + \
+#define XPC_RP_VARS(_rp)	((struct xpc_vars_sn2 *)(XPC_RP_MACH_NASIDS(_rp) + \
 				    xp_nasid_mask_words))
 
 /*
@@ -344,6 +321,7 @@ struct xpc_notify {
  * allocated at the time a partition becomes active. The array contains one
  * of these structures for each potential channel connection to that partition.
  *
+>>> sn2 only!!!
  * Each of these structures manages two message queues (circular buffers).
  * They are allocated at the time a channel connection is made. One of
  * these message queues (local_msgqueue) holds the locally created messages
@@ -622,6 +600,9 @@ extern struct device *xpc_part;
 extern struct device *xpc_chan;
 extern int xpc_disengage_request_timelimit;
 extern int xpc_disengage_request_timedout;
+extern atomic_t xpc_act_IRQ_rcvd;
+extern wait_queue_head_t xpc_act_IRQ_wq;
+extern void *xpc_heartbeating_to_mask;
 extern irqreturn_t xpc_notify_IRQ_handler(int, void *);
 extern void xpc_dropped_IPI_check(struct xpc_partition *);
 extern void xpc_activate_partition(struct xpc_partition *);
@@ -629,15 +610,48 @@ extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int, int);
 extern void xpc_disconnect_wait(int);
 extern enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *);
+extern void (*xpc_heartbeat_init) (void);
+extern void (*xpc_heartbeat_exit) (void);
+extern void (*xpc_increment_heartbeat) (void);
+extern void (*xpc_offline_heartbeat) (void);
+extern void (*xpc_online_heartbeat) (void);
+extern void (*xpc_check_remote_hb) (void);
 extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
 extern u64 (*xpc_get_IPI_flags) (struct xpc_partition *);
 extern struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *);
+extern void (*xpc_initiate_partition_activation) (struct xpc_rsvd_page *, u64,
+						  int);
+extern void (*xpc_process_act_IRQ_rcvd) (int);
 extern enum xp_retval (*xpc_setup_infrastructure) (struct xpc_partition *);
 extern void (*xpc_teardown_infrastructure) (struct xpc_partition *);
+extern void (*xpc_mark_partition_engaged) (struct xpc_partition *);
+extern void (*xpc_mark_partition_disengaged) (struct xpc_partition *);
+extern void (*xpc_request_partition_disengage) (struct xpc_partition *);
+extern void (*xpc_cancel_partition_disengage_request) (struct xpc_partition *);
+extern u64 (*xpc_partition_engaged) (u64);
+extern u64 (*xpc_partition_disengage_requested) (u64);;
+extern void (*xpc_clear_partition_engaged) (u64);
+extern void (*xpc_clear_partition_disengage_request) (u64);
+
+extern void (*xpc_IPI_send_local_activate) (int);
+extern void (*xpc_IPI_send_activated) (struct xpc_partition *);
+extern void (*xpc_IPI_send_local_reactivate) (int);
+extern void (*xpc_IPI_send_disengage) (struct xpc_partition *);
+
+extern void (*xpc_IPI_send_closerequest) (struct xpc_channel *,
+					  unsigned long *);
+extern void (*xpc_IPI_send_closereply) (struct xpc_channel *, unsigned long *);
+extern void (*xpc_IPI_send_openrequest) (struct xpc_channel *, unsigned long *);
+extern void (*xpc_IPI_send_openreply) (struct xpc_channel *, unsigned long *);
+
+extern enum xp_retval (*xpc_allocate_msg) (struct xpc_channel *, u32,
+					   struct xpc_msg **);
+extern enum xp_retval (*xpc_send_msg) (struct xpc_channel *, struct xpc_msg *,
+				       u8, xpc_notify_func, void *);
+extern void (*xpc_received_msg) (struct xpc_channel *, struct xpc_msg *);
 
 /* found in xpc_sn2.c */
 extern void xpc_init_sn2(void);
-extern struct xpc_vars *xpc_vars;		/*>>> eliminate from here */
 
 /* found in xpc_uv.c */
 extern void xpc_init_uv(void);
@@ -646,6 +660,7 @@ extern void xpc_init_uv(void);
 extern int xpc_exiting;
 extern int xp_nasid_mask_words;
 extern struct xpc_rsvd_page *xpc_rsvd_page;
+extern u64 *xpc_mach_nasids;
 extern struct xpc_partition *xpc_partitions;
 extern char *xpc_remote_copy_buffer;
 extern void *xpc_remote_copy_buffer_base;
@@ -658,7 +673,8 @@ extern int xpc_partition_disengaged(struct xpc_partition *);
 extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
 extern void xpc_mark_partition_inactive(struct xpc_partition *);
 extern void xpc_discovery(void);
-extern void xpc_check_remote_hb(void);
+extern enum xp_retval xpc_get_remote_rp(int, u64 *, struct xpc_rsvd_page *,
+					u64 *);
 extern void xpc_deactivate_partition(const int, struct xpc_partition *,
 				     enum xp_retval);
 extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
@@ -667,6 +683,7 @@ extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
 extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern void xpc_initiate_connect(int);
 extern void xpc_initiate_disconnect(int);
+extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *);
 extern enum xp_retval xpc_initiate_allocate(short, int, u32, void **);
 extern enum xp_retval xpc_initiate_send(short, int, void *);
 extern enum xp_retval xpc_initiate_send_notify(short, int, void *,
@@ -680,6 +697,40 @@ extern void xpc_disconnect_channel(const int, struct xpc_channel *,
 extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
 extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
 
+static inline int
+xpc_hb_allowed(short partid, void *heartbeating_to_mask)
+{
+	return test_bit(partid, heartbeating_to_mask);
+}
+
+static inline int
+xpc_any_hbs_allowed(void)
+{
+	DBUG_ON(xpc_heartbeating_to_mask == NULL);
+	return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions);
+}
+
+static inline void
+xpc_allow_hb(short partid)
+{
+	DBUG_ON(xpc_heartbeating_to_mask == NULL);
+	set_bit(partid, xpc_heartbeating_to_mask);
+}
+
+static inline void
+xpc_disallow_hb(short partid)
+{
+	DBUG_ON(xpc_heartbeating_to_mask == NULL);
+	clear_bit(partid, xpc_heartbeating_to_mask);
+}
+
+static inline void
+xpc_disallow_all_hbs(void)
+{
+	DBUG_ON(xpc_heartbeating_to_mask == NULL);
+	bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions);
+}
+
 static inline void
 xpc_wakeup_channel_mgr(struct xpc_partition *part)
 {
@@ -749,297 +800,7 @@ xpc_part_ref(struct xpc_partition *part)
 	}
 
 /*
- * This next set of inlines are used to keep track of when a partition is
- * potentially engaged in accessing memory belonging to another partition.
- */
-
-static inline void
-xpc_mark_partition_engaged(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
-				   (XPC_ENGAGED_PARTITIONS_AMO *
-				    sizeof(AMO_t)));
-
-	local_irq_save(irq_flags);
-
-	/* set bit corresponding to our partid in remote partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
-			 (1UL << sn_partition_id));
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
-							       variable),
-						     xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_mark_partition_disengaged(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
-				   (XPC_ENGAGED_PARTITIONS_AMO *
-				    sizeof(AMO_t)));
-
-	local_irq_save(irq_flags);
-
-	/* clear bit corresponding to our partid in remote partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~(1UL << sn_partition_id));
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
-							       variable),
-						     xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_request_partition_disengage(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
-				   (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
-
-	local_irq_save(irq_flags);
-
-	/* set bit corresponding to our partid in remote partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
-			 (1UL << sn_partition_id));
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
-							       variable),
-						     xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_cancel_partition_disengage_request(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
-				   (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
-
-	local_irq_save(irq_flags);
-
-	/* clear bit corresponding to our partid in remote partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~(1UL << sn_partition_id));
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
-							       variable),
-						     xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-}
-
-static inline u64
-xpc_partition_engaged(u64 partid_mask)
-{
-	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
-
-	/* return our partition's AMO variable ANDed with partid_mask */
-	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
-		partid_mask);
-}
-
-static inline u64
-xpc_partition_disengage_requested(u64 partid_mask)
-{
-	AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
-
-	/* return our partition's AMO variable ANDed with partid_mask */
-	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
-		partid_mask);
-}
-
-static inline void
-xpc_clear_partition_engaged(u64 partid_mask)
-{
-	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
-
-	/* clear bit(s) based on partid_mask in our partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~partid_mask);
-}
-
-static inline void
-xpc_clear_partition_disengage_request(u64 partid_mask)
-{
-	AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
-
-	/* clear bit(s) based on partid_mask in our partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~partid_mask);
-}
-
-/*
- * The following set of macros and inlines are used for the sending and
- * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
- * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
- * the other that is associated with channel activity (SGI_XPC_NOTIFY).
- */
-
-static inline u64
-xpc_IPI_receive(AMO_t *amo)
-{
-	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
-}
-
-static inline enum xp_retval
-xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
-{
-	int ret = 0;
-	unsigned long irq_flags;
-
-	local_irq_save(irq_flags);
-
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
-	sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
-
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
-						     xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-
-	return ((ret == 0) ? xpSuccess : xpPioReadError);
-}
-
-/*
- * IPIs associated with SGI_XPC_ACTIVATE IRQ.
- */
-
-/*
- * Flag the appropriate AMO variable and send an IPI to the specified node.
- */
-static inline void
-xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
-		      int to_phys_cpuid)
-{
-	int w_index = XPC_NASID_W_INDEX(from_nasid);
-	int b_index = XPC_NASID_B_INDEX(from_nasid);
-	AMO_t *amos = (AMO_t *)__va(amos_page_pa +
-				    (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
-
-	(void)xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
-			   to_phys_cpuid, SGI_XPC_ACTIVATE);
-}
-
-static inline void
-xpc_IPI_send_activate(struct xpc_vars *vars)
-{
-	xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0),
-			      vars->act_nasid, vars->act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_activated(struct xpc_partition *part)
-{
-	xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
-			      part->remote_act_nasid,
-			      part->remote_act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_reactivate(struct xpc_partition *part)
-{
-	xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid,
-			      xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_disengage(struct xpc_partition *part)
-{
-	xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
-			      part->remote_act_nasid,
-			      part->remote_act_phys_cpuid);
-}
-
-/*
- * IPIs associated with SGI_XPC_NOTIFY IRQ.
- */
-
-/*
- * Send an IPI to the remote partition that is associated with the
- * specified channel.
- */
-#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \
-		xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f)
-
-static inline void
-xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string,
-		    unsigned long *irq_flags)
-{
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-	enum xp_retval ret;
-
-	if (likely(part->act_state != XPC_P_DEACTIVATING)) {
-		ret = xpc_IPI_send(part->remote_IPI_amo_va,
-				   (u64)ipi_flag << (ch->number * 8),
-				   part->remote_IPI_nasid,
-				   part->remote_IPI_phys_cpuid, SGI_XPC_NOTIFY);
-		dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
-			ipi_flag_string, ch->partid, ch->number, ret);
-		if (unlikely(ret != xpSuccess)) {
-			if (irq_flags != NULL)
-				spin_unlock_irqrestore(&ch->lock, *irq_flags);
-			XPC_DEACTIVATE_PARTITION(part, ret);
-			if (irq_flags != NULL)
-				spin_lock_irqsave(&ch->lock, *irq_flags);
-		}
-	}
-}
-
-/*
- * Make it look like the remote partition, which is associated with the
- * specified channel, sent us an IPI. This faked IPI will be handled
- * by xpc_dropped_IPI_check().
- */
-#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \
-		xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f)
-
-static inline void
-xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
-			  char *ipi_flag_string)
-{
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-
-	FETCHOP_STORE_OP(TO_AMO((u64)&part->local_IPI_amo_va->variable),
-			 FETCHOP_OR, ((u64)ipi_flag << (ch->number * 8)));
-	dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
-		ipi_flag_string, ch->partid, ch->number);
-}
-
-/*
- * The sending and receiving of IPIs includes the setting of an AMO variable
+ * The sending and receiving of IPIs includes the setting of an >>>AMO variable
  * to indicate the reason the IPI was sent. The 64-bit variable is divided
  * up into eight bytes, ordered from right to left. Byte zero pertains to
  * channel 0, byte one to channel 1, and so on. Each byte is described by
@@ -1052,107 +813,11 @@ xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
 #define	XPC_IPI_OPENREPLY	0x08
 #define	XPC_IPI_MSGREQUEST	0x10
 
-/* given an AMO variable and a channel#, get its associated IPI flags */
+/* given an >>>AMO variable and a channel#, get its associated IPI flags */
 #define XPC_GET_IPI_FLAGS(_amo, _c)	((u8) (((_amo) >> ((_c) * 8)) & 0xff))
 #define XPC_SET_IPI_FLAGS(_amo, _c, _f)	(_amo) |= ((u64) (_f) << ((_c) * 8))
 
 #define	XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0fUL)
 #define XPC_ANY_MSG_IPI_FLAGS_SET(_amo)       ((_amo) & 0x1010101010101010UL)
 
-static inline void
-xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	struct xpc_openclose_args *args = ch->local_openclose_args;
-
-	args->reason = ch->reason;
-
-	XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	struct xpc_openclose_args *args = ch->local_openclose_args;
-
-	args->msg_size = ch->msg_size;
-	args->local_nentries = ch->local_nentries;
-
-	XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	struct xpc_openclose_args *args = ch->local_openclose_args;
-
-	args->remote_nentries = ch->remote_nentries;
-	args->local_nentries = ch->local_nentries;
-	args->local_msgqueue_pa = __pa(ch->local_msgqueue);
-
-	XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_msgrequest(struct xpc_channel *ch)
-{
-	XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL);
-}
-
-static inline void
-xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
-{
-	XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST);
-}
-
-/*
->>> this block comment needs to be moved and re-written.
- * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
- * pages are located in the lowest granule. The lowest granule uses 4k pages
- * for cached references and an alternate TLB handler to never provide a
- * cacheable mapping for the entire region. This will prevent speculative
- * reading of cached copies of our lines from being issued which will cause
- * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
- * AMO variables (based on xp_max_npartitions) for message notification and an
- * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
- * activation and 2 AMO variables for partition deactivation.
- */
-static inline AMO_t *
-xpc_IPI_init(int index)
-{
-	AMO_t *amo = xpc_vars->amos_page + index;
-
-	(void)xpc_IPI_receive(amo);	/* clear AMO variable */
-	return amo;
-}
-
-/*
- * Check to see if there is any channel activity to/from the specified
- * partition.
- */
-static inline void
-xpc_check_for_channel_activity(struct xpc_partition *part)
-{
-	u64 IPI_amo;
-	unsigned long irq_flags;
-
-	IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va);
-	if (IPI_amo == 0)
-		return;
-
-	spin_lock_irqsave(&part->IPI_lock, irq_flags);
-	part->local_IPI_amo |= IPI_amo;
-	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
-
-	dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
-		XPC_PARTID(part), IPI_amo);
-
-	xpc_wakeup_channel_mgr(part);
-}
-
 #endif /* _DRIVERS_MISC_SGIXP_XPC_H */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 8081e8155df..26c5e12c122 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -1165,7 +1165,7 @@ xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason)
  * Wait for a message entry to become available for the specified channel,
  * but don't wait any longer than 1 jiffy.
  */
-static enum xp_retval
+enum xp_retval
 xpc_allocate_msg_wait(struct xpc_channel *ch)
 {
 	enum xp_retval ret;
@@ -1191,96 +1191,6 @@ xpc_allocate_msg_wait(struct xpc_channel *ch)
 	return ret;
 }
 
-/*
- * Allocate an entry for a message from the message queue associated with the
- * specified channel.
- */
-static enum xp_retval
-xpc_allocate_msg(struct xpc_channel *ch, u32 flags,
-		 struct xpc_msg **address_of_msg)
-{
-	struct xpc_msg *msg;
-	enum xp_retval ret;
-	s64 put;
-
-	/* this reference will be dropped in xpc_send_msg() */
-	xpc_msgqueue_ref(ch);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		xpc_msgqueue_deref(ch);
-		return ch->reason;
-	}
-	if (!(ch->flags & XPC_C_CONNECTED)) {
-		xpc_msgqueue_deref(ch);
-		return xpNotConnected;
-	}
-
-	/*
-	 * Get the next available message entry from the local message queue.
-	 * If none are available, we'll make sure that we grab the latest
-	 * GP values.
-	 */
-	ret = xpTimeout;
-
-	while (1) {
-
-		put = ch->w_local_GP.put;
-		rmb();	/* guarantee that .put loads before .get */
-		if (put - ch->w_remote_GP.get < ch->local_nentries) {
-
-			/* There are available message entries. We need to try
-			 * to secure one for ourselves. We'll do this by trying
-			 * to increment w_local_GP.put as long as someone else
-			 * doesn't beat us to it. If they do, we'll have to
-			 * try again.
-			 */
-			if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == put) {
-				/* we got the entry referenced by put */
-				break;
-			}
-			continue;	/* try again */
-		}
-
-		/*
-		 * There aren't any available msg entries at this time.
-		 *
-		 * In waiting for a message entry to become available,
-		 * we set a timeout in case the other side is not
-		 * sending completion IPIs. This lets us fake an IPI
-		 * that will cause the IPI handler to fetch the latest
-		 * GP values as if an IPI was sent by the other side.
-		 */
-		if (ret == xpTimeout)
-			xpc_IPI_send_local_msgrequest(ch);
-
-		if (flags & XPC_NOWAIT) {
-			xpc_msgqueue_deref(ch);
-			return xpNoWait;
-		}
-
-		ret = xpc_allocate_msg_wait(ch);
-		if (ret != xpInterrupted && ret != xpTimeout) {
-			xpc_msgqueue_deref(ch);
-			return ret;
-		}
-	}
-
-	/* get the message's address and initialize it */
-	msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
-				 (put % ch->local_nentries) * ch->msg_size);
-
-	DBUG_ON(msg->flags != 0);
-	msg->number = put;
-
-	dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
-		"msg_number=%ld, partid=%d, channel=%d\n", put + 1,
-		(void *)msg, msg->number, ch->partid, ch->number);
-
-	*address_of_msg = msg;
-
-	return xpSuccess;
-}
-
 /*
  * Allocate an entry for a message from the message queue associated with the
  * specified channel. NOTE that this routine can sleep waiting for a message
@@ -1317,144 +1227,6 @@ xpc_initiate_allocate(short partid, int ch_number, u32 flags, void **payload)
 	return ret;
 }
 
-/*
- * Now we actually send the messages that are ready to be sent by advancing
- * the local message queue's Put value and then send an IPI to the recipient
- * partition.
- */
-static void
-xpc_send_msgs(struct xpc_channel *ch, s64 initial_put)
-{
-	struct xpc_msg *msg;
-	s64 put = initial_put + 1;
-	int send_IPI = 0;
-
-	while (1) {
-
-		while (1) {
-			if (put == ch->w_local_GP.put)
-				break;
-
-			msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
-						 (put % ch->local_nentries) *
-						 ch->msg_size);
-
-			if (!(msg->flags & XPC_M_READY))
-				break;
-
-			put++;
-		}
-
-		if (put == initial_put) {
-			/* nothing's changed */
-			break;
-		}
-
-		if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) !=
-		    initial_put) {
-			/* someone else beat us to it */
-			DBUG_ON(ch->local_GP->put < initial_put);
-			break;
-		}
-
-		/* we just set the new value of local_GP->put */
-
-		dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
-			"channel=%d\n", put, ch->partid, ch->number);
-
-		send_IPI = 1;
-
-		/*
-		 * We need to ensure that the message referenced by
-		 * local_GP->put is not XPC_M_READY or that local_GP->put
-		 * equals w_local_GP.put, so we'll go have a look.
-		 */
-		initial_put = put;
-	}
-
-	if (send_IPI)
-		xpc_IPI_send_msgrequest(ch);
-}
-
-/*
- * Common code that does the actual sending of the message by advancing the
- * local message queue's Put value and sends an IPI to the partition the
- * message is being sent to.
- */
-static enum xp_retval
-xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
-	     xpc_notify_func func, void *key)
-{
-	enum xp_retval ret = xpSuccess;
-	struct xpc_notify *notify = notify;
-	s64 put, msg_number = msg->number;
-
-	DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
-	DBUG_ON((((u64)msg - (u64)ch->local_msgqueue) / ch->msg_size) !=
-		msg_number % ch->local_nentries);
-	DBUG_ON(msg->flags & XPC_M_READY);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		/* drop the reference grabbed in xpc_allocate_msg() */
-		xpc_msgqueue_deref(ch);
-		return ch->reason;
-	}
-
-	if (notify_type != 0) {
-		/*
-		 * Tell the remote side to send an ACK interrupt when the
-		 * message has been delivered.
-		 */
-		msg->flags |= XPC_M_INTERRUPT;
-
-		atomic_inc(&ch->n_to_notify);
-
-		notify = &ch->notify_queue[msg_number % ch->local_nentries];
-		notify->func = func;
-		notify->key = key;
-		notify->type = notify_type;
-
-		/* >>> is a mb() needed here? */
-
-		if (ch->flags & XPC_C_DISCONNECTING) {
-			/*
-			 * An error occurred between our last error check and
-			 * this one. We will try to clear the type field from
-			 * the notify entry. If we succeed then
-			 * xpc_disconnect_channel() didn't already process
-			 * the notify entry.
-			 */
-			if (cmpxchg(&notify->type, notify_type, 0) ==
-			    notify_type) {
-				atomic_dec(&ch->n_to_notify);
-				ret = ch->reason;
-			}
-
-			/* drop the reference grabbed in xpc_allocate_msg() */
-			xpc_msgqueue_deref(ch);
-			return ret;
-		}
-	}
-
-	msg->flags |= XPC_M_READY;
-
-	/*
-	 * The preceding store of msg->flags must occur before the following
-	 * load of ch->local_GP->put.
-	 */
-	mb();
-
-	/* see if the message is next in line to be sent, if so send it */
-
-	put = ch->local_GP->put;
-	if (put == msg_number)
-		xpc_send_msgs(ch, put);
-
-	/* drop the reference grabbed in xpc_allocate_msg() */
-	xpc_msgqueue_deref(ch);
-	return ret;
-}
-
 /*
  * Send a message previously allocated using xpc_initiate_allocate() on the
  * specified channel connected to the specified partition.
@@ -1585,66 +1357,6 @@ xpc_deliver_msg(struct xpc_channel *ch)
 	}
 }
 
-/*
- * Now we actually acknowledge the messages that have been delivered and ack'd
- * by advancing the cached remote message queue's Get value and if requested
- * send an IPI to the message sender's partition.
- */
-static void
-xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
-{
-	struct xpc_msg *msg;
-	s64 get = initial_get + 1;
-	int send_IPI = 0;
-
-	while (1) {
-
-		while (1) {
-			if (get == ch->w_local_GP.get)
-				break;
-
-			msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
-						 (get % ch->remote_nentries) *
-						 ch->msg_size);
-
-			if (!(msg->flags & XPC_M_DONE))
-				break;
-
-			msg_flags |= msg->flags;
-			get++;
-		}
-
-		if (get == initial_get) {
-			/* nothing's changed */
-			break;
-		}
-
-		if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) !=
-		    initial_get) {
-			/* someone else beat us to it */
-			DBUG_ON(ch->local_GP->get <= initial_get);
-			break;
-		}
-
-		/* we just set the new value of local_GP->get */
-
-		dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
-			"channel=%d\n", get, ch->partid, ch->number);
-
-		send_IPI = (msg_flags & XPC_M_INTERRUPT);
-
-		/*
-		 * We need to ensure that the message referenced by
-		 * local_GP->get is not XPC_M_DONE or that local_GP->get
-		 * equals w_local_GP.get, so we'll go have a look.
-		 */
-		initial_get = get;
-	}
-
-	if (send_IPI)
-		xpc_IPI_send_msgrequest(ch);
-}
-
 /*
  * Acknowledge receipt of a delivered message.
  *
@@ -1668,35 +1380,12 @@ xpc_initiate_received(short partid, int ch_number, void *payload)
 	struct xpc_partition *part = &xpc_partitions[partid];
 	struct xpc_channel *ch;
 	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
-	s64 get, msg_number = msg->number;
 
 	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
 
 	ch = &part->channels[ch_number];
-
-	dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
-		(void *)msg, msg_number, ch->partid, ch->number);
-
-	DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->msg_size) !=
-		msg_number % ch->remote_nentries);
-	DBUG_ON(msg->flags & XPC_M_DONE);
-
-	msg->flags |= XPC_M_DONE;
-
-	/*
-	 * The preceding store of msg->flags must occur before the following
-	 * load of ch->local_GP->get.
-	 */
-	mb();
-
-	/*
-	 * See if this message is next in line to be acknowledged as having
-	 * been delivered.
-	 */
-	get = ch->local_GP->get;
-	if (get == msg_number)
-		xpc_acknowledge_msgs(ch, get, msg->flags);
+	xpc_received_msg(ch, msg);
 
 	/* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg()  */
 	xpc_msgqueue_deref(ch);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index be3a4853930..10dac3652b2 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -148,12 +148,14 @@ static struct ctl_table_header *xpc_sysctl;
 int xpc_disengage_request_timedout;
 
 /* #of IRQs received */
-static atomic_t xpc_act_IRQ_rcvd;
+atomic_t xpc_act_IRQ_rcvd;
 
 /* IRQ handler notifies this wait queue on receipt of an IRQ */
-static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
+DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
 
 static unsigned long xpc_hb_check_timeout;
+static struct timer_list xpc_hb_timer;
+void *xpc_heartbeating_to_mask;
 
 /* notification that the xpc_hb_checker thread has exited */
 static DECLARE_COMPLETION(xpc_hb_checker_exited);
@@ -161,8 +163,6 @@ static DECLARE_COMPLETION(xpc_hb_checker_exited);
 /* notification that the xpc_discovery thread has exited */
 static DECLARE_COMPLETION(xpc_discovery_exited);
 
-static struct timer_list xpc_hb_timer;
-
 static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
 
 static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
@@ -176,12 +176,54 @@ static struct notifier_block xpc_die_notifier = {
 };
 
 enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *rp);
+void (*xpc_heartbeat_init) (void);
+void (*xpc_heartbeat_exit) (void);
+void (*xpc_increment_heartbeat) (void);
+void (*xpc_offline_heartbeat) (void);
+void (*xpc_online_heartbeat) (void);
+void (*xpc_check_remote_hb) (void);
+
 enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
 u64 (*xpc_get_IPI_flags) (struct xpc_partition *part);
 struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *ch);
+
+void (*xpc_initiate_partition_activation) (struct xpc_rsvd_page *remote_rp,
+					   u64 remote_rp_pa, int nasid);
+
+void (*xpc_process_act_IRQ_rcvd) (int n_IRQs_expected);
 enum xp_retval (*xpc_setup_infrastructure) (struct xpc_partition *part);
 void (*xpc_teardown_infrastructure) (struct xpc_partition *part);
 
+void (*xpc_mark_partition_engaged) (struct xpc_partition *part);
+void (*xpc_mark_partition_disengaged) (struct xpc_partition *part);
+void (*xpc_request_partition_disengage) (struct xpc_partition *part);
+void (*xpc_cancel_partition_disengage_request) (struct xpc_partition *part);
+u64 (*xpc_partition_engaged) (u64 partid_mask);
+u64 (*xpc_partition_disengage_requested) (u64 partid_mask);
+void (*xpc_clear_partition_engaged) (u64 partid_mask);
+void (*xpc_clear_partition_disengage_request) (u64 partid_mask);
+
+void (*xpc_IPI_send_local_activate) (int from_nasid);
+void (*xpc_IPI_send_activated) (struct xpc_partition *part);
+void (*xpc_IPI_send_local_reactivate) (int from_nasid);
+void (*xpc_IPI_send_disengage) (struct xpc_partition *part);
+
+void (*xpc_IPI_send_closerequest) (struct xpc_channel *ch,
+				   unsigned long *irq_flags);
+void (*xpc_IPI_send_closereply) (struct xpc_channel *ch,
+				 unsigned long *irq_flags);
+void (*xpc_IPI_send_openrequest) (struct xpc_channel *ch,
+				  unsigned long *irq_flags);
+void (*xpc_IPI_send_openreply) (struct xpc_channel *ch,
+				unsigned long *irq_flags);
+
+enum xp_retval (*xpc_allocate_msg) (struct xpc_channel *ch, u32 flags,
+				    struct xpc_msg **address_of_msg);
+
+enum xp_retval (*xpc_send_msg) (struct xpc_channel *ch, struct xpc_msg *msg,
+				u8 notify_type, xpc_notify_func func,
+				void *key);
+void (*xpc_received_msg) (struct xpc_channel *ch, struct xpc_msg *msg);
 
 /*
  * Timer function to enforce the timelimit on the partition disengage request.
@@ -218,7 +260,7 @@ xpc_act_IRQ_handler(int irq, void *dev_id)
 static void
 xpc_hb_beater(unsigned long dummy)
 {
-	xpc_vars->heartbeat++;
+	xpc_increment_heartbeat();
 
 	if (time_after_eq(jiffies, xpc_hb_check_timeout))
 		wake_up_interruptible(&xpc_act_IRQ_wq);
@@ -227,6 +269,22 @@ xpc_hb_beater(unsigned long dummy)
 	add_timer(&xpc_hb_timer);
 }
 
+static void
+xpc_start_hb_beater(void)
+{
+	xpc_heartbeat_init();
+	init_timer(&xpc_hb_timer);
+	xpc_hb_timer.function = xpc_hb_beater;
+	xpc_hb_beater(0);
+}
+
+static void
+xpc_stop_hb_beater(void)
+{
+	del_timer_sync(&xpc_hb_timer);
+	xpc_heartbeat_exit();
+}
+
 /*
  * This thread is responsible for nearly all of the partition
  * activation/deactivation.
@@ -244,7 +302,7 @@ xpc_hb_checker(void *ignore)
 
 	/* set our heartbeating to other partitions into motion */
 	xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
-	xpc_hb_beater(0);
+	xpc_start_hb_beater();
 
 	while (!xpc_exiting) {
 
@@ -274,11 +332,8 @@ xpc_hb_checker(void *ignore)
 			dev_dbg(xpc_part, "found an IRQ to process; will be "
 				"resetting xpc_hb_check_timeout\n");
 
-			last_IRQ_count += xpc_identify_act_IRQ_sender();
-			if (last_IRQ_count < new_IRQ_count) {
-				/* retry once to help avoid missing AMO */
-				(void)xpc_identify_act_IRQ_sender();
-			}
+			xpc_process_act_IRQ_rcvd(new_IRQ_count -
+						 last_IRQ_count);
 			last_IRQ_count = new_IRQ_count;
 
 			xpc_hb_check_timeout = jiffies +
@@ -294,6 +349,8 @@ xpc_hb_checker(void *ignore)
 						xpc_exiting));
 	}
 
+	xpc_stop_hb_beater();
+
 	dev_dbg(xpc_part, "heartbeat checker is exiting\n");
 
 	/* mark this thread as having exited */
@@ -401,31 +458,7 @@ xpc_activating(void *__partid)
 
 	dev_dbg(xpc_part, "activating partition %d\n", partid);
 
-	/*
-	 * Register the remote partition's AMOs with SAL so it can handle
-	 * and cleanup errors within that address range should the remote
-	 * partition go down. We don't unregister this range because it is
-	 * difficult to tell when outstanding writes to the remote partition
-	 * are finished and thus when it is safe to unregister. This should
-	 * not result in wasted space in the SAL xp_addr_region table because
-	 * we should get the same page for remote_amos_page_pa after module
-	 * reloads and system reboots.
-	 */
-	if (sn_register_xp_addr_region(part->remote_amos_page_pa,
-				       PAGE_SIZE, 1) < 0) {
-		dev_warn(xpc_part, "xpc_activating(%d) failed to register "
-			 "xp_addr region\n", partid);
-
-		spin_lock_irqsave(&part->act_lock, irq_flags);
-		part->act_state = XPC_P_INACTIVE;
-		XPC_SET_REASON(part, xpPhysAddrRegFailed, __LINE__);
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-		part->remote_rp_pa = 0;
-		return 0;
-	}
-
-	xpc_allow_hb(partid, xpc_vars);
-	xpc_IPI_send_activated(part);
+	xpc_allow_hb(partid);
 
 	if (xpc_setup_infrastructure(part) == xpSuccess) {
 		(void)xpc_part_ref(part);	/* this will always succeed */
@@ -440,12 +473,12 @@ xpc_activating(void *__partid)
 		xpc_teardown_infrastructure(part);
 	}
 
-	xpc_disallow_hb(partid, xpc_vars);
+	xpc_disallow_hb(partid);
 	xpc_mark_partition_inactive(part);
 
 	if (part->reason == xpReactivating) {
 		/* interrupting ourselves results in activating partition */
-		xpc_IPI_send_reactivate(part);
+		xpc_IPI_send_local_reactivate(part->reactivate_nasid);
 	}
 
 	return 0;
@@ -477,6 +510,32 @@ xpc_activate_partition(struct xpc_partition *part)
 	}
 }
 
+/*
+ * Check to see if there is any channel activity to/from the specified
+ * partition.
+ */
+static void
+xpc_check_for_channel_activity(struct xpc_partition *part)
+{
+	u64 IPI_amo;
+	unsigned long irq_flags;
+
+/* this needs to be uncommented, but I'm thinking this function and the */
+/* ones that call it need to be moved into xpc_sn2.c... */
+	IPI_amo = 0; /* = xpc_IPI_receive(part->local_IPI_amo_va); */
+	if (IPI_amo == 0)
+		return;
+
+	spin_lock_irqsave(&part->IPI_lock, irq_flags);
+	part->local_IPI_amo |= IPI_amo;
+	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
+
+	dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
+		XPC_PARTID(part), IPI_amo);
+
+	xpc_wakeup_channel_mgr(part);
+}
+
 /*
  * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
  * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
@@ -902,14 +961,11 @@ xpc_do_exit(enum xp_retval reason)
 	} while (1);
 
 	DBUG_ON(xpc_partition_engaged(-1UL));
+	DBUG_ON(xpc_any_hbs_allowed() != 0);
 
 	/* indicate to others that our reserved page is uninitialized */
 	xpc_rsvd_page->stamp = ZERO_STAMP;
 
-	/* now it's time to eliminate our heartbeat */
-	del_timer_sync(&xpc_hb_timer);
-	DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
-
 	if (reason == xpUnloading) {
 		(void)unregister_die_notifier(&xpc_die_notifier);
 		(void)unregister_reboot_notifier(&xpc_reboot_notifier);
@@ -968,7 +1024,7 @@ xpc_die_disengage(void)
 	/* keep xpc_hb_checker thread from doing anything (just in case) */
 	xpc_exiting = 1;
 
-	xpc_vars->heartbeating_to_mask = 0;	/* indicate we're deactivated */
+	xpc_disallow_all_hbs();	/*indicate we're deactivated */
 
 	for (partid = 0; partid < xp_max_npartitions; partid++) {
 		part = &xpc_partitions[partid];
@@ -1054,8 +1110,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 		/* fall through */
 	case DIE_MCA_MONARCH_ENTER:
 	case DIE_INIT_MONARCH_ENTER:
-		xpc_vars->heartbeat++;
-		xpc_vars->heartbeat_offline = 1;
+		xpc_offline_heartbeat();
 		break;
 
 	case DIE_KDEBUG_LEAVE:
@@ -1066,8 +1121,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 		/* fall through */
 	case DIE_MCA_MONARCH_LEAVE:
 	case DIE_INIT_MONARCH_LEAVE:
-		xpc_vars->heartbeat++;
-		xpc_vars->heartbeat_offline = 0;
+		xpc_online_heartbeat();
 		break;
 	}
 
@@ -1202,9 +1256,6 @@ xpc_init(void)
 	if (ret != 0)
 		dev_warn(xpc_part, "can't register die notifier\n");
 
-	init_timer(&xpc_hb_timer);
-	xpc_hb_timer.function = xpc_hb_beater;
-
 	/*
 	 * The real work-horse behind xpc.  This processes incoming
 	 * interrupts and monitors remote heartbeats.
@@ -1246,7 +1297,6 @@ out_4:
 	/* indicate to others that our reserved page is uninitialized */
 	xpc_rsvd_page->stamp = ZERO_STAMP;
 
-	del_timer_sync(&xpc_hb_timer);
 	(void)unregister_die_notifier(&xpc_die_notifier);
 	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
 out_3:
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index be5b7547dab..4e14effdedd 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -42,7 +42,7 @@ u64 xpc_prot_vec[MAX_NUMNODES];
 /* this partition's reserved page pointers */
 struct xpc_rsvd_page *xpc_rsvd_page;
 static u64 *xpc_part_nasids;
-static u64 *xpc_mach_nasids;
+u64 *xpc_mach_nasids;
 
 /* >>> next two variables should be 'xpc_' if they remain here */
 static int xp_sizeof_nasid_mask;	/* actual size in bytes of nasid mask */
@@ -317,62 +317,6 @@ xpc_restrict_IPI_ops(void)
 	}
 }
 
-/*
- * At periodic intervals, scan through all active partitions and ensure
- * their heartbeat is still active.  If not, the partition is deactivated.
- */
-void
-xpc_check_remote_hb(void)
-{
-	struct xpc_vars *remote_vars;
-	struct xpc_partition *part;
-	short partid;
-	enum xp_retval ret;
-
-	remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
-
-	for (partid = 0; partid < xp_max_npartitions; partid++) {
-
-		if (xpc_exiting)
-			break;
-
-		if (partid == sn_partition_id)
-			continue;
-
-		part = &xpc_partitions[partid];
-
-		if (part->act_state == XPC_P_INACTIVE ||
-		    part->act_state == XPC_P_DEACTIVATING) {
-			continue;
-		}
-
-		/* pull the remote_hb cache line */
-		ret = xp_remote_memcpy(remote_vars,
-				       (void *)part->remote_vars_pa,
-				       XPC_RP_VARS_SIZE);
-		if (ret != xpSuccess) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-			continue;
-		}
-
-		dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
-			" = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
-			partid, remote_vars->heartbeat, part->last_heartbeat,
-			remote_vars->heartbeat_offline,
-			remote_vars->heartbeating_to_mask);
-
-		if (((remote_vars->heartbeat == part->last_heartbeat) &&
-		     (remote_vars->heartbeat_offline == 0)) ||
-		    !xpc_hb_allowed(sn_partition_id, remote_vars)) {
-
-			XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat);
-			continue;
-		}
-
-		part->last_heartbeat = remote_vars->heartbeat;
-	}
-}
-
 /*
  * Get a copy of a portion of the remote partition's rsvd page.
  *
@@ -380,7 +324,7 @@ xpc_check_remote_hb(void)
  * is large enough to contain a copy of their reserved page header and
  * part_nasids mask.
  */
-static enum xp_retval
+enum xp_retval
 xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 		  struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
 {
@@ -431,322 +375,6 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 	return xpSuccess;
 }
 
-/*
- * Get a copy of the remote partition's XPC variables from the reserved page.
- *
- * remote_vars points to a buffer that is cacheline aligned for BTE copies and
- * assumed to be of size XPC_RP_VARS_SIZE.
- */
-static enum xp_retval
-xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
-{
-	enum xp_retval ret;
-
-	if (remote_vars_pa == 0)
-		return xpVarsNotSet;
-
-	/* pull over the cross partition variables */
-	ret = xp_remote_memcpy(remote_vars, (void *)remote_vars_pa,
-			       XPC_RP_VARS_SIZE);
-	if (ret != xpSuccess)
-		return ret;
-
-	if (XPC_VERSION_MAJOR(remote_vars->version) !=
-	    XPC_VERSION_MAJOR(XPC_V_VERSION)) {
-		return xpBadVersion;
-	}
-
-	return xpSuccess;
-}
-
-/*
- * Update the remote partition's info.
- */
-static void
-xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
-			  struct timespec *remote_rp_stamp, u64 remote_rp_pa,
-			  u64 remote_vars_pa, struct xpc_vars *remote_vars)
-{
-	part->remote_rp_version = remote_rp_version;
-	dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
-		part->remote_rp_version);
-
-	part->remote_rp_stamp = *remote_rp_stamp;
-	dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
-		part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
-
-	part->remote_rp_pa = remote_rp_pa;
-	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
-
-	part->remote_vars_pa = remote_vars_pa;
-	dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
-		part->remote_vars_pa);
-
-	part->last_heartbeat = remote_vars->heartbeat;
-	dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
-		part->last_heartbeat);
-
-/* >>> remote_vars_part_pa and vars_part_pa are sn2 only!!! */
-	part->remote_vars_part_pa = remote_vars->vars_part_pa;
-	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
-		part->remote_vars_part_pa);
-
-	part->remote_act_nasid = remote_vars->act_nasid;
-	dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
-		part->remote_act_nasid);
-
-	part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
-	dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
-		part->remote_act_phys_cpuid);
-
-	part->remote_amos_page_pa = remote_vars->amos_page_pa;
-	dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
-		part->remote_amos_page_pa);
-
-	part->remote_vars_version = remote_vars->version;
-	dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
-		part->remote_vars_version);
-}
-
-/*
- * Prior code has determined the nasid which generated an IPI.  Inspect
- * that nasid to determine if its partition needs to be activated or
- * deactivated.
- *
- * A partition is consider "awaiting activation" if our partition
- * flags indicate it is not active and it has a heartbeat.  A
- * partition is considered "awaiting deactivation" if our partition
- * flags indicate it is active but it has no heartbeat or it is not
- * sending its heartbeat to us.
- *
- * To determine the heartbeat, the remote nasid must have a properly
- * initialized reserved page.
- */
-static void
-xpc_identify_act_IRQ_req(int nasid)
-{
-	struct xpc_rsvd_page *remote_rp;
-	struct xpc_vars *remote_vars;
-	u64 remote_rp_pa;
-	u64 remote_vars_pa;
-	int remote_rp_version;
-	int reactivate = 0;
-	int stamp_diff;
-	struct timespec remote_rp_stamp = { 0, 0 }; /*>>> ZERO_STAMP */
-	short partid;
-	struct xpc_partition *part;
-	enum xp_retval ret;
-
-	/* pull over the reserved page structure */
-
-	remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
-
-	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
-	if (ret != xpSuccess) {
-		dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
-			 "which sent interrupt, reason=%d\n", nasid, ret);
-		return;
-	}
-
-	remote_vars_pa = remote_rp->sn.vars_pa;
-	remote_rp_version = remote_rp->version;
-	if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
-		remote_rp_stamp = remote_rp->stamp;
-
-	partid = remote_rp->SAL_partid;
-	part = &xpc_partitions[partid];
-
-	/* pull over the cross partition variables */
-
-	remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
-
-	ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
-	if (ret != xpSuccess) {
-
-		dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
-			 "which sent interrupt, reason=%d\n", nasid, ret);
-
-		XPC_DEACTIVATE_PARTITION(part, ret);
-		return;
-	}
-
-	part->act_IRQ_rcvd++;
-
-	dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
-		"%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
-		remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
-
-	if (xpc_partition_disengaged(part) &&
-	    part->act_state == XPC_P_INACTIVE) {
-
-		xpc_update_partition_info(part, remote_rp_version,
-					  &remote_rp_stamp, remote_rp_pa,
-					  remote_vars_pa, remote_vars);
-
-		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
-			if (xpc_partition_disengage_requested(1UL << partid)) {
-				/*
-				 * Other side is waiting on us to disengage,
-				 * even though we already have.
-				 */
-				return;
-			}
-		} else {
-			/* other side doesn't support disengage requests */
-			xpc_clear_partition_disengage_request(1UL << partid);
-		}
-
-		xpc_activate_partition(part);
-		return;
-	}
-
-	DBUG_ON(part->remote_rp_version == 0);
-	DBUG_ON(part->remote_vars_version == 0);
-
-	if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
-		DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
-						       remote_vars_version));
-
-		if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
-			DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
-							       version));
-			/* see if the other side rebooted */
-			if (part->remote_amos_page_pa ==
-			    remote_vars->amos_page_pa &&
-			    xpc_hb_allowed(sn_partition_id, remote_vars)) {
-				/* doesn't look that way, so ignore the IPI */
-				return;
-			}
-		}
-
-		/*
-		 * Other side rebooted and previous XPC didn't support the
-		 * disengage request, so we don't need to do anything special.
-		 */
-
-		xpc_update_partition_info(part, remote_rp_version,
-					  &remote_rp_stamp, remote_rp_pa,
-					  remote_vars_pa, remote_vars);
-		part->reactivate_nasid = nasid;
-		XPC_DEACTIVATE_PARTITION(part, xpReactivating);
-		return;
-	}
-
-	DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
-
-	if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
-		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
-
-		/*
-		 * Other side rebooted and previous XPC did support the
-		 * disengage request, but the new one doesn't.
-		 */
-
-		xpc_clear_partition_engaged(1UL << partid);
-		xpc_clear_partition_disengage_request(1UL << partid);
-
-		xpc_update_partition_info(part, remote_rp_version,
-					  &remote_rp_stamp, remote_rp_pa,
-					  remote_vars_pa, remote_vars);
-		reactivate = 1;
-
-	} else {
-		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
-
-		stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
-						&remote_rp_stamp);
-		if (stamp_diff != 0) {
-			DBUG_ON(stamp_diff >= 0);
-
-			/*
-			 * Other side rebooted and the previous XPC did support
-			 * the disengage request, as does the new one.
-			 */
-
-			DBUG_ON(xpc_partition_engaged(1UL << partid));
-			DBUG_ON(xpc_partition_disengage_requested(1UL <<
-								  partid));
-
-			xpc_update_partition_info(part, remote_rp_version,
-						  &remote_rp_stamp,
-						  remote_rp_pa, remote_vars_pa,
-						  remote_vars);
-			reactivate = 1;
-		}
-	}
-
-	if (part->disengage_request_timeout > 0 &&
-	    !xpc_partition_disengaged(part)) {
-		/* still waiting on other side to disengage from us */
-		return;
-	}
-
-	if (reactivate) {
-		part->reactivate_nasid = nasid;
-		XPC_DEACTIVATE_PARTITION(part, xpReactivating);
-
-	} else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
-		   xpc_partition_disengage_requested(1UL << partid)) {
-		XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
-	}
-}
-
-/*
- * Loop through the activation AMO variables and process any bits
- * which are set.  Each bit indicates a nasid sending a partition
- * activation or deactivation request.
- *
- * Return #of IRQs detected.
- */
-int
-xpc_identify_act_IRQ_sender(void)
-{
-	int word, bit;
-	u64 nasid_mask;
-	u64 nasid;		/* remote nasid */
-	int n_IRQs_detected = 0;
-	AMO_t *act_amos;
-
-	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
-
-	/* scan through act AMO variable looking for non-zero entries */
-	for (word = 0; word < xp_nasid_mask_words; word++) {
-
-		if (xpc_exiting)
-			break;
-
-		nasid_mask = xpc_IPI_receive(&act_amos[word]);
-		if (nasid_mask == 0) {
-			/* no IRQs from nasids in this variable */
-			continue;
-		}
-
-		dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
-			nasid_mask);
-
-		/*
-		 * If this nasid has been added to the machine since
-		 * our partition was reset, this will retain the
-		 * remote nasid in our reserved pages machine mask.
-		 * This is used in the event of module reload.
-		 */
-		xpc_mach_nasids[word] |= nasid_mask;
-
-		/* locate the nasid(s) which sent interrupts */
-
-		for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
-			if (nasid_mask & (1UL << bit)) {
-				n_IRQs_detected++;
-				nasid = XPC_NASID_FROM_W_B(word, bit);
-				dev_dbg(xpc_part, "interrupt from nasid %ld\n",
-					nasid);
-				xpc_identify_act_IRQ_req(nasid);
-			}
-		}
-	}
-	return n_IRQs_detected;
-}
-
 /*
  * See if the other side has responded to a partition disengage request
  * from us.
@@ -836,7 +464,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
 		if (reason == xpReactivating) {
 			/* we interrupt ourselves to reactivate partition */
-			xpc_IPI_send_reactivate(part);
+			xpc_IPI_send_local_reactivate(part->reactivate_nasid);
 		}
 		return;
 	}
@@ -903,16 +531,12 @@ xpc_discovery(void)
 {
 	void *remote_rp_base;
 	struct xpc_rsvd_page *remote_rp;
-	struct xpc_vars *remote_vars;
 	u64 remote_rp_pa;
-	u64 remote_vars_pa;
 	int region;
 	int region_size;
 	int max_regions;
 	int nasid;
 	struct xpc_rsvd_page *rp;
-	short partid;
-	struct xpc_partition *part;
 	u64 *discovered_nasids;
 	enum xp_retval ret;
 
@@ -922,8 +546,6 @@ xpc_discovery(void)
 	if (remote_rp == NULL)
 		return;
 
-	remote_vars = (struct xpc_vars *)remote_rp;
-
 	discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
 				    GFP_KERNEL);
 	if (discovered_nasids == NULL) {
@@ -988,7 +610,7 @@ xpc_discovery(void)
 				continue;
 			}
 
-			/* pull over the reserved page structure */
+			/* pull over the rsvd page header & part_nasids mask */
 
 			ret = xpc_get_remote_rp(nasid, discovered_nasids,
 						remote_rp, &remote_rp_pa);
@@ -1003,72 +625,8 @@ xpc_discovery(void)
 				continue;
 			}
 
-			remote_vars_pa = remote_rp->sn.vars_pa;
-
-			partid = remote_rp->SAL_partid;
-			part = &xpc_partitions[partid];
-
-			/* pull over the cross partition variables */
-
-			ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
-			if (ret != xpSuccess) {
-				dev_dbg(xpc_part, "unable to get XPC variables "
-					"from nasid %d, reason=%d\n", nasid,
-					ret);
-
-				XPC_DEACTIVATE_PARTITION(part, ret);
-				continue;
-			}
-
-			if (part->act_state != XPC_P_INACTIVE) {
-				dev_dbg(xpc_part, "partition %d on nasid %d is "
-					"already activating\n", partid, nasid);
-				break;
-			}
-
-			/*
-			 * Register the remote partition's AMOs with SAL so it
-			 * can handle and cleanup errors within that address
-			 * range should the remote partition go down. We don't
-			 * unregister this range because it is difficult to
-			 * tell when outstanding writes to the remote partition
-			 * are finished and thus when it is thus safe to
-			 * unregister. This should not result in wasted space
-			 * in the SAL xp_addr_region table because we should
-			 * get the same page for remote_act_amos_pa after
-			 * module reloads and system reboots.
-			 */
-			if (sn_register_xp_addr_region
-			    (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) {
-				dev_dbg(xpc_part,
-					"partition %d failed to "
-					"register xp_addr region 0x%016lx\n",
-					partid, remote_vars->amos_page_pa);
-
-				XPC_SET_REASON(part, xpPhysAddrRegFailed,
-					       __LINE__);
-				break;
-			}
-
-			/*
-			 * The remote nasid is valid and available.
-			 * Send an interrupt to that nasid to notify
-			 * it that we are ready to begin activation.
-			 */
-			dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
-				"nasid %d, phys_cpuid 0x%x\n",
-				remote_vars->amos_page_pa,
-				remote_vars->act_nasid,
-				remote_vars->act_phys_cpuid);
-
-			if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
-							   version)) {
-				part->remote_amos_page_pa =
-				    remote_vars->amos_page_pa;
-				xpc_mark_partition_disengaged(part);
-				xpc_cancel_partition_disengage_request(part);
-			}
-			xpc_IPI_send_activate(remote_vars);
+			xpc_initiate_partition_activation(remote_rp,
+							  remote_rp_pa, nasid);
 		}
 	}
 
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index ee28e231dc4..89c0bb9a27f 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -19,9 +19,370 @@
 #include <asm/sn/sn_sal.h>
 #include "xpc.h"
 
-struct xpc_vars *xpc_vars;
+static struct xpc_vars_sn2 *xpc_vars;	/* >>> Add _sn2 suffix? */
 static struct xpc_vars_part_sn2 *xpc_vars_part; /* >>> Add _sn2 suffix? */
 
+/*
+ * The following set of macros and functions are used for the sending and
+ * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
+ * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
+ * the other that is associated with channel activity (SGI_XPC_NOTIFY).
+ */
+
+static u64
+xpc_IPI_receive_sn2(AMO_t *amo)
+{
+	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
+}
+
+static enum xp_retval
+xpc_IPI_send_sn2(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
+{
+	int ret = 0;
+	unsigned long irq_flags;
+
+	local_irq_save(irq_flags);
+
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
+	sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+
+	return ((ret == 0) ? xpSuccess : xpPioReadError);
+}
+
+static AMO_t *
+xpc_IPI_init_sn2(int index)
+{
+	AMO_t *amo = xpc_vars->amos_page + index;
+
+	(void)xpc_IPI_receive_sn2(amo);	/* clear AMO variable */
+	return amo;
+}
+
+/*
+ * IPIs associated with SGI_XPC_ACTIVATE IRQ.
+ */
+
+/*
+ * Flag the appropriate AMO variable and send an IPI to the specified node.
+ */
+static void
+xpc_activate_IRQ_send_sn2(u64 amos_page_pa, int from_nasid, int to_nasid,
+		      int to_phys_cpuid)
+{
+	int w_index = XPC_NASID_W_INDEX(from_nasid);
+	int b_index = XPC_NASID_B_INDEX(from_nasid);
+	AMO_t *amos = (AMO_t *)__va(amos_page_pa +
+				    (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
+
+	(void)xpc_IPI_send_sn2(&amos[w_index], (1UL << b_index), to_nasid,
+			       to_phys_cpuid, SGI_XPC_ACTIVATE);
+}
+
+static void
+xpc_activate_IRQ_send_local_sn2(int from_nasid)
+{
+	int w_index = XPC_NASID_W_INDEX(from_nasid);
+	int b_index = XPC_NASID_B_INDEX(from_nasid);
+	AMO_t *amos = (AMO_t *)__va(xpc_vars->amos_page_pa +
+				    (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
+
+	/* fake the sending and receipt of an activate IRQ from remote nasid */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amos[w_index].variable), FETCHOP_OR,
+			 (1UL << b_index));
+	atomic_inc(&xpc_act_IRQ_rcvd);
+	wake_up_interruptible(&xpc_act_IRQ_wq);
+}
+
+static void
+xpc_IPI_send_local_activate_sn2(int from_nasid)
+{
+	xpc_activate_IRQ_send_local_sn2(from_nasid);
+}
+
+static void
+xpc_IPI_send_activated_sn2(struct xpc_partition *part)
+{
+	xpc_activate_IRQ_send_sn2(part->remote_amos_page_pa,
+				  cnodeid_to_nasid(0), part->remote_act_nasid,
+				  part->remote_act_phys_cpuid);
+}
+
+static void
+xpc_IPI_send_local_reactivate_sn2(int from_nasid)
+{
+	xpc_activate_IRQ_send_local_sn2(from_nasid);
+}
+
+static void
+xpc_IPI_send_disengage_sn2(struct xpc_partition *part)
+{
+	xpc_activate_IRQ_send_sn2(part->remote_amos_page_pa,
+				  cnodeid_to_nasid(0), part->remote_act_nasid,
+				  part->remote_act_phys_cpuid);
+}
+
+/*
+ * IPIs associated with SGI_XPC_NOTIFY IRQ.
+ */
+
+/*
+ * Send an IPI to the remote partition that is associated with the
+ * specified channel.
+ */
+static void
+xpc_notify_IRQ_send_sn2(struct xpc_channel *ch, u8 ipi_flag,
+			char *ipi_flag_string, unsigned long *irq_flags)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	enum xp_retval ret;
+
+	if (likely(part->act_state != XPC_P_DEACTIVATING)) {
+		ret = xpc_IPI_send_sn2(part->remote_IPI_amo_va,
+				       (u64)ipi_flag << (ch->number * 8),
+				       part->remote_IPI_nasid,
+				       part->remote_IPI_phys_cpuid,
+				       SGI_XPC_NOTIFY);
+		dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
+			ipi_flag_string, ch->partid, ch->number, ret);
+		if (unlikely(ret != xpSuccess)) {
+			if (irq_flags != NULL)
+				spin_unlock_irqrestore(&ch->lock, *irq_flags);
+			XPC_DEACTIVATE_PARTITION(part, ret);
+			if (irq_flags != NULL)
+				spin_lock_irqsave(&ch->lock, *irq_flags);
+		}
+	}
+}
+
+#define XPC_NOTIFY_IRQ_SEND_SN2(_ch, _ipi_f, _irq_f) \
+		xpc_notify_IRQ_send_sn2(_ch, _ipi_f, #_ipi_f, _irq_f)
+
+/*
+ * Make it look like the remote partition, which is associated with the
+ * specified channel, sent us an IPI. This faked IPI will be handled
+ * by xpc_dropped_IPI_check().
+ */
+static void
+xpc_notify_IRQ_send_local_sn2(struct xpc_channel *ch, u8 ipi_flag,
+			      char *ipi_flag_string)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+
+	FETCHOP_STORE_OP(TO_AMO((u64)&part->local_IPI_amo_va->variable),
+			 FETCHOP_OR, ((u64)ipi_flag << (ch->number * 8)));
+	dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
+		ipi_flag_string, ch->partid, ch->number);
+}
+
+#define XPC_NOTIFY_IRQ_SEND_LOCAL_SN2(_ch, _ipi_f) \
+		xpc_notify_IRQ_send_local_sn2(_ch, _ipi_f, #_ipi_f)
+
+static void
+xpc_IPI_send_closerequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_openclose_args *args = ch->local_openclose_args;
+
+	args->reason = ch->reason;
+	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
+}
+
+static void
+xpc_IPI_send_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_CLOSEREPLY, irq_flags);
+}
+
+static void
+xpc_IPI_send_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_openclose_args *args = ch->local_openclose_args;
+
+	args->msg_size = ch->msg_size;
+	args->local_nentries = ch->local_nentries;
+	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_OPENREQUEST, irq_flags);
+}
+
+static void
+xpc_IPI_send_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_openclose_args *args = ch->local_openclose_args;
+
+	args->remote_nentries = ch->remote_nentries;
+	args->local_nentries = ch->local_nentries;
+	args->local_msgqueue_pa = __pa(ch->local_msgqueue);
+	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_OPENREPLY, irq_flags);
+}
+
+static void
+xpc_IPI_send_msgrequest_sn2(struct xpc_channel *ch)
+{
+	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_MSGREQUEST, NULL);
+}
+
+static void
+xpc_IPI_send_local_msgrequest_sn2(struct xpc_channel *ch)
+{
+	XPC_NOTIFY_IRQ_SEND_LOCAL_SN2(ch, XPC_IPI_MSGREQUEST);
+}
+
+/*
+ * This next set of functions are used to keep track of when a partition is
+ * potentially engaged in accessing memory belonging to another partition.
+ */
+
+static void
+xpc_mark_partition_engaged_sn2(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
+				   (XPC_ENGAGED_PARTITIONS_AMO *
+				    sizeof(AMO_t)));
+
+	local_irq_save(irq_flags);
+
+	/* set bit corresponding to our partid in remote partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+			 (1UL << sn_partition_id));
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static void
+xpc_mark_partition_disengaged_sn2(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
+				   (XPC_ENGAGED_PARTITIONS_AMO *
+				    sizeof(AMO_t)));
+
+	local_irq_save(irq_flags);
+
+	/* clear bit corresponding to our partid in remote partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~(1UL << sn_partition_id));
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static void
+xpc_request_partition_disengage_sn2(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
+				   (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
+
+	local_irq_save(irq_flags);
+
+	/* set bit corresponding to our partid in remote partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+			 (1UL << sn_partition_id));
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static void
+xpc_cancel_partition_disengage_request_sn2(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
+				   (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
+
+	local_irq_save(irq_flags);
+
+	/* clear bit corresponding to our partid in remote partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~(1UL << sn_partition_id));
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static u64
+xpc_partition_engaged_sn2(u64 partid_mask)
+{
+	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+
+	/* return our partition's AMO variable ANDed with partid_mask */
+	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+		partid_mask);
+}
+
+static u64
+xpc_partition_disengage_requested_sn2(u64 partid_mask)
+{
+	AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
+
+	/* return our partition's AMO variable ANDed with partid_mask */
+	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+		partid_mask);
+}
+
+static void
+xpc_clear_partition_engaged_sn2(u64 partid_mask)
+{
+	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+
+	/* clear bit(s) based on partid_mask in our partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~partid_mask);
+}
+
+static void
+xpc_clear_partition_disengage_request_sn2(u64 partid_mask)
+{
+	AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
+
+	/* clear bit(s) based on partid_mask in our partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~partid_mask);
+}
+
 static enum xp_retval
 xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 {
@@ -79,7 +440,7 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	}
 
 	/* clear xpc_vars */
-	memset(xpc_vars, 0, sizeof(struct xpc_vars));
+	memset(xpc_vars, 0, sizeof(struct xpc_vars_sn2));
 
 	xpc_vars->version = XPC_V_VERSION;
 	xpc_vars->act_nasid = cpuid_to_nasid(0);
@@ -94,15 +455,446 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 
 	/* initialize the activate IRQ related AMO variables */
 	for (i = 0; i < xp_nasid_mask_words; i++)
-		(void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
+		(void)xpc_IPI_init_sn2(XPC_ACTIVATE_IRQ_AMOS + i);
 
 	/* initialize the engaged remote partitions related AMO variables */
-	(void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
-	(void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
+	(void)xpc_IPI_init_sn2(XPC_ENGAGED_PARTITIONS_AMO);
+	(void)xpc_IPI_init_sn2(XPC_DISENGAGE_REQUEST_AMO);
+
+	return xpSuccess;
+}
+
+static void
+xpc_increment_heartbeat_sn2(void)
+{
+	xpc_vars->heartbeat++;
+}
+
+static void
+xpc_offline_heartbeat_sn2(void)
+{
+	xpc_increment_heartbeat_sn2();
+	xpc_vars->heartbeat_offline = 1;
+}
+
+static void
+xpc_online_heartbeat_sn2(void)
+{
+	xpc_increment_heartbeat_sn2();
+	xpc_vars->heartbeat_offline = 0;
+}
+
+static void
+xpc_heartbeat_init_sn2(void)
+{
+	DBUG_ON(xpc_vars == NULL);
+
+	bitmap_zero(xpc_vars->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
+	xpc_heartbeating_to_mask = &xpc_vars->heartbeating_to_mask[0];
+	xpc_online_heartbeat_sn2();
+}
+
+static void
+xpc_heartbeat_exit_sn2(void)
+{
+	xpc_offline_heartbeat_sn2();
+}
+
+/*
+ * At periodic intervals, scan through all active partitions and ensure
+ * their heartbeat is still active.  If not, the partition is deactivated.
+ */
+static void
+xpc_check_remote_hb_sn2(void)
+{
+	struct xpc_vars_sn2 *remote_vars;
+	struct xpc_partition *part;
+	short partid;
+	enum xp_retval ret;
+
+	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer;
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+
+		if (xpc_exiting)
+			break;
+
+		if (partid == sn_partition_id)
+			continue;
+
+		part = &xpc_partitions[partid];
+
+		if (part->act_state == XPC_P_INACTIVE ||
+		    part->act_state == XPC_P_DEACTIVATING) {
+			continue;
+		}
+
+		/* pull the remote_hb cache line */
+		ret = xp_remote_memcpy(remote_vars,
+				       (void *)part->remote_vars_pa,
+				       XPC_RP_VARS_SIZE);
+		if (ret != xpSuccess) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
+			continue;
+		}
+
+		dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
+			" = %ld, heartbeat_offline = %ld, HB_mask[0] = 0x%lx\n",
+			partid, remote_vars->heartbeat, part->last_heartbeat,
+			remote_vars->heartbeat_offline,
+			remote_vars->heartbeating_to_mask[0]);
+
+		if (((remote_vars->heartbeat == part->last_heartbeat) &&
+		     (remote_vars->heartbeat_offline == 0)) ||
+		    !xpc_hb_allowed(sn_partition_id,
+				    &remote_vars->heartbeating_to_mask)) {
+
+			XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat);
+			continue;
+		}
+
+		part->last_heartbeat = remote_vars->heartbeat;
+	}
+}
+
+/*
+ * Get a copy of the remote partition's XPC variables from the reserved page.
+ *
+ * remote_vars points to a buffer that is cacheline aligned for BTE copies and
+ * assumed to be of size XPC_RP_VARS_SIZE.
+ */
+static enum xp_retval
+xpc_get_remote_vars_sn2(u64 remote_vars_pa, struct xpc_vars_sn2 *remote_vars)
+{
+	enum xp_retval ret;
+
+	if (remote_vars_pa == 0)
+		return xpVarsNotSet;
+
+	/* pull over the cross partition variables */
+	ret = xp_remote_memcpy(remote_vars, (void *)remote_vars_pa,
+			       XPC_RP_VARS_SIZE);
+	if (ret != xpSuccess)
+		return ret;
+
+	if (XPC_VERSION_MAJOR(remote_vars->version) !=
+	    XPC_VERSION_MAJOR(XPC_V_VERSION)) {
+		return xpBadVersion;
+	}
 
 	return xpSuccess;
 }
 
+static void
+xpc_initiate_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
+				      u64 remote_rp_pa, int nasid)
+{
+	xpc_IPI_send_local_activate(nasid);
+}
+
+/*
+ * Update the remote partition's info.
+ */
+static void
+xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
+			      struct timespec *remote_rp_stamp,
+			      u64 remote_rp_pa, u64 remote_vars_pa,
+			      struct xpc_vars_sn2 *remote_vars)
+{
+	part->remote_rp_version = remote_rp_version;
+	dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
+		part->remote_rp_version);
+
+	part->remote_rp_stamp = *remote_rp_stamp;
+	dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
+		part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
+
+	part->remote_rp_pa = remote_rp_pa;
+	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
+
+	part->remote_vars_pa = remote_vars_pa;
+	dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
+		part->remote_vars_pa);
+
+	part->last_heartbeat = remote_vars->heartbeat;
+	dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
+		part->last_heartbeat);
+
+	part->remote_vars_part_pa = remote_vars->vars_part_pa;
+	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
+		part->remote_vars_part_pa);
+
+	part->remote_act_nasid = remote_vars->act_nasid;
+	dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
+		part->remote_act_nasid);
+
+	part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
+	dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
+		part->remote_act_phys_cpuid);
+
+	part->remote_amos_page_pa = remote_vars->amos_page_pa;
+	dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
+		part->remote_amos_page_pa);
+
+	part->remote_vars_version = remote_vars->version;
+	dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
+		part->remote_vars_version);
+}
+
+/*
+ * Prior code has determined the nasid which generated an IPI.  Inspect
+ * that nasid to determine if its partition needs to be activated or
+ * deactivated.
+ *
+ * A partition is consider "awaiting activation" if our partition
+ * flags indicate it is not active and it has a heartbeat.  A
+ * partition is considered "awaiting deactivation" if our partition
+ * flags indicate it is active but it has no heartbeat or it is not
+ * sending its heartbeat to us.
+ *
+ * To determine the heartbeat, the remote nasid must have a properly
+ * initialized reserved page.
+ */
+static void
+xpc_identify_act_IRQ_req_sn2(int nasid)
+{
+	struct xpc_rsvd_page *remote_rp;
+	struct xpc_vars_sn2 *remote_vars;
+	u64 remote_rp_pa;
+	u64 remote_vars_pa;
+	int remote_rp_version;
+	int reactivate = 0;
+	int stamp_diff;
+	struct timespec remote_rp_stamp = { 0, 0 }; /*>>> ZERO_STAMP */
+	short partid;
+	struct xpc_partition *part;
+	enum xp_retval ret;
+
+	/* pull over the reserved page structure */
+
+	remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
+
+	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
+	if (ret != xpSuccess) {
+		dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
+			 "which sent interrupt, reason=%d\n", nasid, ret);
+		return;
+	}
+
+	remote_vars_pa = remote_rp->sn.vars_pa;
+	remote_rp_version = remote_rp->version;
+	if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
+		remote_rp_stamp = remote_rp->stamp;
+
+	partid = remote_rp->SAL_partid;
+	part = &xpc_partitions[partid];
+
+	/* pull over the cross partition variables */
+
+	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer;
+
+	ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars);
+	if (ret != xpSuccess) {
+
+		dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
+			 "which sent interrupt, reason=%d\n", nasid, ret);
+
+		XPC_DEACTIVATE_PARTITION(part, ret);
+		return;
+	}
+
+	part->act_IRQ_rcvd++;
+
+	dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
+		"%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
+		remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]);
+
+	if (xpc_partition_disengaged(part) &&
+	    part->act_state == XPC_P_INACTIVE) {
+
+		xpc_update_partition_info_sn2(part, remote_rp_version,
+					      &remote_rp_stamp, remote_rp_pa,
+					      remote_vars_pa, remote_vars);
+
+		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+			if (xpc_partition_disengage_requested(1UL << partid)) {
+				/*
+				 * Other side is waiting on us to disengage,
+				 * even though we already have.
+				 */
+				return;
+			}
+
+		} else {
+			/* other side doesn't support disengage requests */
+			xpc_clear_partition_disengage_request(1UL << partid);
+		}
+
+		xpc_activate_partition(part);
+		return;
+	}
+
+	DBUG_ON(part->remote_rp_version == 0);
+	DBUG_ON(part->remote_vars_version == 0);
+
+	if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
+		DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
+						       remote_vars_version));
+
+		if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+			DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+							       version));
+			/* see if the other side rebooted */
+			if (part->remote_amos_page_pa ==
+			    remote_vars->amos_page_pa &&
+			    xpc_hb_allowed(sn_partition_id,
+					  &remote_vars->heartbeating_to_mask)) {
+				/* doesn't look that way, so ignore the IPI */
+				return;
+			}
+		}
+
+		/*
+		 * Other side rebooted and previous XPC didn't support the
+		 * disengage request, so we don't need to do anything special.
+		 */
+
+		xpc_update_partition_info_sn2(part, remote_rp_version,
+					      &remote_rp_stamp, remote_rp_pa,
+					      remote_vars_pa, remote_vars);
+		part->reactivate_nasid = nasid;
+		XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+		return;
+	}
+
+	DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
+
+	if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
+
+		/*
+		 * Other side rebooted and previous XPC did support the
+		 * disengage request, but the new one doesn't.
+		 */
+
+		xpc_clear_partition_engaged(1UL << partid);
+		xpc_clear_partition_disengage_request(1UL << partid);
+
+		xpc_update_partition_info_sn2(part, remote_rp_version,
+					      &remote_rp_stamp, remote_rp_pa,
+					      remote_vars_pa, remote_vars);
+		reactivate = 1;
+
+	} else {
+		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
+
+		stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
+						&remote_rp_stamp);
+		if (stamp_diff != 0) {
+			DBUG_ON(stamp_diff >= 0);
+
+			/*
+			 * Other side rebooted and the previous XPC did support
+			 * the disengage request, as does the new one.
+			 */
+
+			DBUG_ON(xpc_partition_engaged(1UL << partid));
+			DBUG_ON(xpc_partition_disengage_requested(1UL <<
+								  partid));
+
+			xpc_update_partition_info_sn2(part, remote_rp_version,
+						      &remote_rp_stamp,
+						      remote_rp_pa,
+						      remote_vars_pa,
+						      remote_vars);
+			reactivate = 1;
+		}
+	}
+
+	if (part->disengage_request_timeout > 0 &&
+	    !xpc_partition_disengaged(part)) {
+		/* still waiting on other side to disengage from us */
+		return;
+	}
+
+	if (reactivate) {
+		part->reactivate_nasid = nasid;
+		XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+	} else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
+		   xpc_partition_disengage_requested(1UL << partid)) {
+		XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
+	}
+}
+
+/*
+ * Loop through the activation AMO variables and process any bits
+ * which are set.  Each bit indicates a nasid sending a partition
+ * activation or deactivation request.
+ *
+ * Return #of IRQs detected.
+ */
+int
+xpc_identify_act_IRQ_sender_sn2(void)
+{
+	int word, bit;
+	u64 nasid_mask;
+	u64 nasid;		/* remote nasid */
+	int n_IRQs_detected = 0;
+	AMO_t *act_amos;
+
+	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
+
+	/* scan through act AMO variable looking for non-zero entries */
+	for (word = 0; word < xp_nasid_mask_words; word++) {
+
+		if (xpc_exiting)
+			break;
+
+		nasid_mask = xpc_IPI_receive_sn2(&act_amos[word]);
+		if (nasid_mask == 0) {
+			/* no IRQs from nasids in this variable */
+			continue;
+		}
+
+		dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
+			nasid_mask);
+
+		/*
+		 * If this nasid has been added to the machine since
+		 * our partition was reset, this will retain the
+		 * remote nasid in our reserved pages machine mask.
+		 * This is used in the event of module reload.
+		 */
+		xpc_mach_nasids[word] |= nasid_mask;
+
+		/* locate the nasid(s) which sent interrupts */
+
+		for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
+			if (nasid_mask & (1UL << bit)) {
+				n_IRQs_detected++;
+				nasid = XPC_NASID_FROM_W_B(word, bit);
+				dev_dbg(xpc_part, "interrupt from nasid %ld\n",
+					nasid);
+				xpc_identify_act_IRQ_req_sn2(nasid);
+			}
+		}
+	}
+	return n_IRQs_detected;
+}
+
+static void
+xpc_process_act_IRQ_rcvd_sn2(int n_IRQs_expected)
+{
+	int n_IRQs_detected;
+
+	n_IRQs_detected = xpc_identify_act_IRQ_sender_sn2();
+	if (n_IRQs_detected < n_IRQs_expected) {
+		/* retry once to help avoid missing AMO */
+		(void)xpc_identify_act_IRQ_sender_sn2();
+	}
+}
+
 /*
  * Setup the infrastructure necessary to support XPartition Communication
  * between the specified remote partition and the local one.
@@ -177,7 +969,7 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 
 	part->remote_openclose_args_pa = 0;
 
-	part->local_IPI_amo_va = xpc_IPI_init(partid);
+	part->local_IPI_amo_va = xpc_IPI_init_sn2(partid);
 	part->local_IPI_amo = 0;
 	spin_lock_init(&part->IPI_lock);
 
@@ -468,6 +1260,28 @@ xpc_make_first_contact_sn2(struct xpc_partition *part)
 {
 	enum xp_retval ret;
 
+	/*
+	 * Register the remote partition's AMOs with SAL so it can handle
+	 * and cleanup errors within that address range should the remote
+	 * partition go down. We don't unregister this range because it is
+	 * difficult to tell when outstanding writes to the remote partition
+	 * are finished and thus when it is safe to unregister. This should
+	 * not result in wasted space in the SAL xp_addr_region table because
+	 * we should get the same page for remote_amos_page_pa after module
+	 * reloads and system reboots.
+	 */
+	if (sn_register_xp_addr_region(part->remote_amos_page_pa,
+				       PAGE_SIZE, 1) < 0) {
+		dev_warn(xpc_part, "xpc_activating(%d) failed to register "
+			 "xp_addr region\n", XPC_PARTID(part));
+
+		ret = xpPhysAddrRegFailed;
+		XPC_DEACTIVATE_PARTITION(part, ret);
+		return ret;
+	}
+
+	xpc_IPI_send_activated(part);
+
 	while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) {
 		if (ret != xpRetry) {
 			XPC_DEACTIVATE_PARTITION(part, ret);
@@ -651,15 +1465,370 @@ xpc_get_deliverable_msg_sn2(struct xpc_channel *ch)
 	return msg;
 }
 
+/*
+ * Now we actually send the messages that are ready to be sent by advancing
+ * the local message queue's Put value and then send an IPI to the recipient
+ * partition.
+ */
+static void
+xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
+{
+	struct xpc_msg *msg;
+	s64 put = initial_put + 1;
+	int send_IPI = 0;
+
+	while (1) {
+
+		while (1) {
+			if (put == ch->w_local_GP.put)
+				break;
+
+			msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
+						 (put % ch->local_nentries) *
+						 ch->msg_size);
+
+			if (!(msg->flags & XPC_M_READY))
+				break;
+
+			put++;
+		}
+
+		if (put == initial_put) {
+			/* nothing's changed */
+			break;
+		}
+
+		if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) !=
+		    initial_put) {
+			/* someone else beat us to it */
+			DBUG_ON(ch->local_GP->put < initial_put);
+			break;
+		}
+
+		/* we just set the new value of local_GP->put */
+
+		dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
+			"channel=%d\n", put, ch->partid, ch->number);
+
+		send_IPI = 1;
+
+		/*
+		 * We need to ensure that the message referenced by
+		 * local_GP->put is not XPC_M_READY or that local_GP->put
+		 * equals w_local_GP.put, so we'll go have a look.
+		 */
+		initial_put = put;
+	}
+
+	if (send_IPI)
+		xpc_IPI_send_msgrequest_sn2(ch);
+}
+
+/*
+ * Allocate an entry for a message from the message queue associated with the
+ * specified channel.
+ */
+static enum xp_retval
+xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
+		     struct xpc_msg **address_of_msg)
+{
+	struct xpc_msg *msg;
+	enum xp_retval ret;
+	s64 put;
+
+	/* this reference will be dropped in xpc_send_msg_sn2() */
+	xpc_msgqueue_ref(ch);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		xpc_msgqueue_deref(ch);
+		return ch->reason;
+	}
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		xpc_msgqueue_deref(ch);
+		return xpNotConnected;
+	}
+
+	/*
+	 * Get the next available message entry from the local message queue.
+	 * If none are available, we'll make sure that we grab the latest
+	 * GP values.
+	 */
+	ret = xpTimeout;
+
+	while (1) {
+
+		put = ch->w_local_GP.put;
+		rmb();	/* guarantee that .put loads before .get */
+		if (put - ch->w_remote_GP.get < ch->local_nentries) {
+
+			/* There are available message entries. We need to try
+			 * to secure one for ourselves. We'll do this by trying
+			 * to increment w_local_GP.put as long as someone else
+			 * doesn't beat us to it. If they do, we'll have to
+			 * try again.
+			 */
+			if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == put) {
+				/* we got the entry referenced by put */
+				break;
+			}
+			continue;	/* try again */
+		}
+
+		/*
+		 * There aren't any available msg entries at this time.
+		 *
+		 * In waiting for a message entry to become available,
+		 * we set a timeout in case the other side is not
+		 * sending completion IPIs. This lets us fake an IPI
+		 * that will cause the IPI handler to fetch the latest
+		 * GP values as if an IPI was sent by the other side.
+		 */
+		if (ret == xpTimeout)
+			xpc_IPI_send_local_msgrequest_sn2(ch);
+
+		if (flags & XPC_NOWAIT) {
+			xpc_msgqueue_deref(ch);
+			return xpNoWait;
+		}
+
+		ret = xpc_allocate_msg_wait(ch);
+		if (ret != xpInterrupted && ret != xpTimeout) {
+			xpc_msgqueue_deref(ch);
+			return ret;
+		}
+	}
+
+	/* get the message's address and initialize it */
+	msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
+				 (put % ch->local_nentries) * ch->msg_size);
+
+	DBUG_ON(msg->flags != 0);
+	msg->number = put;
+
+	dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
+		"msg_number=%ld, partid=%d, channel=%d\n", put + 1,
+		(void *)msg, msg->number, ch->partid, ch->number);
+
+	*address_of_msg = msg;
+
+	return xpSuccess;
+}
+
+/*
+ * Common code that does the actual sending of the message by advancing the
+ * local message queue's Put value and sends an IPI to the partition the
+ * message is being sent to.
+ */
+static enum xp_retval
+xpc_send_msg_sn2(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
+		 xpc_notify_func func, void *key)
+{
+	enum xp_retval ret = xpSuccess;
+	struct xpc_notify *notify = notify;
+	s64 put, msg_number = msg->number;
+
+	DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
+	DBUG_ON((((u64)msg - (u64)ch->local_msgqueue) / ch->msg_size) !=
+		msg_number % ch->local_nentries);
+	DBUG_ON(msg->flags & XPC_M_READY);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		/* drop the reference grabbed in xpc_allocate_msg_sn2() */
+		xpc_msgqueue_deref(ch);
+		return ch->reason;
+	}
+
+	if (notify_type != 0) {
+		/*
+		 * Tell the remote side to send an ACK interrupt when the
+		 * message has been delivered.
+		 */
+		msg->flags |= XPC_M_INTERRUPT;
+
+		atomic_inc(&ch->n_to_notify);
+
+		notify = &ch->notify_queue[msg_number % ch->local_nentries];
+		notify->func = func;
+		notify->key = key;
+		notify->type = notify_type;
+
+		/* >>> is a mb() needed here? */
+
+		if (ch->flags & XPC_C_DISCONNECTING) {
+			/*
+			 * An error occurred between our last error check and
+			 * this one. We will try to clear the type field from
+			 * the notify entry. If we succeed then
+			 * xpc_disconnect_channel() didn't already process
+			 * the notify entry.
+			 */
+			if (cmpxchg(&notify->type, notify_type, 0) ==
+			    notify_type) {
+				atomic_dec(&ch->n_to_notify);
+				ret = ch->reason;
+			}
+
+			/* drop reference grabbed in xpc_allocate_msg_sn2() */
+			xpc_msgqueue_deref(ch);
+			return ret;
+		}
+	}
+
+	msg->flags |= XPC_M_READY;
+
+	/*
+	 * The preceding store of msg->flags must occur before the following
+	 * load of ch->local_GP->put.
+	 */
+	mb();
+
+	/* see if the message is next in line to be sent, if so send it */
+
+	put = ch->local_GP->put;
+	if (put == msg_number)
+		xpc_send_msgs_sn2(ch, put);
+
+	/* drop the reference grabbed in xpc_allocate_msg_sn2() */
+	xpc_msgqueue_deref(ch);
+	return ret;
+}
+
+/*
+ * Now we actually acknowledge the messages that have been delivered and ack'd
+ * by advancing the cached remote message queue's Get value and if requested
+ * send an IPI to the message sender's partition.
+ */
+static void
+xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
+{
+	struct xpc_msg *msg;
+	s64 get = initial_get + 1;
+	int send_IPI = 0;
+
+	while (1) {
+
+		while (1) {
+			if (get == ch->w_local_GP.get)
+				break;
+
+			msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
+						 (get % ch->remote_nentries) *
+						 ch->msg_size);
+
+			if (!(msg->flags & XPC_M_DONE))
+				break;
+
+			msg_flags |= msg->flags;
+			get++;
+		}
+
+		if (get == initial_get) {
+			/* nothing's changed */
+			break;
+		}
+
+		if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) !=
+		    initial_get) {
+			/* someone else beat us to it */
+			DBUG_ON(ch->local_GP->get <= initial_get);
+			break;
+		}
+
+		/* we just set the new value of local_GP->get */
+
+		dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
+			"channel=%d\n", get, ch->partid, ch->number);
+
+		send_IPI = (msg_flags & XPC_M_INTERRUPT);
+
+		/*
+		 * We need to ensure that the message referenced by
+		 * local_GP->get is not XPC_M_DONE or that local_GP->get
+		 * equals w_local_GP.get, so we'll go have a look.
+		 */
+		initial_get = get;
+	}
+
+	if (send_IPI)
+		xpc_IPI_send_msgrequest_sn2(ch);
+}
+
+static void
+xpc_received_msg_sn2(struct xpc_channel *ch, struct xpc_msg *msg)
+{
+	s64 get;
+	s64 msg_number = msg->number;
+
+	dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
+		(void *)msg, msg_number, ch->partid, ch->number);
+
+	DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->msg_size) !=
+		msg_number % ch->remote_nentries);
+	DBUG_ON(msg->flags & XPC_M_DONE);
+
+	msg->flags |= XPC_M_DONE;
+
+	/*
+	 * The preceding store of msg->flags must occur before the following
+	 * load of ch->local_GP->get.
+	 */
+	mb();
+
+	/*
+	 * See if this message is next in line to be acknowledged as having
+	 * been delivered.
+	 */
+	get = ch->local_GP->get;
+	if (get == msg_number)
+		xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
+}
+
 void
 xpc_init_sn2(void)
 {
 	xpc_rsvd_page_init = xpc_rsvd_page_init_sn2;
+	xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
+	xpc_offline_heartbeat = xpc_offline_heartbeat_sn2;
+	xpc_online_heartbeat = xpc_online_heartbeat_sn2;
+	xpc_heartbeat_init = xpc_heartbeat_init_sn2;
+	xpc_heartbeat_exit = xpc_heartbeat_exit_sn2;
+	xpc_check_remote_hb = xpc_check_remote_hb_sn2;
+
+	xpc_initiate_partition_activation =
+	    xpc_initiate_partition_activation_sn2;
+	xpc_process_act_IRQ_rcvd = xpc_process_act_IRQ_rcvd_sn2;
 	xpc_setup_infrastructure = xpc_setup_infrastructure_sn2;
 	xpc_teardown_infrastructure = xpc_teardown_infrastructure_sn2;
 	xpc_make_first_contact = xpc_make_first_contact_sn2;
 	xpc_get_IPI_flags = xpc_get_IPI_flags_sn2;
 	xpc_get_deliverable_msg = xpc_get_deliverable_msg_sn2;
+
+	xpc_mark_partition_engaged = xpc_mark_partition_engaged_sn2;
+	xpc_mark_partition_disengaged = xpc_mark_partition_disengaged_sn2;
+	xpc_request_partition_disengage = xpc_request_partition_disengage_sn2;
+	xpc_cancel_partition_disengage_request =
+	    xpc_cancel_partition_disengage_request_sn2;
+	xpc_partition_engaged = xpc_partition_engaged_sn2;
+	xpc_partition_disengage_requested =
+	    xpc_partition_disengage_requested_sn2;
+	xpc_clear_partition_engaged = xpc_clear_partition_engaged_sn2;
+	xpc_clear_partition_disengage_request =
+	    xpc_clear_partition_disengage_request_sn2;
+
+	xpc_IPI_send_local_activate = xpc_IPI_send_local_activate_sn2;
+	xpc_IPI_send_activated = xpc_IPI_send_activated_sn2;
+	xpc_IPI_send_local_reactivate = xpc_IPI_send_local_reactivate_sn2;
+	xpc_IPI_send_disengage = xpc_IPI_send_disengage_sn2;
+
+	xpc_IPI_send_closerequest = xpc_IPI_send_closerequest_sn2;
+	xpc_IPI_send_closereply = xpc_IPI_send_closereply_sn2;
+	xpc_IPI_send_openrequest = xpc_IPI_send_openrequest_sn2;
+	xpc_IPI_send_openreply = xpc_IPI_send_openreply_sn2;
+
+	xpc_allocate_msg = xpc_allocate_msg_sn2;
+
+	xpc_send_msg = xpc_send_msg_sn2;
+	xpc_received_msg = xpc_received_msg_sn2;
 }
 
 void
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 770f0a8c669..32c577b8d0d 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -19,15 +19,22 @@
 /* >>> uv_gpa() is defined in <gru/grukservices.h> */
 #define uv_gpa(_a)		((unsigned long)_a)
 
-/* >>> temporarily define next three items for xpc.h */
-#define	SGI_XPC_ACTIVATE	23
-#define	SGI_XPC_NOTIFY		24
-#define sn_send_IPI_phys(_a, _b, _c, _d)
-
 #include "xpc.h"
 
+static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
+
 static void *xpc_activate_mq;
 
+static void
+xpc_IPI_send_local_activate_uv(struct xpc_partition *part)
+{
+	/*
+	 * >>> make our side think that the remote parition sent an activate
+	 * >>> message our way. Also do what the activate IRQ handler would
+	 * >>> do had one really been sent.
+	 */
+}
+
 static enum xp_retval
 xpc_rsvd_page_init_uv(struct xpc_rsvd_page *rp)
 {
@@ -36,6 +43,41 @@ xpc_rsvd_page_init_uv(struct xpc_rsvd_page *rp)
 	return xpSuccess;
 }
 
+static void
+xpc_increment_heartbeat_uv(void)
+{
+	/* >>> send heartbeat msg to xpc_heartbeating_to_mask partids */
+}
+
+static void
+xpc_heartbeat_init_uv(void)
+{
+	bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
+	xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
+}
+
+static void
+xpc_heartbeat_exit_uv(void)
+{
+	/* >>> send heartbeat_offline msg to xpc_heartbeating_to_mask partids */
+}
+
+static void
+xpc_initiate_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
+				     u64 remote_rp_pa, int nasid)
+{
+	short partid = remote_rp->SAL_partid;
+	struct xpc_partition *part = &xpc_partitions[partid];
+
+/*
+ * >>> setup part structure with the bits of info we can glean from the rp
+ * >>>	part->remote_rp_pa = remote_rp_pa;
+ * >>>	part->sn.uv.activate_mq_gpa = remote_rp->sn.activate_mq_gpa;
+ */
+
+	xpc_IPI_send_local_activate_uv(part);
+}
+
 /*
  * Setup the infrastructure necessary to support XPartition Communication
  * between the specified remote partition and the local one.
@@ -83,6 +125,11 @@ void
 xpc_init_uv(void)
 {
 	xpc_rsvd_page_init = xpc_rsvd_page_init_uv;
+	xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
+	xpc_heartbeat_init = xpc_heartbeat_init_uv;
+	xpc_heartbeat_exit = xpc_heartbeat_exit_uv;
+	xpc_initiate_partition_activation =
+	    xpc_initiate_partition_activation_uv;
 	xpc_setup_infrastructure = xpc_setup_infrastructure_uv;
 	xpc_teardown_infrastructure = xpc_teardown_infrastructure_uv;
 	xpc_make_first_contact = xpc_make_first_contact_uv;
-- 
cgit v1.2.3


From aaa3cd694c0c4ae534e8aafdf4227e395c57d6bd Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:07 -0700
Subject: sgi-xp: base xpc_rsvd_page's timestamp on jiffies

Change XPC's reserved page timestamp to be based on jiffies.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc.h           | 26 +++-----------------------
 drivers/misc/sgi-xp/xpc_main.c      | 16 ++++++++--------
 drivers/misc/sgi-xp/xpc_partition.c | 33 +++++++++++++++++----------------
 drivers/misc/sgi-xp/xpc_sn2.c       | 16 ++++++----------
 4 files changed, 34 insertions(+), 57 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index a3a67485cf8..56bf5dcc391 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -115,8 +115,8 @@ struct xpc_rsvd_page {
 		u64 vars_pa;	/* physical address of struct xpc_vars */
 		u64 activate_mq_gpa;	/* global phys address of activate_mq */
 	} sn;
-	struct timespec stamp;	/* time when reserved page was setup by XPC */
-	u64 pad2[9];		/* align to last u64 in 2nd 64-byte cacheline */
+	unsigned long stamp;	/* time when reserved page was setup by XPC */
+	u64 pad2[10];		/* align to last u64 in 2nd 64-byte cacheline */
 	u64 SAL_nasids_size;	/* SAL: size of each nasid mask in bytes */
 };
 
@@ -125,26 +125,6 @@ struct xpc_rsvd_page {
 #define XPC_SUPPORTS_RP_STAMP(_version) \
 			(_version >= _XPC_VERSION(1, 1))
 
-#define ZERO_STAMP	((struct timespec){0, 0})
-/*
- * compare stamps - the return value is:
- *
- *	< 0,	if stamp1 < stamp2
- *	= 0,	if stamp1 == stamp2
- *	> 0,	if stamp1 > stamp2
- */
-static inline int
-xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
-{
-	int ret;
-
-	ret = stamp1->tv_sec - stamp2->tv_sec;
-	if (ret == 0)
-		ret = stamp1->tv_nsec - stamp2->tv_nsec;
-
-	return ret;
-}
-
 /*
  * Define the structures by which XPC variables can be exported to other
  * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
@@ -492,7 +472,7 @@ struct xpc_partition {
 	/* XPC HB infrastructure */
 
 	u8 remote_rp_version;	/* version# of partition's rsvd pg */
-	struct timespec remote_rp_stamp; /* time when rsvd pg was initialized */
+	unsigned long remote_rp_stamp; /* time when rsvd pg was initialized */
 	u64 remote_rp_pa;	/* phys addr of partition's rsvd pg */
 	u64 remote_vars_pa;	/* phys addr of partition's vars */
 	u64 remote_vars_part_pa;	/* phys addr of partition's vars part */
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 10dac3652b2..4a6eb377475 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -233,7 +233,7 @@ xpc_timeout_partition_disengage_request(unsigned long data)
 {
 	struct xpc_partition *part = (struct xpc_partition *)data;
 
-	DBUG_ON(time_before(jiffies, part->disengage_request_timeout));
+	DBUG_ON(time_is_after_jiffies(part->disengage_request_timeout));
 
 	(void)xpc_partition_disengaged(part);
 
@@ -262,7 +262,7 @@ xpc_hb_beater(unsigned long dummy)
 {
 	xpc_increment_heartbeat();
 
-	if (time_after_eq(jiffies, xpc_hb_check_timeout))
+	if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
 		wake_up_interruptible(&xpc_act_IRQ_wq);
 
 	xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
@@ -312,7 +312,7 @@ xpc_hb_checker(void *ignore)
 			atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
 
 		/* checking of remote heartbeats is skewed by IRQ handling */
-		if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
+		if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
 			dev_dbg(xpc_part, "checking remote heartbeats\n");
 			xpc_check_remote_hb();
 
@@ -344,8 +344,8 @@ xpc_hb_checker(void *ignore)
 		(void)wait_event_interruptible(xpc_act_IRQ_wq,
 					       (last_IRQ_count <
 						atomic_read(&xpc_act_IRQ_rcvd)
-						|| time_after_eq(jiffies,
-							xpc_hb_check_timeout) ||
+						|| time_is_before_eq_jiffies(
+						xpc_hb_check_timeout) ||
 						xpc_exiting));
 	}
 
@@ -929,7 +929,7 @@ xpc_do_exit(enum xp_retval reason)
 		}
 
 		if (xpc_partition_engaged(-1UL)) {
-			if (time_after(jiffies, printmsg_time)) {
+			if (time_is_before_jiffies(printmsg_time)) {
 				dev_info(xpc_part, "waiting for remote "
 					 "partitions to disengage, timeout in "
 					 "%ld seconds\n",
@@ -964,7 +964,7 @@ xpc_do_exit(enum xp_retval reason)
 	DBUG_ON(xpc_any_hbs_allowed() != 0);
 
 	/* indicate to others that our reserved page is uninitialized */
-	xpc_rsvd_page->stamp = ZERO_STAMP;
+	xpc_rsvd_page->stamp = 0;
 
 	if (reason == xpUnloading) {
 		(void)unregister_die_notifier(&xpc_die_notifier);
@@ -1295,7 +1295,7 @@ xpc_init(void)
 	/* initialization was not successful */
 out_4:
 	/* indicate to others that our reserved page is uninitialized */
-	xpc_rsvd_page->stamp = ZERO_STAMP;
+	xpc_rsvd_page->stamp = 0;
 
 	(void)unregister_die_notifier(&xpc_die_notifier);
 	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 4e14effdedd..90ec5ca8c9a 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -152,6 +152,7 @@ xpc_setup_rsvd_page(void)
 {
 	struct xpc_rsvd_page *rp;
 	u64 rp_pa;
+	unsigned long new_stamp;
 
 	/* get the local reserved page's address */
 
@@ -201,7 +202,10 @@ xpc_setup_rsvd_page(void)
 	 * This signifies to the remote partition that our reserved
 	 * page is initialized.
 	 */
-	rp->stamp = CURRENT_TIME;
+	new_stamp = jiffies;
+	if (new_stamp == 0 || new_stamp == rp->stamp)
+		new_stamp++;
+	rp->stamp = new_stamp;
 
 	return rp;
 }
@@ -350,18 +354,8 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 			discovered_nasids[i] |= remote_part_nasids[i];
 	}
 
-	/* check that the partid is valid and is for another partition */
-
-	if (remote_rp->SAL_partid < 0 ||
-	    remote_rp->SAL_partid >= xp_max_npartitions) {
-		return xpInvalidPartid;
-	}
-
-	if (remote_rp->SAL_partid == sn_partition_id)
-		return xpLocalPartid;
-
-	/* see if the rest of the reserved page has been set up by XPC */
-	if (timespec_equal(&remote_rp->stamp, &ZERO_STAMP))
+	/* see if the reserved page has been set up by XPC */
+	if (remote_rp->stamp == 0)
 		return xpRsvdPageNotSet;
 
 	if (XPC_VERSION_MAJOR(remote_rp->version) !=
@@ -369,8 +363,15 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 		return xpBadVersion;
 	}
 
-	if (remote_rp->max_npartitions <= sn_partition_id)
+	/* check that both local and remote partids are valid for each side */
+	if (remote_rp->SAL_partid < 0 ||
+	    remote_rp->SAL_partid >= xp_max_npartitions ||
+	    remote_rp->max_npartitions <= sn_partition_id) {
 		return xpInvalidPartid;
+	}
+
+	if (remote_rp->SAL_partid == sn_partition_id)
+		return xpLocalPartid;
 
 	return xpSuccess;
 }
@@ -388,8 +389,8 @@ xpc_partition_disengaged(struct xpc_partition *part)
 	disengaged = (xpc_partition_engaged(1UL << partid) == 0);
 	if (part->disengage_request_timeout) {
 		if (!disengaged) {
-			if (time_before(jiffies,
-			    part->disengage_request_timeout)) {
+			if (time_is_after_jiffies(part->
+						  disengage_request_timeout)) {
 				/* timelimit hasn't been reached yet */
 				return 0;
 			}
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 89c0bb9a27f..7216df36bc7 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -597,8 +597,8 @@ xpc_initiate_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
  */
 static void
 xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
-			      struct timespec *remote_rp_stamp,
-			      u64 remote_rp_pa, u64 remote_vars_pa,
+			      unsigned long *remote_rp_stamp, u64 remote_rp_pa,
+			      u64 remote_vars_pa,
 			      struct xpc_vars_sn2 *remote_vars)
 {
 	part->remote_rp_version = remote_rp_version;
@@ -606,8 +606,8 @@ xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
 		part->remote_rp_version);
 
 	part->remote_rp_stamp = *remote_rp_stamp;
-	dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
-		part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
+	dev_dbg(xpc_part, "  remote_rp_stamp = 0x%016lx\n",
+		part->remote_rp_stamp);
 
 	part->remote_rp_pa = remote_rp_pa;
 	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
@@ -664,8 +664,7 @@ xpc_identify_act_IRQ_req_sn2(int nasid)
 	u64 remote_vars_pa;
 	int remote_rp_version;
 	int reactivate = 0;
-	int stamp_diff;
-	struct timespec remote_rp_stamp = { 0, 0 }; /*>>> ZERO_STAMP */
+	unsigned long remote_rp_stamp = 0;
 	short partid;
 	struct xpc_partition *part;
 	enum xp_retval ret;
@@ -788,10 +787,7 @@ xpc_identify_act_IRQ_req_sn2(int nasid)
 	} else {
 		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
 
-		stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
-						&remote_rp_stamp);
-		if (stamp_diff != 0) {
-			DBUG_ON(stamp_diff >= 0);
+		if (remote_rp_stamp != part->remote_rp_stamp) {
 
 			/*
 			 * Other side rebooted and the previous XPC did support
-- 
cgit v1.2.3


From 97bf1aa1e1bb18de9bb1987c6eb9ad751bf08aab Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:08 -0700
Subject: sgi-xp: move xpc_allocate() into xpc_send()/xpc_send_notify()

Move xpc_allocate() functionality into xpc_send()/xpc_send_notify() so
xpc_allocate() no longer needs to be called by XPNET.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h          |  44 ++++++---------
 drivers/misc/sgi-xp/xp_main.c     |  23 +++-----
 drivers/misc/sgi-xp/xpc.h         |   9 +--
 drivers/misc/sgi-xp/xpc_channel.c | 112 +++++++++++++-------------------------
 drivers/misc/sgi-xp/xpc_main.c    |  14 ++---
 drivers/misc/sgi-xp/xpc_sn2.c     |  64 ++++++++++------------
 drivers/misc/sgi-xp/xpnet.c       |  11 ++--
 7 files changed, 106 insertions(+), 171 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 0f75592896d..43bf2470850 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -116,12 +116,6 @@
  * The size of the payload is defined by the user via xpc_connect(). A user-
  * defined message resides in the payload area.
  *
- * The user should have no dealings with the message header, but only the
- * message's payload. When a message entry is allocated (via xpc_allocate())
- * a pointer to the payload area is returned and not the actual beginning of
- * the XPC message. The user then constructs a message in the payload area
- * and passes that pointer as an argument on xpc_send() or xpc_send_notify().
- *
  * The size of a message entry (within a message queue) must be a cacheline
  * sized multiple in order to facilitate the BTE transfer of messages from one
  * message queue to another. A macro, XPC_MSG_SIZE(), is provided for the user
@@ -221,9 +215,10 @@ enum xp_retval {
 	xpBteCopyError,		/* 52: bte_copy() returned error */
 	xpSalError,		/* 53: sn SAL error */
 	xpRsvdPageNotSet,	/* 54: the reserved page is not set up */
+	xpPayloadTooBig,	/* 55: payload too large for message slot */
 
-	xpUnsupported,		/* 55: unsupported functionality or resource */
-	xpUnknownReason		/* 56: unknown reason - must be last in enum */
+	xpUnsupported,		/* 56: unsupported functionality or resource */
+	xpUnknownReason		/* 57: unknown reason - must be last in enum */
 };
 
 /*
@@ -304,16 +299,15 @@ struct xpc_registration {
 
 #define XPC_CHANNEL_REGISTERED(_c)	(xpc_registrations[_c].func != NULL)
 
-/* the following are valid xpc_allocate() flags */
+/* the following are valid xpc_send() or xpc_send_notify() flags */
 #define XPC_WAIT	0	/* wait flag */
 #define XPC_NOWAIT	1	/* no wait flag */
 
 struct xpc_interface {
 	void (*connect) (int);
 	void (*disconnect) (int);
-	enum xp_retval (*allocate) (short, int, u32, void **);
-	enum xp_retval (*send) (short, int, void *);
-	enum xp_retval (*send_notify) (short, int, void *,
+	enum xp_retval (*send) (short, int, u32, void *, u16);
+	enum xp_retval (*send_notify) (short, int, u32, void *, u16,
 					xpc_notify_func, void *);
 	void (*received) (short, int, void *);
 	enum xp_retval (*partid_to_nasids) (short, void *);
@@ -323,10 +317,9 @@ extern struct xpc_interface xpc_interface;
 
 extern void xpc_set_interface(void (*)(int),
 			      void (*)(int),
-			      enum xp_retval (*)(short, int, u32, void **),
-			      enum xp_retval (*)(short, int, void *),
-			      enum xp_retval (*)(short, int, void *,
-						  xpc_notify_func, void *),
+			      enum xp_retval (*)(short, int, u32, void *, u16),
+			      enum xp_retval (*)(short, int, u32, void *, u16,
+						 xpc_notify_func, void *),
 			      void (*)(short, int, void *),
 			      enum xp_retval (*)(short, void *));
 extern void xpc_clear_interface(void);
@@ -336,22 +329,19 @@ extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16,
 extern void xpc_disconnect(int);
 
 static inline enum xp_retval
-xpc_allocate(short partid, int ch_number, u32 flags, void **payload)
-{
-	return xpc_interface.allocate(partid, ch_number, flags, payload);
-}
-
-static inline enum xp_retval
-xpc_send(short partid, int ch_number, void *payload)
+xpc_send(short partid, int ch_number, u32 flags, void *payload,
+	 u16 payload_size)
 {
-	return xpc_interface.send(partid, ch_number, payload);
+	return xpc_interface.send(partid, ch_number, flags, payload,
+				  payload_size);
 }
 
 static inline enum xp_retval
-xpc_send_notify(short partid, int ch_number, void *payload,
-		xpc_notify_func func, void *key)
+xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
+		u16 payload_size, xpc_notify_func func, void *key)
 {
-	return xpc_interface.send_notify(partid, ch_number, payload, func, key);
+	return xpc_interface.send_notify(partid, ch_number, flags, payload,
+					 payload_size, func, key);
 }
 
 static inline void
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 6f25613b27e..9c0ce2f15ff 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -58,10 +58,9 @@ xpc_notloaded(void)
 struct xpc_interface xpc_interface = {
 	(void (*)(int))xpc_notloaded,
 	(void (*)(int))xpc_notloaded,
-	(enum xp_retval(*)(short, int, u32, void **))xpc_notloaded,
-	(enum xp_retval(*)(short, int, void *))xpc_notloaded,
-	(enum xp_retval(*)(short, int, void *, xpc_notify_func, void *))
-	    xpc_notloaded,
+	(enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded,
+	(enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func,
+			   void *))xpc_notloaded,
 	(void (*)(short, int, void *))xpc_notloaded,
 	(enum xp_retval(*)(short, void *))xpc_notloaded
 };
@@ -73,16 +72,14 @@ EXPORT_SYMBOL_GPL(xpc_interface);
 void
 xpc_set_interface(void (*connect) (int),
 		  void (*disconnect) (int),
-		  enum xp_retval (*allocate) (short, int, u32, void **),
-		  enum xp_retval (*send) (short, int, void *),
-		  enum xp_retval (*send_notify) (short, int, void *,
+		  enum xp_retval (*send) (short, int, u32, void *, u16),
+		  enum xp_retval (*send_notify) (short, int, u32, void *, u16,
 						  xpc_notify_func, void *),
 		  void (*received) (short, int, void *),
 		  enum xp_retval (*partid_to_nasids) (short, void *))
 {
 	xpc_interface.connect = connect;
 	xpc_interface.disconnect = disconnect;
-	xpc_interface.allocate = allocate;
 	xpc_interface.send = send;
 	xpc_interface.send_notify = send_notify;
 	xpc_interface.received = received;
@@ -98,13 +95,11 @@ xpc_clear_interface(void)
 {
 	xpc_interface.connect = (void (*)(int))xpc_notloaded;
 	xpc_interface.disconnect = (void (*)(int))xpc_notloaded;
-	xpc_interface.allocate = (enum xp_retval(*)(short, int, u32,
-						     void **))xpc_notloaded;
-	xpc_interface.send = (enum xp_retval(*)(short, int, void *))
+	xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16))
 	    xpc_notloaded;
-	xpc_interface.send_notify = (enum xp_retval(*)(short, int, void *,
-							xpc_notify_func,
-							void *))xpc_notloaded;
+	xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *,
+						       u16, xpc_notify_func,
+						       void *))xpc_notloaded;
 	xpc_interface.received = (void (*)(short, int, void *))
 	    xpc_notloaded;
 	xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *))
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 56bf5dcc391..6b622b091bd 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -624,9 +624,7 @@ extern void (*xpc_IPI_send_closereply) (struct xpc_channel *, unsigned long *);
 extern void (*xpc_IPI_send_openrequest) (struct xpc_channel *, unsigned long *);
 extern void (*xpc_IPI_send_openreply) (struct xpc_channel *, unsigned long *);
 
-extern enum xp_retval (*xpc_allocate_msg) (struct xpc_channel *, u32,
-					   struct xpc_msg **);
-extern enum xp_retval (*xpc_send_msg) (struct xpc_channel *, struct xpc_msg *,
+extern enum xp_retval (*xpc_send_msg) (struct xpc_channel *, u32, void *, u16,
 				       u8, xpc_notify_func, void *);
 extern void (*xpc_received_msg) (struct xpc_channel *, struct xpc_msg *);
 
@@ -664,9 +662,8 @@ extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern void xpc_initiate_connect(int);
 extern void xpc_initiate_disconnect(int);
 extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *);
-extern enum xp_retval xpc_initiate_allocate(short, int, u32, void **);
-extern enum xp_retval xpc_initiate_send(short, int, void *);
-extern enum xp_retval xpc_initiate_send_notify(short, int, void *,
+extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16);
+extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16,
 					       xpc_notify_func, void *);
 extern void xpc_initiate_received(short, int, void *);
 extern void xpc_process_channel_activity(struct xpc_partition *);
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 26c5e12c122..55182c8dd32 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -1192,87 +1192,54 @@ xpc_allocate_msg_wait(struct xpc_channel *ch)
 }
 
 /*
- * Allocate an entry for a message from the message queue associated with the
- * specified channel. NOTE that this routine can sleep waiting for a message
- * entry to become available. To not sleep, pass in the XPC_NOWAIT flag.
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
  *
- * Arguments:
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
  *
- *	partid - ID of partition to which the channel is connected.
- *	ch_number - channel #.
- *	flags - see xpc.h for valid flags.
- *	payload - address of the allocated payload area pointer (filled in on
- * 	          return) in which the user-defined message is constructed.
- */
-enum xp_retval
-xpc_initiate_allocate(short partid, int ch_number, u32 flags, void **payload)
-{
-	struct xpc_partition *part = &xpc_partitions[partid];
-	enum xp_retval ret = xpUnknownReason;
-	struct xpc_msg *msg = NULL;
-
-	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
-	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-
-	*payload = NULL;
-
-	if (xpc_part_ref(part)) {
-		ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg);
-		xpc_part_deref(part);
-
-		if (msg != NULL)
-			*payload = &msg->payload;
-	}
-
-	return ret;
-}
-
-/*
- * Send a message previously allocated using xpc_initiate_allocate() on the
- * specified channel connected to the specified partition.
- *
- * This routine will not wait for the message to be received, nor will
- * notification be given when it does happen. Once this routine has returned
- * the message entry allocated via xpc_initiate_allocate() is no longer
- * accessable to the caller.
- *
- * This routine, although called by users, does not call xpc_part_ref() to
- * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
+ * Once sent, this routine will not wait for the message to be received, nor
+ * will notification be given when it does happen.
  *
  * Arguments:
  *
  *	partid - ID of partition to which the channel is connected.
  *	ch_number - channel # to send message on.
- *	payload - pointer to the payload area allocated via
- *			xpc_initiate_allocate().
+ *	flags - see xp.h for valid flags.
+ *	payload - pointer to the payload which is to be sent.
+ *	payload_size - size of the payload in bytes.
  */
 enum xp_retval
-xpc_initiate_send(short partid, int ch_number, void *payload)
+xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
+		  u16 payload_size)
 {
 	struct xpc_partition *part = &xpc_partitions[partid];
-	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
-	enum xp_retval ret;
+	enum xp_retval ret = xpUnknownReason;
 
-	dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg,
+	dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
 		partid, ch_number);
 
 	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-	DBUG_ON(msg == NULL);
+	DBUG_ON(payload == NULL);
 
-	ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL);
+	if (xpc_part_ref(part)) {
+		ret = xpc_send_msg(&part->channels[ch_number], flags, payload,
+				   payload_size, 0, NULL, NULL);
+		xpc_part_deref(part);
+	}
 
 	return ret;
 }
 
 /*
- * Send a message previously allocated using xpc_initiate_allocate on the
- * specified channel connected to the specified partition.
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
  *
- * This routine will not wait for the message to be sent. Once this routine
- * has returned the message entry allocated via xpc_initiate_allocate() is no
- * longer accessable to the caller.
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * This routine will not wait for the message to be sent or received.
  *
  * Once the remote end of the channel has received the message, the function
  * passed as an argument to xpc_initiate_send_notify() will be called. This
@@ -1282,38 +1249,37 @@ xpc_initiate_send(short partid, int ch_number, void *payload)
  *
  * If this routine returns an error, the caller's function will NOT be called.
  *
- * This routine, although called by users, does not call xpc_part_ref() to
- * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
- *
  * Arguments:
  *
  *	partid - ID of partition to which the channel is connected.
  *	ch_number - channel # to send message on.
- *	payload - pointer to the payload area allocated via
- *			xpc_initiate_allocate().
+ *	flags - see xp.h for valid flags.
+ *	payload - pointer to the payload which is to be sent.
+ *	payload_size - size of the payload in bytes.
  *	func - function to call with asynchronous notification of message
  *		  receipt. THIS FUNCTION MUST BE NON-BLOCKING.
  *	key - user-defined key to be passed to the function when it's called.
  */
 enum xp_retval
-xpc_initiate_send_notify(short partid, int ch_number, void *payload,
-			 xpc_notify_func func, void *key)
+xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
+			 u16 payload_size, xpc_notify_func func, void *key)
 {
 	struct xpc_partition *part = &xpc_partitions[partid];
-	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
-	enum xp_retval ret;
+	enum xp_retval ret = xpUnknownReason;
 
-	dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg,
+	dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
 		partid, ch_number);
 
 	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-	DBUG_ON(msg == NULL);
+	DBUG_ON(payload == NULL);
 	DBUG_ON(func == NULL);
 
-	ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL,
-			   func, key);
+	if (xpc_part_ref(part)) {
+		ret = xpc_send_msg(&part->channels[ch_number], flags, payload,
+				   payload_size, XPC_N_CALL, func, key);
+		xpc_part_deref(part);
+	}
 	return ret;
 }
 
@@ -1372,7 +1338,7 @@ xpc_deliver_msg(struct xpc_channel *ch)
  *	partid - ID of partition to which the channel is connected.
  *	ch_number - channel # message received on.
  *	payload - pointer to the payload area allocated via
- *			xpc_initiate_allocate().
+ *			xpc_initiate_send() or xpc_initiate_send_notify().
  */
 void
 xpc_initiate_received(short partid, int ch_number, void *payload)
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 4a6eb377475..aae90f5933b 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -217,12 +217,9 @@ void (*xpc_IPI_send_openrequest) (struct xpc_channel *ch,
 void (*xpc_IPI_send_openreply) (struct xpc_channel *ch,
 				unsigned long *irq_flags);
 
-enum xp_retval (*xpc_allocate_msg) (struct xpc_channel *ch, u32 flags,
-				    struct xpc_msg **address_of_msg);
-
-enum xp_retval (*xpc_send_msg) (struct xpc_channel *ch, struct xpc_msg *msg,
-				u8 notify_type, xpc_notify_func func,
-				void *key);
+enum xp_retval (*xpc_send_msg) (struct xpc_channel *ch, u32 flags,
+				void *payload, u16 payload_size, u8 notify_type,
+				xpc_notify_func func, void *key);
 void (*xpc_received_msg) (struct xpc_channel *ch, struct xpc_msg *msg);
 
 /*
@@ -1286,9 +1283,8 @@ xpc_init(void)
 
 	/* set the interface to point at XPC's functions */
 	xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
-			  xpc_initiate_allocate, xpc_initiate_send,
-			  xpc_initiate_send_notify, xpc_initiate_received,
-			  xpc_initiate_partid_to_nasids);
+			  xpc_initiate_send, xpc_initiate_send_notify,
+			  xpc_initiate_received, xpc_initiate_partid_to_nasids);
 
 	return 0;
 
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 7216df36bc7..db67d348b35 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -1532,18 +1532,6 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 	enum xp_retval ret;
 	s64 put;
 
-	/* this reference will be dropped in xpc_send_msg_sn2() */
-	xpc_msgqueue_ref(ch);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		xpc_msgqueue_deref(ch);
-		return ch->reason;
-	}
-	if (!(ch->flags & XPC_C_CONNECTED)) {
-		xpc_msgqueue_deref(ch);
-		return xpNotConnected;
-	}
-
 	/*
 	 * Get the next available message entry from the local message queue.
 	 * If none are available, we'll make sure that we grab the latest
@@ -1582,16 +1570,12 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 		if (ret == xpTimeout)
 			xpc_IPI_send_local_msgrequest_sn2(ch);
 
-		if (flags & XPC_NOWAIT) {
-			xpc_msgqueue_deref(ch);
+		if (flags & XPC_NOWAIT)
 			return xpNoWait;
-		}
 
 		ret = xpc_allocate_msg_wait(ch);
-		if (ret != xpInterrupted && ret != xpTimeout) {
-			xpc_msgqueue_deref(ch);
+		if (ret != xpInterrupted && ret != xpTimeout)
 			return ret;
-		}
 	}
 
 	/* get the message's address and initialize it */
@@ -1606,7 +1590,6 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 		(void *)msg, msg->number, ch->partid, ch->number);
 
 	*address_of_msg = msg;
-
 	return xpSuccess;
 }
 
@@ -1616,24 +1599,38 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
  * message is being sent to.
  */
 static enum xp_retval
-xpc_send_msg_sn2(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
-		 xpc_notify_func func, void *key)
+xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
+		 u16 payload_size, u8 notify_type, xpc_notify_func func,
+		 void *key)
 {
 	enum xp_retval ret = xpSuccess;
+	struct xpc_msg *msg = msg;
 	struct xpc_notify *notify = notify;
-	s64 put, msg_number = msg->number;
+	s64 msg_number;
+	s64 put;
 
 	DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
-	DBUG_ON((((u64)msg - (u64)ch->local_msgqueue) / ch->msg_size) !=
-		msg_number % ch->local_nentries);
-	DBUG_ON(msg->flags & XPC_M_READY);
+
+	if (XPC_MSG_SIZE(payload_size) > ch->msg_size)
+		return xpPayloadTooBig;
+
+	xpc_msgqueue_ref(ch);
 
 	if (ch->flags & XPC_C_DISCONNECTING) {
-		/* drop the reference grabbed in xpc_allocate_msg_sn2() */
-		xpc_msgqueue_deref(ch);
-		return ch->reason;
+		ret = ch->reason;
+		goto out_1;
+	}
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		ret = xpNotConnected;
+		goto out_1;
 	}
 
+	ret = xpc_allocate_msg_sn2(ch, flags, &msg);
+	if (ret != xpSuccess)
+		goto out_1;
+
+	msg_number = msg->number;
+
 	if (notify_type != 0) {
 		/*
 		 * Tell the remote side to send an ACK interrupt when the
@@ -1663,13 +1660,12 @@ xpc_send_msg_sn2(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
 				atomic_dec(&ch->n_to_notify);
 				ret = ch->reason;
 			}
-
-			/* drop reference grabbed in xpc_allocate_msg_sn2() */
-			xpc_msgqueue_deref(ch);
-			return ret;
+			goto out_1;
 		}
 	}
 
+	memcpy(&msg->payload, payload, payload_size);
+
 	msg->flags |= XPC_M_READY;
 
 	/*
@@ -1684,7 +1680,7 @@ xpc_send_msg_sn2(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
 	if (put == msg_number)
 		xpc_send_msgs_sn2(ch, put);
 
-	/* drop the reference grabbed in xpc_allocate_msg_sn2() */
+out_1:
 	xpc_msgqueue_deref(ch);
 	return ret;
 }
@@ -1821,8 +1817,6 @@ xpc_init_sn2(void)
 	xpc_IPI_send_openrequest = xpc_IPI_send_openrequest_sn2;
 	xpc_IPI_send_openreply = xpc_IPI_send_openreply_sn2;
 
-	xpc_allocate_msg = xpc_allocate_msg_sn2;
-
 	xpc_send_msg = xpc_send_msg_sn2;
 	xpc_received_msg = xpc_received_msg_sn2;
 }
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 9c540eb1847..f9356ba7315 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -438,7 +438,8 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xpnet_pending_msg *queued_msg;
 	enum xp_retval ret;
-	struct xpnet_message *msg;
+	u8 msg_buffer[XPNET_MSG_SIZE];
+	struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer[0];
 	u64 start_addr, end_addr;
 	long dp;
 	u8 second_mac_octet;
@@ -524,11 +525,6 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		/* found a partition to send to */
 
-		ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL,
-				   XPC_NOWAIT, (void **)&msg);
-		if (unlikely(ret != xpSuccess))
-			continue;
-
 		msg->embedded_bytes = embedded_bytes;
 		if (unlikely(embedded_bytes != 0)) {
 			msg->version = XPNET_VERSION_EMBED;
@@ -553,7 +549,8 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		atomic_inc(&queued_msg->use_count);
 
-		ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg,
+		ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT,
+				      &msg, sizeof(msg) + embedded_bytes - 1,
 				      xpnet_send_completed, queued_msg);
 		if (unlikely(ret != xpSuccess)) {
 			atomic_dec(&queued_msg->use_count);
-- 
cgit v1.2.3


From 6e41017aad9ed175ca51e4828eabc8c5cf5910be Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:09 -0700
Subject: sgi-xp: isolate activate IRQ's hardware specific components

Isolate architecture specific code related to XPC's activate IRQ.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc.h           |  14 ++-
 drivers/misc/sgi-xp/xpc_main.c      |  96 ++++++----------
 drivers/misc/sgi-xp/xpc_partition.c | 121 --------------------
 drivers/misc/sgi-xp/xpc_sn2.c       | 217 +++++++++++++++++++++++++++++++-----
 4 files changed, 229 insertions(+), 219 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 6b622b091bd..1edf37512de 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -480,7 +480,7 @@ struct xpc_partition {
 	u64 remote_amos_page_pa;	/* phys addr of partition's amos page */
 	int remote_act_nasid;	/* active part's act/deact nasid */
 	int remote_act_phys_cpuid;	/* active part's act/deact phys cpuid */
-	u32 act_IRQ_rcvd;	/* IRQs since activation */
+	u32 activate_IRQ_rcvd;	/* IRQs since activation */
 	spinlock_t act_lock;	/* protect updating of act_state */
 	u8 act_state;		/* from XPC HB viewpoint */
 	u8 remote_vars_version;	/* version# of partition's vars */
@@ -580,8 +580,8 @@ extern struct device *xpc_part;
 extern struct device *xpc_chan;
 extern int xpc_disengage_request_timelimit;
 extern int xpc_disengage_request_timedout;
-extern atomic_t xpc_act_IRQ_rcvd;
-extern wait_queue_head_t xpc_act_IRQ_wq;
+extern atomic_t xpc_activate_IRQ_rcvd;
+extern wait_queue_head_t xpc_activate_IRQ_wq;
 extern void *xpc_heartbeating_to_mask;
 extern irqreturn_t xpc_notify_IRQ_handler(int, void *);
 extern void xpc_dropped_IPI_check(struct xpc_partition *);
@@ -601,7 +601,7 @@ extern u64 (*xpc_get_IPI_flags) (struct xpc_partition *);
 extern struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *);
 extern void (*xpc_initiate_partition_activation) (struct xpc_rsvd_page *, u64,
 						  int);
-extern void (*xpc_process_act_IRQ_rcvd) (int);
+extern void (*xpc_process_activate_IRQ_rcvd) (int);
 extern enum xp_retval (*xpc_setup_infrastructure) (struct xpc_partition *);
 extern void (*xpc_teardown_infrastructure) (struct xpc_partition *);
 extern void (*xpc_mark_partition_engaged) (struct xpc_partition *);
@@ -629,10 +629,12 @@ extern enum xp_retval (*xpc_send_msg) (struct xpc_channel *, u32, void *, u16,
 extern void (*xpc_received_msg) (struct xpc_channel *, struct xpc_msg *);
 
 /* found in xpc_sn2.c */
-extern void xpc_init_sn2(void);
+extern int xpc_init_sn2(void);
+extern void xpc_exit_sn2(void);
 
 /* found in xpc_uv.c */
 extern void xpc_init_uv(void);
+extern void xpc_exit_uv(void);
 
 /* found in xpc_partition.c */
 extern int xpc_exiting;
@@ -646,7 +648,7 @@ extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern struct xpc_rsvd_page *xpc_setup_rsvd_page(void);
 extern void xpc_allow_IPI_ops(void);
 extern void xpc_restrict_IPI_ops(void);
-extern int xpc_identify_act_IRQ_sender(void);
+extern int xpc_identify_activate_IRQ_sender(void);
 extern int xpc_partition_disengaged(struct xpc_partition *);
 extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
 extern void xpc_mark_partition_inactive(struct xpc_partition *);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index aae90f5933b..8780d5d00f6 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -147,11 +147,11 @@ static struct ctl_table_header *xpc_sysctl;
 /* non-zero if any remote partition disengage request was timed out */
 int xpc_disengage_request_timedout;
 
-/* #of IRQs received */
-atomic_t xpc_act_IRQ_rcvd;
+/* #of activate IRQs received */
+atomic_t xpc_activate_IRQ_rcvd = ATOMIC_INIT(0);
 
 /* IRQ handler notifies this wait queue on receipt of an IRQ */
-DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
+DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
 
 static unsigned long xpc_hb_check_timeout;
 static struct timer_list xpc_hb_timer;
@@ -190,7 +190,7 @@ struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *ch);
 void (*xpc_initiate_partition_activation) (struct xpc_rsvd_page *remote_rp,
 					   u64 remote_rp_pa, int nasid);
 
-void (*xpc_process_act_IRQ_rcvd) (int n_IRQs_expected);
+void (*xpc_process_activate_IRQ_rcvd) (int n_IRQs_expected);
 enum xp_retval (*xpc_setup_infrastructure) (struct xpc_partition *part);
 void (*xpc_teardown_infrastructure) (struct xpc_partition *part);
 
@@ -238,17 +238,6 @@ xpc_timeout_partition_disengage_request(unsigned long data)
 	DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
 }
 
-/*
- * Notify the heartbeat check thread that an IRQ has been received.
- */
-static irqreturn_t
-xpc_act_IRQ_handler(int irq, void *dev_id)
-{
-	atomic_inc(&xpc_act_IRQ_rcvd);
-	wake_up_interruptible(&xpc_act_IRQ_wq);
-	return IRQ_HANDLED;
-}
-
 /*
  * Timer to produce the heartbeat.  The timer structures function is
  * already set when this is initially called.  A tunable is used to
@@ -260,7 +249,7 @@ xpc_hb_beater(unsigned long dummy)
 	xpc_increment_heartbeat();
 
 	if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
-		wake_up_interruptible(&xpc_act_IRQ_wq);
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
 
 	xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
 	add_timer(&xpc_hb_timer);
@@ -306,7 +295,7 @@ xpc_hb_checker(void *ignore)
 		dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
 			"been received\n",
 			(int)(xpc_hb_check_timeout - jiffies),
-			atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
+			atomic_read(&xpc_activate_IRQ_rcvd) - last_IRQ_count);
 
 		/* checking of remote heartbeats is skewed by IRQ handling */
 		if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
@@ -322,15 +311,15 @@ xpc_hb_checker(void *ignore)
 		}
 
 		/* check for outstanding IRQs */
-		new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
+		new_IRQ_count = atomic_read(&xpc_activate_IRQ_rcvd);
 		if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
 			force_IRQ = 0;
 
 			dev_dbg(xpc_part, "found an IRQ to process; will be "
 				"resetting xpc_hb_check_timeout\n");
 
-			xpc_process_act_IRQ_rcvd(new_IRQ_count -
-						 last_IRQ_count);
+			xpc_process_activate_IRQ_rcvd(new_IRQ_count -
+						      last_IRQ_count);
 			last_IRQ_count = new_IRQ_count;
 
 			xpc_hb_check_timeout = jiffies +
@@ -338,9 +327,9 @@ xpc_hb_checker(void *ignore)
 		}
 
 		/* wait for IRQ or timeout */
-		(void)wait_event_interruptible(xpc_act_IRQ_wq,
-					       (last_IRQ_count <
-						atomic_read(&xpc_act_IRQ_rcvd)
+		(void)wait_event_interruptible(xpc_activate_IRQ_wq,
+					       (last_IRQ_count < atomic_read(
+						&xpc_activate_IRQ_rcvd)
 						|| time_is_before_eq_jiffies(
 						xpc_hb_check_timeout) ||
 						xpc_exiting));
@@ -884,10 +873,7 @@ xpc_do_exit(enum xp_retval reason)
 	 * the heartbeat checker thread in case it's sleeping.
 	 */
 	xpc_exiting = 1;
-	wake_up_interruptible(&xpc_act_IRQ_wq);
-
-	/* ignore all incoming interrupts */
-	free_irq(SGI_XPC_ACTIVATE, NULL);
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
 
 	/* wait for the discovery thread to exit */
 	wait_for_completion(&xpc_discovery_exited);
@@ -968,9 +954,6 @@ xpc_do_exit(enum xp_retval reason)
 		(void)unregister_reboot_notifier(&xpc_reboot_notifier);
 	}
 
-	/* close down protections for IPI operations */
-	xpc_restrict_IPI_ops();
-
 	/* clear the interface to XPC's functions */
 	xpc_clear_interface();
 
@@ -979,6 +962,11 @@ xpc_do_exit(enum xp_retval reason)
 
 	kfree(xpc_partitions);
 	kfree(xpc_remote_copy_buffer_base);
+
+	if (is_shub())
+		xpc_exit_sn2();
+	else
+		xpc_exit_uv();
 }
 
 /*
@@ -1144,7 +1132,9 @@ xpc_init(void)
 		if (xp_max_npartitions != 64)
 			return -EINVAL;
 
-		xpc_init_sn2();
+		ret = xpc_init_sn2();
+		if (ret != 0)
+			return ret;
 
 	} else if (is_uv()) {
 		xpc_init_uv();
@@ -1163,7 +1153,8 @@ xpc_init(void)
 						  &xpc_remote_copy_buffer_base);
 	if (xpc_remote_copy_buffer == NULL) {
 		dev_err(xpc_part, "can't get memory for remote copy buffer\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_1;
 	}
 
 	xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
@@ -1171,7 +1162,7 @@ xpc_init(void)
 	if (xpc_partitions == NULL) {
 		dev_err(xpc_part, "can't get memory for partition structure\n");
 		ret = -ENOMEM;
-		goto out_1;
+		goto out_2;
 	}
 
 	/*
@@ -1187,7 +1178,7 @@ xpc_init(void)
 
 		DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
 
-		part->act_IRQ_rcvd = 0;
+		part->activate_IRQ_rcvd = 0;
 		spin_lock_init(&part->act_lock);
 		part->act_state = XPC_P_INACTIVE;
 		XPC_SET_REASON(part, 0, 0);
@@ -1204,33 +1195,6 @@ xpc_init(void)
 
 	xpc_sysctl = register_sysctl_table(xpc_sys_dir);
 
-	/*
-	 * Open up protections for IPI operations (and AMO operations on
-	 * Shub 1.1 systems).
-	 */
-	xpc_allow_IPI_ops();
-
-	/*
-	 * Interrupts being processed will increment this atomic variable and
-	 * awaken the heartbeat thread which will process the interrupts.
-	 */
-	atomic_set(&xpc_act_IRQ_rcvd, 0);
-
-	/*
-	 * This is safe to do before the xpc_hb_checker thread has started
-	 * because the handler releases a wait queue.  If an interrupt is
-	 * received before the thread is waiting, it will not go to sleep,
-	 * but rather immediately process the interrupt.
-	 */
-	ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
-			  "xpc hb", NULL);
-	if (ret != 0) {
-		dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
-			"errno=%d\n", -ret);
-		ret = -EBUSY;
-		goto out_2;
-	}
-
 	/*
 	 * Fill the partition reserved page with the information needed by
 	 * other partitions to discover we are alive and establish initial
@@ -1296,14 +1260,16 @@ out_4:
 	(void)unregister_die_notifier(&xpc_die_notifier);
 	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
 out_3:
-	free_irq(SGI_XPC_ACTIVATE, NULL);
-out_2:
-	xpc_restrict_IPI_ops();
 	if (xpc_sysctl)
 		unregister_sysctl_table(xpc_sysctl);
 	kfree(xpc_partitions);
-out_1:
+out_2:
 	kfree(xpc_remote_copy_buffer_base);
+out_1:
+	if (is_shub())
+		xpc_exit_sn2();
+	else
+		xpc_exit_uv();
 	return ret;
 }
 
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 90ec5ca8c9a..bf9b1193bd2 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -29,16 +29,6 @@
 /* XPC is exiting flag */
 int xpc_exiting;
 
-/* SH_IPI_ACCESS shub register value on startup */
-static u64 xpc_sh1_IPI_access;
-static u64 xpc_sh2_IPI_access0;
-static u64 xpc_sh2_IPI_access1;
-static u64 xpc_sh2_IPI_access2;
-static u64 xpc_sh2_IPI_access3;
-
-/* original protection values for each node */
-u64 xpc_prot_vec[MAX_NUMNODES];
-
 /* this partition's reserved page pointers */
 struct xpc_rsvd_page *xpc_rsvd_page;
 static u64 *xpc_part_nasids;
@@ -210,117 +200,6 @@ xpc_setup_rsvd_page(void)
 	return rp;
 }
 
-/*
- * Change protections to allow IPI operations (and AMO operations on
- * Shub 1.1 systems).
- */
-void
-xpc_allow_IPI_ops(void)
-{
-	int node;
-	int nasid;
-
-	/* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
-
-	if (is_shub2()) {
-		xpc_sh2_IPI_access0 =
-		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
-		xpc_sh2_IPI_access1 =
-		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
-		xpc_sh2_IPI_access2 =
-		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
-		xpc_sh2_IPI_access3 =
-		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
-			      -1UL);
-			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
-			      -1UL);
-			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
-			      -1UL);
-			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
-			      -1UL);
-		}
-
-	} else {
-		xpc_sh1_IPI_access =
-		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
-			      -1UL);
-
-			/*
-			 * Since the BIST collides with memory operations on
-			 * SHUB 1.1 sn_change_memprotect() cannot be used.
-			 */
-			if (enable_shub_wars_1_1()) {
-				/* open up everything */
-				xpc_prot_vec[node] = (u64)HUB_L((u64 *)
-								GLOBAL_MMR_ADDR
-								(nasid,
-						  SH1_MD_DQLP_MMR_DIR_PRIVEC0));
-				HUB_S((u64 *)
-				      GLOBAL_MMR_ADDR(nasid,
-						   SH1_MD_DQLP_MMR_DIR_PRIVEC0),
-				      -1UL);
-				HUB_S((u64 *)
-				      GLOBAL_MMR_ADDR(nasid,
-						   SH1_MD_DQRP_MMR_DIR_PRIVEC0),
-				      -1UL);
-			}
-		}
-	}
-}
-
-/*
- * Restrict protections to disallow IPI operations (and AMO operations on
- * Shub 1.1 systems).
- */
-void
-xpc_restrict_IPI_ops(void)
-{
-	int node;
-	int nasid;
-
-	/* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
-
-	if (is_shub2()) {
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
-			      xpc_sh2_IPI_access0);
-			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
-			      xpc_sh2_IPI_access1);
-			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
-			      xpc_sh2_IPI_access2);
-			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
-			      xpc_sh2_IPI_access3);
-		}
-
-	} else {
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
-			      xpc_sh1_IPI_access);
-
-			if (enable_shub_wars_1_1()) {
-				HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
-						   SH1_MD_DQLP_MMR_DIR_PRIVEC0),
-				      xpc_prot_vec[node]);
-				HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
-						   SH1_MD_DQRP_MMR_DIR_PRIVEC0),
-				      xpc_prot_vec[node]);
-			}
-		}
-	}
-}
-
 /*
  * Get a copy of a portion of the remote partition's rsvd page.
  *
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index db67d348b35..4659f6cb885 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -22,6 +22,87 @@
 static struct xpc_vars_sn2 *xpc_vars;	/* >>> Add _sn2 suffix? */
 static struct xpc_vars_part_sn2 *xpc_vars_part; /* >>> Add _sn2 suffix? */
 
+/* SH_IPI_ACCESS shub register value on startup */
+static u64 xpc_sh1_IPI_access;
+static u64 xpc_sh2_IPI_access0;
+static u64 xpc_sh2_IPI_access1;
+static u64 xpc_sh2_IPI_access2;
+static u64 xpc_sh2_IPI_access3;
+
+/*
+ * Change protections to allow IPI operations.
+ */
+static void
+xpc_allow_IPI_ops_sn2(void)
+{
+	int node;
+	int nasid;
+
+	/* >>> The following should get moved into SAL. */
+	if (is_shub2()) {
+		xpc_sh2_IPI_access0 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
+		xpc_sh2_IPI_access1 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
+		xpc_sh2_IPI_access2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
+		xpc_sh2_IPI_access3 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
+
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+			      -1UL);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+			      -1UL);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+			      -1UL);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+			      -1UL);
+		}
+	} else {
+		xpc_sh1_IPI_access =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
+
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+			      -1UL);
+		}
+	}
+}
+
+/*
+ * Restrict protections to disallow IPI operations.
+ */
+static void
+xpc_disallow_IPI_ops_sn2(void)
+{
+	int node;
+	int nasid;
+
+	/* >>> The following should get moved into SAL. */
+	if (is_shub2()) {
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+			      xpc_sh2_IPI_access0);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+			      xpc_sh2_IPI_access1);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+			      xpc_sh2_IPI_access2);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+			      xpc_sh2_IPI_access3);
+		}
+	} else {
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+			      xpc_sh1_IPI_access);
+		}
+	}
+}
+
 /*
  * The following set of macros and functions are used for the sending and
  * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
@@ -73,6 +154,17 @@ xpc_IPI_init_sn2(int index)
  * IPIs associated with SGI_XPC_ACTIVATE IRQ.
  */
 
+/*
+ * Notify the heartbeat check thread that an activate IRQ has been received.
+ */
+static irqreturn_t
+xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
+{
+	atomic_inc(&xpc_activate_IRQ_rcvd);
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+	return IRQ_HANDLED;
+}
+
 /*
  * Flag the appropriate AMO variable and send an IPI to the specified node.
  */
@@ -100,8 +192,8 @@ xpc_activate_IRQ_send_local_sn2(int from_nasid)
 	/* fake the sending and receipt of an activate IRQ from remote nasid */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amos[w_index].variable), FETCHOP_OR,
 			 (1UL << b_index));
-	atomic_inc(&xpc_act_IRQ_rcvd);
-	wake_up_interruptible(&xpc_act_IRQ_wq);
+	atomic_inc(&xpc_activate_IRQ_rcvd);
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
 }
 
 static void
@@ -383,11 +475,65 @@ xpc_clear_partition_disengage_request_sn2(u64 partid_mask)
 			 ~partid_mask);
 }
 
+/* original protection values for each node */
+static u64 xpc_prot_vec_sn2[MAX_NUMNODES];
+
+/*
+ * Change protections to allow AMO operations on non-Shub 1.1 systems.
+ */
+static enum xp_retval
+xpc_allow_AMO_ops_sn2(AMO_t *amos_page)
+{
+	u64 nasid_array = 0;
+	int ret;
+
+	/*
+	 * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST
+	 * collides with memory operations. On those systems we call
+	 * xpc_allow_AMO_ops_shub_wars_1_1_sn2() instead.
+	 */
+	if (!enable_shub_wars_1_1()) {
+		ret = sn_change_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE,
+					   SN_MEMPROT_ACCESS_CLASS_1,
+					   &nasid_array);
+		if (ret != 0)
+			return xpSalError;
+	}
+	return xpSuccess;
+}
+
+/*
+ * Change protections to allow AMO operations on Shub 1.1 systems.
+ */
+static void
+xpc_allow_AMO_ops_shub_wars_1_1_sn2(void)
+{
+	int node;
+	int nasid;
+
+	if (!enable_shub_wars_1_1())
+		return;
+
+	for_each_online_node(node) {
+		nasid = cnodeid_to_nasid(node);
+		/* save current protection values */
+		xpc_prot_vec_sn2[node] =
+		    (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid,
+						  SH1_MD_DQLP_MMR_DIR_PRIVEC0));
+		/* open up everything */
+		HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+					     SH1_MD_DQLP_MMR_DIR_PRIVEC0),
+		      -1UL);
+		HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+					     SH1_MD_DQRP_MMR_DIR_PRIVEC0),
+		      -1UL);
+	}
+}
+
 static enum xp_retval
 xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 {
 	AMO_t *amos_page;
-	u64 nasid_array = 0;
 	int i;
 	int ret;
 
@@ -421,21 +567,15 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 		}
 
 		/*
-		 * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
-		 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
+		 * Open up AMO-R/W to cpu.  This is done on Shub 1.1 systems
+		 * when xpc_allow_AMO_ops_shub_wars_1_1_sn2() is called.
 		 */
-		if (!enable_shub_wars_1_1()) {
-			ret = sn_change_memprotect(ia64_tpa((u64)amos_page),
-						   PAGE_SIZE,
-						   SN_MEMPROT_ACCESS_CLASS_1,
-						   &nasid_array);
-			if (ret != 0) {
-				dev_err(xpc_part, "can't change memory "
-					"protections\n");
-				uncached_free_page(__IA64_UNCACHED_OFFSET |
-						   TO_PHYS((u64)amos_page), 1);
-				return xpSalError;
-			}
+		ret = xpc_allow_AMO_ops_sn2(amos_page);
+		if (ret != xpSuccess) {
+			dev_err(xpc_part, "can't allow AMO operations\n");
+			uncached_free_page(__IA64_UNCACHED_OFFSET |
+					   TO_PHYS((u64)amos_page), 1);
+			return ret;
 		}
 	}
 
@@ -656,7 +796,7 @@ xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
  * initialized reserved page.
  */
 static void
-xpc_identify_act_IRQ_req_sn2(int nasid)
+xpc_identify_activate_IRQ_req_sn2(int nasid)
 {
 	struct xpc_rsvd_page *remote_rp;
 	struct xpc_vars_sn2 *remote_vars;
@@ -702,10 +842,10 @@ xpc_identify_act_IRQ_req_sn2(int nasid)
 		return;
 	}
 
-	part->act_IRQ_rcvd++;
+	part->activate_IRQ_rcvd++;
 
 	dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
-		"%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
+		"%ld:0x%lx\n", (int)nasid, (int)partid, part->activate_IRQ_rcvd,
 		remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]);
 
 	if (xpc_partition_disengaged(part) &&
@@ -831,7 +971,7 @@ xpc_identify_act_IRQ_req_sn2(int nasid)
  * Return #of IRQs detected.
  */
 int
-xpc_identify_act_IRQ_sender_sn2(void)
+xpc_identify_activate_IRQ_sender_sn2(void)
 {
 	int word, bit;
 	u64 nasid_mask;
@@ -872,7 +1012,7 @@ xpc_identify_act_IRQ_sender_sn2(void)
 				nasid = XPC_NASID_FROM_W_B(word, bit);
 				dev_dbg(xpc_part, "interrupt from nasid %ld\n",
 					nasid);
-				xpc_identify_act_IRQ_req_sn2(nasid);
+				xpc_identify_activate_IRQ_req_sn2(nasid);
 			}
 		}
 	}
@@ -880,14 +1020,14 @@ xpc_identify_act_IRQ_sender_sn2(void)
 }
 
 static void
-xpc_process_act_IRQ_rcvd_sn2(int n_IRQs_expected)
+xpc_process_activate_IRQ_rcvd_sn2(int n_IRQs_expected)
 {
 	int n_IRQs_detected;
 
-	n_IRQs_detected = xpc_identify_act_IRQ_sender_sn2();
+	n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2();
 	if (n_IRQs_detected < n_IRQs_expected) {
 		/* retry once to help avoid missing AMO */
-		(void)xpc_identify_act_IRQ_sender_sn2();
+		(void)xpc_identify_activate_IRQ_sender_sn2();
 	}
 }
 
@@ -1775,9 +1915,11 @@ xpc_received_msg_sn2(struct xpc_channel *ch, struct xpc_msg *msg)
 		xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
 }
 
-void
+int
 xpc_init_sn2(void)
 {
+	int ret;
+
 	xpc_rsvd_page_init = xpc_rsvd_page_init_sn2;
 	xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
 	xpc_offline_heartbeat = xpc_offline_heartbeat_sn2;
@@ -1788,7 +1930,7 @@ xpc_init_sn2(void)
 
 	xpc_initiate_partition_activation =
 	    xpc_initiate_partition_activation_sn2;
-	xpc_process_act_IRQ_rcvd = xpc_process_act_IRQ_rcvd_sn2;
+	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2;
 	xpc_setup_infrastructure = xpc_setup_infrastructure_sn2;
 	xpc_teardown_infrastructure = xpc_teardown_infrastructure_sn2;
 	xpc_make_first_contact = xpc_make_first_contact_sn2;
@@ -1819,9 +1961,30 @@ xpc_init_sn2(void)
 
 	xpc_send_msg = xpc_send_msg_sn2;
 	xpc_received_msg = xpc_received_msg_sn2;
+
+	/* open up protections for IPI and [potentially] AMO operations */
+	xpc_allow_IPI_ops_sn2();
+	xpc_allow_AMO_ops_shub_wars_1_1_sn2();
+
+	/*
+	 * This is safe to do before the xpc_hb_checker thread has started
+	 * because the handler releases a wait queue.  If an interrupt is
+	 * received before the thread is waiting, it will not go to sleep,
+	 * but rather immediately process the interrupt.
+	 */
+	ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0,
+			  "xpc hb", NULL);
+	if (ret != 0) {
+		dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
+			"errno=%d\n", -ret);
+		xpc_disallow_IPI_ops_sn2();
+	}
+	return ret;
 }
 
 void
 xpc_exit_sn2(void)
 {
+	free_irq(SGI_XPC_ACTIVATE, NULL);
+	xpc_disallow_IPI_ops_sn2();
 }
-- 
cgit v1.2.3


From a47d5dac9d8481766382f8cf1483dd581df38b99 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:09 -0700
Subject: sgi-xp: isolate additional sn2 specific code

Move additional sn2 specific code into xpc_sn2.c.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc.h           | 173 ++++----
 drivers/misc/sgi-xp/xpc_channel.c   | 214 +--------
 drivers/misc/sgi-xp/xpc_main.c      | 278 ++++--------
 drivers/misc/sgi-xp/xpc_partition.c |  59 ++-
 drivers/misc/sgi-xp/xpc_sn2.c       | 851 +++++++++++++++++++++++-------------
 drivers/misc/sgi-xp/xpc_uv.c        |  15 +-
 6 files changed, 784 insertions(+), 806 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 1edf37512de..b04cfbed958 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -122,9 +122,6 @@ struct xpc_rsvd_page {
 
 #define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */
 
-#define XPC_SUPPORTS_RP_STAMP(_version) \
-			(_version >= _XPC_VERSION(1, 1))
-
 /*
  * Define the structures by which XPC variables can be exported to other
  * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
@@ -144,8 +141,8 @@ struct xpc_vars_sn2 {
 	u64 heartbeat;
 	DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
 	u64 heartbeat_offline;	/* if 0, heartbeat should be changing */
-	int act_nasid;
-	int act_phys_cpuid;
+	int activate_IRQ_nasid;
+	int activate_IRQ_phys_cpuid;
 	u64 vars_part_pa;
 	u64 amos_page_pa;	/* paddr of page of AMOs from MSPEC driver */
 	AMO_t *amos_page;	/* vaddr of page of AMOs from MSPEC driver */
@@ -153,9 +150,6 @@ struct xpc_vars_sn2 {
 
 #define XPC_V_VERSION _XPC_VERSION(3, 1)    /* version 3.1 of the cross vars */
 
-#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
-			(_version >= _XPC_VERSION(3, 1))
-
 /*
  * The following pertains to ia64-sn2 only.
  *
@@ -167,14 +161,14 @@ struct xpc_vars_sn2 {
  * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
  * AMO variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
  * NOTIFY IRQs, 128 AMO variables (based on XP_NASID_MASK_WORDS) to identify
- * the senders of ACTIVATE IRQs, and 2 AMO variables to identify which remote
+ * the senders of ACTIVATE IRQs, 1 AMO variable to identify which remote
  * partitions (i.e., XPCs) consider themselves currently engaged with the
- * local XPC.
+ * local XPC and 1 AMO variable to request partition deactivation.
  */
 #define XPC_NOTIFY_IRQ_AMOS	0
 #define XPC_ACTIVATE_IRQ_AMOS	(XPC_NOTIFY_IRQ_AMOS + XP_MAX_NPARTITIONS_SN2)
 #define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
-#define XPC_DISENGAGE_REQUEST_AMO  (XPC_ENGAGED_PARTITIONS_AMO + 1)
+#define XPC_DEACTIVATE_REQUEST_AMO  (XPC_ENGAGED_PARTITIONS_AMO + 1)
 
 /*
  * The following structure describes the per partition specific variables.
@@ -369,6 +363,23 @@ struct xpc_notify {
  *	new messages, by the clearing of the message flags of the acknowledged
  *	messages.
  */
+struct xpc_channel_sn2 {
+
+	/* various flavors of local and remote Get/Put values */
+
+	struct xpc_gp *local_GP;	/* local Get/Put values */
+	struct xpc_gp remote_GP;	/* remote Get/Put values */
+	struct xpc_gp w_local_GP;	/* working local Get/Put values */
+	struct xpc_gp w_remote_GP;	/* working remote Get/Put values */
+	s64 next_msg_to_pull;	/* Put value of next msg to pull */
+
+	struct mutex msg_to_pull_mutex;	/* next msg to pull serialization */
+};
+
+struct xpc_channel_uv {
+	/* >>> code is coming */
+};
+
 struct xpc_channel {
 	short partid;		/* ID of remote partition connected */
 	spinlock_t lock;	/* lock for updating this structure */
@@ -407,20 +418,11 @@ struct xpc_channel {
 	xpc_channel_func func;	/* user's channel function */
 	void *key;		/* pointer to user's key */
 
-	struct mutex msg_to_pull_mutex;	/* next msg to pull serialization */
 	struct completion wdisconnect_wait;    /* wait for channel disconnect */
 
 	struct xpc_openclose_args *local_openclose_args; /* args passed on */
 					     /* opening or closing of channel */
 
-	/* various flavors of local and remote Get/Put values */
-
-	struct xpc_gp *local_GP;	/* local Get/Put values */
-	struct xpc_gp remote_GP;	/* remote Get/Put values */
-	struct xpc_gp w_local_GP;	/* working local Get/Put values */
-	struct xpc_gp w_remote_GP;	/* working remote Get/Put values */
-	s64 next_msg_to_pull;	/* Put value of next msg to pull */
-
 	/* kthread management related fields */
 
 	atomic_t kthreads_assigned;	/* #of kthreads assigned to channel */
@@ -431,6 +433,11 @@ struct xpc_channel {
 
 	wait_queue_head_t idle_wq;	/* idle kthread wait queue */
 
+	union {
+		struct xpc_channel_sn2 sn2;
+		struct xpc_channel_uv uv;
+	} sn;
+
 } ____cacheline_aligned;
 
 /* struct xpc_channel flags */
@@ -467,6 +474,40 @@ struct xpc_channel {
  * for each partition (a partition will never utilize the structure that
  * represents itself).
  */
+
+struct xpc_partition_sn2 {
+	u64 remote_amos_page_pa;	/* phys addr of partition's amos page */
+	int activate_IRQ_nasid;	/* active partition's act/deact nasid */
+	int activate_IRQ_phys_cpuid;	/* active part's act/deact phys cpuid */
+
+	u64 remote_vars_pa;	/* phys addr of partition's vars */
+	u64 remote_vars_part_pa;	/* phys addr of partition's vars part */
+	u8 remote_vars_version;	/* version# of partition's vars */
+
+	void *local_GPs_base;	/* base address of kmalloc'd space */
+	struct xpc_gp *local_GPs;	/* local Get/Put values */
+	void *remote_GPs_base;	/* base address of kmalloc'd space */
+	struct xpc_gp *remote_GPs;	/* copy of remote partition's local */
+					/* Get/Put values */
+	u64 remote_GPs_pa;	/* phys address of remote partition's local */
+				/* Get/Put values */
+
+	u64 remote_openclose_args_pa;	/* phys addr of remote's args */
+
+	int remote_IPI_nasid;	/* nasid of where to send IPIs */
+	int remote_IPI_phys_cpuid;	/* phys CPU ID of where to send IPIs */
+	char IPI_owner[8];	/* IPI owner's name */
+
+	AMO_t *remote_IPI_amo_va;    /* address of remote IPI AMO_t structure */
+	AMO_t *local_IPI_amo_va;	/* address of IPI AMO_t structure */
+
+	struct timer_list dropped_notify_IRQ_timer;	/* dropped IRQ timer */
+};
+
+struct xpc_partition_uv {
+	/* >>> code is coming */
+};
+
 struct xpc_partition {
 
 	/* XPC HB infrastructure */
@@ -474,22 +515,15 @@ struct xpc_partition {
 	u8 remote_rp_version;	/* version# of partition's rsvd pg */
 	unsigned long remote_rp_stamp; /* time when rsvd pg was initialized */
 	u64 remote_rp_pa;	/* phys addr of partition's rsvd pg */
-	u64 remote_vars_pa;	/* phys addr of partition's vars */
-	u64 remote_vars_part_pa;	/* phys addr of partition's vars part */
 	u64 last_heartbeat;	/* HB at last read */
-	u64 remote_amos_page_pa;	/* phys addr of partition's amos page */
-	int remote_act_nasid;	/* active part's act/deact nasid */
-	int remote_act_phys_cpuid;	/* active part's act/deact phys cpuid */
 	u32 activate_IRQ_rcvd;	/* IRQs since activation */
 	spinlock_t act_lock;	/* protect updating of act_state */
 	u8 act_state;		/* from XPC HB viewpoint */
-	u8 remote_vars_version;	/* version# of partition's vars */
 	enum xp_retval reason;	/* reason partition is deactivating */
 	int reason_line;	/* line# deactivation initiated from */
-	int reactivate_nasid;	/* nasid in partition to reactivate */
 
-	unsigned long disengage_request_timeout;	/* timeout in jiffies */
-	struct timer_list disengage_request_timer;
+	unsigned long disengage_timeout;	/* timeout in jiffies */
+	struct timer_list disengage_timer;
 
 	/* XPC infrastructure referencing and teardown control */
 
@@ -502,14 +536,6 @@ struct xpc_partition {
 	atomic_t nchannels_engaged;  /* #of channels engaged with remote part */
 	struct xpc_channel *channels;	/* array of channel structures */
 
-	void *local_GPs_base;	/* base address of kmalloc'd space */
-	struct xpc_gp *local_GPs;	/* local Get/Put values */
-	void *remote_GPs_base;	/* base address of kmalloc'd space */
-	struct xpc_gp *remote_GPs;	/* copy of remote partition's local */
-					/* Get/Put values */
-	u64 remote_GPs_pa;	/* phys address of remote partition's local */
-				/* Get/Put values */
-
 	/* fields used to pass args when opening or closing a channel */
 
 	void *local_openclose_args_base;   /* base address of kmalloc'd space */
@@ -517,19 +543,10 @@ struct xpc_partition {
 	void *remote_openclose_args_base;  /* base address of kmalloc'd space */
 	struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
 							  /* args */
-	u64 remote_openclose_args_pa;	/* phys addr of remote's args */
 
 	/* IPI sending, receiving and handling related fields */
 
-	int remote_IPI_nasid;	/* nasid of where to send IPIs */
-	int remote_IPI_phys_cpuid;	/* phys CPU ID of where to send IPIs */
-	AMO_t *remote_IPI_amo_va;    /* address of remote IPI AMO_t structure */
-
-	AMO_t *local_IPI_amo_va;	/* address of IPI AMO_t structure */
 	u64 local_IPI_amo;	/* IPI amo flags yet to be handled */
-	char IPI_owner[8];	/* IPI owner's name */
-	struct timer_list dropped_IPI_timer;	/* dropped IPI timer */
-
 	spinlock_t IPI_lock;	/* IPI handler lock */
 
 	/* channel manager related fields */
@@ -537,6 +554,11 @@ struct xpc_partition {
 	atomic_t channel_mgr_requests;	/* #of requests to activate chan mgr */
 	wait_queue_head_t channel_mgr_wq;	/* channel mgr's wait queue */
 
+	union {
+		struct xpc_partition_sn2 sn2;
+		struct xpc_partition_uv uv;
+	} sn;
+
 } ____cacheline_aligned;
 
 /* struct xpc_partition act_state values (for XPC HB) */
@@ -565,10 +587,10 @@ struct xpc_partition {
 #define XPC_P_DROPPED_IPI_WAIT_INTERVAL	(0.25 * HZ)
 
 /* number of seconds to wait for other partitions to disengage */
-#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT	90
+#define XPC_DISENGAGE_DEFAULT_TIMELIMIT		90
 
-/* interval in seconds to print 'waiting disengagement' messages */
-#define XPC_DISENGAGE_PRINTMSG_INTERVAL		10
+/* interval in seconds to print 'waiting deactivation' messages */
+#define XPC_DEACTIVATE_PRINTMSG_INTERVAL	10
 
 #define XPC_PARTID(_p)	((short)((_p) - &xpc_partitions[0]))
 
@@ -578,13 +600,11 @@ extern struct xpc_registration xpc_registrations[];
 /* found in xpc_main.c */
 extern struct device *xpc_part;
 extern struct device *xpc_chan;
-extern int xpc_disengage_request_timelimit;
-extern int xpc_disengage_request_timedout;
+extern int xpc_disengage_timelimit;
+extern int xpc_disengage_timedout;
 extern atomic_t xpc_activate_IRQ_rcvd;
 extern wait_queue_head_t xpc_activate_IRQ_wq;
 extern void *xpc_heartbeating_to_mask;
-extern irqreturn_t xpc_notify_IRQ_handler(int, void *);
-extern void xpc_dropped_IPI_check(struct xpc_partition *);
 extern void xpc_activate_partition(struct xpc_partition *);
 extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int, int);
@@ -598,31 +618,34 @@ extern void (*xpc_online_heartbeat) (void);
 extern void (*xpc_check_remote_hb) (void);
 extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
 extern u64 (*xpc_get_IPI_flags) (struct xpc_partition *);
+extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
+extern void (*xpc_process_msg_IPI) (struct xpc_partition *, int);
+extern int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *);
 extern struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *);
-extern void (*xpc_initiate_partition_activation) (struct xpc_rsvd_page *, u64,
-						  int);
+extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *, u64,
+						 int);
+extern void (*xpc_request_partition_reactivation) (struct xpc_partition *);
+extern void (*xpc_request_partition_deactivation) (struct xpc_partition *);
+extern void (*xpc_cancel_partition_deactivation_request) (
+							struct xpc_partition *);
 extern void (*xpc_process_activate_IRQ_rcvd) (int);
 extern enum xp_retval (*xpc_setup_infrastructure) (struct xpc_partition *);
 extern void (*xpc_teardown_infrastructure) (struct xpc_partition *);
-extern void (*xpc_mark_partition_engaged) (struct xpc_partition *);
-extern void (*xpc_mark_partition_disengaged) (struct xpc_partition *);
-extern void (*xpc_request_partition_disengage) (struct xpc_partition *);
-extern void (*xpc_cancel_partition_disengage_request) (struct xpc_partition *);
-extern u64 (*xpc_partition_engaged) (u64);
-extern u64 (*xpc_partition_disengage_requested) (u64);;
-extern void (*xpc_clear_partition_engaged) (u64);
-extern void (*xpc_clear_partition_disengage_request) (u64);
-
-extern void (*xpc_IPI_send_local_activate) (int);
-extern void (*xpc_IPI_send_activated) (struct xpc_partition *);
-extern void (*xpc_IPI_send_local_reactivate) (int);
-extern void (*xpc_IPI_send_disengage) (struct xpc_partition *);
-
-extern void (*xpc_IPI_send_closerequest) (struct xpc_channel *,
-					  unsigned long *);
-extern void (*xpc_IPI_send_closereply) (struct xpc_channel *, unsigned long *);
-extern void (*xpc_IPI_send_openrequest) (struct xpc_channel *, unsigned long *);
-extern void (*xpc_IPI_send_openreply) (struct xpc_channel *, unsigned long *);
+
+extern void (*xpc_indicate_partition_engaged) (struct xpc_partition *);
+extern int (*xpc_partition_engaged) (short);
+extern int (*xpc_any_partition_engaged) (void);
+extern void (*xpc_indicate_partition_disengaged) (struct xpc_partition *);
+extern void (*xpc_assume_partition_disengaged) (short);
+
+extern void (*xpc_send_channel_closerequest) (struct xpc_channel *,
+					      unsigned long *);
+extern void (*xpc_send_channel_closereply) (struct xpc_channel *,
+					    unsigned long *);
+extern void (*xpc_send_channel_openrequest) (struct xpc_channel *,
+					     unsigned long *);
+extern void (*xpc_send_channel_openreply) (struct xpc_channel *,
+					   unsigned long *);
 
 extern enum xp_retval (*xpc_send_msg) (struct xpc_channel *, u32, void *, u16,
 				       u8, xpc_notify_func, void *);
@@ -646,8 +669,6 @@ extern char *xpc_remote_copy_buffer;
 extern void *xpc_remote_copy_buffer_base;
 extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern struct xpc_rsvd_page *xpc_setup_rsvd_page(void);
-extern void xpc_allow_IPI_ops(void);
-extern void xpc_restrict_IPI_ops(void);
 extern int xpc_identify_activate_IRQ_sender(void);
 extern int xpc_partition_disengaged(struct xpc_partition *);
 extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 55182c8dd32..48b16136305 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -201,7 +201,7 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	if (!(ch->flags & XPC_C_OPENREPLY)) {
 		ch->flags |= XPC_C_OPENREPLY;
-		xpc_IPI_send_openreply(ch, irq_flags);
+		xpc_send_channel_openreply(ch, irq_flags);
 	}
 
 	if (!(ch->flags & XPC_C_ROPENREPLY))
@@ -219,52 +219,6 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 	spin_lock_irqsave(&ch->lock, *irq_flags);
 }
 
-/*
- * Notify those who wanted to be notified upon delivery of their message.
- */
-static void
-xpc_notify_senders(struct xpc_channel *ch, enum xp_retval reason, s64 put)
-{
-	struct xpc_notify *notify;
-	u8 notify_type;
-	s64 get = ch->w_remote_GP.get - 1;
-
-	while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
-
-		notify = &ch->notify_queue[get % ch->local_nentries];
-
-		/*
-		 * See if the notify entry indicates it was associated with
-		 * a message who's sender wants to be notified. It is possible
-		 * that it is, but someone else is doing or has done the
-		 * notification.
-		 */
-		notify_type = notify->type;
-		if (notify_type == 0 ||
-		    cmpxchg(&notify->type, notify_type, 0) != notify_type) {
-			continue;
-		}
-
-		DBUG_ON(notify_type != XPC_N_CALL);
-
-		atomic_dec(&ch->n_to_notify);
-
-		if (notify->func != NULL) {
-			dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
-				(void *)notify, get, ch->partid, ch->number);
-
-			notify->func(reason, ch->partid, ch->number,
-				     notify->key);
-
-			dev_dbg(xpc_chan, "notify->func() returned, "
-				"notify=0x%p, msg_number=%ld, partid=%d, "
-				"channel=%d\n", (void *)notify, get,
-				ch->partid, ch->number);
-		}
-	}
-}
-
 /*
  * Free up message queues and other stuff that were allocated for the specified
  * channel.
@@ -275,6 +229,8 @@ xpc_notify_senders(struct xpc_channel *ch, enum xp_retval reason, s64 put)
 static void
 xpc_free_msgqueues(struct xpc_channel *ch)
 {
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+
 	DBUG_ON(!spin_is_locked(&ch->lock));
 	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
 
@@ -287,15 +243,15 @@ xpc_free_msgqueues(struct xpc_channel *ch)
 	ch->kthreads_assigned_limit = 0;
 	ch->kthreads_idle_limit = 0;
 
-	ch->local_GP->get = 0;
-	ch->local_GP->put = 0;
-	ch->remote_GP.get = 0;
-	ch->remote_GP.put = 0;
-	ch->w_local_GP.get = 0;
-	ch->w_local_GP.put = 0;
-	ch->w_remote_GP.get = 0;
-	ch->w_remote_GP.put = 0;
-	ch->next_msg_to_pull = 0;
+	ch_sn2->local_GP->get = 0;
+	ch_sn2->local_GP->put = 0;
+	ch_sn2->remote_GP.get = 0;
+	ch_sn2->remote_GP.put = 0;
+	ch_sn2->w_local_GP.get = 0;
+	ch_sn2->w_local_GP.put = 0;
+	ch_sn2->w_remote_GP.get = 0;
+	ch_sn2->w_remote_GP.put = 0;
+	ch_sn2->next_msg_to_pull = 0;
 
 	if (ch->flags & XPC_C_SETUP) {
 		ch->flags &= ~XPC_C_SETUP;
@@ -339,7 +295,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	if (part->act_state == XPC_P_DEACTIVATING) {
 		/* can't proceed until the other side disengages from us */
-		if (xpc_partition_engaged(1UL << ch->partid))
+		if (xpc_partition_engaged(ch->partid))
 			return;
 
 	} else {
@@ -351,7 +307,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 		if (!(ch->flags & XPC_C_CLOSEREPLY)) {
 			ch->flags |= XPC_C_CLOSEREPLY;
-			xpc_IPI_send_closereply(ch, irq_flags);
+			xpc_send_channel_closereply(ch, irq_flags);
 		}
 
 		if (!(ch->flags & XPC_C_RCLOSEREPLY))
@@ -361,7 +317,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	/* wake those waiting for notify completion */
 	if (atomic_read(&ch->n_to_notify) > 0) {
 		/* >>> we do callout while holding ch->lock */
-		xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
+		xpc_notify_senders_of_disconnect(ch);
 	}
 
 	/* both sides are disconnected now */
@@ -734,7 +690,7 @@ xpc_connect_channel(struct xpc_channel *ch)
 	/* initiate the connection */
 
 	ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
-	xpc_IPI_send_openrequest(ch, &irq_flags);
+	xpc_send_channel_openrequest(ch, &irq_flags);
 
 	xpc_process_connect(ch, &irq_flags);
 
@@ -743,142 +699,6 @@ xpc_connect_channel(struct xpc_channel *ch)
 	return xpSuccess;
 }
 
-/*
- * Clear some of the msg flags in the local message queue.
- */
-static inline void
-xpc_clear_local_msgqueue_flags(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg;
-	s64 get;
-
-	get = ch->w_remote_GP.get;
-	do {
-		msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
-					 (get % ch->local_nentries) *
-					 ch->msg_size);
-		msg->flags = 0;
-	} while (++get < ch->remote_GP.get);
-}
-
-/*
- * Clear some of the msg flags in the remote message queue.
- */
-static inline void
-xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg;
-	s64 put;
-
-	put = ch->w_remote_GP.put;
-	do {
-		msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
-					 (put % ch->remote_nentries) *
-					 ch->msg_size);
-		msg->flags = 0;
-	} while (++put < ch->remote_GP.put);
-}
-
-static void
-xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
-{
-	struct xpc_channel *ch = &part->channels[ch_number];
-	int nmsgs_sent;
-
-	ch->remote_GP = part->remote_GPs[ch_number];
-
-	/* See what, if anything, has changed for each connected channel */
-
-	xpc_msgqueue_ref(ch);
-
-	if (ch->w_remote_GP.get == ch->remote_GP.get &&
-	    ch->w_remote_GP.put == ch->remote_GP.put) {
-		/* nothing changed since GPs were last pulled */
-		xpc_msgqueue_deref(ch);
-		return;
-	}
-
-	if (!(ch->flags & XPC_C_CONNECTED)) {
-		xpc_msgqueue_deref(ch);
-		return;
-	}
-
-	/*
-	 * First check to see if messages recently sent by us have been
-	 * received by the other side. (The remote GET value will have
-	 * changed since we last looked at it.)
-	 */
-
-	if (ch->w_remote_GP.get != ch->remote_GP.get) {
-
-		/*
-		 * We need to notify any senders that want to be notified
-		 * that their sent messages have been received by their
-		 * intended recipients. We need to do this before updating
-		 * w_remote_GP.get so that we don't allocate the same message
-		 * queue entries prematurely (see xpc_allocate_msg()).
-		 */
-		if (atomic_read(&ch->n_to_notify) > 0) {
-			/*
-			 * Notify senders that messages sent have been
-			 * received and delivered by the other side.
-			 */
-			xpc_notify_senders(ch, xpMsgDelivered,
-					   ch->remote_GP.get);
-		}
-
-		/*
-		 * Clear msg->flags in previously sent messages, so that
-		 * they're ready for xpc_allocate_msg().
-		 */
-		xpc_clear_local_msgqueue_flags(ch);
-
-		ch->w_remote_GP.get = ch->remote_GP.get;
-
-		dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
-			"channel=%d\n", ch->w_remote_GP.get, ch->partid,
-			ch->number);
-
-		/*
-		 * If anyone was waiting for message queue entries to become
-		 * available, wake them up.
-		 */
-		if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
-			wake_up(&ch->msg_allocate_wq);
-	}
-
-	/*
-	 * Now check for newly sent messages by the other side. (The remote
-	 * PUT value will have changed since we last looked at it.)
-	 */
-
-	if (ch->w_remote_GP.put != ch->remote_GP.put) {
-		/*
-		 * Clear msg->flags in previously received messages, so that
-		 * they're ready for xpc_get_deliverable_msg().
-		 */
-		xpc_clear_remote_msgqueue_flags(ch);
-
-		ch->w_remote_GP.put = ch->remote_GP.put;
-
-		dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
-			"channel=%d\n", ch->w_remote_GP.put, ch->partid,
-			ch->number);
-
-		nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get;
-		if (nmsgs_sent > 0) {
-			dev_dbg(xpc_chan, "msgs waiting to be copied and "
-				"delivered=%d, partid=%d, channel=%d\n",
-				nmsgs_sent, ch->partid, ch->number);
-
-			if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
-				xpc_activate_kthreads(ch, nmsgs_sent);
-		}
-	}
-
-	xpc_msgqueue_deref(ch);
-}
-
 void
 xpc_process_channel_activity(struct xpc_partition *part)
 {
@@ -1117,7 +937,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
 		       XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
 		       XPC_C_CONNECTING | XPC_C_CONNECTED);
 
-	xpc_IPI_send_closerequest(ch, irq_flags);
+	xpc_send_channel_closerequest(ch, irq_flags);
 
 	if (channel_was_connected)
 		ch->flags |= XPC_C_WASCONNECTED;
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 8780d5d00f6..563aaf4a2ff 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -26,7 +26,7 @@
  *	Caveats:
  *
  *	  . We currently have no way to determine which nasid an IPI came
- *	    from. Thus, xpc_IPI_send() does a remote AMO write followed by
+ *	    from. Thus, >>> xpc_IPI_send() does a remote AMO write followed by
  *	    an IPI. The AMO indicates where data is to be pulled from, so
  *	    after the IPI arrives, the remote partition checks the AMO word.
  *	    The IPI can actually arrive before the AMO however, so other code
@@ -89,9 +89,9 @@ static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
 static int xpc_hb_check_min_interval = 10;
 static int xpc_hb_check_max_interval = 120;
 
-int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
-static int xpc_disengage_request_min_timelimit;	/* = 0 */
-static int xpc_disengage_request_max_timelimit = 120;
+int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
+static int xpc_disengage_min_timelimit;	/* = 0 */
+static int xpc_disengage_max_timelimit = 120;
 
 static ctl_table xpc_sys_xpc_hb_dir[] = {
 	{
@@ -124,14 +124,14 @@ static ctl_table xpc_sys_xpc_dir[] = {
 	 .child = xpc_sys_xpc_hb_dir},
 	{
 	 .ctl_name = CTL_UNNUMBERED,
-	 .procname = "disengage_request_timelimit",
-	 .data = &xpc_disengage_request_timelimit,
+	 .procname = "disengage_timelimit",
+	 .data = &xpc_disengage_timelimit,
 	 .maxlen = sizeof(int),
 	 .mode = 0644,
 	 .proc_handler = &proc_dointvec_minmax,
 	 .strategy = &sysctl_intvec,
-	 .extra1 = &xpc_disengage_request_min_timelimit,
-	 .extra2 = &xpc_disengage_request_max_timelimit},
+	 .extra1 = &xpc_disengage_min_timelimit,
+	 .extra2 = &xpc_disengage_max_timelimit},
 	{}
 };
 static ctl_table xpc_sys_dir[] = {
@@ -144,8 +144,8 @@ static ctl_table xpc_sys_dir[] = {
 };
 static struct ctl_table_header *xpc_sysctl;
 
-/* non-zero if any remote partition disengage request was timed out */
-int xpc_disengage_request_timedout;
+/* non-zero if any remote partition disengage was timed out */
+int xpc_disengage_timedout;
 
 /* #of activate IRQs received */
 atomic_t xpc_activate_IRQ_rcvd = ATOMIC_INIT(0);
@@ -184,38 +184,36 @@ void (*xpc_online_heartbeat) (void);
 void (*xpc_check_remote_hb) (void);
 
 enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
+void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
 u64 (*xpc_get_IPI_flags) (struct xpc_partition *part);
+void (*xpc_process_msg_IPI) (struct xpc_partition *part, int ch_number);
+int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *ch);
 struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *ch);
 
-void (*xpc_initiate_partition_activation) (struct xpc_rsvd_page *remote_rp,
-					   u64 remote_rp_pa, int nasid);
+void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
+					  u64 remote_rp_pa, int nasid);
+void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
+void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
+void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
 
 void (*xpc_process_activate_IRQ_rcvd) (int n_IRQs_expected);
 enum xp_retval (*xpc_setup_infrastructure) (struct xpc_partition *part);
 void (*xpc_teardown_infrastructure) (struct xpc_partition *part);
 
-void (*xpc_mark_partition_engaged) (struct xpc_partition *part);
-void (*xpc_mark_partition_disengaged) (struct xpc_partition *part);
-void (*xpc_request_partition_disengage) (struct xpc_partition *part);
-void (*xpc_cancel_partition_disengage_request) (struct xpc_partition *part);
-u64 (*xpc_partition_engaged) (u64 partid_mask);
-u64 (*xpc_partition_disengage_requested) (u64 partid_mask);
-void (*xpc_clear_partition_engaged) (u64 partid_mask);
-void (*xpc_clear_partition_disengage_request) (u64 partid_mask);
-
-void (*xpc_IPI_send_local_activate) (int from_nasid);
-void (*xpc_IPI_send_activated) (struct xpc_partition *part);
-void (*xpc_IPI_send_local_reactivate) (int from_nasid);
-void (*xpc_IPI_send_disengage) (struct xpc_partition *part);
-
-void (*xpc_IPI_send_closerequest) (struct xpc_channel *ch,
-				   unsigned long *irq_flags);
-void (*xpc_IPI_send_closereply) (struct xpc_channel *ch,
-				 unsigned long *irq_flags);
-void (*xpc_IPI_send_openrequest) (struct xpc_channel *ch,
-				  unsigned long *irq_flags);
-void (*xpc_IPI_send_openreply) (struct xpc_channel *ch,
-				unsigned long *irq_flags);
+void (*xpc_indicate_partition_engaged) (struct xpc_partition *part);
+int (*xpc_partition_engaged) (short partid);
+int (*xpc_any_partition_engaged) (void);
+void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part);
+void (*xpc_assume_partition_disengaged) (short partid);
+
+void (*xpc_send_channel_closerequest) (struct xpc_channel *ch,
+				       unsigned long *irq_flags);
+void (*xpc_send_channel_closereply) (struct xpc_channel *ch,
+				     unsigned long *irq_flags);
+void (*xpc_send_channel_openrequest) (struct xpc_channel *ch,
+				      unsigned long *irq_flags);
+void (*xpc_send_channel_openreply) (struct xpc_channel *ch,
+				    unsigned long *irq_flags);
 
 enum xp_retval (*xpc_send_msg) (struct xpc_channel *ch, u32 flags,
 				void *payload, u16 payload_size, u8 notify_type,
@@ -223,19 +221,19 @@ enum xp_retval (*xpc_send_msg) (struct xpc_channel *ch, u32 flags,
 void (*xpc_received_msg) (struct xpc_channel *ch, struct xpc_msg *msg);
 
 /*
- * Timer function to enforce the timelimit on the partition disengage request.
+ * Timer function to enforce the timelimit on the partition disengage.
  */
 static void
-xpc_timeout_partition_disengage_request(unsigned long data)
+xpc_timeout_partition_disengage(unsigned long data)
 {
 	struct xpc_partition *part = (struct xpc_partition *)data;
 
-	DBUG_ON(time_is_after_jiffies(part->disengage_request_timeout));
+	DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
 
 	(void)xpc_partition_disengaged(part);
 
-	DBUG_ON(part->disengage_request_timeout != 0);
-	DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
+	DBUG_ON(part->disengage_timeout != 0);
+	DBUG_ON(xpc_partition_engaged(XPC_PARTID(part)));
 }
 
 /*
@@ -464,7 +462,7 @@ xpc_activating(void *__partid)
 
 	if (part->reason == xpReactivating) {
 		/* interrupting ourselves results in activating partition */
-		xpc_IPI_send_local_reactivate(part->reactivate_nasid);
+		xpc_request_partition_reactivation(part);
 	}
 
 	return 0;
@@ -496,82 +494,6 @@ xpc_activate_partition(struct xpc_partition *part)
 	}
 }
 
-/*
- * Check to see if there is any channel activity to/from the specified
- * partition.
- */
-static void
-xpc_check_for_channel_activity(struct xpc_partition *part)
-{
-	u64 IPI_amo;
-	unsigned long irq_flags;
-
-/* this needs to be uncommented, but I'm thinking this function and the */
-/* ones that call it need to be moved into xpc_sn2.c... */
-	IPI_amo = 0; /* = xpc_IPI_receive(part->local_IPI_amo_va); */
-	if (IPI_amo == 0)
-		return;
-
-	spin_lock_irqsave(&part->IPI_lock, irq_flags);
-	part->local_IPI_amo |= IPI_amo;
-	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
-
-	dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
-		XPC_PARTID(part), IPI_amo);
-
-	xpc_wakeup_channel_mgr(part);
-}
-
-/*
- * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
- * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
- * than one partition, we use an AMO_t structure per partition to indicate
- * whether a partition has sent an IPI or not.  If it has, then wake up the
- * associated kthread to handle it.
- *
- * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
- * running on other partitions.
- *
- * Noteworthy Arguments:
- *
- *	irq - Interrupt ReQuest number. NOT USED.
- *
- *	dev_id - partid of IPI's potential sender.
- */
-irqreturn_t
-xpc_notify_IRQ_handler(int irq, void *dev_id)
-{
-	short partid = (short)(u64)dev_id;
-	struct xpc_partition *part = &xpc_partitions[partid];
-
-	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
-
-	if (xpc_part_ref(part)) {
-		xpc_check_for_channel_activity(part);
-
-		xpc_part_deref(part);
-	}
-	return IRQ_HANDLED;
-}
-
-/*
- * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
- * because the write to their associated IPI amo completed after the IRQ/IPI
- * was received.
- */
-void
-xpc_dropped_IPI_check(struct xpc_partition *part)
-{
-	if (xpc_part_ref(part)) {
-		xpc_check_for_channel_activity(part);
-
-		part->dropped_IPI_timer.expires = jiffies +
-		    XPC_P_DROPPED_IPI_WAIT_INTERVAL;
-		add_timer(&part->dropped_IPI_timer);
-		xpc_part_deref(part);
-	}
-}
-
 void
 xpc_activate_kthreads(struct xpc_channel *ch, int needed)
 {
@@ -616,7 +538,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 	do {
 		/* deliver messages to their intended recipients */
 
-		while (ch->w_local_GP.get < ch->w_remote_GP.put &&
+		while (xpc_n_of_deliverable_msgs(ch) > 0 &&
 		       !(ch->flags & XPC_C_DISCONNECTING)) {
 			xpc_deliver_msg(ch);
 		}
@@ -632,7 +554,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 			"wait_event_interruptible_exclusive()\n");
 
 		(void)wait_event_interruptible_exclusive(ch->idle_wq,
-				(ch->w_local_GP.get < ch->w_remote_GP.put ||
+				(xpc_n_of_deliverable_msgs(ch) > 0 ||
 				 (ch->flags & XPC_C_DISCONNECTING)));
 
 		atomic_dec(&ch->kthreads_idle);
@@ -677,7 +599,7 @@ xpc_kthread_start(void *args)
 			 * additional kthreads to help deliver them. We only
 			 * need one less than total #of messages to deliver.
 			 */
-			n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1;
+			n_needed = xpc_n_of_deliverable_msgs(ch) - 1;
 			if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
 				xpc_activate_kthreads(ch, n_needed);
 
@@ -703,11 +625,9 @@ xpc_kthread_start(void *args)
 	}
 	spin_unlock_irqrestore(&ch->lock, irq_flags);
 
-	if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
-		if (atomic_dec_return(&part->nchannels_engaged) == 0) {
-			xpc_mark_partition_disengaged(part);
-			xpc_IPI_send_disengage(part);
-		}
+	if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+	    atomic_dec_return(&part->nchannels_engaged) == 0) {
+		xpc_indicate_partition_disengaged(part);
 	}
 
 	xpc_msgqueue_deref(ch);
@@ -758,9 +678,9 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
 		} else if (ch->flags & XPC_C_DISCONNECTING) {
 			break;
 
-		} else if (atomic_inc_return(&ch->kthreads_assigned) == 1) {
-			if (atomic_inc_return(&part->nchannels_engaged) == 1)
-				xpc_mark_partition_engaged(part);
+		} else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+			   atomic_inc_return(&part->nchannels_engaged) == 1) {
+				xpc_indicate_partition_engaged(part);
 		}
 		(void)xpc_part_ref(part);
 		xpc_msgqueue_ref(ch);
@@ -782,8 +702,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
 
 			if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
 			    atomic_dec_return(&part->nchannels_engaged) == 0) {
-				xpc_mark_partition_disengaged(part);
-				xpc_IPI_send_disengage(part);
+				xpc_indicate_partition_disengaged(part);
 			}
 			xpc_msgqueue_deref(ch);
 			xpc_part_deref(part);
@@ -862,7 +781,7 @@ xpc_do_exit(enum xp_retval reason)
 	short partid;
 	int active_part_count, printed_waiting_msg = 0;
 	struct xpc_partition *part;
-	unsigned long printmsg_time, disengage_request_timeout = 0;
+	unsigned long printmsg_time, disengage_timeout = 0;
 
 	/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
 	DBUG_ON(xpc_exiting == 1);
@@ -886,8 +805,8 @@ xpc_do_exit(enum xp_retval reason)
 
 	/* wait for all partitions to become inactive */
 
-	printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
-	xpc_disengage_request_timedout = 0;
+	printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+	xpc_disengage_timedout = 0;
 
 	do {
 		active_part_count = 0;
@@ -904,36 +823,32 @@ xpc_do_exit(enum xp_retval reason)
 
 			XPC_DEACTIVATE_PARTITION(part, reason);
 
-			if (part->disengage_request_timeout >
-			    disengage_request_timeout) {
-				disengage_request_timeout =
-				    part->disengage_request_timeout;
-			}
+			if (part->disengage_timeout > disengage_timeout)
+				disengage_timeout = part->disengage_timeout;
 		}
 
-		if (xpc_partition_engaged(-1UL)) {
+		if (xpc_any_partition_engaged()) {
 			if (time_is_before_jiffies(printmsg_time)) {
 				dev_info(xpc_part, "waiting for remote "
-					 "partitions to disengage, timeout in "
-					 "%ld seconds\n",
-					 (disengage_request_timeout - jiffies)
-					 / HZ);
+					 "partitions to deactivate, timeout in "
+					 "%ld seconds\n", (disengage_timeout -
+					 jiffies) / HZ);
 				printmsg_time = jiffies +
-				    (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+				    (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
 				printed_waiting_msg = 1;
 			}
 
 		} else if (active_part_count > 0) {
 			if (printed_waiting_msg) {
 				dev_info(xpc_part, "waiting for local partition"
-					 " to disengage\n");
+					 " to deactivate\n");
 				printed_waiting_msg = 0;
 			}
 
 		} else {
-			if (!xpc_disengage_request_timedout) {
+			if (!xpc_disengage_timedout) {
 				dev_info(xpc_part, "all partitions have "
-					 "disengaged\n");
+					 "deactivated\n");
 			}
 			break;
 		}
@@ -943,7 +858,7 @@ xpc_do_exit(enum xp_retval reason)
 
 	} while (1);
 
-	DBUG_ON(xpc_partition_engaged(-1UL));
+	DBUG_ON(xpc_any_partition_engaged());
 	DBUG_ON(xpc_any_hbs_allowed() != 0);
 
 	/* indicate to others that our reserved page is uninitialized */
@@ -996,15 +911,16 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
 }
 
 /*
- * Notify other partitions to disengage from all references to our memory.
+ * Notify other partitions to deactivate from us by first disengaging from all
+ * references to our memory.
  */
 static void
-xpc_die_disengage(void)
+xpc_die_deactivate(void)
 {
 	struct xpc_partition *part;
 	short partid;
-	unsigned long engaged;
-	long time, printmsg_time, disengage_request_timeout;
+	int any_engaged;
+	long time, printmsg_time, disengage_timeout;
 
 	/* keep xpc_hb_checker thread from doing anything (just in case) */
 	xpc_exiting = 1;
@@ -1014,43 +930,37 @@ xpc_die_disengage(void)
 	for (partid = 0; partid < xp_max_npartitions; partid++) {
 		part = &xpc_partitions[partid];
 
-		if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
-		    remote_vars_version)) {
-
-			/* just in case it was left set by an earlier XPC */
-			xpc_clear_partition_engaged(1UL << partid);
-			continue;
-		}
-
-		if (xpc_partition_engaged(1UL << partid) ||
+		if (xpc_partition_engaged(partid) ||
 		    part->act_state != XPC_P_INACTIVE) {
-			xpc_request_partition_disengage(part);
-			xpc_mark_partition_disengaged(part);
-			xpc_IPI_send_disengage(part);
+			xpc_request_partition_deactivation(part);
+			xpc_indicate_partition_disengaged(part);
 		}
 	}
 
 	time = rtc_time();
 	printmsg_time = time +
-	    (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
-	disengage_request_timeout = time +
-	    (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
+	    (XPC_DEACTIVATE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
+	disengage_timeout = time +
+	    (xpc_disengage_timelimit * sn_rtc_cycles_per_second);
 
-	/* wait for all other partitions to disengage from us */
+	/*
+	 * Though we requested that all other partitions deactivate from us,
+	 * we only wait until they've all disengaged.
+	 */
 
 	while (1) {
-		engaged = xpc_partition_engaged(-1UL);
-		if (!engaged) {
-			dev_info(xpc_part, "all partitions have disengaged\n");
+		any_engaged = xpc_any_partition_engaged();
+		if (!any_engaged) {
+			dev_info(xpc_part, "all partitions have deactivated\n");
 			break;
 		}
 
 		time = rtc_time();
-		if (time >= disengage_request_timeout) {
+		if (time >= disengage_timeout) {
 			for (partid = 0; partid < xp_max_npartitions;
 			     partid++) {
-				if (engaged & (1UL << partid)) {
-					dev_info(xpc_part, "disengage from "
+				if (xpc_partition_engaged(partid)) {
+					dev_info(xpc_part, "deactivate from "
 						 "remote partition %d timed "
 						 "out\n", partid);
 				}
@@ -1060,11 +970,11 @@ xpc_die_disengage(void)
 
 		if (time >= printmsg_time) {
 			dev_info(xpc_part, "waiting for remote partitions to "
-				 "disengage, timeout in %ld seconds\n",
-				 (disengage_request_timeout - time) /
+				 "deactivate, timeout in %ld seconds\n",
+				 (disengage_timeout - time) /
 				 sn_rtc_cycles_per_second);
 			printmsg_time = time +
-			    (XPC_DISENGAGE_PRINTMSG_INTERVAL *
+			    (XPC_DEACTIVATE_PRINTMSG_INTERVAL *
 			     sn_rtc_cycles_per_second);
 		}
 	}
@@ -1084,7 +994,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 	switch (event) {
 	case DIE_MACHINE_RESTART:
 	case DIE_MACHINE_HALT:
-		xpc_die_disengage();
+		xpc_die_deactivate();
 		break;
 
 	case DIE_KDEBUG_ENTER:
@@ -1183,10 +1093,10 @@ xpc_init(void)
 		part->act_state = XPC_P_INACTIVE;
 		XPC_SET_REASON(part, 0, 0);
 
-		init_timer(&part->disengage_request_timer);
-		part->disengage_request_timer.function =
-		    xpc_timeout_partition_disengage_request;
-		part->disengage_request_timer.data = (unsigned long)part;
+		init_timer(&part->disengage_timer);
+		part->disengage_timer.function =
+		    xpc_timeout_partition_disengage;
+		part->disengage_timer.data = (unsigned long)part;
 
 		part->setup_state = XPC_P_UNSET;
 		init_waitqueue_head(&part->teardown_wq);
@@ -1295,9 +1205,9 @@ module_param(xpc_hb_check_interval, int, 0);
 MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
 		 "heartbeat checks.");
 
-module_param(xpc_disengage_request_timelimit, int, 0);
-MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
-		 "for disengage request to complete.");
+module_param(xpc_disengage_timelimit, int, 0);
+MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
+		 "for disengage to complete.");
 
 module_param(xpc_kdebug_ignore, int, 0);
 MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index bf9b1193bd2..c769ab8f74e 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -242,7 +242,7 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 		return xpBadVersion;
 	}
 
-	/* check that both local and remote partids are valid for each side */
+	/* check that both remote and local partids are valid for each side */
 	if (remote_rp->SAL_partid < 0 ||
 	    remote_rp->SAL_partid >= xp_max_npartitions ||
 	    remote_rp->max_npartitions <= sn_partition_id) {
@@ -256,8 +256,9 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 }
 
 /*
- * See if the other side has responded to a partition disengage request
- * from us.
+ * See if the other side has responded to a partition deactivate request
+ * from us. Though we requested the remote partition to deactivate with regard
+ * to us, we really only need to wait for the other side to disengage from us.
  */
 int
 xpc_partition_disengaged(struct xpc_partition *part)
@@ -265,41 +266,37 @@ xpc_partition_disengaged(struct xpc_partition *part)
 	short partid = XPC_PARTID(part);
 	int disengaged;
 
-	disengaged = (xpc_partition_engaged(1UL << partid) == 0);
-	if (part->disengage_request_timeout) {
+	disengaged = !xpc_partition_engaged(partid);
+	if (part->disengage_timeout) {
 		if (!disengaged) {
-			if (time_is_after_jiffies(part->
-						  disengage_request_timeout)) {
+			if (time_is_after_jiffies(part->disengage_timeout)) {
 				/* timelimit hasn't been reached yet */
 				return 0;
 			}
 
 			/*
-			 * Other side hasn't responded to our disengage
+			 * Other side hasn't responded to our deactivate
 			 * request in a timely fashion, so assume it's dead.
 			 */
 
-			dev_info(xpc_part, "disengage from remote partition %d "
-				 "timed out\n", partid);
-			xpc_disengage_request_timedout = 1;
-			xpc_clear_partition_engaged(1UL << partid);
+			dev_info(xpc_part, "deactivate request to remote "
+				 "partition %d timed out\n", partid);
+			xpc_disengage_timedout = 1;
+			xpc_assume_partition_disengaged(partid);
 			disengaged = 1;
 		}
-		part->disengage_request_timeout = 0;
+		part->disengage_timeout = 0;
 
 		/* cancel the timer function, provided it's not us */
-		if (!in_interrupt()) {
-			del_singleshot_timer_sync(&part->
-						  disengage_request_timer);
-		}
+		if (!in_interrupt())
+			del_singleshot_timer_sync(&part->disengage_timer);
 
 		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
 			part->act_state != XPC_P_INACTIVE);
 		if (part->act_state != XPC_P_INACTIVE)
 			xpc_wakeup_channel_mgr(part);
 
-		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version))
-			xpc_cancel_partition_disengage_request(part);
+		xpc_cancel_partition_deactivation_request(part);
 	}
 	return disengaged;
 }
@@ -329,7 +326,7 @@ xpc_mark_partition_active(struct xpc_partition *part)
 }
 
 /*
- * Notify XPC that the partition is down.
+ * Start the process of deactivating the specified partition.
  */
 void
 xpc_deactivate_partition(const int line, struct xpc_partition *part,
@@ -344,7 +341,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
 		if (reason == xpReactivating) {
 			/* we interrupt ourselves to reactivate partition */
-			xpc_IPI_send_local_reactivate(part->reactivate_nasid);
+			xpc_request_partition_reactivation(part);
 		}
 		return;
 	}
@@ -362,17 +359,13 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
 
-	if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
-		xpc_request_partition_disengage(part);
-		xpc_IPI_send_disengage(part);
+	/* ask remote partition to deactivate with regard to us */
+	xpc_request_partition_deactivation(part);
 
-		/* set a timelimit on the disengage request */
-		part->disengage_request_timeout = jiffies +
-		    (xpc_disengage_request_timelimit * HZ);
-		part->disengage_request_timer.expires =
-		    part->disengage_request_timeout;
-		add_timer(&part->disengage_request_timer);
-	}
+	/* set a timelimit on the disengage phase of the deactivation request */
+	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
+	part->disengage_timer.expires = part->disengage_timeout;
+	add_timer(&part->disengage_timer);
 
 	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
 		XPC_PARTID(part), reason);
@@ -505,8 +498,8 @@ xpc_discovery(void)
 				continue;
 			}
 
-			xpc_initiate_partition_activation(remote_rp,
-							  remote_rp_pa, nasid);
+			xpc_request_partition_activation(remote_rp,
+							 remote_rp_pa, nasid);
 		}
 	}
 
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 4659f6cb885..69d74bd5689 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -196,37 +196,85 @@ xpc_activate_IRQ_send_local_sn2(int from_nasid)
 	wake_up_interruptible(&xpc_activate_IRQ_wq);
 }
 
-static void
-xpc_IPI_send_local_activate_sn2(int from_nasid)
-{
-	xpc_activate_IRQ_send_local_sn2(from_nasid);
-}
+/*
+ * IPIs associated with SGI_XPC_NOTIFY IRQ.
+ */
 
+/*
+ * Check to see if there is any channel activity to/from the specified
+ * partition.
+ */
 static void
-xpc_IPI_send_activated_sn2(struct xpc_partition *part)
+xpc_check_for_channel_activity_sn2(struct xpc_partition *part)
 {
-	xpc_activate_IRQ_send_sn2(part->remote_amos_page_pa,
-				  cnodeid_to_nasid(0), part->remote_act_nasid,
-				  part->remote_act_phys_cpuid);
-}
+	u64 IPI_amo;
+	unsigned long irq_flags;
 
-static void
-xpc_IPI_send_local_reactivate_sn2(int from_nasid)
-{
-	xpc_activate_IRQ_send_local_sn2(from_nasid);
+	IPI_amo = xpc_IPI_receive_sn2(part->sn.sn2.local_IPI_amo_va);
+	if (IPI_amo == 0)
+		return;
+
+	spin_lock_irqsave(&part->IPI_lock, irq_flags);
+	part->local_IPI_amo |= IPI_amo;
+	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
+
+	dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
+		XPC_PARTID(part), IPI_amo);
+
+	xpc_wakeup_channel_mgr(part);
 }
 
-static void
-xpc_IPI_send_disengage_sn2(struct xpc_partition *part)
+/*
+ * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
+ * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
+ * than one partition, we use an AMO_t structure per partition to indicate
+ * whether a partition has sent an IPI or not.  If it has, then wake up the
+ * associated kthread to handle it.
+ *
+ * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
+ * running on other partitions.
+ *
+ * Noteworthy Arguments:
+ *
+ *	irq - Interrupt ReQuest number. NOT USED.
+ *
+ *	dev_id - partid of IPI's potential sender.
+ */
+static irqreturn_t
+xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
 {
-	xpc_activate_IRQ_send_sn2(part->remote_amos_page_pa,
-				  cnodeid_to_nasid(0), part->remote_act_nasid,
-				  part->remote_act_phys_cpuid);
+	short partid = (short)(u64)dev_id;
+	struct xpc_partition *part = &xpc_partitions[partid];
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+
+	if (xpc_part_ref(part)) {
+		xpc_check_for_channel_activity_sn2(part);
+
+		xpc_part_deref(part);
+	}
+	return IRQ_HANDLED;
 }
 
 /*
- * IPIs associated with SGI_XPC_NOTIFY IRQ.
+ * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IPIs on the floor
+ * because the write to their associated IPI amo completed after the IRQ/IPI
+ * was received.
  */
+static void
+xpc_dropped_notify_IRQ_check_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+
+	if (xpc_part_ref(part)) {
+		xpc_check_for_channel_activity_sn2(part);
+
+		part_sn2->dropped_notify_IRQ_timer.expires = jiffies +
+		    XPC_P_DROPPED_IPI_WAIT_INTERVAL;
+		add_timer(&part_sn2->dropped_notify_IRQ_timer);
+		xpc_part_deref(part);
+	}
+}
 
 /*
  * Send an IPI to the remote partition that is associated with the
@@ -237,13 +285,14 @@ xpc_notify_IRQ_send_sn2(struct xpc_channel *ch, u8 ipi_flag,
 			char *ipi_flag_string, unsigned long *irq_flags)
 {
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	enum xp_retval ret;
 
 	if (likely(part->act_state != XPC_P_DEACTIVATING)) {
-		ret = xpc_IPI_send_sn2(part->remote_IPI_amo_va,
+		ret = xpc_IPI_send_sn2(part_sn2->remote_IPI_amo_va,
 				       (u64)ipi_flag << (ch->number * 8),
-				       part->remote_IPI_nasid,
-				       part->remote_IPI_phys_cpuid,
+				       part_sn2->remote_IPI_nasid,
+				       part_sn2->remote_IPI_phys_cpuid,
 				       SGI_XPC_NOTIFY);
 		dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
 			ipi_flag_string, ch->partid, ch->number, ret);
@@ -263,7 +312,7 @@ xpc_notify_IRQ_send_sn2(struct xpc_channel *ch, u8 ipi_flag,
 /*
  * Make it look like the remote partition, which is associated with the
  * specified channel, sent us an IPI. This faked IPI will be handled
- * by xpc_dropped_IPI_check().
+ * by xpc_dropped_notify_IRQ_check_sn2().
  */
 static void
 xpc_notify_IRQ_send_local_sn2(struct xpc_channel *ch, u8 ipi_flag,
@@ -271,7 +320,7 @@ xpc_notify_IRQ_send_local_sn2(struct xpc_channel *ch, u8 ipi_flag,
 {
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
 
-	FETCHOP_STORE_OP(TO_AMO((u64)&part->local_IPI_amo_va->variable),
+	FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_IPI_amo_va->variable),
 			 FETCHOP_OR, ((u64)ipi_flag << (ch->number * 8)));
 	dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
 		ipi_flag_string, ch->partid, ch->number);
@@ -281,7 +330,8 @@ xpc_notify_IRQ_send_local_sn2(struct xpc_channel *ch, u8 ipi_flag,
 		xpc_notify_IRQ_send_local_sn2(_ch, _ipi_f, #_ipi_f)
 
 static void
-xpc_IPI_send_closerequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+xpc_send_channel_closerequest_sn2(struct xpc_channel *ch,
+				  unsigned long *irq_flags)
 {
 	struct xpc_openclose_args *args = ch->local_openclose_args;
 
@@ -290,13 +340,15 @@ xpc_IPI_send_closerequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 }
 
 static void
-xpc_IPI_send_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+xpc_send_channel_closereply_sn2(struct xpc_channel *ch,
+				unsigned long *irq_flags)
 {
 	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_CLOSEREPLY, irq_flags);
 }
 
 static void
-xpc_IPI_send_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+xpc_send_channel_openrequest_sn2(struct xpc_channel *ch,
+				 unsigned long *irq_flags)
 {
 	struct xpc_openclose_args *args = ch->local_openclose_args;
 
@@ -306,7 +358,7 @@ xpc_IPI_send_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 }
 
 static void
-xpc_IPI_send_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+xpc_send_channel_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 {
 	struct xpc_openclose_args *args = ch->local_openclose_args;
 
@@ -317,13 +369,13 @@ xpc_IPI_send_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 }
 
 static void
-xpc_IPI_send_msgrequest_sn2(struct xpc_channel *ch)
+xpc_send_channel_msgrequest_sn2(struct xpc_channel *ch)
 {
 	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_MSGREQUEST, NULL);
 }
 
 static void
-xpc_IPI_send_local_msgrequest_sn2(struct xpc_channel *ch)
+xpc_send_channel_local_msgrequest_sn2(struct xpc_channel *ch)
 {
 	XPC_NOTIFY_IRQ_SEND_LOCAL_SN2(ch, XPC_IPI_MSGREQUEST);
 }
@@ -334,10 +386,10 @@ xpc_IPI_send_local_msgrequest_sn2(struct xpc_channel *ch)
  */
 
 static void
-xpc_mark_partition_engaged_sn2(struct xpc_partition *part)
+xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
 {
 	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
+	AMO_t *amo = (AMO_t *)__va(part->sn.sn2.remote_amos_page_pa +
 				   (XPC_ENGAGED_PARTITIONS_AMO *
 				    sizeof(AMO_t)));
 
@@ -360,10 +412,11 @@ xpc_mark_partition_engaged_sn2(struct xpc_partition *part)
 }
 
 static void
-xpc_mark_partition_disengaged_sn2(struct xpc_partition *part)
+xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 {
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
+	AMO_t *amo = (AMO_t *)__va(part_sn2->remote_amos_page_pa +
 				   (XPC_ENGAGED_PARTITIONS_AMO *
 				    sizeof(AMO_t)));
 
@@ -383,96 +436,44 @@ xpc_mark_partition_disengaged_sn2(struct xpc_partition *part)
 						     xp_nofault_PIOR_target));
 
 	local_irq_restore(irq_flags);
-}
-
-static void
-xpc_request_partition_disengage_sn2(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
-				   (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
-
-	local_irq_save(irq_flags);
 
-	/* set bit corresponding to our partid in remote partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
-			 (1UL << sn_partition_id));
 	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 * Send activate IRQ to get other side to see that we've cleared our
+	 * bit in their engaged partitions AMO.
 	 */
-	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
-							       variable),
-						     xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
+	xpc_activate_IRQ_send_sn2(part_sn2->remote_amos_page_pa,
+				  cnodeid_to_nasid(0),
+				  part_sn2->activate_IRQ_nasid,
+				  part_sn2->activate_IRQ_phys_cpuid);
 }
 
-static void
-xpc_cancel_partition_disengage_request_sn2(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
-				   (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
-
-	local_irq_save(irq_flags);
-
-	/* clear bit corresponding to our partid in remote partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~(1UL << sn_partition_id));
-	/*
-	 * We must always use the nofault function regardless of whether we
-	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
-	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
-	 */
-	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
-							       variable),
-						     xp_nofault_PIOR_target));
-
-	local_irq_restore(irq_flags);
-}
-
-static u64
-xpc_partition_engaged_sn2(u64 partid_mask)
+static int
+xpc_partition_engaged_sn2(short partid)
 {
 	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
 
-	/* return our partition's AMO variable ANDed with partid_mask */
+	/* our partition's AMO variable ANDed with partid mask */
 	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
-		partid_mask);
+		(1UL << partid)) != 0;
 }
 
-static u64
-xpc_partition_disengage_requested_sn2(u64 partid_mask)
-{
-	AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
-
-	/* return our partition's AMO variable ANDed with partid_mask */
-	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
-		partid_mask);
-}
-
-static void
-xpc_clear_partition_engaged_sn2(u64 partid_mask)
+static int
+xpc_any_partition_engaged_sn2(void)
 {
 	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
 
-	/* clear bit(s) based on partid_mask in our partition's AMO */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~partid_mask);
+	/* our partition's AMO variable */
+	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
 }
 
 static void
-xpc_clear_partition_disengage_request_sn2(u64 partid_mask)
+xpc_assume_partition_disengaged_sn2(short partid)
 {
-	AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
+	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
 
-	/* clear bit(s) based on partid_mask in our partition's AMO */
+	/* clear bit(s) based on partid mask in our partition's AMO */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~partid_mask);
+			 ~(1UL << partid));
 }
 
 /* original protection values for each node */
@@ -545,7 +546,6 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	xpc_vars_part = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
 						     XPC_RP_VARS_SIZE);
 
-
 	/*
 	 * Before clearing xpc_vars, see if a page of AMOs had been previously
 	 * allocated. If not we'll need to allocate one and set permissions
@@ -583,8 +583,8 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	memset(xpc_vars, 0, sizeof(struct xpc_vars_sn2));
 
 	xpc_vars->version = XPC_V_VERSION;
-	xpc_vars->act_nasid = cpuid_to_nasid(0);
-	xpc_vars->act_phys_cpuid = cpu_physical_id(0);
+	xpc_vars->activate_IRQ_nasid = cpuid_to_nasid(0);
+	xpc_vars->activate_IRQ_phys_cpuid = cpu_physical_id(0);
 	xpc_vars->vars_part_pa = __pa(xpc_vars_part);
 	xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
 	xpc_vars->amos_page = amos_page;	/* save for next load of XPC */
@@ -599,7 +599,7 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 
 	/* initialize the engaged remote partitions related AMO variables */
 	(void)xpc_IPI_init_sn2(XPC_ENGAGED_PARTITIONS_AMO);
-	(void)xpc_IPI_init_sn2(XPC_DISENGAGE_REQUEST_AMO);
+	(void)xpc_IPI_init_sn2(XPC_DEACTIVATE_REQUEST_AMO);
 
 	return xpSuccess;
 }
@@ -671,7 +671,7 @@ xpc_check_remote_hb_sn2(void)
 
 		/* pull the remote_hb cache line */
 		ret = xp_remote_memcpy(remote_vars,
-				       (void *)part->remote_vars_pa,
+				       (void *)part->sn.sn2.remote_vars_pa,
 				       XPC_RP_VARS_SIZE);
 		if (ret != xpSuccess) {
 			XPC_DEACTIVATE_PARTITION(part, ret);
@@ -726,10 +726,86 @@ xpc_get_remote_vars_sn2(u64 remote_vars_pa, struct xpc_vars_sn2 *remote_vars)
 }
 
 static void
-xpc_initiate_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
-				      u64 remote_rp_pa, int nasid)
+xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
+				     u64 remote_rp_pa, int nasid)
 {
-	xpc_IPI_send_local_activate(nasid);
+	xpc_activate_IRQ_send_local_sn2(nasid);
+}
+
+static void
+xpc_request_partition_reactivation_sn2(struct xpc_partition *part)
+{
+	xpc_activate_IRQ_send_local_sn2(part->sn.sn2.activate_IRQ_nasid);
+}
+
+static void
+xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *)__va(part_sn2->remote_amos_page_pa +
+				  (XPC_DEACTIVATE_REQUEST_AMO * sizeof(AMO_t)));
+
+	local_irq_save(irq_flags);
+
+	/* set bit corresponding to our partid in remote partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+			 (1UL << sn_partition_id));
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+
+	/*
+	 * Send activate IRQ to get other side to see that we've set our
+	 * bit in their deactivate request AMO.
+	 */
+	xpc_activate_IRQ_send_sn2(part_sn2->remote_amos_page_pa,
+				  cnodeid_to_nasid(0),
+				  part_sn2->activate_IRQ_nasid,
+				  part_sn2->activate_IRQ_phys_cpuid);
+}
+
+static void
+xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *)__va(part->sn.sn2.remote_amos_page_pa +
+				  (XPC_DEACTIVATE_REQUEST_AMO * sizeof(AMO_t)));
+
+	local_irq_save(irq_flags);
+
+	/* clear bit corresponding to our partid in remote partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~(1UL << sn_partition_id));
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static int
+xpc_partition_deactivation_requested_sn2(short partid)
+{
+	AMO_t *amo = xpc_vars->amos_page + XPC_DEACTIVATE_REQUEST_AMO;
+
+	/* our partition's AMO variable ANDed with partid mask */
+	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+		(1UL << partid)) != 0;
 }
 
 /*
@@ -741,6 +817,8 @@ xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
 			      u64 remote_vars_pa,
 			      struct xpc_vars_sn2 *remote_vars)
 {
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+
 	part->remote_rp_version = remote_rp_version;
 	dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
 		part->remote_rp_version);
@@ -752,33 +830,34 @@ xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
 	part->remote_rp_pa = remote_rp_pa;
 	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
 
-	part->remote_vars_pa = remote_vars_pa;
+	part_sn2->remote_vars_pa = remote_vars_pa;
 	dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
-		part->remote_vars_pa);
+		part_sn2->remote_vars_pa);
 
 	part->last_heartbeat = remote_vars->heartbeat;
 	dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
 		part->last_heartbeat);
 
-	part->remote_vars_part_pa = remote_vars->vars_part_pa;
+	part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa;
 	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
-		part->remote_vars_part_pa);
+		part_sn2->remote_vars_part_pa);
 
-	part->remote_act_nasid = remote_vars->act_nasid;
-	dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
-		part->remote_act_nasid);
+	part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid;
+	dev_dbg(xpc_part, "  activate_IRQ_nasid = 0x%x\n",
+		part_sn2->activate_IRQ_nasid);
 
-	part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
-	dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
-		part->remote_act_phys_cpuid);
+	part_sn2->activate_IRQ_phys_cpuid =
+	    remote_vars->activate_IRQ_phys_cpuid;
+	dev_dbg(xpc_part, "  activate_IRQ_phys_cpuid = 0x%x\n",
+		part_sn2->activate_IRQ_phys_cpuid);
 
-	part->remote_amos_page_pa = remote_vars->amos_page_pa;
+	part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa;
 	dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
-		part->remote_amos_page_pa);
+		part_sn2->remote_amos_page_pa);
 
-	part->remote_vars_version = remote_vars->version;
+	part_sn2->remote_vars_version = remote_vars->version;
 	dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
-		part->remote_vars_version);
+		part_sn2->remote_vars_version);
 }
 
 /*
@@ -807,6 +886,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 	unsigned long remote_rp_stamp = 0;
 	short partid;
 	struct xpc_partition *part;
+	struct xpc_partition_sn2 *part_sn2;
 	enum xp_retval ret;
 
 	/* pull over the reserved page structure */
@@ -822,11 +902,11 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 
 	remote_vars_pa = remote_rp->sn.vars_pa;
 	remote_rp_version = remote_rp->version;
-	if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
-		remote_rp_stamp = remote_rp->stamp;
+	remote_rp_stamp = remote_rp->stamp;
 
 	partid = remote_rp->SAL_partid;
 	part = &xpc_partitions[partid];
+	part_sn2 = &part->sn.sn2;
 
 	/* pull over the cross partition variables */
 
@@ -834,7 +914,6 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 
 	ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars);
 	if (ret != xpSuccess) {
-
 		dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
 			 "which sent interrupt, reason=%d\n", nasid, ret);
 
@@ -855,18 +934,12 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 					      &remote_rp_stamp, remote_rp_pa,
 					      remote_vars_pa, remote_vars);
 
-		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
-			if (xpc_partition_disengage_requested(1UL << partid)) {
-				/*
-				 * Other side is waiting on us to disengage,
-				 * even though we already have.
-				 */
-				return;
-			}
-
-		} else {
-			/* other side doesn't support disengage requests */
-			xpc_clear_partition_disengage_request(1UL << partid);
+		if (xpc_partition_deactivation_requested_sn2(partid)) {
+			/*
+			 * Other side is waiting on us to deactivate even though
+			 * we already have.
+			 */
+			return;
 		}
 
 		xpc_activate_partition(part);
@@ -874,93 +947,30 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 	}
 
 	DBUG_ON(part->remote_rp_version == 0);
-	DBUG_ON(part->remote_vars_version == 0);
-
-	if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
-		DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
-						       remote_vars_version));
-
-		if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
-			DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
-							       version));
-			/* see if the other side rebooted */
-			if (part->remote_amos_page_pa ==
-			    remote_vars->amos_page_pa &&
-			    xpc_hb_allowed(sn_partition_id,
-					  &remote_vars->heartbeating_to_mask)) {
-				/* doesn't look that way, so ignore the IPI */
-				return;
-			}
-		}
+	DBUG_ON(part_sn2->remote_vars_version == 0);
 
-		/*
-		 * Other side rebooted and previous XPC didn't support the
-		 * disengage request, so we don't need to do anything special.
-		 */
+	if (remote_rp_stamp != part->remote_rp_stamp) {
 
-		xpc_update_partition_info_sn2(part, remote_rp_version,
-					      &remote_rp_stamp, remote_rp_pa,
-					      remote_vars_pa, remote_vars);
-		part->reactivate_nasid = nasid;
-		XPC_DEACTIVATE_PARTITION(part, xpReactivating);
-		return;
-	}
+		/* the other side rebooted */
 
-	DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
-
-	if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
-		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
-
-		/*
-		 * Other side rebooted and previous XPC did support the
-		 * disengage request, but the new one doesn't.
-		 */
-
-		xpc_clear_partition_engaged(1UL << partid);
-		xpc_clear_partition_disengage_request(1UL << partid);
+		DBUG_ON(xpc_partition_engaged_sn2(partid));
+		DBUG_ON(xpc_partition_deactivation_requested_sn2(partid));
 
 		xpc_update_partition_info_sn2(part, remote_rp_version,
 					      &remote_rp_stamp, remote_rp_pa,
 					      remote_vars_pa, remote_vars);
 		reactivate = 1;
-
-	} else {
-		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
-
-		if (remote_rp_stamp != part->remote_rp_stamp) {
-
-			/*
-			 * Other side rebooted and the previous XPC did support
-			 * the disengage request, as does the new one.
-			 */
-
-			DBUG_ON(xpc_partition_engaged(1UL << partid));
-			DBUG_ON(xpc_partition_disengage_requested(1UL <<
-								  partid));
-
-			xpc_update_partition_info_sn2(part, remote_rp_version,
-						      &remote_rp_stamp,
-						      remote_rp_pa,
-						      remote_vars_pa,
-						      remote_vars);
-			reactivate = 1;
-		}
 	}
 
-	if (part->disengage_request_timeout > 0 &&
-	    !xpc_partition_disengaged(part)) {
+	if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) {
 		/* still waiting on other side to disengage from us */
 		return;
 	}
 
-	if (reactivate) {
-		part->reactivate_nasid = nasid;
+	if (reactivate)
 		XPC_DEACTIVATE_PARTITION(part, xpReactivating);
-
-	} else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
-		   xpc_partition_disengage_requested(1UL << partid)) {
+	else if (xpc_partition_deactivation_requested_sn2(partid))
 		XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
-	}
 }
 
 /*
@@ -1038,6 +1048,7 @@ xpc_process_activate_IRQ_rcvd_sn2(int n_IRQs_expected)
 static enum xp_retval
 xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 {
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	enum xp_retval retval;
 	int ret;
 	int cpuid;
@@ -1060,28 +1071,29 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 
 	/* allocate all the required GET/PUT values */
 
-	part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
-							GFP_KERNEL,
-							&part->local_GPs_base);
-	if (part->local_GPs == NULL) {
+	part_sn2->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
+							    GFP_KERNEL,
+							    &part_sn2->
+							    local_GPs_base);
+	if (part_sn2->local_GPs == NULL) {
 		dev_err(xpc_chan, "can't get memory for local get/put "
 			"values\n");
 		retval = xpNoMemory;
 		goto out_1;
 	}
 
-	part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
-							 GFP_KERNEL,
-							 &part->
-							 remote_GPs_base);
-	if (part->remote_GPs == NULL) {
+	part_sn2->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
+							     GFP_KERNEL,
+							     &part_sn2->
+							     remote_GPs_base);
+	if (part_sn2->remote_GPs == NULL) {
 		dev_err(xpc_chan, "can't get memory for remote get/put "
 			"values\n");
 		retval = xpNoMemory;
 		goto out_2;
 	}
 
-	part->remote_GPs_pa = 0;
+	part_sn2->remote_GPs_pa = 0;
 
 	/* allocate all the required open and close args */
 
@@ -1103,22 +1115,23 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 		goto out_4;
 	}
 
-	part->remote_openclose_args_pa = 0;
+	part_sn2->remote_openclose_args_pa = 0;
 
-	part->local_IPI_amo_va = xpc_IPI_init_sn2(partid);
+	part_sn2->local_IPI_amo_va = xpc_IPI_init_sn2(partid);
 	part->local_IPI_amo = 0;
 	spin_lock_init(&part->IPI_lock);
 
-	part->remote_IPI_nasid = 0;
-	part->remote_IPI_phys_cpuid = 0;
-	part->remote_IPI_amo_va = NULL;
+	part_sn2->remote_IPI_nasid = 0;
+	part_sn2->remote_IPI_phys_cpuid = 0;
+	part_sn2->remote_IPI_amo_va = NULL;
 
 	atomic_set(&part->channel_mgr_requests, 1);
 	init_waitqueue_head(&part->channel_mgr_wq);
 
-	sprintf(part->IPI_owner, "xpc%02d", partid);
-	ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED,
-			  part->IPI_owner, (void *)(u64)partid);
+	sprintf(part_sn2->IPI_owner, "xpc%02d", partid);
+	ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2,
+			  IRQF_SHARED, part_sn2->IPI_owner,
+			  (void *)(u64)partid);
 	if (ret != 0) {
 		dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
 			"errno=%d\n", -ret);
@@ -1127,9 +1140,10 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	}
 
 	/* Setup a timer to check for dropped IPIs */
-	timer = &part->dropped_IPI_timer;
+	timer = &part_sn2->dropped_notify_IRQ_timer;
 	init_timer(timer);
-	timer->function = (void (*)(unsigned long))xpc_dropped_IPI_check;
+	timer->function =
+	    (void (*)(unsigned long))xpc_dropped_notify_IRQ_check_sn2;
 	timer->data = (unsigned long)part;
 	timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT_INTERVAL;
 	add_timer(timer);
@@ -1146,7 +1160,7 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 		ch->number = ch_number;
 		ch->flags = XPC_C_DISCONNECTED;
 
-		ch->local_GP = &part->local_GPs[ch_number];
+		ch->sn.sn2.local_GP = &part_sn2->local_GPs[ch_number];
 		ch->local_openclose_args =
 		    &part->local_openclose_args[ch_number];
 
@@ -1158,7 +1172,7 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 		atomic_set(&ch->n_to_notify, 0);
 
 		spin_lock_init(&ch->lock);
-		mutex_init(&ch->msg_to_pull_mutex);
+		mutex_init(&ch->sn.sn2.msg_to_pull_mutex);
 		init_completion(&ch->wdisconnect_wait);
 
 		atomic_set(&ch->n_on_msg_allocate_wq, 0);
@@ -1179,10 +1193,10 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	 * The setting of the magic # indicates that these per partition
 	 * specific variables are ready to be used.
 	 */
-	xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs);
+	xpc_vars_part[partid].GPs_pa = __pa(part_sn2->local_GPs);
 	xpc_vars_part[partid].openclose_args_pa =
 	    __pa(part->local_openclose_args);
-	xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
+	xpc_vars_part[partid].IPI_amo_pa = __pa(part_sn2->local_IPI_amo_va);
 	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
 	xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
 	xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
@@ -1199,11 +1213,11 @@ out_4:
 	kfree(part->local_openclose_args_base);
 	part->local_openclose_args = NULL;
 out_3:
-	kfree(part->remote_GPs_base);
-	part->remote_GPs = NULL;
+	kfree(part_sn2->remote_GPs_base);
+	part_sn2->remote_GPs = NULL;
 out_2:
-	kfree(part->local_GPs_base);
-	part->local_GPs = NULL;
+	kfree(part_sn2->local_GPs_base);
+	part_sn2->local_GPs = NULL;
 out_1:
 	kfree(part->channels);
 	part->channels = NULL;
@@ -1217,6 +1231,7 @@ out_1:
 static void
 xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
 {
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	short partid = XPC_PARTID(part);
 
 	/*
@@ -1248,19 +1263,19 @@ xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
 	part->setup_state = XPC_P_TORNDOWN;
 
 	/* in case we've still got outstanding timers registered... */
-	del_timer_sync(&part->dropped_IPI_timer);
+	del_timer_sync(&part_sn2->dropped_notify_IRQ_timer);
 
 	kfree(part->remote_openclose_args_base);
 	part->remote_openclose_args = NULL;
 	kfree(part->local_openclose_args_base);
 	part->local_openclose_args = NULL;
-	kfree(part->remote_GPs_base);
-	part->remote_GPs = NULL;
-	kfree(part->local_GPs_base);
-	part->local_GPs = NULL;
+	kfree(part_sn2->remote_GPs_base);
+	part_sn2->remote_GPs = NULL;
+	kfree(part_sn2->local_GPs_base);
+	part_sn2->local_GPs = NULL;
 	kfree(part->channels);
 	part->channels = NULL;
-	part->local_IPI_amo_va = NULL;
+	part_sn2->local_IPI_amo_va = NULL;
 }
 
 /*
@@ -1300,6 +1315,7 @@ xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
 static enum xp_retval
 xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 {
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	u8 buffer[L1_CACHE_BYTES * 2];
 	struct xpc_vars_part_sn2 *pulled_entry_cacheline =
 	    (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer);
@@ -1310,11 +1326,11 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 
 	/* pull the cacheline that contains the variables we're interested in */
 
-	DBUG_ON(part->remote_vars_part_pa !=
-		L1_CACHE_ALIGN(part->remote_vars_part_pa));
+	DBUG_ON(part_sn2->remote_vars_part_pa !=
+		L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa));
 	DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2);
 
-	remote_entry_pa = part->remote_vars_part_pa +
+	remote_entry_pa = part_sn2->remote_vars_part_pa +
 	    sn_partition_id * sizeof(struct xpc_vars_part_sn2);
 
 	remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
@@ -1364,13 +1380,13 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 
 		/* the variables we imported look to be valid */
 
-		part->remote_GPs_pa = pulled_entry->GPs_pa;
-		part->remote_openclose_args_pa =
+		part_sn2->remote_GPs_pa = pulled_entry->GPs_pa;
+		part_sn2->remote_openclose_args_pa =
 		    pulled_entry->openclose_args_pa;
-		part->remote_IPI_amo_va =
+		part_sn2->remote_IPI_amo_va =
 		    (AMO_t *)__va(pulled_entry->IPI_amo_pa);
-		part->remote_IPI_nasid = pulled_entry->IPI_nasid;
-		part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
+		part_sn2->remote_IPI_nasid = pulled_entry->IPI_nasid;
+		part_sn2->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
 
 		if (part->nchannels > pulled_entry->nchannels)
 			part->nchannels = pulled_entry->nchannels;
@@ -1394,6 +1410,7 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 static enum xp_retval
 xpc_make_first_contact_sn2(struct xpc_partition *part)
 {
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	enum xp_retval ret;
 
 	/*
@@ -1406,7 +1423,7 @@ xpc_make_first_contact_sn2(struct xpc_partition *part)
 	 * we should get the same page for remote_amos_page_pa after module
 	 * reloads and system reboots.
 	 */
-	if (sn_register_xp_addr_region(part->remote_amos_page_pa,
+	if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa,
 				       PAGE_SIZE, 1) < 0) {
 		dev_warn(xpc_part, "xpc_activating(%d) failed to register "
 			 "xp_addr region\n", XPC_PARTID(part));
@@ -1416,7 +1433,14 @@ xpc_make_first_contact_sn2(struct xpc_partition *part)
 		return ret;
 	}
 
-	xpc_IPI_send_activated(part);
+	/*
+	 * Send activate IRQ to get other side to activate if they've not
+	 * already begun to do so.
+	 */
+	xpc_activate_IRQ_send_sn2(part_sn2->remote_amos_page_pa,
+				  cnodeid_to_nasid(0),
+				  part_sn2->activate_IRQ_nasid,
+				  part_sn2->activate_IRQ_phys_cpuid);
 
 	while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) {
 		if (ret != xpRetry) {
@@ -1443,6 +1467,7 @@ xpc_make_first_contact_sn2(struct xpc_partition *part)
 static u64
 xpc_get_IPI_flags_sn2(struct xpc_partition *part)
 {
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	unsigned long irq_flags;
 	u64 IPI_amo;
 	enum xp_retval ret;
@@ -1459,9 +1484,9 @@ xpc_get_IPI_flags_sn2(struct xpc_partition *part)
 	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
 
 	if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
-		ret = xpc_pull_remote_cachelines_sn2(part,
-						    part->remote_openclose_args,
-						     (void *)part->
+		ret = xpc_pull_remote_cachelines_sn2(part, part->
+						     remote_openclose_args,
+						     (void *)part_sn2->
 						     remote_openclose_args_pa,
 						     XPC_OPENCLOSE_ARGS_SIZE);
 		if (ret != xpSuccess) {
@@ -1477,8 +1502,8 @@ xpc_get_IPI_flags_sn2(struct xpc_partition *part)
 	}
 
 	if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
-		ret = xpc_pull_remote_cachelines_sn2(part, part->remote_GPs,
-						    (void *)part->remote_GPs_pa,
+		ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs,
+						(void *)part_sn2->remote_GPs_pa,
 						     XPC_GP_SIZE);
 		if (ret != xpSuccess) {
 			XPC_DEACTIVATE_PARTITION(part, ret);
@@ -1494,28 +1519,220 @@ xpc_get_IPI_flags_sn2(struct xpc_partition *part)
 	return IPI_amo;
 }
 
+/*
+ * Notify those who wanted to be notified upon delivery of their message.
+ */
+static void
+xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put)
+{
+	struct xpc_notify *notify;
+	u8 notify_type;
+	s64 get = ch->sn.sn2.w_remote_GP.get - 1;
+
+	while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
+
+		notify = &ch->notify_queue[get % ch->local_nentries];
+
+		/*
+		 * See if the notify entry indicates it was associated with
+		 * a message who's sender wants to be notified. It is possible
+		 * that it is, but someone else is doing or has done the
+		 * notification.
+		 */
+		notify_type = notify->type;
+		if (notify_type == 0 ||
+		    cmpxchg(&notify->type, notify_type, 0) != notify_type) {
+			continue;
+		}
+
+		DBUG_ON(notify_type != XPC_N_CALL);
+
+		atomic_dec(&ch->n_to_notify);
+
+		if (notify->func != NULL) {
+			dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
+				"msg_number=%ld, partid=%d, channel=%d\n",
+				(void *)notify, get, ch->partid, ch->number);
+
+			notify->func(reason, ch->partid, ch->number,
+				     notify->key);
+
+			dev_dbg(xpc_chan, "notify->func() returned, "
+				"notify=0x%p, msg_number=%ld, partid=%d, "
+				"channel=%d\n", (void *)notify, get,
+				ch->partid, ch->number);
+		}
+	}
+}
+
+static void
+xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch)
+{
+	xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put);
+}
+
+/*
+ * Clear some of the msg flags in the local message queue.
+ */
+static inline void
+xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg *msg;
+	s64 get;
+
+	get = ch_sn2->w_remote_GP.get;
+	do {
+		msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
+					 (get % ch->local_nentries) *
+					 ch->msg_size);
+		msg->flags = 0;
+	} while (++get < ch_sn2->remote_GP.get);
+}
+
+/*
+ * Clear some of the msg flags in the remote message queue.
+ */
+static inline void
+xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg *msg;
+	s64 put;
+
+	put = ch_sn2->w_remote_GP.put;
+	do {
+		msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
+					 (put % ch->remote_nentries) *
+					 ch->msg_size);
+		msg->flags = 0;
+	} while (++put < ch_sn2->remote_GP.put);
+}
+
+static void
+xpc_process_msg_IPI_sn2(struct xpc_partition *part, int ch_number)
+{
+	struct xpc_channel *ch = &part->channels[ch_number];
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	int nmsgs_sent;
+
+	ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number];
+
+	/* See what, if anything, has changed for each connected channel */
+
+	xpc_msgqueue_ref(ch);
+
+	if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get &&
+	    ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) {
+		/* nothing changed since GPs were last pulled */
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/*
+	 * First check to see if messages recently sent by us have been
+	 * received by the other side. (The remote GET value will have
+	 * changed since we last looked at it.)
+	 */
+
+	if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) {
+
+		/*
+		 * We need to notify any senders that want to be notified
+		 * that their sent messages have been received by their
+		 * intended recipients. We need to do this before updating
+		 * w_remote_GP.get so that we don't allocate the same message
+		 * queue entries prematurely (see xpc_allocate_msg()).
+		 */
+		if (atomic_read(&ch->n_to_notify) > 0) {
+			/*
+			 * Notify senders that messages sent have been
+			 * received and delivered by the other side.
+			 */
+			xpc_notify_senders_sn2(ch, xpMsgDelivered,
+					       ch_sn2->remote_GP.get);
+		}
+
+		/*
+		 * Clear msg->flags in previously sent messages, so that
+		 * they're ready for xpc_allocate_msg().
+		 */
+		xpc_clear_local_msgqueue_flags_sn2(ch);
+
+		ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get;
+
+		dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
+			"channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid,
+			ch->number);
+
+		/*
+		 * If anyone was waiting for message queue entries to become
+		 * available, wake them up.
+		 */
+		if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+			wake_up(&ch->msg_allocate_wq);
+	}
+
+	/*
+	 * Now check for newly sent messages by the other side. (The remote
+	 * PUT value will have changed since we last looked at it.)
+	 */
+
+	if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) {
+		/*
+		 * Clear msg->flags in previously received messages, so that
+		 * they're ready for xpc_get_deliverable_msg().
+		 */
+		xpc_clear_remote_msgqueue_flags_sn2(ch);
+
+		ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put;
+
+		dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
+			"channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid,
+			ch->number);
+
+		nmsgs_sent = ch_sn2->w_remote_GP.put - ch_sn2->w_local_GP.get;
+		if (nmsgs_sent > 0) {
+			dev_dbg(xpc_chan, "msgs waiting to be copied and "
+				"delivered=%d, partid=%d, channel=%d\n",
+				nmsgs_sent, ch->partid, ch->number);
+
+			if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
+				xpc_activate_kthreads(ch, nmsgs_sent);
+		}
+	}
+
+	xpc_msgqueue_deref(ch);
+}
+
 static struct xpc_msg *
 xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
 {
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	struct xpc_msg *remote_msg, *msg;
 	u32 msg_index, nmsgs;
 	u64 msg_offset;
 	enum xp_retval ret;
 
-	if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
+	if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) {
 		/* we were interrupted by a signal */
 		return NULL;
 	}
 
-	while (get >= ch->next_msg_to_pull) {
+	while (get >= ch_sn2->next_msg_to_pull) {
 
 		/* pull as many messages as are ready and able to be pulled */
 
-		msg_index = ch->next_msg_to_pull % ch->remote_nentries;
+		msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries;
 
-		DBUG_ON(ch->next_msg_to_pull >= ch->w_remote_GP.put);
-		nmsgs = ch->w_remote_GP.put - ch->next_msg_to_pull;
+		DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put);
+		nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull;
 		if (msg_index + nmsgs > ch->remote_nentries) {
 			/* ignore the ones that wrap the msg queue for now */
 			nmsgs = ch->remote_nentries - msg_index;
@@ -1532,19 +1749,19 @@ xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
 
 			dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
 				" msg %ld from partition %d, channel=%d, "
-				"ret=%d\n", nmsgs, ch->next_msg_to_pull,
+				"ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull,
 				ch->partid, ch->number, ret);
 
 			XPC_DEACTIVATE_PARTITION(part, ret);
 
-			mutex_unlock(&ch->msg_to_pull_mutex);
+			mutex_unlock(&ch_sn2->msg_to_pull_mutex);
 			return NULL;
 		}
 
-		ch->next_msg_to_pull += nmsgs;
+		ch_sn2->next_msg_to_pull += nmsgs;
 	}
 
-	mutex_unlock(&ch->msg_to_pull_mutex);
+	mutex_unlock(&ch_sn2->msg_to_pull_mutex);
 
 	/* return the message we were looking for */
 	msg_offset = (get % ch->remote_nentries) * ch->msg_size;
@@ -1553,12 +1770,19 @@ xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
 	return msg;
 }
 
+static int
+xpc_n_of_deliverable_msgs_sn2(struct xpc_channel *ch)
+{
+	return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get;
+}
+
 /*
  * Get a message to be delivered.
  */
 static struct xpc_msg *
 xpc_get_deliverable_msg_sn2(struct xpc_channel *ch)
 {
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	struct xpc_msg *msg = NULL;
 	s64 get;
 
@@ -1566,9 +1790,9 @@ xpc_get_deliverable_msg_sn2(struct xpc_channel *ch)
 		if (ch->flags & XPC_C_DISCONNECTING)
 			break;
 
-		get = ch->w_local_GP.get;
+		get = ch_sn2->w_local_GP.get;
 		rmb();	/* guarantee that .get loads before .put */
-		if (get == ch->w_remote_GP.put)
+		if (get == ch_sn2->w_remote_GP.put)
 			break;
 
 		/* There are messages waiting to be pulled and delivered.
@@ -1578,7 +1802,7 @@ xpc_get_deliverable_msg_sn2(struct xpc_channel *ch)
 		 * to try again for the next one.
 		 */
 
-		if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) {
+		if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) {
 			/* we got the entry referenced by get */
 
 			dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
@@ -1609,6 +1833,7 @@ xpc_get_deliverable_msg_sn2(struct xpc_channel *ch)
 static void
 xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 {
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	struct xpc_msg *msg;
 	s64 put = initial_put + 1;
 	int send_IPI = 0;
@@ -1616,7 +1841,7 @@ xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 	while (1) {
 
 		while (1) {
-			if (put == ch->w_local_GP.put)
+			if (put == ch_sn2->w_local_GP.put)
 				break;
 
 			msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
@@ -1634,10 +1859,10 @@ xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 			break;
 		}
 
-		if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) !=
+		if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) !=
 		    initial_put) {
 			/* someone else beat us to it */
-			DBUG_ON(ch->local_GP->put < initial_put);
+			DBUG_ON(ch_sn2->local_GP->put < initial_put);
 			break;
 		}
 
@@ -1657,7 +1882,7 @@ xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 	}
 
 	if (send_IPI)
-		xpc_IPI_send_msgrequest_sn2(ch);
+		xpc_send_channel_msgrequest_sn2(ch);
 }
 
 /*
@@ -1668,6 +1893,7 @@ static enum xp_retval
 xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 		     struct xpc_msg **address_of_msg)
 {
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	struct xpc_msg *msg;
 	enum xp_retval ret;
 	s64 put;
@@ -1681,9 +1907,9 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 
 	while (1) {
 
-		put = ch->w_local_GP.put;
+		put = ch_sn2->w_local_GP.put;
 		rmb();	/* guarantee that .put loads before .get */
-		if (put - ch->w_remote_GP.get < ch->local_nentries) {
+		if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) {
 
 			/* There are available message entries. We need to try
 			 * to secure one for ourselves. We'll do this by trying
@@ -1691,7 +1917,8 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 			 * doesn't beat us to it. If they do, we'll have to
 			 * try again.
 			 */
-			if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == put) {
+			if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) ==
+			    put) {
 				/* we got the entry referenced by put */
 				break;
 			}
@@ -1708,7 +1935,7 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 		 * GP values as if an IPI was sent by the other side.
 		 */
 		if (ret == xpTimeout)
-			xpc_IPI_send_local_msgrequest_sn2(ch);
+			xpc_send_channel_local_msgrequest_sn2(ch);
 
 		if (flags & XPC_NOWAIT)
 			return xpNoWait;
@@ -1810,13 +2037,13 @@ xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
 
 	/*
 	 * The preceding store of msg->flags must occur before the following
-	 * load of ch->local_GP->put.
+	 * load of local_GP->put.
 	 */
 	mb();
 
 	/* see if the message is next in line to be sent, if so send it */
 
-	put = ch->local_GP->put;
+	put = ch->sn.sn2.local_GP->put;
 	if (put == msg_number)
 		xpc_send_msgs_sn2(ch, put);
 
@@ -1833,6 +2060,7 @@ out_1:
 static void
 xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 {
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	struct xpc_msg *msg;
 	s64 get = initial_get + 1;
 	int send_IPI = 0;
@@ -1840,7 +2068,7 @@ xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 	while (1) {
 
 		while (1) {
-			if (get == ch->w_local_GP.get)
+			if (get == ch_sn2->w_local_GP.get)
 				break;
 
 			msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
@@ -1859,10 +2087,10 @@ xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 			break;
 		}
 
-		if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) !=
+		if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) !=
 		    initial_get) {
 			/* someone else beat us to it */
-			DBUG_ON(ch->local_GP->get <= initial_get);
+			DBUG_ON(ch_sn2->local_GP->get <= initial_get);
 			break;
 		}
 
@@ -1882,7 +2110,7 @@ xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 	}
 
 	if (send_IPI)
-		xpc_IPI_send_msgrequest_sn2(ch);
+		xpc_send_channel_msgrequest_sn2(ch);
 }
 
 static void
@@ -1902,7 +2130,7 @@ xpc_received_msg_sn2(struct xpc_channel *ch, struct xpc_msg *msg)
 
 	/*
 	 * The preceding store of msg->flags must occur before the following
-	 * load of ch->local_GP->get.
+	 * load of local_GP->get.
 	 */
 	mb();
 
@@ -1910,7 +2138,7 @@ xpc_received_msg_sn2(struct xpc_channel *ch, struct xpc_msg *msg)
 	 * See if this message is next in line to be acknowledged as having
 	 * been delivered.
 	 */
-	get = ch->local_GP->get;
+	get = ch->sn.sn2.local_GP->get;
 	if (get == msg_number)
 		xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
 }
@@ -1928,36 +2156,35 @@ xpc_init_sn2(void)
 	xpc_heartbeat_exit = xpc_heartbeat_exit_sn2;
 	xpc_check_remote_hb = xpc_check_remote_hb_sn2;
 
-	xpc_initiate_partition_activation =
-	    xpc_initiate_partition_activation_sn2;
+	xpc_request_partition_activation = xpc_request_partition_activation_sn2;
+	xpc_request_partition_reactivation =
+	    xpc_request_partition_reactivation_sn2;
+	xpc_request_partition_deactivation =
+	    xpc_request_partition_deactivation_sn2;
+	xpc_cancel_partition_deactivation_request =
+	    xpc_cancel_partition_deactivation_request_sn2;
+
 	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2;
 	xpc_setup_infrastructure = xpc_setup_infrastructure_sn2;
 	xpc_teardown_infrastructure = xpc_teardown_infrastructure_sn2;
 	xpc_make_first_contact = xpc_make_first_contact_sn2;
 	xpc_get_IPI_flags = xpc_get_IPI_flags_sn2;
+	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
+	xpc_process_msg_IPI = xpc_process_msg_IPI_sn2;
+	xpc_n_of_deliverable_msgs = xpc_n_of_deliverable_msgs_sn2;
 	xpc_get_deliverable_msg = xpc_get_deliverable_msg_sn2;
 
-	xpc_mark_partition_engaged = xpc_mark_partition_engaged_sn2;
-	xpc_mark_partition_disengaged = xpc_mark_partition_disengaged_sn2;
-	xpc_request_partition_disengage = xpc_request_partition_disengage_sn2;
-	xpc_cancel_partition_disengage_request =
-	    xpc_cancel_partition_disengage_request_sn2;
+	xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2;
 	xpc_partition_engaged = xpc_partition_engaged_sn2;
-	xpc_partition_disengage_requested =
-	    xpc_partition_disengage_requested_sn2;
-	xpc_clear_partition_engaged = xpc_clear_partition_engaged_sn2;
-	xpc_clear_partition_disengage_request =
-	    xpc_clear_partition_disengage_request_sn2;
-
-	xpc_IPI_send_local_activate = xpc_IPI_send_local_activate_sn2;
-	xpc_IPI_send_activated = xpc_IPI_send_activated_sn2;
-	xpc_IPI_send_local_reactivate = xpc_IPI_send_local_reactivate_sn2;
-	xpc_IPI_send_disengage = xpc_IPI_send_disengage_sn2;
-
-	xpc_IPI_send_closerequest = xpc_IPI_send_closerequest_sn2;
-	xpc_IPI_send_closereply = xpc_IPI_send_closereply_sn2;
-	xpc_IPI_send_openrequest = xpc_IPI_send_openrequest_sn2;
-	xpc_IPI_send_openreply = xpc_IPI_send_openreply_sn2;
+	xpc_any_partition_engaged = xpc_any_partition_engaged_sn2;
+	xpc_indicate_partition_disengaged =
+	    xpc_indicate_partition_disengaged_sn2;
+	xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2;
+
+	xpc_send_channel_closerequest = xpc_send_channel_closerequest_sn2;
+	xpc_send_channel_closereply = xpc_send_channel_closereply_sn2;
+	xpc_send_channel_openrequest = xpc_send_channel_openrequest_sn2;
+	xpc_send_channel_openreply = xpc_send_channel_openreply_sn2;
 
 	xpc_send_msg = xpc_send_msg_sn2;
 	xpc_received_msg = xpc_received_msg_sn2;
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 32c577b8d0d..c53b229cb04 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -63,8 +63,8 @@ xpc_heartbeat_exit_uv(void)
 }
 
 static void
-xpc_initiate_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
-				     u64 remote_rp_pa, int nasid)
+xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
+				    u64 remote_rp_pa, int nasid)
 {
 	short partid = remote_rp->SAL_partid;
 	struct xpc_partition *part = &xpc_partitions[partid];
@@ -78,6 +78,12 @@ xpc_initiate_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
 	xpc_IPI_send_local_activate_uv(part);
 }
 
+static void
+xpc_request_partition_reactivation_uv(struct xpc_partition *part)
+{
+	xpc_IPI_send_local_activate_uv(part);
+}
+
 /*
  * Setup the infrastructure necessary to support XPartition Communication
  * between the specified remote partition and the local one.
@@ -128,8 +134,9 @@ xpc_init_uv(void)
 	xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
 	xpc_heartbeat_init = xpc_heartbeat_init_uv;
 	xpc_heartbeat_exit = xpc_heartbeat_exit_uv;
-	xpc_initiate_partition_activation =
-	    xpc_initiate_partition_activation_uv;
+	xpc_request_partition_activation = xpc_request_partition_activation_uv;
+	xpc_request_partition_reactivation =
+	    xpc_request_partition_reactivation_uv;
 	xpc_setup_infrastructure = xpc_setup_infrastructure_uv;
 	xpc_teardown_infrastructure = xpc_teardown_infrastructure_uv;
 	xpc_make_first_contact = xpc_make_first_contact_uv;
-- 
cgit v1.2.3


From 7fb5e59d63deda89a8eefdbd5b3c8d622076afd4 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:10 -0700
Subject: sgi-xp: separate chctl_flags from XPC's notify IRQ

Tie current IPI references to either XPC's notify IRQ or channel control
flags.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc.h         | 124 +++++++++-------
 drivers/misc/sgi-xp/xpc_channel.c | 135 +++++++++--------
 drivers/misc/sgi-xp/xpc_main.c    |  59 ++++----
 drivers/misc/sgi-xp/xpc_sn2.c     | 301 +++++++++++++++++++-------------------
 drivers/misc/sgi-xp/xpc_uv.c      |  10 +-
 5 files changed, 327 insertions(+), 302 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index b04cfbed958..26a1725f68a 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -186,9 +186,10 @@ struct xpc_vars_part_sn2 {
 	u64 openclose_args_pa;	/* physical address of open and close args */
 	u64 GPs_pa;		/* physical address of Get/Put values */
 
-	u64 IPI_amo_pa;		/* physical address of IPI AMO_t structure */
-	int IPI_nasid;		/* nasid of where to send IPIs */
-	int IPI_phys_cpuid;	/* physical CPU ID of where to send IPIs */
+	u64 chctl_amo_pa;	/* physical address of chctl flags' AMO_t */
+
+	int notify_IRQ_nasid;	/* nasid of where to send notify IRQs */
+	int notify_IRQ_phys_cpuid;	/* CPUID of where to send notify IRQs */
 
 	u8 nchannels;		/* #of defined channels supported */
 
@@ -407,7 +408,7 @@ struct xpc_channel {
 	atomic_t n_on_msg_allocate_wq;	/* #on msg allocation wait queue */
 	wait_queue_head_t msg_allocate_wq;	/* msg allocation wait queue */
 
-	u8 delayed_IPI_flags;	/* IPI flags received, but delayed */
+	u8 delayed_chctl_flags;	/* chctl flags received, but delayed */
 				/* action until channel disconnected */
 
 	/* queue of msg senders who want to be notified when msg received */
@@ -469,6 +470,54 @@ struct xpc_channel {
 				0x00020000 /* disconnecting callout completed */
 #define	XPC_C_WDISCONNECT	0x00040000  /* waiting for channel disconnect */
 
+/*
+ * The channel control flags (chctl) union consists of a 64-bit variable which
+ * is divided up into eight bytes, ordered from right to left. Byte zero
+ * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte
+ * can have one or more of the chctl flags set in it.
+ */
+
+union xpc_channel_ctl_flags {
+	u64 all_flags;
+	u8 flags[XPC_MAX_NCHANNELS];
+};
+
+/* chctl flags */
+#define	XPC_CHCTL_CLOSEREQUEST	0x01
+#define	XPC_CHCTL_CLOSEREPLY	0x02
+#define	XPC_CHCTL_OPENREQUEST	0x04
+#define	XPC_CHCTL_OPENREPLY	0x08
+#define	XPC_CHCTL_MSGREQUEST	0x10
+
+#define XPC_OPENCLOSE_CHCTL_FLAGS \
+			(XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \
+			 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY)
+#define XPC_MSG_CHCTL_FLAGS	XPC_CHCTL_MSGREQUEST
+
+static inline int
+xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+	int ch_number;
+
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+		if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS)
+			return 1;
+	}
+	return 0;
+}
+
+static inline int
+xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+	int ch_number;
+
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+		if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+			return 1;
+	}
+	return 0;
+}
+
 /*
  * Manages channels on a partition basis. There is one of these structures
  * for each partition (a partition will never utilize the structure that
@@ -494,12 +543,12 @@ struct xpc_partition_sn2 {
 
 	u64 remote_openclose_args_pa;	/* phys addr of remote's args */
 
-	int remote_IPI_nasid;	/* nasid of where to send IPIs */
-	int remote_IPI_phys_cpuid;	/* phys CPU ID of where to send IPIs */
-	char IPI_owner[8];	/* IPI owner's name */
+	int notify_IRQ_nasid;	/* nasid of where to send notify IRQs */
+	int notify_IRQ_phys_cpuid;	/* CPUID of where to send notify IRQs */
+	char notify_IRQ_owner[8];	/* notify IRQ's owner's name */
 
-	AMO_t *remote_IPI_amo_va;    /* address of remote IPI AMO_t structure */
-	AMO_t *local_IPI_amo_va;	/* address of IPI AMO_t structure */
+	AMO_t *remote_chctl_amo_va; /* address of remote chctl flags' AMO_t */
+	AMO_t *local_chctl_amo_va;	/* address of chctl flags' AMO_t */
 
 	struct timer_list dropped_notify_IRQ_timer;	/* dropped IRQ timer */
 };
@@ -536,7 +585,10 @@ struct xpc_partition {
 	atomic_t nchannels_engaged;  /* #of channels engaged with remote part */
 	struct xpc_channel *channels;	/* array of channel structures */
 
-	/* fields used to pass args when opening or closing a channel */
+	/* fields used for managing channel avialability and activity */
+
+	union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */
+	spinlock_t chctl_lock;	/* chctl flags lock */
 
 	void *local_openclose_args_base;   /* base address of kmalloc'd space */
 	struct xpc_openclose_args *local_openclose_args;      /* local's args */
@@ -544,11 +596,6 @@ struct xpc_partition {
 	struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
 							  /* args */
 
-	/* IPI sending, receiving and handling related fields */
-
-	u64 local_IPI_amo;	/* IPI amo flags yet to be handled */
-	spinlock_t IPI_lock;	/* IPI handler lock */
-
 	/* channel manager related fields */
 
 	atomic_t channel_mgr_requests;	/* #of requests to activate chan mgr */
@@ -580,11 +627,12 @@ struct xpc_partition {
 #define XPC_P_TORNDOWN		0x03	/* infrastructure is torndown */
 
 /*
- * struct xpc_partition IPI_timer #of seconds to wait before checking for
- * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
- * after the IPI was received.
+ * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the
+ * following interval #of seconds before checking for dropped notify IRQs.
+ * These can occur whenever an IRQ's associated amo write doesn't complete
+ * until after the IRQ was received.
  */
-#define XPC_P_DROPPED_IPI_WAIT_INTERVAL	(0.25 * HZ)
+#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL	(0.25 * HZ)
 
 /* number of seconds to wait for other partitions to disengage */
 #define XPC_DISENGAGE_DEFAULT_TIMELIMIT		90
@@ -617,9 +665,9 @@ extern void (*xpc_offline_heartbeat) (void);
 extern void (*xpc_online_heartbeat) (void);
 extern void (*xpc_check_remote_hb) (void);
 extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
-extern u64 (*xpc_get_IPI_flags) (struct xpc_partition *);
+extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
 extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
-extern void (*xpc_process_msg_IPI) (struct xpc_partition *, int);
+extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int);
 extern int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *);
 extern struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *);
 extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *, u64,
@@ -638,14 +686,13 @@ extern int (*xpc_any_partition_engaged) (void);
 extern void (*xpc_indicate_partition_disengaged) (struct xpc_partition *);
 extern void (*xpc_assume_partition_disengaged) (short);
 
-extern void (*xpc_send_channel_closerequest) (struct xpc_channel *,
-					      unsigned long *);
-extern void (*xpc_send_channel_closereply) (struct xpc_channel *,
+extern void (*xpc_send_chctl_closerequest) (struct xpc_channel *,
 					    unsigned long *);
-extern void (*xpc_send_channel_openrequest) (struct xpc_channel *,
-					     unsigned long *);
-extern void (*xpc_send_channel_openreply) (struct xpc_channel *,
+extern void (*xpc_send_chctl_closereply) (struct xpc_channel *,
+					  unsigned long *);
+extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *,
 					   unsigned long *);
+extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *);
 
 extern enum xp_retval (*xpc_send_msg) (struct xpc_channel *, u32, void *, u16,
 				       u8, xpc_notify_func, void *);
@@ -689,7 +736,7 @@ extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16);
 extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16,
 					       xpc_notify_func, void *);
 extern void xpc_initiate_received(short, int, void *);
-extern void xpc_process_channel_activity(struct xpc_partition *);
+extern void xpc_process_sent_chctl_flags(struct xpc_partition *);
 extern void xpc_connected_callout(struct xpc_channel *);
 extern void xpc_deliver_msg(struct xpc_channel *);
 extern void xpc_disconnect_channel(const int, struct xpc_channel *,
@@ -799,25 +846,4 @@ xpc_part_ref(struct xpc_partition *part)
 		(_p)->reason_line = _line; \
 	}
 
-/*
- * The sending and receiving of IPIs includes the setting of an >>>AMO variable
- * to indicate the reason the IPI was sent. The 64-bit variable is divided
- * up into eight bytes, ordered from right to left. Byte zero pertains to
- * channel 0, byte one to channel 1, and so on. Each byte is described by
- * the following IPI flags.
- */
-
-#define	XPC_IPI_CLOSEREQUEST	0x01
-#define	XPC_IPI_CLOSEREPLY	0x02
-#define	XPC_IPI_OPENREQUEST	0x04
-#define	XPC_IPI_OPENREPLY	0x08
-#define	XPC_IPI_MSGREQUEST	0x10
-
-/* given an >>>AMO variable and a channel#, get its associated IPI flags */
-#define XPC_GET_IPI_FLAGS(_amo, _c)	((u8) (((_amo) >> ((_c) * 8)) & 0xff))
-#define XPC_SET_IPI_FLAGS(_amo, _c, _f)	(_amo) |= ((u64) (_f) << ((_c) * 8))
-
-#define	XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0fUL)
-#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo)       ((_amo) & 0x1010101010101010UL)
-
 #endif /* _DRIVERS_MISC_SGIXP_XPC_H */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 48b16136305..0d3c153d1d0 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -201,7 +201,7 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	if (!(ch->flags & XPC_C_OPENREPLY)) {
 		ch->flags |= XPC_C_OPENREPLY;
-		xpc_send_channel_openreply(ch, irq_flags);
+		xpc_send_chctl_openreply(ch, irq_flags);
 	}
 
 	if (!(ch->flags & XPC_C_ROPENREPLY))
@@ -307,7 +307,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 		if (!(ch->flags & XPC_C_CLOSEREPLY)) {
 			ch->flags |= XPC_C_CLOSEREPLY;
-			xpc_send_channel_closereply(ch, irq_flags);
+			xpc_send_chctl_closereply(ch, irq_flags);
 		}
 
 		if (!(ch->flags & XPC_C_RCLOSEREPLY))
@@ -344,15 +344,15 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	if (ch->flags & XPC_C_WDISCONNECT) {
 		/* we won't lose the CPU since we're holding ch->lock */
 		complete(&ch->wdisconnect_wait);
-	} else if (ch->delayed_IPI_flags) {
+	} else if (ch->delayed_chctl_flags) {
 		if (part->act_state != XPC_P_DEACTIVATING) {
-			/* time to take action on any delayed IPI flags */
-			spin_lock(&part->IPI_lock);
-			XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number,
-					  ch->delayed_IPI_flags);
-			spin_unlock(&part->IPI_lock);
+			/* time to take action on any delayed chctl flags */
+			spin_lock(&part->chctl_lock);
+			part->chctl.flags[ch->number] |=
+			    ch->delayed_chctl_flags;
+			spin_unlock(&part->chctl_lock);
 		}
-		ch->delayed_IPI_flags = 0;
+		ch->delayed_chctl_flags = 0;
 	}
 }
 
@@ -360,8 +360,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
  * Process a change in the channel's remote connection state.
  */
 static void
-xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
-			  u8 IPI_flags)
+xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
+				  u8 chctl_flags)
 {
 	unsigned long irq_flags;
 	struct xpc_openclose_args *args =
@@ -376,24 +376,24 @@ again:
 	if ((ch->flags & XPC_C_DISCONNECTED) &&
 	    (ch->flags & XPC_C_WDISCONNECT)) {
 		/*
-		 * Delay processing IPI flags until thread waiting disconnect
+		 * Delay processing chctl flags until thread waiting disconnect
 		 * has had a chance to see that the channel is disconnected.
 		 */
-		ch->delayed_IPI_flags |= IPI_flags;
+		ch->delayed_chctl_flags |= chctl_flags;
 		spin_unlock_irqrestore(&ch->lock, irq_flags);
 		return;
 	}
 
-	if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
+	if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) {
 
-		dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received "
+		dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received "
 			"from partid=%d, channel=%d\n", args->reason,
 			ch->partid, ch->number);
 
 		/*
 		 * If RCLOSEREQUEST is set, we're probably waiting for
 		 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
-		 * with this RCLOSEREQUEST in the IPI_flags.
+		 * with this RCLOSEREQUEST in the chctl_flags.
 		 */
 
 		if (ch->flags & XPC_C_RCLOSEREQUEST) {
@@ -402,8 +402,8 @@ again:
 			DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
 			DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
 
-			DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY));
-			IPI_flags &= ~XPC_IPI_CLOSEREPLY;
+			DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY));
+			chctl_flags &= ~XPC_CHCTL_CLOSEREPLY;
 			ch->flags |= XPC_C_RCLOSEREPLY;
 
 			/* both sides have finished disconnecting */
@@ -413,17 +413,15 @@ again:
 		}
 
 		if (ch->flags & XPC_C_DISCONNECTED) {
-			if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
-				if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo,
-						       ch_number) &
-				     XPC_IPI_OPENREQUEST)) {
-
-					DBUG_ON(ch->delayed_IPI_flags != 0);
-					spin_lock(&part->IPI_lock);
-					XPC_SET_IPI_FLAGS(part->local_IPI_amo,
-							  ch_number,
-							  XPC_IPI_CLOSEREQUEST);
-					spin_unlock(&part->IPI_lock);
+			if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) {
+				if (part->chctl.flags[ch_number] &
+				    XPC_CHCTL_OPENREQUEST) {
+
+					DBUG_ON(ch->delayed_chctl_flags != 0);
+					spin_lock(&part->chctl_lock);
+					part->chctl.flags[ch_number] |=
+					    XPC_CHCTL_CLOSEREQUEST;
+					spin_unlock(&part->chctl_lock);
 				}
 				spin_unlock_irqrestore(&ch->lock, irq_flags);
 				return;
@@ -436,7 +434,7 @@ again:
 			ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
 		}
 
-		IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY);
+		chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY);
 
 		/*
 		 * The meaningful CLOSEREQUEST connection state fields are:
@@ -454,7 +452,7 @@ again:
 
 			XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
 
-			DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY);
+			DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY);
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 			return;
 		}
@@ -462,10 +460,10 @@ again:
 		xpc_process_disconnect(ch, &irq_flags);
 	}
 
-	if (IPI_flags & XPC_IPI_CLOSEREPLY) {
+	if (chctl_flags & XPC_CHCTL_CLOSEREPLY) {
 
-		dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d,"
-			" channel=%d\n", ch->partid, ch->number);
+		dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid="
+			"%d, channel=%d\n", ch->partid, ch->number);
 
 		if (ch->flags & XPC_C_DISCONNECTED) {
 			DBUG_ON(part->act_state != XPC_P_DEACTIVATING);
@@ -476,15 +474,14 @@ again:
 		DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
 
 		if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
-			if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number)
-			     & XPC_IPI_CLOSEREQUEST)) {
-
-				DBUG_ON(ch->delayed_IPI_flags != 0);
-				spin_lock(&part->IPI_lock);
-				XPC_SET_IPI_FLAGS(part->local_IPI_amo,
-						  ch_number,
-						  XPC_IPI_CLOSEREPLY);
-				spin_unlock(&part->IPI_lock);
+			if (part->chctl.flags[ch_number] &
+			    XPC_CHCTL_CLOSEREQUEST) {
+
+				DBUG_ON(ch->delayed_chctl_flags != 0);
+				spin_lock(&part->chctl_lock);
+				part->chctl.flags[ch_number] |=
+				    XPC_CHCTL_CLOSEREPLY;
+				spin_unlock(&part->chctl_lock);
 			}
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 			return;
@@ -498,9 +495,9 @@ again:
 		}
 	}
 
-	if (IPI_flags & XPC_IPI_OPENREQUEST) {
+	if (chctl_flags & XPC_CHCTL_OPENREQUEST) {
 
-		dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, "
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (msg_size=%d, "
 			"local_nentries=%d) received from partid=%d, "
 			"channel=%d\n", args->msg_size, args->local_nentries,
 			ch->partid, ch->number);
@@ -512,7 +509,7 @@ again:
 		}
 
 		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
-			ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST;
+			ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST;
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 			return;
 		}
@@ -554,13 +551,13 @@ again:
 		xpc_process_connect(ch, &irq_flags);
 	}
 
-	if (IPI_flags & XPC_IPI_OPENREPLY) {
+	if (chctl_flags & XPC_CHCTL_OPENREPLY) {
 
-		dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, "
-			"local_nentries=%d, remote_nentries=%d) received from "
-			"partid=%d, channel=%d\n", args->local_msgqueue_pa,
-			args->local_nentries, args->remote_nentries,
-			ch->partid, ch->number);
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa="
+			"0x%lx, local_nentries=%d, remote_nentries=%d) "
+			"received from partid=%d, channel=%d\n",
+			args->local_msgqueue_pa, args->local_nentries,
+			args->remote_nentries, ch->partid, ch->number);
 
 		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -591,7 +588,7 @@ again:
 		ch->remote_msgqueue_pa = args->local_msgqueue_pa;
 
 		if (args->local_nentries < ch->remote_nentries) {
-			dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
+			dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
 				"remote_nentries=%d, old remote_nentries=%d, "
 				"partid=%d, channel=%d\n",
 				args->local_nentries, ch->remote_nentries,
@@ -600,7 +597,7 @@ again:
 			ch->remote_nentries = args->local_nentries;
 		}
 		if (args->remote_nentries < ch->local_nentries) {
-			dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
+			dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
 				"local_nentries=%d, old local_nentries=%d, "
 				"partid=%d, channel=%d\n",
 				args->remote_nentries, ch->local_nentries,
@@ -690,7 +687,7 @@ xpc_connect_channel(struct xpc_channel *ch)
 	/* initiate the connection */
 
 	ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
-	xpc_send_channel_openrequest(ch, &irq_flags);
+	xpc_send_chctl_openrequest(ch, &irq_flags);
 
 	xpc_process_connect(ch, &irq_flags);
 
@@ -700,15 +697,15 @@ xpc_connect_channel(struct xpc_channel *ch)
 }
 
 void
-xpc_process_channel_activity(struct xpc_partition *part)
+xpc_process_sent_chctl_flags(struct xpc_partition *part)
 {
 	unsigned long irq_flags;
-	u64 IPI_amo, IPI_flags;
+	union xpc_channel_ctl_flags chctl;
 	struct xpc_channel *ch;
 	int ch_number;
 	u32 ch_flags;
 
-	IPI_amo = xpc_get_IPI_flags(part);
+	chctl.all_flags = xpc_get_chctl_all_flags(part);
 
 	/*
 	 * Initiate channel connections for registered channels.
@@ -721,14 +718,14 @@ xpc_process_channel_activity(struct xpc_partition *part)
 		ch = &part->channels[ch_number];
 
 		/*
-		 * Process any open or close related IPI flags, and then deal
+		 * Process any open or close related chctl flags, and then deal
 		 * with connecting or disconnecting the channel as required.
 		 */
 
-		IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number);
-
-		if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags))
-			xpc_process_openclose_IPI(part, ch_number, IPI_flags);
+		if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) {
+			xpc_process_openclose_chctl_flags(part, ch_number,
+							chctl.flags[ch_number]);
+		}
 
 		ch_flags = ch->flags;	/* need an atomic snapshot of flags */
 
@@ -755,13 +752,13 @@ xpc_process_channel_activity(struct xpc_partition *part)
 		}
 
 		/*
-		 * Process any message related IPI flags, this may involve the
-		 * activation of kthreads to deliver any pending messages sent
-		 * from the other partition.
+		 * Process any message related chctl flags, this may involve
+		 * the activation of kthreads to deliver any pending messages
+		 * sent from the other partition.
 		 */
 
-		if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags))
-			xpc_process_msg_IPI(part, ch_number);
+		if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+			xpc_process_msg_chctl_flags(part, ch_number);
 	}
 }
 
@@ -937,7 +934,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
 		       XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
 		       XPC_C_CONNECTING | XPC_C_CONNECTED);
 
-	xpc_send_channel_closerequest(ch, irq_flags);
+	xpc_send_chctl_closerequest(ch, irq_flags);
 
 	if (channel_was_connected)
 		ch->flags |= XPC_C_WASCONNECTED;
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 563aaf4a2ff..43f5b686ecf 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -25,18 +25,18 @@
  *
  *	Caveats:
  *
- *	  . We currently have no way to determine which nasid an IPI came
- *	    from. Thus, >>> xpc_IPI_send() does a remote AMO write followed by
- *	    an IPI. The AMO indicates where data is to be pulled from, so
- *	    after the IPI arrives, the remote partition checks the AMO word.
- *	    The IPI can actually arrive before the AMO however, so other code
- *	    must periodically check for this case. Also, remote AMO operations
- *	    do not reliably time out. Thus we do a remote PIO read solely to
- *	    know whether the remote partition is down and whether we should
- *	    stop sending IPIs to it. This remote PIO read operation is set up
- *	    in a special nofault region so SAL knows to ignore (and cleanup)
- *	    any errors due to the remote AMO write, PIO read, and/or PIO
- *	    write operations.
+ *	  . Currently on sn2, we have no way to determine which nasid an IRQ
+ *	    came from. Thus, xpc_send_IRQ_sn2() does a remote AMO write
+ *	    followed by an IPI. The AMO indicates where data is to be pulled
+ *	    from, so after the IPI arrives, the remote partition checks the AMO
+ *	    word. The IPI can actually arrive before the AMO however, so other
+ *	    code must periodically check for this case. Also, remote AMO
+ *	    operations do not reliably time out. Thus we do a remote PIO read
+ *	    solely to know whether the remote partition is down and whether we
+ *	    should stop sending IPIs to it. This remote PIO read operation is
+ *	    set up in a special nofault region so SAL knows to ignore (and
+ *	    cleanup) any errors due to the remote AMO write, PIO read, and/or
+ *	    PIO write operations.
  *
  *	    If/when new hardware solves this IPI problem, we should abandon
  *	    the current approach.
@@ -185,8 +185,8 @@ void (*xpc_check_remote_hb) (void);
 
 enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
 void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
-u64 (*xpc_get_IPI_flags) (struct xpc_partition *part);
-void (*xpc_process_msg_IPI) (struct xpc_partition *part, int ch_number);
+u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
+void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
 int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *ch);
 struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *ch);
 
@@ -206,14 +206,14 @@ int (*xpc_any_partition_engaged) (void);
 void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part);
 void (*xpc_assume_partition_disengaged) (short partid);
 
-void (*xpc_send_channel_closerequest) (struct xpc_channel *ch,
-				       unsigned long *irq_flags);
-void (*xpc_send_channel_closereply) (struct xpc_channel *ch,
+void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch,
 				     unsigned long *irq_flags);
-void (*xpc_send_channel_openrequest) (struct xpc_channel *ch,
-				      unsigned long *irq_flags);
-void (*xpc_send_channel_openreply) (struct xpc_channel *ch,
+void (*xpc_send_chctl_closereply) (struct xpc_channel *ch,
+				   unsigned long *irq_flags);
+void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
 				    unsigned long *irq_flags);
+void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
+				  unsigned long *irq_flags);
 
 enum xp_retval (*xpc_send_msg) (struct xpc_channel *ch, u32 flags,
 				void *payload, u16 payload_size, u8 notify_type,
@@ -302,7 +302,7 @@ xpc_hb_checker(void *ignore)
 
 			/*
 			 * We need to periodically recheck to ensure no
-			 * IPI/AMO pairs have been missed.  That check
+			 * IRQ/AMO pairs have been missed.  That check
 			 * must always reset xpc_hb_check_timeout.
 			 */
 			force_IRQ = 1;
@@ -378,7 +378,7 @@ xpc_channel_mgr(struct xpc_partition *part)
 	       atomic_read(&part->nchannels_active) > 0 ||
 	       !xpc_partition_disengaged(part)) {
 
-		xpc_process_channel_activity(part);
+		xpc_process_sent_chctl_flags(part);
 
 		/*
 		 * Wait until we've been requested to activate kthreads or
@@ -396,7 +396,7 @@ xpc_channel_mgr(struct xpc_partition *part)
 		atomic_dec(&part->channel_mgr_requests);
 		(void)wait_event_interruptible(part->channel_mgr_wq,
 				(atomic_read(&part->channel_mgr_requests) > 0 ||
-				 part->local_IPI_amo != 0 ||
+				 part->chctl.all_flags != 0 ||
 				 (part->act_state == XPC_P_DEACTIVATING &&
 				 atomic_read(&part->nchannels_active) == 0 &&
 				 xpc_partition_disengaged(part))));
@@ -753,16 +753,15 @@ xpc_disconnect_wait(int ch_number)
 		DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
 		wakeup_channel_mgr = 0;
 
-		if (ch->delayed_IPI_flags) {
+		if (ch->delayed_chctl_flags) {
 			if (part->act_state != XPC_P_DEACTIVATING) {
-				spin_lock(&part->IPI_lock);
-				XPC_SET_IPI_FLAGS(part->local_IPI_amo,
-						  ch->number,
-						  ch->delayed_IPI_flags);
-				spin_unlock(&part->IPI_lock);
+				spin_lock(&part->chctl_lock);
+				part->chctl.flags[ch->number] |=
+				    ch->delayed_chctl_flags;
+				spin_unlock(&part->chctl_lock);
 				wakeup_channel_mgr = 1;
 			}
-			ch->delayed_IPI_flags = 0;
+			ch->delayed_chctl_flags = 0;
 		}
 
 		ch->flags &= ~XPC_C_WDISCONNECT;
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 69d74bd5689..0fef7d86a5a 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -104,20 +104,20 @@ xpc_disallow_IPI_ops_sn2(void)
 }
 
 /*
- * The following set of macros and functions are used for the sending and
- * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
- * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
- * the other that is associated with channel activity (SGI_XPC_NOTIFY).
+ * The following set of functions are used for the sending and receiving of
+ * IRQs (also known as IPIs). There are two flavors of IRQs, one that is
+ * associated with partition activity (SGI_XPC_ACTIVATE) and the other that
+ * is associated with channel activity (SGI_XPC_NOTIFY).
  */
 
 static u64
-xpc_IPI_receive_sn2(AMO_t *amo)
+xpc_receive_IRQ_amo_sn2(AMO_t *amo)
 {
 	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
 }
 
 static enum xp_retval
-xpc_IPI_send_sn2(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
+xpc_send_IRQ_sn2(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
 {
 	int ret = 0;
 	unsigned long irq_flags;
@@ -131,7 +131,7 @@ xpc_IPI_send_sn2(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
 	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 * keep sending IRQs and AMOs to it until the heartbeat times out.
 	 */
 	ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
 						     xp_nofault_PIOR_target));
@@ -142,16 +142,16 @@ xpc_IPI_send_sn2(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
 }
 
 static AMO_t *
-xpc_IPI_init_sn2(int index)
+xpc_init_IRQ_amo_sn2(int index)
 {
 	AMO_t *amo = xpc_vars->amos_page + index;
 
-	(void)xpc_IPI_receive_sn2(amo);	/* clear AMO variable */
+	(void)xpc_receive_IRQ_amo_sn2(amo);	/* clear AMO variable */
 	return amo;
 }
 
 /*
- * IPIs associated with SGI_XPC_ACTIVATE IRQ.
+ * Functions associated with SGI_XPC_ACTIVATE IRQ.
  */
 
 /*
@@ -166,23 +166,23 @@ xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
 }
 
 /*
- * Flag the appropriate AMO variable and send an IPI to the specified node.
+ * Flag the appropriate AMO variable and send an IRQ to the specified node.
  */
 static void
-xpc_activate_IRQ_send_sn2(u64 amos_page_pa, int from_nasid, int to_nasid,
-		      int to_phys_cpuid)
+xpc_send_activate_IRQ_sn2(u64 amos_page_pa, int from_nasid, int to_nasid,
+			  int to_phys_cpuid)
 {
 	int w_index = XPC_NASID_W_INDEX(from_nasid);
 	int b_index = XPC_NASID_B_INDEX(from_nasid);
 	AMO_t *amos = (AMO_t *)__va(amos_page_pa +
 				    (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
 
-	(void)xpc_IPI_send_sn2(&amos[w_index], (1UL << b_index), to_nasid,
+	(void)xpc_send_IRQ_sn2(&amos[w_index], (1UL << b_index), to_nasid,
 			       to_phys_cpuid, SGI_XPC_ACTIVATE);
 }
 
 static void
-xpc_activate_IRQ_send_local_sn2(int from_nasid)
+xpc_send_local_activate_IRQ_sn2(int from_nasid)
 {
 	int w_index = XPC_NASID_W_INDEX(from_nasid);
 	int b_index = XPC_NASID_B_INDEX(from_nasid);
@@ -197,29 +197,29 @@ xpc_activate_IRQ_send_local_sn2(int from_nasid)
 }
 
 /*
- * IPIs associated with SGI_XPC_NOTIFY IRQ.
+ * Functions associated with SGI_XPC_NOTIFY IRQ.
  */
 
 /*
- * Check to see if there is any channel activity to/from the specified
- * partition.
+ * Check to see if any chctl flags were sent from the specified partition.
  */
 static void
-xpc_check_for_channel_activity_sn2(struct xpc_partition *part)
+xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part)
 {
-	u64 IPI_amo;
+	union xpc_channel_ctl_flags chctl;
 	unsigned long irq_flags;
 
-	IPI_amo = xpc_IPI_receive_sn2(part->sn.sn2.local_IPI_amo_va);
-	if (IPI_amo == 0)
+	chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2.
+						  local_chctl_amo_va);
+	if (chctl.all_flags == 0)
 		return;
 
-	spin_lock_irqsave(&part->IPI_lock, irq_flags);
-	part->local_IPI_amo |= IPI_amo;
-	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	part->chctl.all_flags |= chctl.all_flags;
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
 
-	dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
-		XPC_PARTID(part), IPI_amo);
+	dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags="
+		"0x%lx\n", XPC_PARTID(part), chctl.all_flags);
 
 	xpc_wakeup_channel_mgr(part);
 }
@@ -228,17 +228,17 @@ xpc_check_for_channel_activity_sn2(struct xpc_partition *part)
  * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
  * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
  * than one partition, we use an AMO_t structure per partition to indicate
- * whether a partition has sent an IPI or not.  If it has, then wake up the
+ * whether a partition has sent an IRQ or not.  If it has, then wake up the
  * associated kthread to handle it.
  *
- * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
+ * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC
  * running on other partitions.
  *
  * Noteworthy Arguments:
  *
  *	irq - Interrupt ReQuest number. NOT USED.
  *
- *	dev_id - partid of IPI's potential sender.
+ *	dev_id - partid of IRQ's potential sender.
  */
 static irqreturn_t
 xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
@@ -249,7 +249,7 @@ xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
 	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 
 	if (xpc_part_ref(part)) {
-		xpc_check_for_channel_activity_sn2(part);
+		xpc_check_for_sent_chctl_flags_sn2(part);
 
 		xpc_part_deref(part);
 	}
@@ -257,45 +257,47 @@ xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
 }
 
 /*
- * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IPIs on the floor
- * because the write to their associated IPI amo completed after the IRQ/IPI
+ * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor
+ * because the write to their associated amo variable completed after the IRQ
  * was received.
  */
 static void
-xpc_dropped_notify_IRQ_check_sn2(struct xpc_partition *part)
+xpc_check_for_dropped_notify_IRQ_sn2(struct xpc_partition *part)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 
 	if (xpc_part_ref(part)) {
-		xpc_check_for_channel_activity_sn2(part);
+		xpc_check_for_sent_chctl_flags_sn2(part);
 
 		part_sn2->dropped_notify_IRQ_timer.expires = jiffies +
-		    XPC_P_DROPPED_IPI_WAIT_INTERVAL;
+		    XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
 		add_timer(&part_sn2->dropped_notify_IRQ_timer);
 		xpc_part_deref(part);
 	}
 }
 
 /*
- * Send an IPI to the remote partition that is associated with the
+ * Send a notify IRQ to the remote partition that is associated with the
  * specified channel.
  */
 static void
-xpc_notify_IRQ_send_sn2(struct xpc_channel *ch, u8 ipi_flag,
-			char *ipi_flag_string, unsigned long *irq_flags)
+xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
+			char *chctl_flag_string, unsigned long *irq_flags)
 {
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	union xpc_channel_ctl_flags chctl = { 0 };
 	enum xp_retval ret;
 
 	if (likely(part->act_state != XPC_P_DEACTIVATING)) {
-		ret = xpc_IPI_send_sn2(part_sn2->remote_IPI_amo_va,
-				       (u64)ipi_flag << (ch->number * 8),
-				       part_sn2->remote_IPI_nasid,
-				       part_sn2->remote_IPI_phys_cpuid,
+		chctl.flags[ch->number] = chctl_flag;
+		ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va,
+				       chctl.all_flags,
+				       part_sn2->notify_IRQ_nasid,
+				       part_sn2->notify_IRQ_phys_cpuid,
 				       SGI_XPC_NOTIFY);
 		dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
-			ipi_flag_string, ch->partid, ch->number, ret);
+			chctl_flag_string, ch->partid, ch->number, ret);
 		if (unlikely(ret != xpSuccess)) {
 			if (irq_flags != NULL)
 				spin_unlock_irqrestore(&ch->lock, *irq_flags);
@@ -306,78 +308,78 @@ xpc_notify_IRQ_send_sn2(struct xpc_channel *ch, u8 ipi_flag,
 	}
 }
 
-#define XPC_NOTIFY_IRQ_SEND_SN2(_ch, _ipi_f, _irq_f) \
-		xpc_notify_IRQ_send_sn2(_ch, _ipi_f, #_ipi_f, _irq_f)
+#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \
+		xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f)
 
 /*
  * Make it look like the remote partition, which is associated with the
- * specified channel, sent us an IPI. This faked IPI will be handled
- * by xpc_dropped_notify_IRQ_check_sn2().
+ * specified channel, sent us a notify IRQ. This faked IRQ will be handled
+ * by xpc_check_for_dropped_notify_IRQ_sn2().
  */
 static void
-xpc_notify_IRQ_send_local_sn2(struct xpc_channel *ch, u8 ipi_flag,
-			      char *ipi_flag_string)
+xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
+			      char *chctl_flag_string)
 {
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	union xpc_channel_ctl_flags chctl = { 0 };
 
-	FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_IPI_amo_va->variable),
-			 FETCHOP_OR, ((u64)ipi_flag << (ch->number * 8)));
+	chctl.flags[ch->number] = chctl_flag;
+	FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va->
+				variable), FETCHOP_OR, chctl.all_flags);
 	dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
-		ipi_flag_string, ch->partid, ch->number);
+		chctl_flag_string, ch->partid, ch->number);
 }
 
-#define XPC_NOTIFY_IRQ_SEND_LOCAL_SN2(_ch, _ipi_f) \
-		xpc_notify_IRQ_send_local_sn2(_ch, _ipi_f, #_ipi_f)
+#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \
+		xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f)
 
 static void
-xpc_send_channel_closerequest_sn2(struct xpc_channel *ch,
-				  unsigned long *irq_flags)
+xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch,
+				unsigned long *irq_flags)
 {
 	struct xpc_openclose_args *args = ch->local_openclose_args;
 
 	args->reason = ch->reason;
-	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags);
 }
 
 static void
-xpc_send_channel_closereply_sn2(struct xpc_channel *ch,
-				unsigned long *irq_flags)
+xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 {
-	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_CLOSEREPLY, irq_flags);
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags);
 }
 
 static void
-xpc_send_channel_openrequest_sn2(struct xpc_channel *ch,
-				 unsigned long *irq_flags)
+xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 {
 	struct xpc_openclose_args *args = ch->local_openclose_args;
 
 	args->msg_size = ch->msg_size;
 	args->local_nentries = ch->local_nentries;
-	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_OPENREQUEST, irq_flags);
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags);
 }
 
 static void
-xpc_send_channel_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 {
 	struct xpc_openclose_args *args = ch->local_openclose_args;
 
 	args->remote_nentries = ch->remote_nentries;
 	args->local_nentries = ch->local_nentries;
 	args->local_msgqueue_pa = __pa(ch->local_msgqueue);
-	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_OPENREPLY, irq_flags);
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags);
 }
 
 static void
-xpc_send_channel_msgrequest_sn2(struct xpc_channel *ch)
+xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch)
 {
-	XPC_NOTIFY_IRQ_SEND_SN2(ch, XPC_IPI_MSGREQUEST, NULL);
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL);
 }
 
 static void
-xpc_send_channel_local_msgrequest_sn2(struct xpc_channel *ch)
+xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch)
 {
-	XPC_NOTIFY_IRQ_SEND_LOCAL_SN2(ch, XPC_IPI_MSGREQUEST);
+	XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST);
 }
 
 /*
@@ -402,7 +404,7 @@ xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
 	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 * keep sending IRQs and AMOs to it until the heartbeat times out.
 	 */
 	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
 							       variable),
@@ -429,7 +431,7 @@ xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
 	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 * keep sending IRQs and AMOs to it until the heartbeat times out.
 	 */
 	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
 							       variable),
@@ -441,7 +443,7 @@ xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 	 * Send activate IRQ to get other side to see that we've cleared our
 	 * bit in their engaged partitions AMO.
 	 */
-	xpc_activate_IRQ_send_sn2(part_sn2->remote_amos_page_pa,
+	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
 				  cnodeid_to_nasid(0),
 				  part_sn2->activate_IRQ_nasid,
 				  part_sn2->activate_IRQ_phys_cpuid);
@@ -595,11 +597,11 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 
 	/* initialize the activate IRQ related AMO variables */
 	for (i = 0; i < xp_nasid_mask_words; i++)
-		(void)xpc_IPI_init_sn2(XPC_ACTIVATE_IRQ_AMOS + i);
+		(void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS + i);
 
 	/* initialize the engaged remote partitions related AMO variables */
-	(void)xpc_IPI_init_sn2(XPC_ENGAGED_PARTITIONS_AMO);
-	(void)xpc_IPI_init_sn2(XPC_DEACTIVATE_REQUEST_AMO);
+	(void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO);
+	(void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO);
 
 	return xpSuccess;
 }
@@ -729,13 +731,13 @@ static void
 xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
 				     u64 remote_rp_pa, int nasid)
 {
-	xpc_activate_IRQ_send_local_sn2(nasid);
+	xpc_send_local_activate_IRQ_sn2(nasid);
 }
 
 static void
 xpc_request_partition_reactivation_sn2(struct xpc_partition *part)
 {
-	xpc_activate_IRQ_send_local_sn2(part->sn.sn2.activate_IRQ_nasid);
+	xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid);
 }
 
 static void
@@ -755,7 +757,7 @@ xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
 	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 * keep sending IRQs and AMOs to it until the heartbeat times out.
 	 */
 	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
 							       variable),
@@ -767,7 +769,7 @@ xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
 	 * Send activate IRQ to get other side to see that we've set our
 	 * bit in their deactivate request AMO.
 	 */
-	xpc_activate_IRQ_send_sn2(part_sn2->remote_amos_page_pa,
+	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
 				  cnodeid_to_nasid(0),
 				  part_sn2->activate_IRQ_nasid,
 				  part_sn2->activate_IRQ_phys_cpuid);
@@ -789,7 +791,7 @@ xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
 	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 * keep sending IRQs and AMOs to it until the heartbeat times out.
 	 */
 	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
 							       variable),
@@ -861,11 +863,11 @@ xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
 }
 
 /*
- * Prior code has determined the nasid which generated an IPI.  Inspect
- * that nasid to determine if its partition needs to be activated or
- * deactivated.
+ * Prior code has determined the nasid which generated a activate IRQ.
+ * Inspect that nasid to determine if its partition needs to be activated
+ * or deactivated.
  *
- * A partition is consider "awaiting activation" if our partition
+ * A partition is considered "awaiting activation" if our partition
  * flags indicate it is not active and it has a heartbeat.  A
  * partition is considered "awaiting deactivation" if our partition
  * flags indicate it is active but it has no heartbeat or it is not
@@ -997,7 +999,7 @@ xpc_identify_activate_IRQ_sender_sn2(void)
 		if (xpc_exiting)
 			break;
 
-		nasid_mask = xpc_IPI_receive_sn2(&act_amos[word]);
+		nasid_mask = xpc_receive_IRQ_amo_sn2(&act_amos[word]);
 		if (nasid_mask == 0) {
 			/* no IRQs from nasids in this variable */
 			continue;
@@ -1117,20 +1119,20 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 
 	part_sn2->remote_openclose_args_pa = 0;
 
-	part_sn2->local_IPI_amo_va = xpc_IPI_init_sn2(partid);
-	part->local_IPI_amo = 0;
-	spin_lock_init(&part->IPI_lock);
+	part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid);
+	part->chctl.all_flags = 0;
+	spin_lock_init(&part->chctl_lock);
 
-	part_sn2->remote_IPI_nasid = 0;
-	part_sn2->remote_IPI_phys_cpuid = 0;
-	part_sn2->remote_IPI_amo_va = NULL;
+	part_sn2->notify_IRQ_nasid = 0;
+	part_sn2->notify_IRQ_phys_cpuid = 0;
+	part_sn2->remote_chctl_amo_va = NULL;
 
 	atomic_set(&part->channel_mgr_requests, 1);
 	init_waitqueue_head(&part->channel_mgr_wq);
 
-	sprintf(part_sn2->IPI_owner, "xpc%02d", partid);
+	sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid);
 	ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2,
-			  IRQF_SHARED, part_sn2->IPI_owner,
+			  IRQF_SHARED, part_sn2->notify_IRQ_owner,
 			  (void *)(u64)partid);
 	if (ret != 0) {
 		dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
@@ -1139,13 +1141,13 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 		goto out_5;
 	}
 
-	/* Setup a timer to check for dropped IPIs */
+	/* Setup a timer to check for dropped notify IRQs */
 	timer = &part_sn2->dropped_notify_IRQ_timer;
 	init_timer(timer);
 	timer->function =
-	    (void (*)(unsigned long))xpc_dropped_notify_IRQ_check_sn2;
+	    (void (*)(unsigned long))xpc_check_for_dropped_notify_IRQ_sn2;
 	timer->data = (unsigned long)part;
-	timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT_INTERVAL;
+	timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
 	add_timer(timer);
 
 	part->nchannels = XPC_MAX_NCHANNELS;
@@ -1196,10 +1198,10 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	xpc_vars_part[partid].GPs_pa = __pa(part_sn2->local_GPs);
 	xpc_vars_part[partid].openclose_args_pa =
 	    __pa(part->local_openclose_args);
-	xpc_vars_part[partid].IPI_amo_pa = __pa(part_sn2->local_IPI_amo_va);
+	xpc_vars_part[partid].chctl_amo_pa = __pa(part_sn2->local_chctl_amo_va);
 	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
-	xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
-	xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
+	xpc_vars_part[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid);
+	xpc_vars_part[partid].notify_IRQ_phys_cpuid = cpu_physical_id(cpuid);
 	xpc_vars_part[partid].nchannels = part->nchannels;
 	xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
 
@@ -1239,7 +1241,7 @@ xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
 	 * processes by marking it as no longer setup. Then we make it
 	 * inaccessible to remote processes by clearing the XPC per partition
 	 * specific variable's magic # (which indicates that these variables
-	 * are no longer valid) and by ignoring all XPC notify IPIs sent to
+	 * are no longer valid) and by ignoring all XPC notify IRQs sent to
 	 * this partition.
 	 */
 
@@ -1275,7 +1277,7 @@ xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
 	part_sn2->local_GPs = NULL;
 	kfree(part->channels);
 	part->channels = NULL;
-	part_sn2->local_IPI_amo_va = NULL;
+	part_sn2->local_chctl_amo_va = NULL;
 }
 
 /*
@@ -1370,7 +1372,7 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 
 		if (pulled_entry->GPs_pa == 0 ||
 		    pulled_entry->openclose_args_pa == 0 ||
-		    pulled_entry->IPI_amo_pa == 0) {
+		    pulled_entry->chctl_amo_pa == 0) {
 
 			dev_err(xpc_chan, "partition %d's XPC vars_part for "
 				"partition %d are not valid\n", partid,
@@ -1383,10 +1385,11 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 		part_sn2->remote_GPs_pa = pulled_entry->GPs_pa;
 		part_sn2->remote_openclose_args_pa =
 		    pulled_entry->openclose_args_pa;
-		part_sn2->remote_IPI_amo_va =
-		    (AMO_t *)__va(pulled_entry->IPI_amo_pa);
-		part_sn2->remote_IPI_nasid = pulled_entry->IPI_nasid;
-		part_sn2->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
+		part_sn2->remote_chctl_amo_va =
+		    (AMO_t *)__va(pulled_entry->chctl_amo_pa);
+		part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid;
+		part_sn2->notify_IRQ_phys_cpuid =
+		    pulled_entry->notify_IRQ_phys_cpuid;
 
 		if (part->nchannels > pulled_entry->nchannels)
 			part->nchannels = pulled_entry->nchannels;
@@ -1437,7 +1440,7 @@ xpc_make_first_contact_sn2(struct xpc_partition *part)
 	 * Send activate IRQ to get other side to activate if they've not
 	 * already begun to do so.
 	 */
-	xpc_activate_IRQ_send_sn2(part_sn2->remote_amos_page_pa,
+	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
 				  cnodeid_to_nasid(0),
 				  part_sn2->activate_IRQ_nasid,
 				  part_sn2->activate_IRQ_phys_cpuid);
@@ -1462,28 +1465,28 @@ xpc_make_first_contact_sn2(struct xpc_partition *part)
 }
 
 /*
- * Get the IPI flags and pull the openclose args and/or remote GPs as needed.
+ * Get the chctl flags and pull the openclose args and/or remote GPs as needed.
  */
 static u64
-xpc_get_IPI_flags_sn2(struct xpc_partition *part)
+xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	unsigned long irq_flags;
-	u64 IPI_amo;
+	union xpc_channel_ctl_flags chctl;
 	enum xp_retval ret;
 
 	/*
-	 * See if there are any IPI flags to be handled.
+	 * See if there are any chctl flags to be handled.
 	 */
 
-	spin_lock_irqsave(&part->IPI_lock, irq_flags);
-	IPI_amo = part->local_IPI_amo;
-	if (IPI_amo != 0)
-		part->local_IPI_amo = 0;
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	chctl = part->chctl;
+	if (chctl.all_flags != 0)
+		part->chctl.all_flags = 0;
 
-	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
 
-	if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
+	if (xpc_any_openclose_chctl_flags_set(&chctl)) {
 		ret = xpc_pull_remote_cachelines_sn2(part, part->
 						     remote_openclose_args,
 						     (void *)part_sn2->
@@ -1496,12 +1499,12 @@ xpc_get_IPI_flags_sn2(struct xpc_partition *part)
 				"partition %d, ret=%d\n", XPC_PARTID(part),
 				ret);
 
-			/* don't bother processing IPIs anymore */
-			IPI_amo = 0;
+			/* don't bother processing chctl flags anymore */
+			chctl.all_flags = 0;
 		}
 	}
 
-	if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
+	if (xpc_any_msg_chctl_flags_set(&chctl)) {
 		ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs,
 						(void *)part_sn2->remote_GPs_pa,
 						     XPC_GP_SIZE);
@@ -1511,12 +1514,12 @@ xpc_get_IPI_flags_sn2(struct xpc_partition *part)
 			dev_dbg(xpc_chan, "failed to pull GPs from partition "
 				"%d, ret=%d\n", XPC_PARTID(part), ret);
 
-			/* don't bother processing IPIs anymore */
-			IPI_amo = 0;
+			/* don't bother processing chctl flags anymore */
+			chctl.all_flags = 0;
 		}
 	}
 
-	return IPI_amo;
+	return chctl.all_flags;
 }
 
 /*
@@ -1610,7 +1613,7 @@ xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
 }
 
 static void
-xpc_process_msg_IPI_sn2(struct xpc_partition *part, int ch_number)
+xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number)
 {
 	struct xpc_channel *ch = &part->channels[ch_number];
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
@@ -1827,8 +1830,8 @@ xpc_get_deliverable_msg_sn2(struct xpc_channel *ch)
 
 /*
  * Now we actually send the messages that are ready to be sent by advancing
- * the local message queue's Put value and then send an IPI to the recipient
- * partition.
+ * the local message queue's Put value and then send a chctl msgrequest to the
+ * recipient partition.
  */
 static void
 xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
@@ -1836,7 +1839,7 @@ xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	struct xpc_msg *msg;
 	s64 put = initial_put + 1;
-	int send_IPI = 0;
+	int send_msgrequest = 0;
 
 	while (1) {
 
@@ -1871,7 +1874,7 @@ xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 		dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
 			"channel=%d\n", put, ch->partid, ch->number);
 
-		send_IPI = 1;
+		send_msgrequest = 1;
 
 		/*
 		 * We need to ensure that the message referenced by
@@ -1881,8 +1884,8 @@ xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 		initial_put = put;
 	}
 
-	if (send_IPI)
-		xpc_send_channel_msgrequest_sn2(ch);
+	if (send_msgrequest)
+		xpc_send_chctl_msgrequest_sn2(ch);
 }
 
 /*
@@ -1929,13 +1932,13 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 		 * There aren't any available msg entries at this time.
 		 *
 		 * In waiting for a message entry to become available,
-		 * we set a timeout in case the other side is not
-		 * sending completion IPIs. This lets us fake an IPI
-		 * that will cause the IPI handler to fetch the latest
-		 * GP values as if an IPI was sent by the other side.
+		 * we set a timeout in case the other side is not sending
+		 * completion interrupts. This lets us fake a notify IRQ
+		 * that will cause the notify IRQ handler to fetch the latest
+		 * GP values as if an interrupt was sent by the other side.
 		 */
 		if (ret == xpTimeout)
-			xpc_send_channel_local_msgrequest_sn2(ch);
+			xpc_send_chctl_local_msgrequest_sn2(ch);
 
 		if (flags & XPC_NOWAIT)
 			return xpNoWait;
@@ -1962,8 +1965,8 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 
 /*
  * Common code that does the actual sending of the message by advancing the
- * local message queue's Put value and sends an IPI to the partition the
- * message is being sent to.
+ * local message queue's Put value and sends a chctl msgrequest to the
+ * partition the message is being sent to.
  */
 static enum xp_retval
 xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
@@ -2055,7 +2058,7 @@ out_1:
 /*
  * Now we actually acknowledge the messages that have been delivered and ack'd
  * by advancing the cached remote message queue's Get value and if requested
- * send an IPI to the message sender's partition.
+ * send a chctl msgrequest to the message sender's partition.
  */
 static void
 xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
@@ -2063,7 +2066,7 @@ xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	struct xpc_msg *msg;
 	s64 get = initial_get + 1;
-	int send_IPI = 0;
+	int send_msgrequest = 0;
 
 	while (1) {
 
@@ -2099,7 +2102,7 @@ xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 		dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
 			"channel=%d\n", get, ch->partid, ch->number);
 
-		send_IPI = (msg_flags & XPC_M_INTERRUPT);
+		send_msgrequest = (msg_flags & XPC_M_INTERRUPT);
 
 		/*
 		 * We need to ensure that the message referenced by
@@ -2109,8 +2112,8 @@ xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 		initial_get = get;
 	}
 
-	if (send_IPI)
-		xpc_send_channel_msgrequest_sn2(ch);
+	if (send_msgrequest)
+		xpc_send_chctl_msgrequest_sn2(ch);
 }
 
 static void
@@ -2168,9 +2171,9 @@ xpc_init_sn2(void)
 	xpc_setup_infrastructure = xpc_setup_infrastructure_sn2;
 	xpc_teardown_infrastructure = xpc_teardown_infrastructure_sn2;
 	xpc_make_first_contact = xpc_make_first_contact_sn2;
-	xpc_get_IPI_flags = xpc_get_IPI_flags_sn2;
+	xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2;
 	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
-	xpc_process_msg_IPI = xpc_process_msg_IPI_sn2;
+	xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2;
 	xpc_n_of_deliverable_msgs = xpc_n_of_deliverable_msgs_sn2;
 	xpc_get_deliverable_msg = xpc_get_deliverable_msg_sn2;
 
@@ -2181,10 +2184,10 @@ xpc_init_sn2(void)
 	    xpc_indicate_partition_disengaged_sn2;
 	xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2;
 
-	xpc_send_channel_closerequest = xpc_send_channel_closerequest_sn2;
-	xpc_send_channel_closereply = xpc_send_channel_closereply_sn2;
-	xpc_send_channel_openrequest = xpc_send_channel_openrequest_sn2;
-	xpc_send_channel_openreply = xpc_send_channel_openreply_sn2;
+	xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2;
+	xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2;
+	xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2;
+	xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2;
 
 	xpc_send_msg = xpc_send_msg_sn2;
 	xpc_received_msg = xpc_received_msg_sn2;
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index c53b229cb04..1401b0f45dc 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -26,7 +26,7 @@ static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
 static void *xpc_activate_mq;
 
 static void
-xpc_IPI_send_local_activate_uv(struct xpc_partition *part)
+xpc_send_local_activate_IRQ_uv(struct xpc_partition *part)
 {
 	/*
 	 * >>> make our side think that the remote parition sent an activate
@@ -75,13 +75,13 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
  * >>>	part->sn.uv.activate_mq_gpa = remote_rp->sn.activate_mq_gpa;
  */
 
-	xpc_IPI_send_local_activate_uv(part);
+	xpc_send_local_activate_IRQ_uv(part);
 }
 
 static void
 xpc_request_partition_reactivation_uv(struct xpc_partition *part)
 {
-	xpc_IPI_send_local_activate_uv(part);
+	xpc_send_local_activate_IRQ_uv(part);
 }
 
 /*
@@ -114,7 +114,7 @@ xpc_make_first_contact_uv(struct xpc_partition *part)
 }
 
 static u64
-xpc_get_IPI_flags_uv(struct xpc_partition *part)
+xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
 {
 	/* >>> this function needs fleshing out */
 	return 0UL;
@@ -140,7 +140,7 @@ xpc_init_uv(void)
 	xpc_setup_infrastructure = xpc_setup_infrastructure_uv;
 	xpc_teardown_infrastructure = xpc_teardown_infrastructure_uv;
 	xpc_make_first_contact = xpc_make_first_contact_uv;
-	xpc_get_IPI_flags = xpc_get_IPI_flags_uv;
+	xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv;
 	xpc_get_deliverable_msg = xpc_get_deliverable_msg_uv;
 }
 
-- 
cgit v1.2.3


From c39838ce21ca8e05857ed7f4be5d289011561905 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:11 -0700
Subject: sgi-xp: replace AMO_t typedef by struct amo

Replace the AMO_t typedef by a direct reference to 'struct amo'.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp_sn2.c   |   2 +-
 drivers/misc/sgi-xp/xpc.h      |  24 +++----
 drivers/misc/sgi-xp/xpc_main.c |  16 ++---
 drivers/misc/sgi-xp/xpc_sn2.c  | 139 +++++++++++++++++++++--------------------
 4 files changed, 93 insertions(+), 88 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
index 3d553fa73f4..1fcfdebca2c 100644
--- a/drivers/misc/sgi-xp/xp_sn2.c
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -32,7 +32,7 @@ EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
  * If the PIO read times out, the MCA handler will consume the error and
  * return to a kernel-provided instruction to indicate an error. This PIO read
  * exists because it is guaranteed to timeout if the destination is down
- * (AMO operations do not timeout on at least some CPUs on Shubs <= v1.2,
+ * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2,
  * which unfortunately we have to work around).
  */
 static enum xp_retval
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 26a1725f68a..da2680892df 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -38,8 +38,8 @@
 /*
  * The next macros define word or bit representations for given
  * C-brick nasid in either the SAL provided bit array representing
- * nasids in the partition/machine or the AMO_t array used for
- * inter-partition initiation communications.
+ * nasids in the partition/machine or the array of amo structures used
+ * for inter-partition initiation communications.
  *
  * For SN2 machines, C-Bricks are alway even numbered NASIDs.  As
  * such, some space will be saved by insisting that nasid information
@@ -144,8 +144,8 @@ struct xpc_vars_sn2 {
 	int activate_IRQ_nasid;
 	int activate_IRQ_phys_cpuid;
 	u64 vars_part_pa;
-	u64 amos_page_pa;	/* paddr of page of AMOs from MSPEC driver */
-	AMO_t *amos_page;	/* vaddr of page of AMOs from MSPEC driver */
+	u64 amos_page_pa;	/* paddr of page of amos from MSPEC driver */
+	struct amo *amos_page;	/* vaddr of page of amos from MSPEC driver */
 };
 
 #define XPC_V_VERSION _XPC_VERSION(3, 1)    /* version 3.1 of the cross vars */
@@ -153,17 +153,17 @@ struct xpc_vars_sn2 {
 /*
  * The following pertains to ia64-sn2 only.
  *
- * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
+ * Memory for XPC's amo variables is allocated by the MSPEC driver. These
  * pages are located in the lowest granule. The lowest granule uses 4k pages
  * for cached references and an alternate TLB handler to never provide a
  * cacheable mapping for the entire region. This will prevent speculative
  * reading of cached copies of our lines from being issued which will cause
  * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
- * AMO variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
- * NOTIFY IRQs, 128 AMO variables (based on XP_NASID_MASK_WORDS) to identify
- * the senders of ACTIVATE IRQs, 1 AMO variable to identify which remote
+ * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
+ * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS) to identify
+ * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote
  * partitions (i.e., XPCs) consider themselves currently engaged with the
- * local XPC and 1 AMO variable to request partition deactivation.
+ * local XPC and 1 amo variable to request partition deactivation.
  */
 #define XPC_NOTIFY_IRQ_AMOS	0
 #define XPC_ACTIVATE_IRQ_AMOS	(XPC_NOTIFY_IRQ_AMOS + XP_MAX_NPARTITIONS_SN2)
@@ -186,7 +186,7 @@ struct xpc_vars_part_sn2 {
 	u64 openclose_args_pa;	/* physical address of open and close args */
 	u64 GPs_pa;		/* physical address of Get/Put values */
 
-	u64 chctl_amo_pa;	/* physical address of chctl flags' AMO_t */
+	u64 chctl_amo_pa;	/* physical address of chctl flags' amo */
 
 	int notify_IRQ_nasid;	/* nasid of where to send notify IRQs */
 	int notify_IRQ_phys_cpuid;	/* CPUID of where to send notify IRQs */
@@ -547,8 +547,8 @@ struct xpc_partition_sn2 {
 	int notify_IRQ_phys_cpuid;	/* CPUID of where to send notify IRQs */
 	char notify_IRQ_owner[8];	/* notify IRQ's owner's name */
 
-	AMO_t *remote_chctl_amo_va; /* address of remote chctl flags' AMO_t */
-	AMO_t *local_chctl_amo_va;	/* address of chctl flags' AMO_t */
+	struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */
+	struct amo *local_chctl_amo_va;	/* address of chctl flags' amo */
 
 	struct timer_list dropped_notify_IRQ_timer;	/* dropped IRQ timer */
 };
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 43f5b686ecf..2934b447300 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -26,16 +26,16 @@
  *	Caveats:
  *
  *	  . Currently on sn2, we have no way to determine which nasid an IRQ
- *	    came from. Thus, xpc_send_IRQ_sn2() does a remote AMO write
- *	    followed by an IPI. The AMO indicates where data is to be pulled
- *	    from, so after the IPI arrives, the remote partition checks the AMO
- *	    word. The IPI can actually arrive before the AMO however, so other
- *	    code must periodically check for this case. Also, remote AMO
+ *	    came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
+ *	    followed by an IPI. The amo indicates where data is to be pulled
+ *	    from, so after the IPI arrives, the remote partition checks the amo
+ *	    word. The IPI can actually arrive before the amo however, so other
+ *	    code must periodically check for this case. Also, remote amo
  *	    operations do not reliably time out. Thus we do a remote PIO read
  *	    solely to know whether the remote partition is down and whether we
  *	    should stop sending IPIs to it. This remote PIO read operation is
  *	    set up in a special nofault region so SAL knows to ignore (and
- *	    cleanup) any errors due to the remote AMO write, PIO read, and/or
+ *	    cleanup) any errors due to the remote amo write, PIO read, and/or
  *	    PIO write operations.
  *
  *	    If/when new hardware solves this IPI problem, we should abandon
@@ -302,7 +302,7 @@ xpc_hb_checker(void *ignore)
 
 			/*
 			 * We need to periodically recheck to ensure no
-			 * IRQ/AMO pairs have been missed.  That check
+			 * IRQ/amo pairs have been missed.  That check
 			 * must always reset xpc_hb_check_timeout.
 			 */
 			force_IRQ = 1;
@@ -1034,7 +1034,7 @@ xpc_init(void)
 	if (is_shub()) {
 		/*
 		 * The ia64-sn2 architecture supports at most 64 partitions.
-		 * And the inability to unregister remote AMOs restricts us
+		 * And the inability to unregister remote amos restricts us
 		 * further to only support exactly 64 partitions on this
 		 * architecture, no less.
 		 */
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 0fef7d86a5a..01dd40ec2a8 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -111,13 +111,14 @@ xpc_disallow_IPI_ops_sn2(void)
  */
 
 static u64
-xpc_receive_IRQ_amo_sn2(AMO_t *amo)
+xpc_receive_IRQ_amo_sn2(struct amo *amo)
 {
 	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
 }
 
 static enum xp_retval
-xpc_send_IRQ_sn2(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
+xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid,
+		 int vector)
 {
 	int ret = 0;
 	unsigned long irq_flags;
@@ -131,7 +132,7 @@ xpc_send_IRQ_sn2(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
 	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IRQs and AMOs to it until the heartbeat times out.
+	 * keep sending IRQs and amos to it until the heartbeat times out.
 	 */
 	ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
 						     xp_nofault_PIOR_target));
@@ -141,12 +142,12 @@ xpc_send_IRQ_sn2(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
 	return ((ret == 0) ? xpSuccess : xpPioReadError);
 }
 
-static AMO_t *
+static struct amo *
 xpc_init_IRQ_amo_sn2(int index)
 {
-	AMO_t *amo = xpc_vars->amos_page + index;
+	struct amo *amo = xpc_vars->amos_page + index;
 
-	(void)xpc_receive_IRQ_amo_sn2(amo);	/* clear AMO variable */
+	(void)xpc_receive_IRQ_amo_sn2(amo);	/* clear amo variable */
 	return amo;
 }
 
@@ -166,7 +167,7 @@ xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
 }
 
 /*
- * Flag the appropriate AMO variable and send an IRQ to the specified node.
+ * Flag the appropriate amo variable and send an IRQ to the specified node.
  */
 static void
 xpc_send_activate_IRQ_sn2(u64 amos_page_pa, int from_nasid, int to_nasid,
@@ -174,8 +175,9 @@ xpc_send_activate_IRQ_sn2(u64 amos_page_pa, int from_nasid, int to_nasid,
 {
 	int w_index = XPC_NASID_W_INDEX(from_nasid);
 	int b_index = XPC_NASID_B_INDEX(from_nasid);
-	AMO_t *amos = (AMO_t *)__va(amos_page_pa +
-				    (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
+	struct amo *amos = (struct amo *)__va(amos_page_pa +
+					      (XPC_ACTIVATE_IRQ_AMOS *
+					      sizeof(struct amo)));
 
 	(void)xpc_send_IRQ_sn2(&amos[w_index], (1UL << b_index), to_nasid,
 			       to_phys_cpuid, SGI_XPC_ACTIVATE);
@@ -186,8 +188,9 @@ xpc_send_local_activate_IRQ_sn2(int from_nasid)
 {
 	int w_index = XPC_NASID_W_INDEX(from_nasid);
 	int b_index = XPC_NASID_B_INDEX(from_nasid);
-	AMO_t *amos = (AMO_t *)__va(xpc_vars->amos_page_pa +
-				    (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
+	struct amo *amos = (struct amo *)__va(xpc_vars->amos_page_pa +
+					      (XPC_ACTIVATE_IRQ_AMOS *
+					      sizeof(struct amo)));
 
 	/* fake the sending and receipt of an activate IRQ from remote nasid */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amos[w_index].variable), FETCHOP_OR,
@@ -227,7 +230,7 @@ xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part)
 /*
  * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
  * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
- * than one partition, we use an AMO_t structure per partition to indicate
+ * than one partition, we use an amo structure per partition to indicate
  * whether a partition has sent an IRQ or not.  If it has, then wake up the
  * associated kthread to handle it.
  *
@@ -391,20 +394,20 @@ static void
 xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
 {
 	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part->sn.sn2.remote_amos_page_pa +
-				   (XPC_ENGAGED_PARTITIONS_AMO *
-				    sizeof(AMO_t)));
+	struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
+					     (XPC_ENGAGED_PARTITIONS_AMO *
+					     sizeof(struct amo)));
 
 	local_irq_save(irq_flags);
 
-	/* set bit corresponding to our partid in remote partition's AMO */
+	/* set bit corresponding to our partid in remote partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
 			 (1UL << sn_partition_id));
 	/*
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
 	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IRQs and AMOs to it until the heartbeat times out.
+	 * keep sending IRQs and amos to it until the heartbeat times out.
 	 */
 	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
 							       variable),
@@ -418,20 +421,20 @@ xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part_sn2->remote_amos_page_pa +
-				   (XPC_ENGAGED_PARTITIONS_AMO *
-				    sizeof(AMO_t)));
+	struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
+					     (XPC_ENGAGED_PARTITIONS_AMO *
+					     sizeof(struct amo)));
 
 	local_irq_save(irq_flags);
 
-	/* clear bit corresponding to our partid in remote partition's AMO */
+	/* clear bit corresponding to our partid in remote partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
 			 ~(1UL << sn_partition_id));
 	/*
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
 	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IRQs and AMOs to it until the heartbeat times out.
+	 * keep sending IRQs and amos to it until the heartbeat times out.
 	 */
 	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
 							       variable),
@@ -441,7 +444,7 @@ xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 
 	/*
 	 * Send activate IRQ to get other side to see that we've cleared our
-	 * bit in their engaged partitions AMO.
+	 * bit in their engaged partitions amo.
 	 */
 	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
 				  cnodeid_to_nasid(0),
@@ -452,9 +455,9 @@ xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 static int
 xpc_partition_engaged_sn2(short partid)
 {
-	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
 
-	/* our partition's AMO variable ANDed with partid mask */
+	/* our partition's amo variable ANDed with partid mask */
 	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
 		(1UL << partid)) != 0;
 }
@@ -462,18 +465,18 @@ xpc_partition_engaged_sn2(short partid)
 static int
 xpc_any_partition_engaged_sn2(void)
 {
-	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
 
-	/* our partition's AMO variable */
+	/* our partition's amo variable */
 	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
 }
 
 static void
 xpc_assume_partition_disengaged_sn2(short partid)
 {
-	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
 
-	/* clear bit(s) based on partid mask in our partition's AMO */
+	/* clear bit(s) based on partid mask in our partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
 			 ~(1UL << partid));
 }
@@ -482,10 +485,10 @@ xpc_assume_partition_disengaged_sn2(short partid)
 static u64 xpc_prot_vec_sn2[MAX_NUMNODES];
 
 /*
- * Change protections to allow AMO operations on non-Shub 1.1 systems.
+ * Change protections to allow amo operations on non-Shub 1.1 systems.
  */
 static enum xp_retval
-xpc_allow_AMO_ops_sn2(AMO_t *amos_page)
+xpc_allow_amo_ops_sn2(struct amo *amos_page)
 {
 	u64 nasid_array = 0;
 	int ret;
@@ -493,7 +496,7 @@ xpc_allow_AMO_ops_sn2(AMO_t *amos_page)
 	/*
 	 * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST
 	 * collides with memory operations. On those systems we call
-	 * xpc_allow_AMO_ops_shub_wars_1_1_sn2() instead.
+	 * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead.
 	 */
 	if (!enable_shub_wars_1_1()) {
 		ret = sn_change_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE,
@@ -506,10 +509,10 @@ xpc_allow_AMO_ops_sn2(AMO_t *amos_page)
 }
 
 /*
- * Change protections to allow AMO operations on Shub 1.1 systems.
+ * Change protections to allow amo operations on Shub 1.1 systems.
  */
 static void
-xpc_allow_AMO_ops_shub_wars_1_1_sn2(void)
+xpc_allow_amo_ops_shub_wars_1_1_sn2(void)
 {
 	int node;
 	int nasid;
@@ -536,7 +539,7 @@ xpc_allow_AMO_ops_shub_wars_1_1_sn2(void)
 static enum xp_retval
 xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 {
-	AMO_t *amos_page;
+	struct amo *amos_page;
 	int i;
 	int ret;
 
@@ -549,32 +552,32 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 						     XPC_RP_VARS_SIZE);
 
 	/*
-	 * Before clearing xpc_vars, see if a page of AMOs had been previously
+	 * Before clearing xpc_vars, see if a page of amos had been previously
 	 * allocated. If not we'll need to allocate one and set permissions
-	 * so that cross-partition AMOs are allowed.
+	 * so that cross-partition amos are allowed.
 	 *
-	 * The allocated AMO page needs MCA reporting to remain disabled after
+	 * The allocated amo page needs MCA reporting to remain disabled after
 	 * XPC has unloaded.  To make this work, we keep a copy of the pointer
 	 * to this page (i.e., amos_page) in the struct xpc_vars structure,
 	 * which is pointed to by the reserved page, and re-use that saved copy
-	 * on subsequent loads of XPC. This AMO page is never freed, and its
+	 * on subsequent loads of XPC. This amo page is never freed, and its
 	 * memory protections are never restricted.
 	 */
 	amos_page = xpc_vars->amos_page;
 	if (amos_page == NULL) {
-		amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1));
+		amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1));
 		if (amos_page == NULL) {
-			dev_err(xpc_part, "can't allocate page of AMOs\n");
+			dev_err(xpc_part, "can't allocate page of amos\n");
 			return xpNoMemory;
 		}
 
 		/*
-		 * Open up AMO-R/W to cpu.  This is done on Shub 1.1 systems
-		 * when xpc_allow_AMO_ops_shub_wars_1_1_sn2() is called.
+		 * Open up amo-R/W to cpu.  This is done on Shub 1.1 systems
+		 * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called.
 		 */
-		ret = xpc_allow_AMO_ops_sn2(amos_page);
+		ret = xpc_allow_amo_ops_sn2(amos_page);
 		if (ret != xpSuccess) {
-			dev_err(xpc_part, "can't allow AMO operations\n");
+			dev_err(xpc_part, "can't allow amo operations\n");
 			uncached_free_page(__IA64_UNCACHED_OFFSET |
 					   TO_PHYS((u64)amos_page), 1);
 			return ret;
@@ -595,11 +598,11 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part_sn2) *
 	       xp_max_npartitions);
 
-	/* initialize the activate IRQ related AMO variables */
+	/* initialize the activate IRQ related amo variables */
 	for (i = 0; i < xp_nasid_mask_words; i++)
 		(void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS + i);
 
-	/* initialize the engaged remote partitions related AMO variables */
+	/* initialize the engaged remote partitions related amo variables */
 	(void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO);
 	(void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO);
 
@@ -745,19 +748,20 @@ xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part_sn2->remote_amos_page_pa +
-				  (XPC_DEACTIVATE_REQUEST_AMO * sizeof(AMO_t)));
+	struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
+					     (XPC_DEACTIVATE_REQUEST_AMO *
+					     sizeof(struct amo)));
 
 	local_irq_save(irq_flags);
 
-	/* set bit corresponding to our partid in remote partition's AMO */
+	/* set bit corresponding to our partid in remote partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
 			 (1UL << sn_partition_id));
 	/*
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
 	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IRQs and AMOs to it until the heartbeat times out.
+	 * keep sending IRQs and amos to it until the heartbeat times out.
 	 */
 	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
 							       variable),
@@ -767,7 +771,7 @@ xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
 
 	/*
 	 * Send activate IRQ to get other side to see that we've set our
-	 * bit in their deactivate request AMO.
+	 * bit in their deactivate request amo.
 	 */
 	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
 				  cnodeid_to_nasid(0),
@@ -779,19 +783,20 @@ static void
 xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
 {
 	unsigned long irq_flags;
-	AMO_t *amo = (AMO_t *)__va(part->sn.sn2.remote_amos_page_pa +
-				  (XPC_DEACTIVATE_REQUEST_AMO * sizeof(AMO_t)));
+	struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
+					     (XPC_DEACTIVATE_REQUEST_AMO *
+					     sizeof(struct amo)));
 
 	local_irq_save(irq_flags);
 
-	/* clear bit corresponding to our partid in remote partition's AMO */
+	/* clear bit corresponding to our partid in remote partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
 			 ~(1UL << sn_partition_id));
 	/*
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
 	 * didn't, we'd never know that the other partition is down and would
-	 * keep sending IRQs and AMOs to it until the heartbeat times out.
+	 * keep sending IRQs and amos to it until the heartbeat times out.
 	 */
 	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
 							       variable),
@@ -803,9 +808,9 @@ xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
 static int
 xpc_partition_deactivation_requested_sn2(short partid)
 {
-	AMO_t *amo = xpc_vars->amos_page + XPC_DEACTIVATE_REQUEST_AMO;
+	struct amo *amo = xpc_vars->amos_page + XPC_DEACTIVATE_REQUEST_AMO;
 
-	/* our partition's AMO variable ANDed with partid mask */
+	/* our partition's amo variable ANDed with partid mask */
 	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
 		(1UL << partid)) != 0;
 }
@@ -976,7 +981,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 }
 
 /*
- * Loop through the activation AMO variables and process any bits
+ * Loop through the activation amo variables and process any bits
  * which are set.  Each bit indicates a nasid sending a partition
  * activation or deactivation request.
  *
@@ -989,11 +994,11 @@ xpc_identify_activate_IRQ_sender_sn2(void)
 	u64 nasid_mask;
 	u64 nasid;		/* remote nasid */
 	int n_IRQs_detected = 0;
-	AMO_t *act_amos;
+	struct amo *act_amos;
 
 	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
 
-	/* scan through act AMO variable looking for non-zero entries */
+	/* scan through act amo variable looking for non-zero entries */
 	for (word = 0; word < xp_nasid_mask_words; word++) {
 
 		if (xpc_exiting)
@@ -1005,7 +1010,7 @@ xpc_identify_activate_IRQ_sender_sn2(void)
 			continue;
 		}
 
-		dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
+		dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", word,
 			nasid_mask);
 
 		/*
@@ -1038,7 +1043,7 @@ xpc_process_activate_IRQ_rcvd_sn2(int n_IRQs_expected)
 
 	n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2();
 	if (n_IRQs_detected < n_IRQs_expected) {
-		/* retry once to help avoid missing AMO */
+		/* retry once to help avoid missing amo */
 		(void)xpc_identify_activate_IRQ_sender_sn2();
 	}
 }
@@ -1386,7 +1391,7 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 		part_sn2->remote_openclose_args_pa =
 		    pulled_entry->openclose_args_pa;
 		part_sn2->remote_chctl_amo_va =
-		    (AMO_t *)__va(pulled_entry->chctl_amo_pa);
+		    (struct amo *)__va(pulled_entry->chctl_amo_pa);
 		part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid;
 		part_sn2->notify_IRQ_phys_cpuid =
 		    pulled_entry->notify_IRQ_phys_cpuid;
@@ -1417,7 +1422,7 @@ xpc_make_first_contact_sn2(struct xpc_partition *part)
 	enum xp_retval ret;
 
 	/*
-	 * Register the remote partition's AMOs with SAL so it can handle
+	 * Register the remote partition's amos with SAL so it can handle
 	 * and cleanup errors within that address range should the remote
 	 * partition go down. We don't unregister this range because it is
 	 * difficult to tell when outstanding writes to the remote partition
@@ -2192,9 +2197,9 @@ xpc_init_sn2(void)
 	xpc_send_msg = xpc_send_msg_sn2;
 	xpc_received_msg = xpc_received_msg_sn2;
 
-	/* open up protections for IPI and [potentially] AMO operations */
+	/* open up protections for IPI and [potentially] amo operations */
 	xpc_allow_IPI_ops_sn2();
-	xpc_allow_AMO_ops_shub_wars_1_1_sn2();
+	xpc_allow_amo_ops_shub_wars_1_1_sn2();
 
 	/*
 	 * This is safe to do before the xpc_hb_checker thread has started
-- 
cgit v1.2.3


From 185c3a1b4bb4353529257f97caaeaac6c695e77d Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:11 -0700
Subject: sgi-xp: isolate allocation of XPC's msgqueues to sn2 only

Move the allocation of XPC's msgqueues to xpc_sn2.c.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc.h         |   3 +-
 drivers/misc/sgi-xp/xpc_channel.c | 197 ++----------------------------------
 drivers/misc/sgi-xp/xpc_main.c    |   2 +
 drivers/misc/sgi-xp/xpc_sn2.c     | 205 +++++++++++++++++++++++++++++++++++---
 4 files changed, 204 insertions(+), 203 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index da2680892df..defd0888118 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -666,6 +666,8 @@ extern void (*xpc_online_heartbeat) (void);
 extern void (*xpc_check_remote_hb) (void);
 extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
 extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
+extern enum xp_retval (*xpc_allocate_msgqueues) (struct xpc_channel *);
+extern void (*xpc_free_msgqueues) (struct xpc_channel *);
 extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
 extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int);
 extern int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *);
@@ -728,7 +730,6 @@ extern void xpc_deactivate_partition(const int, struct xpc_partition *,
 extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
 
 /* found in xpc_channel.c */
-extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern void xpc_initiate_connect(int);
 extern void xpc_initiate_disconnect(int);
 extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *);
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 0d3c153d1d0..1c73423665b 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -24,145 +24,6 @@
 #include <asm/sn/sn_sal.h>
 #include "xpc.h"
 
-/*
- * Guarantee that the kzalloc'd memory is cacheline aligned.
- */
-void *
-xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
-{
-	/* see if kzalloc will give us cachline aligned memory by default */
-	*base = kzalloc(size, flags);
-	if (*base == NULL)
-		return NULL;
-
-	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
-		return *base;
-
-	kfree(*base);
-
-	/* nope, we'll have to do it ourselves */
-	*base = kzalloc(size + L1_CACHE_BYTES, flags);
-	if (*base == NULL)
-		return NULL;
-
-	return (void *)L1_CACHE_ALIGN((u64)*base);
-}
-
-/*
- * Allocate the local message queue and the notify queue.
- */
-static enum xp_retval
-xpc_allocate_local_msgqueue(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	int nentries;
-	size_t nbytes;
-
-	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
-
-		nbytes = nentries * ch->msg_size;
-		ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
-								   GFP_KERNEL,
-						      &ch->local_msgqueue_base);
-		if (ch->local_msgqueue == NULL)
-			continue;
-
-		nbytes = nentries * sizeof(struct xpc_notify);
-		ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
-		if (ch->notify_queue == NULL) {
-			kfree(ch->local_msgqueue_base);
-			ch->local_msgqueue = NULL;
-			continue;
-		}
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		if (nentries < ch->local_nentries) {
-			dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
-				"partid=%d, channel=%d\n", nentries,
-				ch->local_nentries, ch->partid, ch->number);
-
-			ch->local_nentries = nentries;
-		}
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		return xpSuccess;
-	}
-
-	dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
-		"queue, partid=%d, channel=%d\n", ch->partid, ch->number);
-	return xpNoMemory;
-}
-
-/*
- * Allocate the cached remote message queue.
- */
-static enum xp_retval
-xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	int nentries;
-	size_t nbytes;
-
-	DBUG_ON(ch->remote_nentries <= 0);
-
-	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
-
-		nbytes = nentries * ch->msg_size;
-		ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
-								    GFP_KERNEL,
-						     &ch->remote_msgqueue_base);
-		if (ch->remote_msgqueue == NULL)
-			continue;
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		if (nentries < ch->remote_nentries) {
-			dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
-				"partid=%d, channel=%d\n", nentries,
-				ch->remote_nentries, ch->partid, ch->number);
-
-			ch->remote_nentries = nentries;
-		}
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		return xpSuccess;
-	}
-
-	dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
-		"partid=%d, channel=%d\n", ch->partid, ch->number);
-	return xpNoMemory;
-}
-
-/*
- * Allocate message queues and other stuff associated with a channel.
- *
- * Note: Assumes all of the channel sizes are filled in.
- */
-static enum xp_retval
-xpc_allocate_msgqueues(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	enum xp_retval ret;
-
-	DBUG_ON(ch->flags & XPC_C_SETUP);
-
-	ret = xpc_allocate_local_msgqueue(ch);
-	if (ret != xpSuccess)
-		return ret;
-
-	ret = xpc_allocate_remote_msgqueue(ch);
-	if (ret != xpSuccess) {
-		kfree(ch->local_msgqueue_base);
-		ch->local_msgqueue = NULL;
-		kfree(ch->notify_queue);
-		ch->notify_queue = NULL;
-		return ret;
-	}
-
-	spin_lock_irqsave(&ch->lock, irq_flags);
-	ch->flags |= XPC_C_SETUP;
-	spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-	return xpSuccess;
-}
-
 /*
  * Process a connect message from a remote partition.
  *
@@ -191,10 +52,11 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 		if (ret != xpSuccess)
 			XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
 
+		ch->flags |= XPC_C_SETUP;
+
 		if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING))
 			return;
 
-		DBUG_ON(!(ch->flags & XPC_C_SETUP));
 		DBUG_ON(ch->local_msgqueue == NULL);
 		DBUG_ON(ch->remote_msgqueue == NULL);
 	}
@@ -219,55 +81,6 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 	spin_lock_irqsave(&ch->lock, *irq_flags);
 }
 
-/*
- * Free up message queues and other stuff that were allocated for the specified
- * channel.
- *
- * Note: ch->reason and ch->reason_line are left set for debugging purposes,
- * they're cleared when XPC_C_DISCONNECTED is cleared.
- */
-static void
-xpc_free_msgqueues(struct xpc_channel *ch)
-{
-	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
-
-	DBUG_ON(!spin_is_locked(&ch->lock));
-	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
-
-	ch->remote_msgqueue_pa = 0;
-	ch->func = NULL;
-	ch->key = NULL;
-	ch->msg_size = 0;
-	ch->local_nentries = 0;
-	ch->remote_nentries = 0;
-	ch->kthreads_assigned_limit = 0;
-	ch->kthreads_idle_limit = 0;
-
-	ch_sn2->local_GP->get = 0;
-	ch_sn2->local_GP->put = 0;
-	ch_sn2->remote_GP.get = 0;
-	ch_sn2->remote_GP.put = 0;
-	ch_sn2->w_local_GP.get = 0;
-	ch_sn2->w_local_GP.put = 0;
-	ch_sn2->w_remote_GP.get = 0;
-	ch_sn2->w_remote_GP.put = 0;
-	ch_sn2->next_msg_to_pull = 0;
-
-	if (ch->flags & XPC_C_SETUP) {
-		ch->flags &= ~XPC_C_SETUP;
-
-		dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
-			ch->flags, ch->partid, ch->number);
-
-		kfree(ch->local_msgqueue_base);
-		ch->local_msgqueue = NULL;
-		kfree(ch->remote_msgqueue_base);
-		ch->remote_msgqueue = NULL;
-		kfree(ch->notify_queue);
-		ch->notify_queue = NULL;
-	}
-}
-
 /*
  * spin_lock_irqsave() is expected to be held on entry.
  */
@@ -331,7 +144,11 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	/* it's now safe to free the channel's message queues */
 	xpc_free_msgqueues(ch);
 
-	/* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
+	/*
+	 * Mark the channel disconnected and clear all other flags, including
+	 * XPC_C_SETUP (because of call to xpc_free_msgqueues()) but not
+	 * including XPC_C_WDISCONNECT (if it was set).
+	 */
 	ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
 
 	atomic_dec(&part->nchannels_active);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 2934b447300..b5f3c5e59db 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -186,6 +186,8 @@ void (*xpc_check_remote_hb) (void);
 enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
 void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
 u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
+enum xp_retval (*xpc_allocate_msgqueues) (struct xpc_channel *ch);
+void (*xpc_free_msgqueues) (struct xpc_channel *ch);
 void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
 int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *ch);
 struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *ch);
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 01dd40ec2a8..e5dc8c44c6f 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -1048,6 +1048,30 @@ xpc_process_activate_IRQ_rcvd_sn2(int n_IRQs_expected)
 	}
 }
 
+/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kzalloc_cacheline_aligned_sn2(size_t size, gfp_t flags, void **base)
+{
+	/* see if kzalloc will give us cachline aligned memory by default */
+	*base = kzalloc(size, flags);
+	if (*base == NULL)
+		return NULL;
+
+	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+		return *base;
+
+	kfree(*base);
+
+	/* nope, we'll have to do it ourselves */
+	*base = kzalloc(size + L1_CACHE_BYTES, flags);
+	if (*base == NULL)
+		return NULL;
+
+	return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
 /*
  * Setup the infrastructure necessary to support XPartition Communication
  * between the specified remote partition and the local one.
@@ -1078,10 +1102,9 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 
 	/* allocate all the required GET/PUT values */
 
-	part_sn2->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
-							    GFP_KERNEL,
-							    &part_sn2->
-							    local_GPs_base);
+	part_sn2->local_GPs =
+	    xpc_kzalloc_cacheline_aligned_sn2(XPC_GP_SIZE, GFP_KERNEL,
+					      &part_sn2->local_GPs_base);
 	if (part_sn2->local_GPs == NULL) {
 		dev_err(xpc_chan, "can't get memory for local get/put "
 			"values\n");
@@ -1089,10 +1112,9 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 		goto out_1;
 	}
 
-	part_sn2->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
-							     GFP_KERNEL,
-							     &part_sn2->
-							     remote_GPs_base);
+	part_sn2->remote_GPs =
+	    xpc_kzalloc_cacheline_aligned_sn2(XPC_GP_SIZE, GFP_KERNEL,
+					      &part_sn2->remote_GPs_base);
 	if (part_sn2->remote_GPs == NULL) {
 		dev_err(xpc_chan, "can't get memory for remote get/put "
 			"values\n");
@@ -1105,8 +1127,9 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	/* allocate all the required open and close args */
 
 	part->local_openclose_args =
-	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
-					  &part->local_openclose_args_base);
+	    xpc_kzalloc_cacheline_aligned_sn2(XPC_OPENCLOSE_ARGS_SIZE,
+					      GFP_KERNEL,
+					      &part->local_openclose_args_base);
 	if (part->local_openclose_args == NULL) {
 		dev_err(xpc_chan, "can't get memory for local connect args\n");
 		retval = xpNoMemory;
@@ -1114,8 +1137,9 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	}
 
 	part->remote_openclose_args =
-	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
-					  &part->remote_openclose_args_base);
+	    xpc_kzalloc_cacheline_aligned_sn2(XPC_OPENCLOSE_ARGS_SIZE,
+					      GFP_KERNEL,
+					     &part->remote_openclose_args_base);
 	if (part->remote_openclose_args == NULL) {
 		dev_err(xpc_chan, "can't get memory for remote connect args\n");
 		retval = xpNoMemory;
@@ -1527,6 +1551,161 @@ xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
 	return chctl.all_flags;
 }
 
+/*
+ * Allocate the local message queue and the notify queue.
+ */
+static enum xp_retval
+xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
+{
+	unsigned long irq_flags;
+	int nentries;
+	size_t nbytes;
+
+	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+
+		nbytes = nentries * ch->msg_size;
+		ch->local_msgqueue =
+		    xpc_kzalloc_cacheline_aligned_sn2(nbytes, GFP_KERNEL,
+						      &ch->local_msgqueue_base);
+		if (ch->local_msgqueue == NULL)
+			continue;
+
+		nbytes = nentries * sizeof(struct xpc_notify);
+		ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
+		if (ch->notify_queue == NULL) {
+			kfree(ch->local_msgqueue_base);
+			ch->local_msgqueue = NULL;
+			continue;
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->local_nentries) {
+			dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
+				"partid=%d, channel=%d\n", nentries,
+				ch->local_nentries, ch->partid, ch->number);
+
+			ch->local_nentries = nentries;
+		}
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
+		"queue, partid=%d, channel=%d\n", ch->partid, ch->number);
+	return xpNoMemory;
+}
+
+/*
+ * Allocate the cached remote message queue.
+ */
+static enum xp_retval
+xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
+{
+	unsigned long irq_flags;
+	int nentries;
+	size_t nbytes;
+
+	DBUG_ON(ch->remote_nentries <= 0);
+
+	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+
+		nbytes = nentries * ch->msg_size;
+		ch->remote_msgqueue =
+		    xpc_kzalloc_cacheline_aligned_sn2(nbytes, GFP_KERNEL,
+						     &ch->remote_msgqueue_base);
+		if (ch->remote_msgqueue == NULL)
+			continue;
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->remote_nentries) {
+			dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
+				"partid=%d, channel=%d\n", nentries,
+				ch->remote_nentries, ch->partid, ch->number);
+
+			ch->remote_nentries = nentries;
+		}
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
+		"partid=%d, channel=%d\n", ch->partid, ch->number);
+	return xpNoMemory;
+}
+
+/*
+ * Allocate message queues and other stuff associated with a channel.
+ *
+ * Note: Assumes all of the channel sizes are filled in.
+ */
+static enum xp_retval
+xpc_allocate_msgqueues_sn2(struct xpc_channel *ch)
+{
+	enum xp_retval ret;
+
+	DBUG_ON(ch->flags & XPC_C_SETUP);
+
+	ret = xpc_allocate_local_msgqueue_sn2(ch);
+	if (ret == xpSuccess) {
+
+		ret = xpc_allocate_remote_msgqueue_sn2(ch);
+		if (ret != xpSuccess) {
+			kfree(ch->local_msgqueue_base);
+			ch->local_msgqueue = NULL;
+			kfree(ch->notify_queue);
+			ch->notify_queue = NULL;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Free up message queues and other stuff that were allocated for the specified
+ * channel.
+ *
+ * Note: ch->reason and ch->reason_line are left set for debugging purposes,
+ * they're cleared when XPC_C_DISCONNECTED is cleared.
+ */
+static void
+xpc_free_msgqueues_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
+
+	ch->remote_msgqueue_pa = 0;
+	ch->func = NULL;
+	ch->key = NULL;
+	ch->msg_size = 0;
+	ch->local_nentries = 0;
+	ch->remote_nentries = 0;
+	ch->kthreads_assigned_limit = 0;
+	ch->kthreads_idle_limit = 0;
+
+	ch_sn2->local_GP->get = 0;
+	ch_sn2->local_GP->put = 0;
+	ch_sn2->remote_GP.get = 0;
+	ch_sn2->remote_GP.put = 0;
+	ch_sn2->w_local_GP.get = 0;
+	ch_sn2->w_local_GP.put = 0;
+	ch_sn2->w_remote_GP.get = 0;
+	ch_sn2->w_remote_GP.put = 0;
+	ch_sn2->next_msg_to_pull = 0;
+
+	if (ch->flags & XPC_C_SETUP) {
+		dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
+			ch->flags, ch->partid, ch->number);
+
+		kfree(ch->local_msgqueue_base);
+		ch->local_msgqueue = NULL;
+		kfree(ch->remote_msgqueue_base);
+		ch->remote_msgqueue = NULL;
+		kfree(ch->notify_queue);
+		ch->notify_queue = NULL;
+	}
+}
+
 /*
  * Notify those who wanted to be notified upon delivery of their message.
  */
@@ -2177,6 +2356,8 @@ xpc_init_sn2(void)
 	xpc_teardown_infrastructure = xpc_teardown_infrastructure_sn2;
 	xpc_make_first_contact = xpc_make_first_contact_sn2;
 	xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2;
+	xpc_allocate_msgqueues = xpc_allocate_msgqueues_sn2;
+	xpc_free_msgqueues = xpc_free_msgqueues_sn2;
 	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
 	xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2;
 	xpc_n_of_deliverable_msgs = xpc_n_of_deliverable_msgs_sn2;
-- 
cgit v1.2.3


From a7b4d509205db5e9cd3ffc77b306d7b10fe6a34d Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:12 -0700
Subject: sgi-xp: enable XPNET to handle more than 64 partitions

Enable XPNET to support more than 64 partitions.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpnet.c | 213 +++++++++++++++++++-------------------------
 1 file changed, 94 insertions(+), 119 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index f9356ba7315..c5f59a6dae5 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -21,7 +21,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
@@ -32,8 +31,6 @@
 #include <linux/mii.h>
 #include <linux/smp.h>
 #include <linux/string.h>
-#include <asm/sn/io.h>
-#include <asm/sn/sn_sal.h>
 #include <asm/atomic.h>
 #include "xp.h"
 
@@ -104,7 +101,6 @@ struct xpnet_message {
  * then be released.
  */
 struct xpnet_pending_msg {
-	struct list_head free_list;
 	struct sk_buff *skb;
 	atomic_t use_count;
 };
@@ -120,7 +116,7 @@ struct net_device *xpnet_device;
  * When we are notified of other partitions activating, we add them to
  * our bitmask of partitions to which we broadcast.
  */
-static u64 xpnet_broadcast_partitions;
+static unsigned long *xpnet_broadcast_partitions;
 /* protect above */
 static DEFINE_SPINLOCK(xpnet_broadcast_lock);
 
@@ -140,16 +136,13 @@ static DEFINE_SPINLOCK(xpnet_broadcast_lock);
 #define XPNET_DEF_MTU (0x8000UL)
 
 /*
- * The partition id is encapsulated in the MAC address.  The following
- * define locates the octet the partid is in.
+ * The partid is encapsulated in the MAC address beginning in the following
+ * octet and it consists of two octets.
  */
-#define XPNET_PARTID_OCTET	1
-#define XPNET_LICENSE_OCTET	2
+#define XPNET_PARTID_OCTET	2
+
+/* Define the XPNET debug device structures to be used with dev_dbg() et al */
 
-/*
- * Define the XPNET debug device structure that is to be used with dev_dbg(),
- * dev_err(), dev_warn(), and dev_info().
- */
 struct device_driver xpnet_dbg_name = {
 	.name = "xpnet"
 };
@@ -231,7 +224,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 					  (void *)msg->buf_pa, msg->size);
 
 		ret = xp_remote_memcpy((void *)((u64)skb->data &
-				                ~(L1_CACHE_BYTES - 1)),
+						~(L1_CACHE_BYTES - 1)),
 				       (void *)msg->buf_pa, msg->size);
 
 		if (ret != xpSuccess) {
@@ -283,8 +276,6 @@ static void
 xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
 			  void *data, void *key)
 {
-	long bp;
-
 	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 	DBUG_ON(channel != XPC_NET_CHANNEL);
 
@@ -297,31 +288,28 @@ xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
 
 	case xpConnected:	/* connection completed to a partition */
 		spin_lock_bh(&xpnet_broadcast_lock);
-		xpnet_broadcast_partitions |= 1UL << (partid - 1);
-		bp = xpnet_broadcast_partitions;
+		__set_bit(partid, xpnet_broadcast_partitions);
 		spin_unlock_bh(&xpnet_broadcast_lock);
 
 		netif_carrier_on(xpnet_device);
 
-		dev_dbg(xpnet, "%s connection created to partition %d; "
-			"xpnet_broadcast_partitions=0x%lx\n",
-			xpnet_device->name, partid, bp);
+		dev_dbg(xpnet, "%s connected to partition %d\n",
+			xpnet_device->name, partid);
 		break;
 
 	default:
 		spin_lock_bh(&xpnet_broadcast_lock);
-		xpnet_broadcast_partitions &= ~(1UL << (partid - 1));
-		bp = xpnet_broadcast_partitions;
+		__clear_bit(partid, xpnet_broadcast_partitions);
 		spin_unlock_bh(&xpnet_broadcast_lock);
 
-		if (bp == 0)
+		if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions,
+				 xp_max_npartitions)) {
 			netif_carrier_off(xpnet_device);
+		}
 
-		dev_dbg(xpnet, "%s disconnected from partition %d; "
-			"xpnet_broadcast_partitions=0x%lx\n",
-			xpnet_device->name, partid, bp);
+		dev_dbg(xpnet, "%s disconnected from partition %d\n",
+			xpnet_device->name, partid);
 		break;
-
 	}
 }
 
@@ -424,36 +412,72 @@ xpnet_send_completed(enum xp_retval reason, short partid, int channel,
 	}
 }
 
+static void
+xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
+	   u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid)
+{
+	u8 msg_buffer[XPNET_MSG_SIZE];
+	struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer;
+	enum xp_retval ret;
+
+	msg->embedded_bytes = embedded_bytes;
+	if (unlikely(embedded_bytes != 0)) {
+		msg->version = XPNET_VERSION_EMBED;
+		dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
+			&msg->data, skb->data, (size_t)embedded_bytes);
+		skb_copy_from_linear_data(skb, &msg->data,
+					  (size_t)embedded_bytes);
+	} else {
+		msg->version = XPNET_VERSION;
+	}
+	msg->magic = XPNET_MAGIC;
+	msg->size = end_addr - start_addr;
+	msg->leadin_ignore = (u64)skb->data - start_addr;
+	msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
+	msg->buf_pa = __pa(start_addr);
+
+	dev_dbg(xpnet, "sending XPC message to %d:%d\n"
+		KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
+		"msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
+		dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
+		msg->leadin_ignore, msg->tailout_ignore);
+
+	atomic_inc(&queued_msg->use_count);
+
+	ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg,
+			      XPNET_MSG_SIZE, xpnet_send_completed, queued_msg);
+	if (unlikely(ret != xpSuccess))
+		atomic_dec(&queued_msg->use_count);
+}
+
 /*
  * Network layer has formatted a packet (skb) and is ready to place it
  * "on the wire".  Prepare and send an xpnet_message to all partitions
  * which have connected with us and are targets of this packet.
  *
  * MAC-NOTE:  For the XPNET driver, the MAC address contains the
- * destination partition_id.  If the destination partition id word
- * is 0xff, this packet is to broadcast to all partitions.
+ * destination partid.  If the destination partid octets are 0xffff,
+ * this packet is to be broadcast to all connected partitions.
  */
 static int
 xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xpnet_pending_msg *queued_msg;
-	enum xp_retval ret;
-	u8 msg_buffer[XPNET_MSG_SIZE];
-	struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer[0];
 	u64 start_addr, end_addr;
-	long dp;
-	u8 second_mac_octet;
 	short dest_partid;
-	struct xpnet_dev_private *priv;
-	u16 embedded_bytes;
-
-	priv = (struct xpnet_dev_private *)dev->priv;
+	struct xpnet_dev_private *priv = (struct xpnet_dev_private *)dev->priv;
+	u16 embedded_bytes = 0;
 
 	dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
 		"skb->end=0x%p skb->len=%d\n", (void *)skb->head,
 		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
 		skb->len);
 
+	if (skb->data[0] == 0x33) {
+		dev_kfree_skb(skb);
+		return 0;	/* nothing needed to be done */
+	}
+
 	/*
 	 * The xpnet_pending_msg tracks how many outstanding
 	 * xpc_send_notifies are relying on this skb.  When none
@@ -465,7 +489,6 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			 "packet\n", sizeof(struct xpnet_pending_msg));
 
 		priv->stats.tx_errors++;
-
 		return -ENOMEM;
 	}
 
@@ -474,7 +497,6 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
 
 	/* calculate how many bytes to embed in the XPC message */
-	embedded_bytes = 0;
 	if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
 		/* skb->data does fit so embed */
 		embedded_bytes = skb->len;
@@ -490,78 +512,28 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	atomic_set(&queued_msg->use_count, 1);
 	queued_msg->skb = skb;
 
-	second_mac_octet = skb->data[XPNET_PARTID_OCTET];
-	if (second_mac_octet == 0xff) {
+	if (skb->data[0] == 0xff) {
 		/* we are being asked to broadcast to all partitions */
-		dp = xpnet_broadcast_partitions;
-	} else if (second_mac_octet != 0) {
-		dp = xpnet_broadcast_partitions &
-		    (1UL << (second_mac_octet - 1));
-	} else {
-		/* 0 is an invalid partid.  Ignore */
-		dp = 0;
-	}
-	dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp);
-
-	/*
-	 * If we wanted to allow promiscuous mode to work like an
-	 * unswitched network, this would be a good point to OR in a
-	 * mask of partitions which should be receiving all packets.
-	 */
-
-	/*
-	 * Main send loop.
-	 */
-	for (dest_partid = 0; dp && dest_partid < xp_max_npartitions;
-	     dest_partid++) {
+		for_each_bit(dest_partid, xpnet_broadcast_partitions,
+			     xp_max_npartitions) {
 
-		if (!(dp & (1UL << (dest_partid - 1)))) {
-			/* not destined for this partition */
-			continue;
+			xpnet_send(skb, queued_msg, start_addr, end_addr,
+				   embedded_bytes, dest_partid);
 		}
+	} else {
+		dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1];
+		dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8;
 
-		/* remove this partition from the destinations mask */
-		dp &= ~(1UL << (dest_partid - 1));
-
-		/* found a partition to send to */
+		if (dest_partid >= 0 &&
+		    dest_partid < xp_max_npartitions &&
+		    test_bit(dest_partid, xpnet_broadcast_partitions) != 0) {
 
-		msg->embedded_bytes = embedded_bytes;
-		if (unlikely(embedded_bytes != 0)) {
-			msg->version = XPNET_VERSION_EMBED;
-			dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
-				&msg->data, skb->data, (size_t)embedded_bytes);
-			skb_copy_from_linear_data(skb, &msg->data,
-						  (size_t)embedded_bytes);
-		} else {
-			msg->version = XPNET_VERSION;
-		}
-		msg->magic = XPNET_MAGIC;
-		msg->size = end_addr - start_addr;
-		msg->leadin_ignore = (u64)skb->data - start_addr;
-		msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
-		msg->buf_pa = __pa(start_addr);
-
-		dev_dbg(xpnet, "sending XPC message to %d:%d\n"
-			KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
-			"msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
-			dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
-			msg->leadin_ignore, msg->tailout_ignore);
-
-		atomic_inc(&queued_msg->use_count);
-
-		ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT,
-				      &msg, sizeof(msg) + embedded_bytes - 1,
-				      xpnet_send_completed, queued_msg);
-		if (unlikely(ret != xpSuccess)) {
-			atomic_dec(&queued_msg->use_count);
-			continue;
+			xpnet_send(skb, queued_msg, start_addr, end_addr,
+				   embedded_bytes, dest_partid);
 		}
 	}
 
 	if (atomic_dec_return(&queued_msg->use_count) == 0) {
-		dev_dbg(xpnet, "no partitions to receive packet destined for "
-			"%d\n", dest_partid);
-
 		dev_kfree_skb(skb);
 		kfree(queued_msg);
 	}
@@ -589,23 +561,28 @@ xpnet_dev_tx_timeout(struct net_device *dev)
 static int __init
 xpnet_init(void)
 {
-	int i;
-	u32 license_num;
-	int result = -ENOMEM;
+	int result;
 
-	if (!ia64_platform_is("sn2"))
+	if (!is_shub() && !is_uv())
 		return -ENODEV;
 
 	dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
 
+	xpnet_broadcast_partitions = kzalloc(BITS_TO_LONGS(xp_max_npartitions) *
+					     sizeof(long), GFP_KERNEL);
+	if (xpnet_broadcast_partitions == NULL)
+		return -ENOMEM;
+
 	/*
 	 * use ether_setup() to init the majority of our device
 	 * structure and then override the necessary pieces.
 	 */
 	xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private),
 				    XPNET_DEVICE_NAME, ether_setup);
-	if (xpnet_device == NULL)
+	if (xpnet_device == NULL) {
+		kfree(xpnet_broadcast_partitions);
 		return -ENOMEM;
+	}
 
 	netif_carrier_off(xpnet_device);
 
@@ -623,14 +600,10 @@ xpnet_init(void)
 	 * MAC addresses.  We chose the first octet of the MAC to be unlikely
 	 * to collide with any vendor's officially issued MAC.
 	 */
-	xpnet_device->dev_addr[0] = 0xfe;
-	xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id;
-	license_num = sn_partition_serial_number_val();
-	for (i = 3; i >= 0; i--) {
-		xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] =
-		    license_num & 0xff;
-		license_num = license_num >> 8;
-	}
+	xpnet_device->dev_addr[0] = 0x02;     /* locally administered, no OUI */
+
+	xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = sn_partition_id;
+	xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (sn_partition_id >> 8);
 
 	/*
 	 * ether_setup() sets this to a multicast device.  We are
@@ -646,8 +619,10 @@ xpnet_init(void)
 	xpnet_device->features = NETIF_F_NO_CSUM;
 
 	result = register_netdev(xpnet_device);
-	if (result != 0)
+	if (result != 0) {
 		free_netdev(xpnet_device);
+		kfree(xpnet_broadcast_partitions);
+	}
 
 	return result;
 }
@@ -661,8 +636,8 @@ xpnet_exit(void)
 		 xpnet_device[0].name);
 
 	unregister_netdev(xpnet_device);
-
 	free_netdev(xpnet_device);
+	kfree(xpnet_broadcast_partitions);
 }
 
 module_exit(xpnet_exit);
-- 
cgit v1.2.3


From ee6665e3b6e1283c30ae240732af1345bc02154e Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:13 -0700
Subject: sgi-xp: isolate remote copy buffer to sn2 only

Make the remote copy buffer an sn2 only item.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h            | 10 -----
 drivers/misc/sgi-xp/xpc.h           | 39 ++++------------
 drivers/misc/sgi-xp/xpc_main.c      | 31 ++++---------
 drivers/misc/sgi-xp/xpc_partition.c | 31 +++++--------
 drivers/misc/sgi-xp/xpc_sn2.c       | 88 +++++++++++++++++++++++++++++--------
 5 files changed, 97 insertions(+), 102 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 43bf2470850..955b5b91323 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -60,16 +60,6 @@
 #define XP_MAX_NPARTITIONS_SN2	64
 #define XP_MAX_NPARTITIONS_UV	256
 
-/*
- * Define the number of u64s required to represent all the C-brick nasids
- * as a bitmap.  The cross-partition kernel modules deal only with
- * C-brick nasids, thus the need for bitmaps which don't account for
- * odd-numbered (non C-brick) nasids.
- */
-#define XP_MAX_PHYSNODE_ID	(MAX_NUMALINK_NODES / 2)
-#define XP_NASID_MASK_BYTES	((XP_MAX_PHYSNODE_ID + 7) / 8)
-#define XP_NASID_MASK_WORDS	((XP_MAX_PHYSNODE_ID + 63) / 64)
-
 /*
  * XPC establishes channel connections between the local partition and any
  * other partition that is currently up. Over these channels, kernel-level
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index defd0888118..2111723553b 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -150,26 +150,6 @@ struct xpc_vars_sn2 {
 
 #define XPC_V_VERSION _XPC_VERSION(3, 1)    /* version 3.1 of the cross vars */
 
-/*
- * The following pertains to ia64-sn2 only.
- *
- * Memory for XPC's amo variables is allocated by the MSPEC driver. These
- * pages are located in the lowest granule. The lowest granule uses 4k pages
- * for cached references and an alternate TLB handler to never provide a
- * cacheable mapping for the entire region. This will prevent speculative
- * reading of cached copies of our lines from being issued which will cause
- * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
- * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
- * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS) to identify
- * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote
- * partitions (i.e., XPCs) consider themselves currently engaged with the
- * local XPC and 1 amo variable to request partition deactivation.
- */
-#define XPC_NOTIFY_IRQ_AMOS	0
-#define XPC_ACTIVATE_IRQ_AMOS	(XPC_NOTIFY_IRQ_AMOS + XP_MAX_NPARTITIONS_SN2)
-#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
-#define XPC_DEACTIVATE_REQUEST_AMO  (XPC_ENGAGED_PARTITIONS_AMO + 1)
-
 /*
  * The following structure describes the per partition specific variables.
  *
@@ -214,9 +194,10 @@ struct xpc_vars_part_sn2 {
 #define XPC_RP_VARS_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2))
 
 #define XPC_RP_PART_NASIDS(_rp) ((u64 *)((u8 *)(_rp) + XPC_RP_HEADER_SIZE))
-#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
-#define XPC_RP_VARS(_rp)	((struct xpc_vars_sn2 *)(XPC_RP_MACH_NASIDS(_rp) + \
-				    xp_nasid_mask_words))
+#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xpc_nasid_mask_words)
+#define XPC_RP_VARS(_rp)	((struct xpc_vars_sn2 *) \
+				 (XPC_RP_MACH_NASIDS(_rp) + \
+				  xpc_nasid_mask_words))
 
 /*
  * Functions registered by add_timer() or called by kernel_thread() only
@@ -225,11 +206,11 @@ struct xpc_vars_part_sn2 {
  * the passed argument.
  */
 #define XPC_PACK_ARGS(_arg1, _arg2) \
-			((((u64) _arg1) & 0xffffffff) | \
-			((((u64) _arg2) & 0xffffffff) << 32))
+			((((u64)_arg1) & 0xffffffff) | \
+			((((u64)_arg2) & 0xffffffff) << 32))
 
-#define XPC_UNPACK_ARG1(_args)	(((u64) _args) & 0xffffffff)
-#define XPC_UNPACK_ARG2(_args)	((((u64) _args) >> 32) & 0xffffffff)
+#define XPC_UNPACK_ARG1(_args)	(((u64)_args) & 0xffffffff)
+#define XPC_UNPACK_ARG2(_args)	((((u64)_args) >> 32) & 0xffffffff)
 
 /*
  * Define a Get/Put value pair (pointers) used with a message queue.
@@ -710,12 +691,10 @@ extern void xpc_exit_uv(void);
 
 /* found in xpc_partition.c */
 extern int xpc_exiting;
-extern int xp_nasid_mask_words;
+extern int xpc_nasid_mask_words;
 extern struct xpc_rsvd_page *xpc_rsvd_page;
 extern u64 *xpc_mach_nasids;
 extern struct xpc_partition *xpc_partitions;
-extern char *xpc_remote_copy_buffer;
-extern void *xpc_remote_copy_buffer_base;
 extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern struct xpc_rsvd_page *xpc_setup_rsvd_page(void);
 extern int xpc_identify_activate_IRQ_sender(void);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index b5f3c5e59db..36dfccea524 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -877,7 +877,6 @@ xpc_do_exit(enum xp_retval reason)
 		unregister_sysctl_table(xpc_sysctl);
 
 	kfree(xpc_partitions);
-	kfree(xpc_remote_copy_buffer_base);
 
 	if (is_shub())
 		xpc_exit_sn2();
@@ -1031,7 +1030,9 @@ xpc_init(void)
 	short partid;
 	struct xpc_partition *part;
 	struct task_struct *kthread;
-	size_t buf_size;
+
+	snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
+	snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
 
 	if (is_shub()) {
 		/*
@@ -1054,26 +1055,12 @@ xpc_init(void)
 		return -ENODEV;
 	}
 
-	snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
-	snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
-
-	buf_size = max(XPC_RP_VARS_SIZE,
-		       XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
-	xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
-							       GFP_KERNEL,
-						  &xpc_remote_copy_buffer_base);
-	if (xpc_remote_copy_buffer == NULL) {
-		dev_err(xpc_part, "can't get memory for remote copy buffer\n");
-		ret = -ENOMEM;
-		goto out_1;
-	}
-
 	xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
 				 xp_max_npartitions, GFP_KERNEL);
 	if (xpc_partitions == NULL) {
 		dev_err(xpc_part, "can't get memory for partition structure\n");
 		ret = -ENOMEM;
-		goto out_2;
+		goto out_1;
 	}
 
 	/*
@@ -1115,7 +1102,7 @@ xpc_init(void)
 	if (xpc_rsvd_page == NULL) {
 		dev_err(xpc_part, "can't setup our reserved page\n");
 		ret = -EBUSY;
-		goto out_3;
+		goto out_2;
 	}
 
 	/* add ourselves to the reboot_notifier_list */
@@ -1136,7 +1123,7 @@ xpc_init(void)
 	if (IS_ERR(kthread)) {
 		dev_err(xpc_part, "failed while forking hb check thread\n");
 		ret = -EBUSY;
-		goto out_4;
+		goto out_3;
 	}
 
 	/*
@@ -1164,18 +1151,16 @@ xpc_init(void)
 	return 0;
 
 	/* initialization was not successful */
-out_4:
+out_3:
 	/* indicate to others that our reserved page is uninitialized */
 	xpc_rsvd_page->stamp = 0;
 
 	(void)unregister_die_notifier(&xpc_die_notifier);
 	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
-out_3:
+out_2:
 	if (xpc_sysctl)
 		unregister_sysctl_table(xpc_sysctl);
 	kfree(xpc_partitions);
-out_2:
-	kfree(xpc_remote_copy_buffer_base);
 out_1:
 	if (is_shub())
 		xpc_exit_sn2();
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index c769ab8f74e..9f104450478 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -34,20 +34,11 @@ struct xpc_rsvd_page *xpc_rsvd_page;
 static u64 *xpc_part_nasids;
 u64 *xpc_mach_nasids;
 
-/* >>> next two variables should be 'xpc_' if they remain here */
-static int xp_sizeof_nasid_mask;	/* actual size in bytes of nasid mask */
-int xp_nasid_mask_words;	/* actual size in words of nasid mask */
+static int xpc_sizeof_nasid_mask;	/* actual size in bytes of nasid mask */
+int xpc_nasid_mask_words;	/* actual size in words of nasid mask */
 
 struct xpc_partition *xpc_partitions;
 
-/*
- * Generic buffer used to store a local copy of portions of a remote
- * partition's reserved page (either its header and part_nasids mask,
- * or its vars).
- */
-char *xpc_remote_copy_buffer;
-void *xpc_remote_copy_buffer_base;
-
 /*
  * Guarantee that the kmalloc'd memory is cacheline aligned.
  */
@@ -176,9 +167,9 @@ xpc_setup_rsvd_page(void)
 		/* SAL_version 1 didn't set the nasids_size field */
 		rp->SAL_nasids_size = 128;
 	}
-	xp_sizeof_nasid_mask = rp->SAL_nasids_size;
-	xp_nasid_mask_words = DIV_ROUND_UP(xp_sizeof_nasid_mask,
-					   BYTES_PER_WORD);
+	xpc_sizeof_nasid_mask = rp->SAL_nasids_size;
+	xpc_nasid_mask_words = DIV_ROUND_UP(xpc_sizeof_nasid_mask,
+					    BYTES_PER_WORD);
 
 	/* setup the pointers to the various items in the reserved page */
 	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
@@ -222,14 +213,14 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 
 	/* pull over the reserved page header and part_nasids mask */
 	ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
-			       XPC_RP_HEADER_SIZE + xp_sizeof_nasid_mask);
+			       XPC_RP_HEADER_SIZE + xpc_sizeof_nasid_mask);
 	if (ret != xpSuccess)
 		return ret;
 
 	if (discovered_nasids != NULL) {
 		u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
 
-		for (i = 0; i < xp_nasid_mask_words; i++)
+		for (i = 0; i < xpc_nasid_mask_words; i++)
 			discovered_nasids[i] |= remote_part_nasids[i];
 	}
 
@@ -414,12 +405,12 @@ xpc_discovery(void)
 	enum xp_retval ret;
 
 	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
-						  xp_sizeof_nasid_mask,
+						  xpc_sizeof_nasid_mask,
 						  GFP_KERNEL, &remote_rp_base);
 	if (remote_rp == NULL)
 		return;
 
-	discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
+	discovered_nasids = kzalloc(sizeof(u64) * xpc_nasid_mask_words,
 				    GFP_KERNEL);
 	if (discovered_nasids == NULL) {
 		kfree(remote_rp_base);
@@ -521,10 +512,10 @@ xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
 	if (part->remote_rp_pa == 0)
 		return xpPartitionDown;
 
-	memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
+	memset(nasid_mask, 0, xpc_sizeof_nasid_mask);
 
 	part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
 
 	return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
-				xp_sizeof_nasid_mask);
+				xpc_sizeof_nasid_mask);
 }
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index e5dc8c44c6f..9c0c29a2ac8 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -19,6 +19,43 @@
 #include <asm/sn/sn_sal.h>
 #include "xpc.h"
 
+/*
+ * Define the number of u64s required to represent all the C-brick nasids
+ * as a bitmap.  The cross-partition kernel modules deal only with
+ * C-brick nasids, thus the need for bitmaps which don't account for
+ * odd-numbered (non C-brick) nasids.
+ */
+#define XPC_MAX_PHYSNODES_SN2	(MAX_NUMALINK_NODES / 2)
+#define XP_NASID_MASK_BYTES_SN2	((XPC_MAX_PHYSNODES_SN2 + 7) / 8)
+#define XP_NASID_MASK_WORDS_SN2	((XPC_MAX_PHYSNODES_SN2 + 63) / 64)
+
+/*
+ * Memory for XPC's amo variables is allocated by the MSPEC driver. These
+ * pages are located in the lowest granule. The lowest granule uses 4k pages
+ * for cached references and an alternate TLB handler to never provide a
+ * cacheable mapping for the entire region. This will prevent speculative
+ * reading of cached copies of our lines from being issued which will cause
+ * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
+ * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
+ * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify
+ * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote
+ * partitions (i.e., XPCs) consider themselves currently engaged with the
+ * local XPC and 1 amo variable to request partition deactivation.
+ */
+#define XPC_NOTIFY_IRQ_AMOS_SN2		0
+#define XPC_ACTIVATE_IRQ_AMOS_SN2	(XPC_NOTIFY_IRQ_AMOS_SN2 + \
+					 XP_MAX_NPARTITIONS_SN2)
+#define XPC_ENGAGED_PARTITIONS_AMO_SN2	(XPC_ACTIVATE_IRQ_AMOS_SN2 + \
+					 XP_NASID_MASK_WORDS_SN2)
+#define XPC_DEACTIVATE_REQUEST_AMO_SN2	(XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1)
+
+/*
+ * Buffer used to store a local copy of portions of a remote partition's
+ * reserved page (either its header and part_nasids mask, or its vars).
+ */
+static char *xpc_remote_copy_buffer_sn2;
+static void *xpc_remote_copy_buffer_base_sn2;
+
 static struct xpc_vars_sn2 *xpc_vars;	/* >>> Add _sn2 suffix? */
 static struct xpc_vars_part_sn2 *xpc_vars_part; /* >>> Add _sn2 suffix? */
 
@@ -176,7 +213,7 @@ xpc_send_activate_IRQ_sn2(u64 amos_page_pa, int from_nasid, int to_nasid,
 	int w_index = XPC_NASID_W_INDEX(from_nasid);
 	int b_index = XPC_NASID_B_INDEX(from_nasid);
 	struct amo *amos = (struct amo *)__va(amos_page_pa +
-					      (XPC_ACTIVATE_IRQ_AMOS *
+					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
 					      sizeof(struct amo)));
 
 	(void)xpc_send_IRQ_sn2(&amos[w_index], (1UL << b_index), to_nasid,
@@ -189,7 +226,7 @@ xpc_send_local_activate_IRQ_sn2(int from_nasid)
 	int w_index = XPC_NASID_W_INDEX(from_nasid);
 	int b_index = XPC_NASID_B_INDEX(from_nasid);
 	struct amo *amos = (struct amo *)__va(xpc_vars->amos_page_pa +
-					      (XPC_ACTIVATE_IRQ_AMOS *
+					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
 					      sizeof(struct amo)));
 
 	/* fake the sending and receipt of an activate IRQ from remote nasid */
@@ -395,7 +432,7 @@ xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
 {
 	unsigned long irq_flags;
 	struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
-					     (XPC_ENGAGED_PARTITIONS_AMO *
+					     (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
 					     sizeof(struct amo)));
 
 	local_irq_save(irq_flags);
@@ -422,7 +459,7 @@ xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	unsigned long irq_flags;
 	struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
-					     (XPC_ENGAGED_PARTITIONS_AMO *
+					     (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
 					     sizeof(struct amo)));
 
 	local_irq_save(irq_flags);
@@ -455,7 +492,7 @@ xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 static int
 xpc_partition_engaged_sn2(short partid)
 {
-	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO_SN2;
 
 	/* our partition's amo variable ANDed with partid mask */
 	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
@@ -465,7 +502,7 @@ xpc_partition_engaged_sn2(short partid)
 static int
 xpc_any_partition_engaged_sn2(void)
 {
-	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO_SN2;
 
 	/* our partition's amo variable */
 	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
@@ -474,7 +511,7 @@ xpc_any_partition_engaged_sn2(void)
 static void
 xpc_assume_partition_disengaged_sn2(short partid)
 {
-	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO_SN2;
 
 	/* clear bit(s) based on partid mask in our partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
@@ -599,12 +636,12 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	       xp_max_npartitions);
 
 	/* initialize the activate IRQ related amo variables */
-	for (i = 0; i < xp_nasid_mask_words; i++)
-		(void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS + i);
+	for (i = 0; i < xpc_nasid_mask_words; i++)
+		(void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i);
 
 	/* initialize the engaged remote partitions related amo variables */
-	(void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO);
-	(void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO);
+	(void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2);
+	(void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2);
 
 	return xpSuccess;
 }
@@ -657,7 +694,7 @@ xpc_check_remote_hb_sn2(void)
 	short partid;
 	enum xp_retval ret;
 
-	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer;
+	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
 
 	for (partid = 0; partid < xp_max_npartitions; partid++) {
 
@@ -749,7 +786,7 @@ xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	unsigned long irq_flags;
 	struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
-					     (XPC_DEACTIVATE_REQUEST_AMO *
+					     (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
 					     sizeof(struct amo)));
 
 	local_irq_save(irq_flags);
@@ -784,7 +821,7 @@ xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
 {
 	unsigned long irq_flags;
 	struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
-					     (XPC_DEACTIVATE_REQUEST_AMO *
+					     (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
 					     sizeof(struct amo)));
 
 	local_irq_save(irq_flags);
@@ -808,7 +845,7 @@ xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
 static int
 xpc_partition_deactivation_requested_sn2(short partid)
 {
-	struct amo *amo = xpc_vars->amos_page + XPC_DEACTIVATE_REQUEST_AMO;
+	struct amo *amo = xpc_vars->amos_page + XPC_DEACTIVATE_REQUEST_AMO_SN2;
 
 	/* our partition's amo variable ANDed with partid mask */
 	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
@@ -898,7 +935,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 
 	/* pull over the reserved page structure */
 
-	remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
+	remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2;
 
 	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
 	if (ret != xpSuccess) {
@@ -917,7 +954,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 
 	/* pull over the cross partition variables */
 
-	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer;
+	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
 
 	ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars);
 	if (ret != xpSuccess) {
@@ -996,10 +1033,10 @@ xpc_identify_activate_IRQ_sender_sn2(void)
 	int n_IRQs_detected = 0;
 	struct amo *act_amos;
 
-	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
+	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2;
 
 	/* scan through act amo variable looking for non-zero entries */
-	for (word = 0; word < xp_nasid_mask_words; word++) {
+	for (word = 0; word < xpc_nasid_mask_words; word++) {
 
 		if (xpc_exiting)
 			break;
@@ -2334,6 +2371,7 @@ int
 xpc_init_sn2(void)
 {
 	int ret;
+	size_t buf_size;
 
 	xpc_rsvd_page_init = xpc_rsvd_page_init_sn2;
 	xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
@@ -2378,6 +2416,16 @@ xpc_init_sn2(void)
 	xpc_send_msg = xpc_send_msg_sn2;
 	xpc_received_msg = xpc_received_msg_sn2;
 
+	buf_size = max(XPC_RP_VARS_SIZE,
+		       XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2);
+	xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size,
+								   GFP_KERNEL,
+					      &xpc_remote_copy_buffer_base_sn2);
+	if (xpc_remote_copy_buffer_sn2 == NULL) {
+		dev_err(xpc_part, "can't get memory for remote copy buffer\n");
+		return -ENOMEM;
+	}
+
 	/* open up protections for IPI and [potentially] amo operations */
 	xpc_allow_IPI_ops_sn2();
 	xpc_allow_amo_ops_shub_wars_1_1_sn2();
@@ -2394,6 +2442,7 @@ xpc_init_sn2(void)
 		dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
 			"errno=%d\n", -ret);
 		xpc_disallow_IPI_ops_sn2();
+		kfree(xpc_remote_copy_buffer_base_sn2);
 	}
 	return ret;
 }
@@ -2403,4 +2452,5 @@ xpc_exit_sn2(void)
 {
 	free_irq(SGI_XPC_ACTIVATE, NULL);
 	xpc_disallow_IPI_ops_sn2();
+	kfree(xpc_remote_copy_buffer_base_sn2);
 }
-- 
cgit v1.2.3


From 8e85c23ef04fe0d8414e0b1dc04543095282a27a Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:13 -0700
Subject: sgi-xp: add _sn2 suffix to a few variables

Add an '_sn2' suffix to some variables found in xpc_sn2.c.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc_sn2.c | 124 ++++++++++++++++++++++--------------------
 1 file changed, 65 insertions(+), 59 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 9c0c29a2ac8..63fe59a5bfa 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -56,15 +56,15 @@
 static char *xpc_remote_copy_buffer_sn2;
 static void *xpc_remote_copy_buffer_base_sn2;
 
-static struct xpc_vars_sn2 *xpc_vars;	/* >>> Add _sn2 suffix? */
-static struct xpc_vars_part_sn2 *xpc_vars_part; /* >>> Add _sn2 suffix? */
+static struct xpc_vars_sn2 *xpc_vars_sn2;
+static struct xpc_vars_part_sn2 *xpc_vars_part_sn2;
 
 /* SH_IPI_ACCESS shub register value on startup */
-static u64 xpc_sh1_IPI_access;
-static u64 xpc_sh2_IPI_access0;
-static u64 xpc_sh2_IPI_access1;
-static u64 xpc_sh2_IPI_access2;
-static u64 xpc_sh2_IPI_access3;
+static u64 xpc_sh1_IPI_access_sn2;
+static u64 xpc_sh2_IPI_access0_sn2;
+static u64 xpc_sh2_IPI_access1_sn2;
+static u64 xpc_sh2_IPI_access2_sn2;
+static u64 xpc_sh2_IPI_access3_sn2;
 
 /*
  * Change protections to allow IPI operations.
@@ -77,13 +77,13 @@ xpc_allow_IPI_ops_sn2(void)
 
 	/* >>> The following should get moved into SAL. */
 	if (is_shub2()) {
-		xpc_sh2_IPI_access0 =
+		xpc_sh2_IPI_access0_sn2 =
 		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
-		xpc_sh2_IPI_access1 =
+		xpc_sh2_IPI_access1_sn2 =
 		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
-		xpc_sh2_IPI_access2 =
+		xpc_sh2_IPI_access2_sn2 =
 		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
-		xpc_sh2_IPI_access3 =
+		xpc_sh2_IPI_access3_sn2 =
 		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
 
 		for_each_online_node(node) {
@@ -98,7 +98,7 @@ xpc_allow_IPI_ops_sn2(void)
 			      -1UL);
 		}
 	} else {
-		xpc_sh1_IPI_access =
+		xpc_sh1_IPI_access_sn2 =
 		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
 
 		for_each_online_node(node) {
@@ -123,19 +123,19 @@ xpc_disallow_IPI_ops_sn2(void)
 		for_each_online_node(node) {
 			nasid = cnodeid_to_nasid(node);
 			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
-			      xpc_sh2_IPI_access0);
+			      xpc_sh2_IPI_access0_sn2);
 			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
-			      xpc_sh2_IPI_access1);
+			      xpc_sh2_IPI_access1_sn2);
 			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
-			      xpc_sh2_IPI_access2);
+			      xpc_sh2_IPI_access2_sn2);
 			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
-			      xpc_sh2_IPI_access3);
+			      xpc_sh2_IPI_access3_sn2);
 		}
 	} else {
 		for_each_online_node(node) {
 			nasid = cnodeid_to_nasid(node);
 			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
-			      xpc_sh1_IPI_access);
+			      xpc_sh1_IPI_access_sn2);
 		}
 	}
 }
@@ -182,7 +182,7 @@ xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid,
 static struct amo *
 xpc_init_IRQ_amo_sn2(int index)
 {
-	struct amo *amo = xpc_vars->amos_page + index;
+	struct amo *amo = xpc_vars_sn2->amos_page + index;
 
 	(void)xpc_receive_IRQ_amo_sn2(amo);	/* clear amo variable */
 	return amo;
@@ -225,7 +225,7 @@ xpc_send_local_activate_IRQ_sn2(int from_nasid)
 {
 	int w_index = XPC_NASID_W_INDEX(from_nasid);
 	int b_index = XPC_NASID_B_INDEX(from_nasid);
-	struct amo *amos = (struct amo *)__va(xpc_vars->amos_page_pa +
+	struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa +
 					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
 					      sizeof(struct amo)));
 
@@ -492,7 +492,8 @@ xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 static int
 xpc_partition_engaged_sn2(short partid)
 {
-	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO_SN2;
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
 
 	/* our partition's amo variable ANDed with partid mask */
 	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
@@ -502,7 +503,8 @@ xpc_partition_engaged_sn2(short partid)
 static int
 xpc_any_partition_engaged_sn2(void)
 {
-	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO_SN2;
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
 
 	/* our partition's amo variable */
 	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
@@ -511,7 +513,8 @@ xpc_any_partition_engaged_sn2(void)
 static void
 xpc_assume_partition_disengaged_sn2(short partid)
 {
-	struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO_SN2;
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
 
 	/* clear bit(s) based on partid mask in our partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
@@ -580,27 +583,27 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	int i;
 	int ret;
 
-	xpc_vars = XPC_RP_VARS(rp);
+	xpc_vars_sn2 = XPC_RP_VARS(rp);
 
-	rp->sn.vars_pa = __pa(xpc_vars);
+	rp->sn.vars_pa = __pa(xpc_vars_sn2);
 
 	/* vars_part array follows immediately after vars */
-	xpc_vars_part = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
-						     XPC_RP_VARS_SIZE);
+	xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
+							 XPC_RP_VARS_SIZE);
 
 	/*
-	 * Before clearing xpc_vars, see if a page of amos had been previously
-	 * allocated. If not we'll need to allocate one and set permissions
-	 * so that cross-partition amos are allowed.
+	 * Before clearing xpc_vars_sn2, see if a page of amos had been
+	 * previously allocated. If not we'll need to allocate one and set
+	 * permissions so that cross-partition amos are allowed.
 	 *
 	 * The allocated amo page needs MCA reporting to remain disabled after
 	 * XPC has unloaded.  To make this work, we keep a copy of the pointer
-	 * to this page (i.e., amos_page) in the struct xpc_vars structure,
+	 * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure,
 	 * which is pointed to by the reserved page, and re-use that saved copy
 	 * on subsequent loads of XPC. This amo page is never freed, and its
 	 * memory protections are never restricted.
 	 */
-	amos_page = xpc_vars->amos_page;
+	amos_page = xpc_vars_sn2->amos_page;
 	if (amos_page == NULL) {
 		amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1));
 		if (amos_page == NULL) {
@@ -621,18 +624,18 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 		}
 	}
 
-	/* clear xpc_vars */
-	memset(xpc_vars, 0, sizeof(struct xpc_vars_sn2));
+	/* clear xpc_vars_sn2 */
+	memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2));
 
-	xpc_vars->version = XPC_V_VERSION;
-	xpc_vars->activate_IRQ_nasid = cpuid_to_nasid(0);
-	xpc_vars->activate_IRQ_phys_cpuid = cpu_physical_id(0);
-	xpc_vars->vars_part_pa = __pa(xpc_vars_part);
-	xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
-	xpc_vars->amos_page = amos_page;	/* save for next load of XPC */
+	xpc_vars_sn2->version = XPC_V_VERSION;
+	xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0);
+	xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0);
+	xpc_vars_sn2->vars_part_pa = __pa(xpc_vars_part_sn2);
+	xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page);
+	xpc_vars_sn2->amos_page = amos_page;	/* save for next load of XPC */
 
-	/* clear xpc_vars_part */
-	memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part_sn2) *
+	/* clear xpc_vars_part_sn2 */
+	memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) *
 	       xp_max_npartitions);
 
 	/* initialize the activate IRQ related amo variables */
@@ -649,30 +652,30 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 static void
 xpc_increment_heartbeat_sn2(void)
 {
-	xpc_vars->heartbeat++;
+	xpc_vars_sn2->heartbeat++;
 }
 
 static void
 xpc_offline_heartbeat_sn2(void)
 {
 	xpc_increment_heartbeat_sn2();
-	xpc_vars->heartbeat_offline = 1;
+	xpc_vars_sn2->heartbeat_offline = 1;
 }
 
 static void
 xpc_online_heartbeat_sn2(void)
 {
 	xpc_increment_heartbeat_sn2();
-	xpc_vars->heartbeat_offline = 0;
+	xpc_vars_sn2->heartbeat_offline = 0;
 }
 
 static void
 xpc_heartbeat_init_sn2(void)
 {
-	DBUG_ON(xpc_vars == NULL);
+	DBUG_ON(xpc_vars_sn2 == NULL);
 
-	bitmap_zero(xpc_vars->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
-	xpc_heartbeating_to_mask = &xpc_vars->heartbeating_to_mask[0];
+	bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
+	xpc_heartbeating_to_mask = &xpc_vars_sn2->heartbeating_to_mask[0];
 	xpc_online_heartbeat_sn2();
 }
 
@@ -845,7 +848,8 @@ xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
 static int
 xpc_partition_deactivation_requested_sn2(short partid)
 {
-	struct amo *amo = xpc_vars->amos_page + XPC_DEACTIVATE_REQUEST_AMO_SN2;
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_DEACTIVATE_REQUEST_AMO_SN2;
 
 	/* our partition's amo variable ANDed with partid mask */
 	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
@@ -1033,7 +1037,7 @@ xpc_identify_activate_IRQ_sender_sn2(void)
 	int n_IRQs_detected = 0;
 	struct amo *act_amos;
 
-	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2;
+	act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2;
 
 	/* scan through act amo variable looking for non-zero entries */
 	for (word = 0; word < xpc_nasid_mask_words; word++) {
@@ -1261,15 +1265,17 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	 * The setting of the magic # indicates that these per partition
 	 * specific variables are ready to be used.
 	 */
-	xpc_vars_part[partid].GPs_pa = __pa(part_sn2->local_GPs);
-	xpc_vars_part[partid].openclose_args_pa =
+	xpc_vars_part_sn2[partid].GPs_pa = __pa(part_sn2->local_GPs);
+	xpc_vars_part_sn2[partid].openclose_args_pa =
 	    __pa(part->local_openclose_args);
-	xpc_vars_part[partid].chctl_amo_pa = __pa(part_sn2->local_chctl_amo_va);
+	xpc_vars_part_sn2[partid].chctl_amo_pa =
+	    __pa(part_sn2->local_chctl_amo_va);
 	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
-	xpc_vars_part[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid);
-	xpc_vars_part[partid].notify_IRQ_phys_cpuid = cpu_physical_id(cpuid);
-	xpc_vars_part[partid].nchannels = part->nchannels;
-	xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
+	xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid);
+	xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid =
+	    cpu_physical_id(cpuid);
+	xpc_vars_part_sn2[partid].nchannels = part->nchannels;
+	xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1;
 
 	return xpSuccess;
 
@@ -1316,7 +1322,7 @@ xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
 	DBUG_ON(part->setup_state != XPC_P_SETUP);
 	part->setup_state = XPC_P_WTEARDOWN;
 
-	xpc_vars_part[partid].magic = 0;
+	xpc_vars_part_sn2[partid].magic = 0;
 
 	free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
 
@@ -1432,7 +1438,7 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 		return xpRetry;
 	}
 
-	if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
+	if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1) {
 
 		/* validate the variables */
 
@@ -1462,7 +1468,7 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 
 		/* let the other side know that we've pulled their variables */
 
-		xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
+		xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2;
 	}
 
 	if (pulled_entry->magic == XPC_VP_MAGIC1)
-- 
cgit v1.2.3


From ea57f80c8c0e59cfc5095f7e856ce7c8e6ac2984 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:14 -0700
Subject: sgi-xp: eliminate '>>>' in comments

Comments in /drivers/misc/sgi-xp has been using '>>>' as a means to draw
attention to something that needs to be done or considered.  To avoid
colliding with git rejects, '>>>' will now be replaced by '!!!' to
indicate something to do, and by '???' to indicate something to be
considered.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h            | 11 +++--------
 drivers/misc/sgi-xp/xp_sn2.c        | 10 +++++-----
 drivers/misc/sgi-xp/xp_uv.c         |  2 +-
 drivers/misc/sgi-xp/xpc.h           | 14 +++++++++-----
 drivers/misc/sgi-xp/xpc_channel.c   |  2 +-
 drivers/misc/sgi-xp/xpc_partition.c |  2 +-
 drivers/misc/sgi-xp/xpc_sn2.c       |  8 ++++----
 drivers/misc/sgi-xp/xpc_uv.c        | 32 ++++++++++++++++----------------
 drivers/misc/sgi-xp/xpnet.c         |  6 +++---
 9 files changed, 43 insertions(+), 44 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 955b5b91323..0ca81f16646 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -21,7 +21,7 @@
 #include <asm/sn/arch.h>
 #endif
 
-/* >>> Add this #define to some linux header file some day. */
+/* ??? Add this #define to some linux header file some day? */
 #define BYTES_PER_WORD	sizeof(void *)
 
 #ifdef USE_DBUG_ON
@@ -65,18 +65,13 @@
  * other partition that is currently up. Over these channels, kernel-level
  * `users' can communicate with their counterparts on the other partitions.
  *
->>> The following described limitation of a max of eight channels possible
->>> pertains only to ia64-sn2. THIS ISN'T TRUE SINCE I'M PLANNING TO JUST
->>> TIE INTO THE EXISTING MECHANISM ONCE THE CHANNEL MESSAGES ARE RECEIVED.
->>> THE 128-BYTE CACHELINE PERFORMANCE ISSUE IS TIED TO IA64-SN2.
- *
  * If the need for additional channels arises, one can simply increase
  * XPC_MAX_NCHANNELS accordingly. If the day should come where that number
  * exceeds the absolute MAXIMUM number of channels possible (eight), then one
  * will need to make changes to the XPC code to accommodate for this.
  *
- * The absolute maximum number of channels possible is currently limited to
- * eight for performance reasons. The internal cross partition structures
+ * The absolute maximum number of channels possible is limited to eight for
+ * performance reasons on sn2 hardware. The internal cross partition structures
  * require sixteen bytes per channel, and eight allows all of this
  * interface-shared info to fit in one 128-byte cacheline.
  */
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
index 1fcfdebca2c..baabc1cb3fe 100644
--- a/drivers/misc/sgi-xp/xp_sn2.c
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -87,11 +87,11 @@ xp_remote_memcpy_sn2(void *vdst, const void *psrc, size_t len)
 {
 	bte_result_t ret;
 	u64 pdst = ia64_tpa(vdst);
-	/* >>> What are the rules governing the src and dst addresses passed in?
-	 * >>> Currently we're assuming that dst is a virtual address and src
-	 * >>> is a physical address, is this appropriate? Can we allow them to
-	 * >>> be whatever and we make the change here without damaging the
-	 * >>> addresses?
+	/* ??? What are the rules governing the src and dst addresses passed in?
+	 * ??? Currently we're assuming that dst is a virtual address and src
+	 * ??? is a physical address, is this appropriate? Can we allow them to
+	 * ??? be whatever and we make the change here without damaging the
+	 * ??? addresses?
 	 */
 
 	/*
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
index dca519fdef9..382b1b6bcc0 100644
--- a/drivers/misc/sgi-xp/xp_uv.c
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -18,7 +18,7 @@
 static enum xp_retval
 xp_remote_memcpy_uv(void *vdst, const void *psrc, size_t len)
 {
-	/* >>> this function needs fleshing out */
+	/* !!! this function needs fleshing out */
 	return xpUnsupported;
 }
 
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 2111723553b..0f516c3e3e6 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -276,9 +276,12 @@ struct xpc_notify {
  * There is an array of these structures for each remote partition. It is
  * allocated at the time a partition becomes active. The array contains one
  * of these structures for each potential channel connection to that partition.
+ */
+
+/*
+ * The following is sn2 only.
  *
->>> sn2 only!!!
- * Each of these structures manages two message queues (circular buffers).
+ * Each channel structure manages two message queues (circular buffers).
  * They are allocated at the time a channel connection is made. One of
  * these message queues (local_msgqueue) holds the locally created messages
  * that are destined for the remote partition. The other of these message
@@ -345,6 +348,7 @@ struct xpc_notify {
  *	new messages, by the clearing of the message flags of the acknowledged
  *	messages.
  */
+
 struct xpc_channel_sn2 {
 
 	/* various flavors of local and remote Get/Put values */
@@ -359,7 +363,7 @@ struct xpc_channel_sn2 {
 };
 
 struct xpc_channel_uv {
-	/* >>> code is coming */
+	/* !!! code is coming */
 };
 
 struct xpc_channel {
@@ -500,7 +504,7 @@ xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
 }
 
 /*
- * Manages channels on a partition basis. There is one of these structures
+ * Manage channels on a partition basis. There is one of these structures
  * for each partition (a partition will never utilize the structure that
  * represents itself).
  */
@@ -535,7 +539,7 @@ struct xpc_partition_sn2 {
 };
 
 struct xpc_partition_uv {
-	/* >>> code is coming */
+	/* !!! code is coming */
 };
 
 struct xpc_partition {
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 1c73423665b..f1afc0a7c33 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -129,7 +129,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	/* wake those waiting for notify completion */
 	if (atomic_read(&ch->n_to_notify) > 0) {
-		/* >>> we do callout while holding ch->lock */
+		/* we do callout while holding ch->lock, callout can't block */
 		xpc_notify_senders_of_disconnect(ch);
 	}
 
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 9f104450478..73a92957b80 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -91,7 +91,7 @@ xpc_get_rsvd_page_pa(int nasid)
 		if (status != SALRET_MORE_PASSES)
 			break;
 
-		/* >>> L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
+		/* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
 		if (L1_CACHE_ALIGN(len) > buf_len) {
 			kfree(buf_base);
 			buf_len = L1_CACHE_ALIGN(len);
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 63fe59a5bfa..e42c3038203 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -75,7 +75,7 @@ xpc_allow_IPI_ops_sn2(void)
 	int node;
 	int nasid;
 
-	/* >>> The following should get moved into SAL. */
+	/* !!! The following should get moved into SAL. */
 	if (is_shub2()) {
 		xpc_sh2_IPI_access0_sn2 =
 		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
@@ -118,7 +118,7 @@ xpc_disallow_IPI_ops_sn2(void)
 	int node;
 	int nasid;
 
-	/* >>> The following should get moved into SAL. */
+	/* !!! The following should get moved into SAL. */
 	if (is_shub2()) {
 		for_each_online_node(node) {
 			nasid = cnodeid_to_nasid(node);
@@ -1360,7 +1360,7 @@ xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
  * dst must be a cacheline aligned virtual address on this partition.
  * cnt must be cacheline sized
  */
-/* >>> Replace this function by call to xp_remote_memcpy() or bte_copy()? */
+/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */
 static enum xp_retval
 xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
 			       const void *src, size_t cnt)
@@ -2242,7 +2242,7 @@ xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
 		notify->key = key;
 		notify->type = notify_type;
 
-		/* >>> is a mb() needed here? */
+		/* ??? Is a mb() needed here? */
 
 		if (ch->flags & XPC_C_DISCONNECTING) {
 			/*
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 1401b0f45dc..2aec1dfbb3d 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -15,8 +15,8 @@
 
 #include <linux/kernel.h>
 
-/* >>> #include <gru/grukservices.h> */
-/* >>> uv_gpa() is defined in <gru/grukservices.h> */
+/* !!! #include <gru/grukservices.h> */
+/* !!! uv_gpa() is defined in <gru/grukservices.h> */
 #define uv_gpa(_a)		((unsigned long)_a)
 
 #include "xpc.h"
@@ -29,16 +29,16 @@ static void
 xpc_send_local_activate_IRQ_uv(struct xpc_partition *part)
 {
 	/*
-	 * >>> make our side think that the remote parition sent an activate
-	 * >>> message our way. Also do what the activate IRQ handler would
-	 * >>> do had one really been sent.
+	 * !!! Make our side think that the remote parition sent an activate
+	 * !!! message our way. Also do what the activate IRQ handler would
+	 * !!! do had one really been sent.
 	 */
 }
 
 static enum xp_retval
 xpc_rsvd_page_init_uv(struct xpc_rsvd_page *rp)
 {
-	/* >>> need to have established xpc_activate_mq earlier */
+	/* !!! need to have established xpc_activate_mq earlier */
 	rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq);
 	return xpSuccess;
 }
@@ -46,7 +46,7 @@ xpc_rsvd_page_init_uv(struct xpc_rsvd_page *rp)
 static void
 xpc_increment_heartbeat_uv(void)
 {
-	/* >>> send heartbeat msg to xpc_heartbeating_to_mask partids */
+	/* !!! send heartbeat msg to xpc_heartbeating_to_mask partids */
 }
 
 static void
@@ -59,7 +59,7 @@ xpc_heartbeat_init_uv(void)
 static void
 xpc_heartbeat_exit_uv(void)
 {
-	/* >>> send heartbeat_offline msg to xpc_heartbeating_to_mask partids */
+	/* !!! send heartbeat_offline msg to xpc_heartbeating_to_mask partids */
 }
 
 static void
@@ -70,9 +70,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
 	struct xpc_partition *part = &xpc_partitions[partid];
 
 /*
- * >>> setup part structure with the bits of info we can glean from the rp
- * >>>	part->remote_rp_pa = remote_rp_pa;
- * >>>	part->sn.uv.activate_mq_gpa = remote_rp->sn.activate_mq_gpa;
+ * !!! Setup part structure with the bits of info we can glean from the rp:
+ * !!!	part->remote_rp_pa = remote_rp_pa;
+ * !!!	part->sn.uv.activate_mq_gpa = remote_rp->sn.activate_mq_gpa;
  */
 
 	xpc_send_local_activate_IRQ_uv(part);
@@ -91,7 +91,7 @@ xpc_request_partition_reactivation_uv(struct xpc_partition *part)
 static enum xp_retval
 xpc_setup_infrastructure_uv(struct xpc_partition *part)
 {
-	/* >>> this function needs fleshing out */
+	/* !!! this function needs fleshing out */
 	return xpUnsupported;
 }
 
@@ -102,28 +102,28 @@ xpc_setup_infrastructure_uv(struct xpc_partition *part)
 static void
 xpc_teardown_infrastructure_uv(struct xpc_partition *part)
 {
-	/* >>> this function needs fleshing out */
+	/* !!! this function needs fleshing out */
 	return;
 }
 
 static enum xp_retval
 xpc_make_first_contact_uv(struct xpc_partition *part)
 {
-	/* >>> this function needs fleshing out */
+	/* !!! this function needs fleshing out */
 	return xpUnsupported;
 }
 
 static u64
 xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
 {
-	/* >>> this function needs fleshing out */
+	/* !!! this function needs fleshing out */
 	return 0UL;
 }
 
 static struct xpc_msg *
 xpc_get_deliverable_msg_uv(struct xpc_channel *ch)
 {
-	/* >>> this function needs fleshing out */
+	/* !!! this function needs fleshing out */
 	return NULL;
 }
 
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index c5f59a6dae5..07c89c4e2c2 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -229,9 +229,9 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 
 		if (ret != xpSuccess) {
 			/*
-			 * >>> Need better way of cleaning skb.  Currently skb
-			 * >>> appears in_use and we can't just call
-			 * >>> dev_kfree_skb.
+			 * !!! Need better way of cleaning skb.  Currently skb
+			 * !!! appears in_use and we can't just call
+			 * !!! dev_kfree_skb.
 			 */
 			dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
 				"returned error=0x%x\n", (void *)
-- 
cgit v1.2.3


From 04de741885bc7565a28150e82c56a56e544440e6 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:14 -0700
Subject: sgi-xp: use standard bitops macros and functions

Change sgi-xp to use the standard bitops macros and functions instead of
trying to invent its own mechanism.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h            |  3 --
 drivers/misc/sgi-xp/xpc.h           | 43 ++++++++--------------
 drivers/misc/sgi-xp/xpc_partition.c | 43 +++++++++++-----------
 drivers/misc/sgi-xp/xpc_sn2.c       | 73 ++++++++++++++++++++-----------------
 4 files changed, 76 insertions(+), 86 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 0ca81f16646..3054fae8b02 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -21,9 +21,6 @@
 #include <asm/sn/arch.h>
 #endif
 
-/* ??? Add this #define to some linux header file some day? */
-#define BYTES_PER_WORD	sizeof(void *)
-
 #ifdef USE_DBUG_ON
 #define DBUG_ON(condition)	BUG_ON(condition)
 #else
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 0f516c3e3e6..0907934cdd8 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -35,23 +35,7 @@
 #define XPC_VERSION_MAJOR(_v)		((_v) >> 4)
 #define XPC_VERSION_MINOR(_v)		((_v) & 0xf)
 
-/*
- * The next macros define word or bit representations for given
- * C-brick nasid in either the SAL provided bit array representing
- * nasids in the partition/machine or the array of amo structures used
- * for inter-partition initiation communications.
- *
- * For SN2 machines, C-Bricks are alway even numbered NASIDs.  As
- * such, some space will be saved by insisting that nasid information
- * passed from SAL always be packed for C-Bricks and the
- * cross-partition interrupts use the same packing scheme.
- */
-#define XPC_NASID_W_INDEX(_n)	(((_n) / 64) / 2)
-#define XPC_NASID_B_INDEX(_n)	(((_n) / 2) & (64 - 1))
-#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
-				    (1UL << XPC_NASID_B_INDEX(_n)))
-#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
-
+/* define frequency of the heartbeat and frequency how often it's checked */
 #define XPC_HB_DEFAULT_INTERVAL		5	/* incr HB every x secs */
 #define XPC_HB_CHECK_DEFAULT_INTERVAL	20	/* check HB every x secs */
 
@@ -86,11 +70,13 @@
  *     the actual nasids in the entire machine (mach_nasids). We're only
  *     interested in the even numbered nasids (which contain the processors
  *     and/or memory), so we only need half as many bits to represent the
- *     nasids. The part_nasids mask is located starting at the first cacheline
- *     following the reserved page header. The mach_nasids mask follows right
- *     after the part_nasids mask. The size in bytes of each mask is reflected
- *     by the reserved page header field 'SAL_nasids_size'. (Local partition's
- *     mask pointers are xpc_part_nasids and xpc_mach_nasids.)
+ *     nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure
+ *     to either divide or multiply by 2. The part_nasids mask is located
+ *     starting at the first cacheline following the reserved page header. The
+ *     mach_nasids mask follows right after the part_nasids mask. The size in
+ *     bytes of each mask is reflected by the reserved page header field
+ *     'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids
+ *     and xpc_mach_nasids.)
  *
  *   vars	(ia64-sn2 only)
  *   vars part	(ia64-sn2 only)
@@ -194,10 +180,11 @@ struct xpc_vars_part_sn2 {
 #define XPC_RP_VARS_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2))
 
 #define XPC_RP_PART_NASIDS(_rp) ((u64 *)((u8 *)(_rp) + XPC_RP_HEADER_SIZE))
-#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xpc_nasid_mask_words)
+#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \
+				 xpc_nasid_mask_nlongs)
 #define XPC_RP_VARS(_rp)	((struct xpc_vars_sn2 *) \
 				 (XPC_RP_MACH_NASIDS(_rp) + \
-				  xpc_nasid_mask_words))
+				  xpc_nasid_mask_nlongs))
 
 /*
  * Functions registered by add_timer() or called by kernel_thread() only
@@ -695,9 +682,9 @@ extern void xpc_exit_uv(void);
 
 /* found in xpc_partition.c */
 extern int xpc_exiting;
-extern int xpc_nasid_mask_words;
+extern int xpc_nasid_mask_nlongs;
 extern struct xpc_rsvd_page *xpc_rsvd_page;
-extern u64 *xpc_mach_nasids;
+extern unsigned long *xpc_mach_nasids;
 extern struct xpc_partition *xpc_partitions;
 extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern struct xpc_rsvd_page *xpc_setup_rsvd_page(void);
@@ -706,8 +693,8 @@ extern int xpc_partition_disengaged(struct xpc_partition *);
 extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
 extern void xpc_mark_partition_inactive(struct xpc_partition *);
 extern void xpc_discovery(void);
-extern enum xp_retval xpc_get_remote_rp(int, u64 *, struct xpc_rsvd_page *,
-					u64 *);
+extern enum xp_retval xpc_get_remote_rp(int, unsigned long *,
+					struct xpc_rsvd_page *, u64 *);
 extern void xpc_deactivate_partition(const int, struct xpc_partition *,
 				     enum xp_retval);
 extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 73a92957b80..ca6784f5597 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -31,11 +31,11 @@ int xpc_exiting;
 
 /* this partition's reserved page pointers */
 struct xpc_rsvd_page *xpc_rsvd_page;
-static u64 *xpc_part_nasids;
-u64 *xpc_mach_nasids;
+static unsigned long *xpc_part_nasids;
+unsigned long *xpc_mach_nasids;
 
-static int xpc_sizeof_nasid_mask;	/* actual size in bytes of nasid mask */
-int xpc_nasid_mask_words;	/* actual size in words of nasid mask */
+static int xpc_nasid_mask_nbytes;	/* #of bytes in nasid mask */
+int xpc_nasid_mask_nlongs;	/* #of longs in nasid mask */
 
 struct xpc_partition *xpc_partitions;
 
@@ -167,9 +167,9 @@ xpc_setup_rsvd_page(void)
 		/* SAL_version 1 didn't set the nasids_size field */
 		rp->SAL_nasids_size = 128;
 	}
-	xpc_sizeof_nasid_mask = rp->SAL_nasids_size;
-	xpc_nasid_mask_words = DIV_ROUND_UP(xpc_sizeof_nasid_mask,
-					    BYTES_PER_WORD);
+	xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
+	xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
+					      BITS_PER_BYTE);
 
 	/* setup the pointers to the various items in the reserved page */
 	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
@@ -199,10 +199,10 @@ xpc_setup_rsvd_page(void)
  * part_nasids mask.
  */
 enum xp_retval
-xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
+xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
 		  struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
 {
-	int i;
+	int l;
 	enum xp_retval ret;
 
 	/* get the reserved page's physical address */
@@ -213,15 +213,16 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 
 	/* pull over the reserved page header and part_nasids mask */
 	ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
-			       XPC_RP_HEADER_SIZE + xpc_sizeof_nasid_mask);
+			       XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
 	if (ret != xpSuccess)
 		return ret;
 
 	if (discovered_nasids != NULL) {
-		u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
+		unsigned long *remote_part_nasids =
+		    XPC_RP_PART_NASIDS(remote_rp);
 
-		for (i = 0; i < xpc_nasid_mask_words; i++)
-			discovered_nasids[i] |= remote_part_nasids[i];
+		for (l = 0; l < xpc_nasid_mask_nlongs; l++)
+			discovered_nasids[l] |= remote_part_nasids[l];
 	}
 
 	/* see if the reserved page has been set up by XPC */
@@ -401,16 +402,16 @@ xpc_discovery(void)
 	int max_regions;
 	int nasid;
 	struct xpc_rsvd_page *rp;
-	u64 *discovered_nasids;
+	unsigned long *discovered_nasids;
 	enum xp_retval ret;
 
 	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
-						  xpc_sizeof_nasid_mask,
+						  xpc_nasid_mask_nbytes,
 						  GFP_KERNEL, &remote_rp_base);
 	if (remote_rp == NULL)
 		return;
 
-	discovered_nasids = kzalloc(sizeof(u64) * xpc_nasid_mask_words,
+	discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
 				    GFP_KERNEL);
 	if (discovered_nasids == NULL) {
 		kfree(remote_rp_base);
@@ -453,21 +454,21 @@ xpc_discovery(void)
 
 			dev_dbg(xpc_part, "checking nasid %d\n", nasid);
 
-			if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
+			if (test_bit(nasid / 2, xpc_part_nasids)) {
 				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
 					"part of the local partition; skipping "
 					"region\n", nasid);
 				break;
 			}
 
-			if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
+			if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
 				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
 					"not on Numa-Link network at reset\n",
 					nasid);
 				continue;
 			}
 
-			if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
+			if (test_bit(nasid / 2, discovered_nasids)) {
 				dev_dbg(xpc_part, "Nasid %d is part of a "
 					"partition which was previously "
 					"discovered\n", nasid);
@@ -512,10 +513,10 @@ xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
 	if (part->remote_rp_pa == 0)
 		return xpPartitionDown;
 
-	memset(nasid_mask, 0, xpc_sizeof_nasid_mask);
+	memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
 
 	part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
 
 	return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
-				xpc_sizeof_nasid_mask);
+				xpc_nasid_mask_nbytes);
 }
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index e42c3038203..f82889f6015 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -210,28 +210,26 @@ static void
 xpc_send_activate_IRQ_sn2(u64 amos_page_pa, int from_nasid, int to_nasid,
 			  int to_phys_cpuid)
 {
-	int w_index = XPC_NASID_W_INDEX(from_nasid);
-	int b_index = XPC_NASID_B_INDEX(from_nasid);
 	struct amo *amos = (struct amo *)__va(amos_page_pa +
 					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
 					      sizeof(struct amo)));
 
-	(void)xpc_send_IRQ_sn2(&amos[w_index], (1UL << b_index), to_nasid,
+	(void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)],
+			       BIT_MASK(from_nasid / 2), to_nasid,
 			       to_phys_cpuid, SGI_XPC_ACTIVATE);
 }
 
 static void
 xpc_send_local_activate_IRQ_sn2(int from_nasid)
 {
-	int w_index = XPC_NASID_W_INDEX(from_nasid);
-	int b_index = XPC_NASID_B_INDEX(from_nasid);
 	struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa +
 					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
 					      sizeof(struct amo)));
 
 	/* fake the sending and receipt of an activate IRQ from remote nasid */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amos[w_index].variable), FETCHOP_OR,
-			 (1UL << b_index));
+	FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable),
+			 FETCHOP_OR, BIT_MASK(from_nasid / 2));
+
 	atomic_inc(&xpc_activate_IRQ_rcvd);
 	wake_up_interruptible(&xpc_activate_IRQ_wq);
 }
@@ -439,7 +437,8 @@ xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
 
 	/* set bit corresponding to our partid in remote partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
-			 (1UL << sn_partition_id));
+			 BIT(sn_partition_id));
+
 	/*
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
@@ -466,7 +465,8 @@ xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 
 	/* clear bit corresponding to our partid in remote partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~(1UL << sn_partition_id));
+			 ~BIT(sn_partition_id));
+
 	/*
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
@@ -497,7 +497,7 @@ xpc_partition_engaged_sn2(short partid)
 
 	/* our partition's amo variable ANDed with partid mask */
 	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
-		(1UL << partid)) != 0;
+		BIT(partid)) != 0;
 }
 
 static int
@@ -518,7 +518,7 @@ xpc_assume_partition_disengaged_sn2(short partid)
 
 	/* clear bit(s) based on partid mask in our partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~(1UL << partid));
+			 ~BIT(partid));
 }
 
 /* original protection values for each node */
@@ -639,7 +639,7 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	       xp_max_npartitions);
 
 	/* initialize the activate IRQ related amo variables */
-	for (i = 0; i < xpc_nasid_mask_words; i++)
+	for (i = 0; i < xpc_nasid_mask_nlongs; i++)
 		(void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i);
 
 	/* initialize the engaged remote partitions related amo variables */
@@ -796,7 +796,8 @@ xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
 
 	/* set bit corresponding to our partid in remote partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
-			 (1UL << sn_partition_id));
+			 BIT(sn_partition_id));
+
 	/*
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
@@ -831,7 +832,8 @@ xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
 
 	/* clear bit corresponding to our partid in remote partition's amo */
 	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~(1UL << sn_partition_id));
+			 ~BIT(sn_partition_id));
+
 	/*
 	 * We must always use the nofault function regardless of whether we
 	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
@@ -853,7 +855,7 @@ xpc_partition_deactivation_requested_sn2(short partid)
 
 	/* our partition's amo variable ANDed with partid mask */
 	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
-		(1UL << partid)) != 0;
+		BIT(partid)) != 0;
 }
 
 /*
@@ -1031,28 +1033,31 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 int
 xpc_identify_activate_IRQ_sender_sn2(void)
 {
-	int word, bit;
-	u64 nasid_mask;
+	int l;
+	int b;
+	unsigned long nasid_mask_long;
 	u64 nasid;		/* remote nasid */
 	int n_IRQs_detected = 0;
 	struct amo *act_amos;
 
 	act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2;
 
-	/* scan through act amo variable looking for non-zero entries */
-	for (word = 0; word < xpc_nasid_mask_words; word++) {
+	/* scan through activate amo variables looking for non-zero entries */
+	for (l = 0; l < xpc_nasid_mask_nlongs; l++) {
 
 		if (xpc_exiting)
 			break;
 
-		nasid_mask = xpc_receive_IRQ_amo_sn2(&act_amos[word]);
-		if (nasid_mask == 0) {
-			/* no IRQs from nasids in this variable */
+		nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]);
+
+		b = find_first_bit(&nasid_mask_long, BITS_PER_LONG);
+		if (b >= BITS_PER_LONG) {
+			/* no IRQs from nasids in this amo variable */
 			continue;
 		}
 
-		dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", word,
-			nasid_mask);
+		dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l,
+			nasid_mask_long);
 
 		/*
 		 * If this nasid has been added to the machine since
@@ -1060,19 +1065,19 @@ xpc_identify_activate_IRQ_sender_sn2(void)
 		 * remote nasid in our reserved pages machine mask.
 		 * This is used in the event of module reload.
 		 */
-		xpc_mach_nasids[word] |= nasid_mask;
+		xpc_mach_nasids[l] |= nasid_mask_long;
 
 		/* locate the nasid(s) which sent interrupts */
 
-		for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
-			if (nasid_mask & (1UL << bit)) {
-				n_IRQs_detected++;
-				nasid = XPC_NASID_FROM_W_B(word, bit);
-				dev_dbg(xpc_part, "interrupt from nasid %ld\n",
-					nasid);
-				xpc_identify_activate_IRQ_req_sn2(nasid);
-			}
-		}
+		do {
+			n_IRQs_detected++;
+			nasid = (l * BITS_PER_LONG + b) * 2;
+			dev_dbg(xpc_part, "interrupt from nasid %ld\n", nasid);
+			xpc_identify_activate_IRQ_req_sn2(nasid);
+
+			b = find_next_bit(&nasid_mask_long, BITS_PER_LONG,
+					  b + 1);
+		} while (b < BITS_PER_LONG);
 	}
 	return n_IRQs_detected;
 }
-- 
cgit v1.2.3


From 81fe7883d2c8a80a7145ad22f8cd8514d05412b9 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:15 -0700
Subject: sgi-xp: add 'jiffies' to reserved page's timestamp name

Rename XPC's reserved page's timestamp member to reflect the units of time
involved.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc.h           |  6 +++---
 drivers/misc/sgi-xp/xpc_main.c      |  8 ++++----
 drivers/misc/sgi-xp/xpc_partition.c | 14 +++++++-------
 drivers/misc/sgi-xp/xpc_sn2.c       | 26 ++++++++++++++------------
 4 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 0907934cdd8..e194d3140f6 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -87,7 +87,7 @@
  *     which are partition specific (vars part). These are setup by XPC.
  *     (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
  *
- * Note: Until 'stamp' is set non-zero, the partition XPC code has not been
+ * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been
  *       initialized.
  */
 struct xpc_rsvd_page {
@@ -101,7 +101,7 @@ struct xpc_rsvd_page {
 		u64 vars_pa;	/* physical address of struct xpc_vars */
 		u64 activate_mq_gpa;	/* global phys address of activate_mq */
 	} sn;
-	unsigned long stamp;	/* time when reserved page was setup by XPC */
+	unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
 	u64 pad2[10];		/* align to last u64 in 2nd 64-byte cacheline */
 	u64 SAL_nasids_size;	/* SAL: size of each nasid mask in bytes */
 };
@@ -534,7 +534,7 @@ struct xpc_partition {
 	/* XPC HB infrastructure */
 
 	u8 remote_rp_version;	/* version# of partition's rsvd pg */
-	unsigned long remote_rp_stamp; /* time when rsvd pg was initialized */
+	unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */
 	u64 remote_rp_pa;	/* phys addr of partition's rsvd pg */
 	u64 last_heartbeat;	/* HB at last read */
 	u32 activate_IRQ_rcvd;	/* IRQs since activation */
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 36dfccea524..e7ff9e1670f 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -862,8 +862,8 @@ xpc_do_exit(enum xp_retval reason)
 	DBUG_ON(xpc_any_partition_engaged());
 	DBUG_ON(xpc_any_hbs_allowed() != 0);
 
-	/* indicate to others that our reserved page is uninitialized */
-	xpc_rsvd_page->stamp = 0;
+	/* a zero timestamp indicates our rsvd page is not initialized */
+	xpc_rsvd_page->ts_jiffies = 0;
 
 	if (reason == xpUnloading) {
 		(void)unregister_die_notifier(&xpc_die_notifier);
@@ -1152,8 +1152,8 @@ xpc_init(void)
 
 	/* initialization was not successful */
 out_3:
-	/* indicate to others that our reserved page is uninitialized */
-	xpc_rsvd_page->stamp = 0;
+	/* a zero timestamp indicates our rsvd page is not initialized */
+	xpc_rsvd_page->ts_jiffies = 0;
 
 	(void)unregister_die_notifier(&xpc_die_notifier);
 	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index ca6784f5597..70d4a00c972 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -133,7 +133,7 @@ xpc_setup_rsvd_page(void)
 {
 	struct xpc_rsvd_page *rp;
 	u64 rp_pa;
-	unsigned long new_stamp;
+	unsigned long new_ts_jiffies;
 
 	/* get the local reserved page's address */
 
@@ -183,10 +183,10 @@ xpc_setup_rsvd_page(void)
 	 * This signifies to the remote partition that our reserved
 	 * page is initialized.
 	 */
-	new_stamp = jiffies;
-	if (new_stamp == 0 || new_stamp == rp->stamp)
-		new_stamp++;
-	rp->stamp = new_stamp;
+	new_ts_jiffies = jiffies;
+	if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
+		new_ts_jiffies++;
+	rp->ts_jiffies = new_ts_jiffies;
 
 	return rp;
 }
@@ -225,8 +225,8 @@ xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
 			discovered_nasids[l] |= remote_part_nasids[l];
 	}
 
-	/* see if the reserved page has been set up by XPC */
-	if (remote_rp->stamp == 0)
+	/* zero timestamp indicates the reserved page has not been setup */
+	if (remote_rp->ts_jiffies == 0)
 		return xpRsvdPageNotSet;
 
 	if (XPC_VERSION_MAJOR(remote_rp->version) !=
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index f82889f6015..4b5f69edf0d 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -863,8 +863,8 @@ xpc_partition_deactivation_requested_sn2(short partid)
  */
 static void
 xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
-			      unsigned long *remote_rp_stamp, u64 remote_rp_pa,
-			      u64 remote_vars_pa,
+			      unsigned long *remote_rp_ts_jiffies,
+			      u64 remote_rp_pa, u64 remote_vars_pa,
 			      struct xpc_vars_sn2 *remote_vars)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
@@ -873,9 +873,9 @@ xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
 	dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
 		part->remote_rp_version);
 
-	part->remote_rp_stamp = *remote_rp_stamp;
-	dev_dbg(xpc_part, "  remote_rp_stamp = 0x%016lx\n",
-		part->remote_rp_stamp);
+	part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies;
+	dev_dbg(xpc_part, "  remote_rp_ts_jiffies = 0x%016lx\n",
+		part->remote_rp_ts_jiffies);
 
 	part->remote_rp_pa = remote_rp_pa;
 	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
@@ -933,7 +933,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 	u64 remote_vars_pa;
 	int remote_rp_version;
 	int reactivate = 0;
-	unsigned long remote_rp_stamp = 0;
+	unsigned long remote_rp_ts_jiffies = 0;
 	short partid;
 	struct xpc_partition *part;
 	struct xpc_partition_sn2 *part_sn2;
@@ -952,7 +952,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 
 	remote_vars_pa = remote_rp->sn.vars_pa;
 	remote_rp_version = remote_rp->version;
-	remote_rp_stamp = remote_rp->stamp;
+	remote_rp_ts_jiffies = remote_rp->ts_jiffies;
 
 	partid = remote_rp->SAL_partid;
 	part = &xpc_partitions[partid];
@@ -981,8 +981,9 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 	    part->act_state == XPC_P_INACTIVE) {
 
 		xpc_update_partition_info_sn2(part, remote_rp_version,
-					      &remote_rp_stamp, remote_rp_pa,
-					      remote_vars_pa, remote_vars);
+					      &remote_rp_ts_jiffies,
+					      remote_rp_pa, remote_vars_pa,
+					      remote_vars);
 
 		if (xpc_partition_deactivation_requested_sn2(partid)) {
 			/*
@@ -999,7 +1000,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 	DBUG_ON(part->remote_rp_version == 0);
 	DBUG_ON(part_sn2->remote_vars_version == 0);
 
-	if (remote_rp_stamp != part->remote_rp_stamp) {
+	if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) {
 
 		/* the other side rebooted */
 
@@ -1007,8 +1008,9 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 		DBUG_ON(xpc_partition_deactivation_requested_sn2(partid));
 
 		xpc_update_partition_info_sn2(part, remote_rp_version,
-					      &remote_rp_stamp, remote_rp_pa,
-					      remote_vars_pa, remote_vars);
+					      &remote_rp_ts_jiffies,
+					      remote_rp_pa, remote_vars_pa,
+					      remote_vars);
 		reactivate = 1;
 	}
 
-- 
cgit v1.2.3


From 261f3b4979db88d29fc86aad9f76fbc0c2c6d21a Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:16 -0700
Subject: sgi-xp: enable building of XPC/XPNET on x86_64

Get XPC/XPNET to build on x86_64.  Trying to modprobe them up on a non-UV
or sn2 system will result in a -ENODEV.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/Kconfig                |  2 +-
 drivers/misc/sgi-xp/Makefile        | 14 ++++++++---
 drivers/misc/sgi-xp/xp.h            | 32 +++++++++++++++---------
 drivers/misc/sgi-xp/xp_main.c       | 10 +++++++-
 drivers/misc/sgi-xp/xp_sn2.c        | 10 ++++++++
 drivers/misc/sgi-xp/xpc.h           | 34 ++++++++++++-------------
 drivers/misc/sgi-xp/xpc_channel.c   | 18 +++++---------
 drivers/misc/sgi-xp/xpc_main.c      | 49 +++++++++++++++++++------------------
 drivers/misc/sgi-xp/xpc_partition.c | 47 +++++++++++++++--------------------
 drivers/misc/sgi-xp/xpc_sn2.c       | 30 +++++++++++++++++++----
 drivers/misc/sgi-xp/xpc_uv.c        |  7 ++----
 drivers/misc/sgi-xp/xpnet.c         | 28 +++++++++------------
 12 files changed, 155 insertions(+), 126 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 4b288f43ca8..fa50e9ede0e 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -426,7 +426,7 @@ config ENCLOSURE_SERVICES
 
 config SGI_XP
 	tristate "Support communication between SGI SSIs"
-	depends on IA64_GENERIC || IA64_SGI_SN2
+	depends on IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || (X86_64 && SMP)
 	select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
 	select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
 	---help---
diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
index b3eeff31ebf..35ce2857807 100644
--- a/drivers/misc/sgi-xp/Makefile
+++ b/drivers/misc/sgi-xp/Makefile
@@ -3,11 +3,17 @@
 #
 
 obj-$(CONFIG_SGI_XP)		+= xp.o
-xp-y				:= xp_main.o xp_uv.o
-xp-$(CONFIG_IA64)		+= xp_sn2.o xp_nofault.o
+xp-y				:= xp_main.o
+xp-$(CONFIG_IA64_SGI_SN2)	+= xp_sn2.o xp_nofault.o
+xp-$(CONFIG_IA64_GENERIC)	+= xp_sn2.o xp_nofault.o xp_uv.o
+xp-$(CONFIG_IA64_SGI_UV)	+= xp_uv.o
+xp-$(CONFIG_X86_64)		+= xp_uv.o
 
 obj-$(CONFIG_SGI_XP)		+= xpc.o
-xpc-y				:= xpc_main.o xpc_uv.o xpc_channel.o xpc_partition.o
-xpc-$(CONFIG_IA64)		+= xpc_sn2.o
+xpc-y				:= xpc_main.o xpc_channel.o xpc_partition.o
+xpc-$(CONFIG_IA64_SGI_SN2)	+= xpc_sn2.o
+xpc-$(CONFIG_IA64_GENERIC)	+= xpc_sn2.o xpc_uv.o
+xpc-$(CONFIG_IA64_SGI_UV) 	+= xpc_uv.o
+xpc-$(CONFIG_X86_64)		+= xpc_uv.o
 
 obj-$(CONFIG_SGI_XP)		+= xpnet.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 3054fae8b02..01bf1a2cd8e 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -13,18 +13,17 @@
 #ifndef _DRIVERS_MISC_SGIXP_XP_H
 #define _DRIVERS_MISC_SGIXP_XP_H
 
-#include <linux/cache.h>
-#include <linux/hardirq.h>
 #include <linux/mutex.h>
-#include <asm/sn/types.h>
+
 #ifdef CONFIG_IA64
-#include <asm/sn/arch.h>
+#include <asm/system.h>
+#include <asm/sn/arch.h>	/* defines is_shub1() and is_shub2() */
+#define is_shub()	ia64_platform_is("sn2")
+#define is_uv()		ia64_platform_is("uv")
 #endif
-
-#ifdef USE_DBUG_ON
-#define DBUG_ON(condition)	BUG_ON(condition)
-#else
-#define DBUG_ON(condition)
+#ifdef CONFIG_X86_64
+#include <asm/genapic.h>
+#define is_uv()		is_uv_system()
 #endif
 
 #ifndef is_shub1
@@ -36,13 +35,19 @@
 #endif
 
 #ifndef is_shub
-#define is_shub()	(is_shub1() || is_shub2())
+#define is_shub()	0
 #endif
 
 #ifndef is_uv
 #define is_uv()		0
 #endif
 
+#ifdef USE_DBUG_ON
+#define DBUG_ON(condition)	BUG_ON(condition)
+#else
+#define DBUG_ON(condition)
+#endif
+
 /*
  * Define the maximum number of partitions the system can possibly support.
  * It is based on the maximum number of hardware partitionable regions. The
@@ -200,7 +205,9 @@ enum xp_retval {
 	xpPayloadTooBig,	/* 55: payload too large for message slot */
 
 	xpUnsupported,		/* 56: unsupported functionality or resource */
-	xpUnknownReason		/* 57: unknown reason - must be last in enum */
+	xpNeedMoreInfo,		/* 57: more info is needed by SAL */
+
+	xpUnknownReason		/* 58: unknown reason - must be last in enum */
 };
 
 /*
@@ -339,8 +346,11 @@ xpc_partid_to_nasids(short partid, void *nasids)
 }
 
 extern short xp_max_npartitions;
+extern short xp_partition_id;
+extern u8 xp_region_size;
 
 extern enum xp_retval (*xp_remote_memcpy) (void *, const void *, size_t);
+extern int (*xp_cpu_to_nasid) (int);
 
 extern u64 xp_nofault_PIOR_target;
 extern int xp_nofault_PIOR(void *);
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 9c0ce2f15ff..c34b23fe498 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -14,7 +14,6 @@
  *
  */
 
-#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/device.h>
 #include "xp.h"
@@ -36,9 +35,18 @@ struct device *xp = &xp_dbg_subname;
 short xp_max_npartitions;
 EXPORT_SYMBOL_GPL(xp_max_npartitions);
 
+short xp_partition_id;
+EXPORT_SYMBOL_GPL(xp_partition_id);
+
+u8 xp_region_size;
+EXPORT_SYMBOL_GPL(xp_region_size);
+
 enum xp_retval (*xp_remote_memcpy) (void *dst, const void *src, size_t len);
 EXPORT_SYMBOL_GPL(xp_remote_memcpy);
 
+int (*xp_cpu_to_nasid) (int cpuid);
+EXPORT_SYMBOL_GPL(xp_cpu_to_nasid);
+
 /*
  * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
  * users of XPC.
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
index baabc1cb3fe..c6a1ede7d6e 100644
--- a/drivers/misc/sgi-xp/xp_sn2.c
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -12,6 +12,7 @@
  *      Architecture specific implementation of common functions.
  */
 
+#include <linux/module.h>
 #include <linux/device.h>
 #include <asm/sn/bte.h>
 #include <asm/sn/sn_sal.h>
@@ -116,14 +117,23 @@ xp_remote_memcpy_sn2(void *vdst, const void *psrc, size_t len)
 	return xpBteCopyError;
 }
 
+static int
+xp_cpu_to_nasid_sn2(int cpuid)
+{
+	return cpuid_to_nasid(cpuid);
+}
+
 enum xp_retval
 xp_init_sn2(void)
 {
 	BUG_ON(!is_shub());
 
 	xp_max_npartitions = XP_MAX_NPARTITIONS_SN2;
+	xp_partition_id = sn_partition_id;
+	xp_region_size = sn_region_size;
 
 	xp_remote_memcpy = xp_remote_memcpy_sn2;
+	xp_cpu_to_nasid = xp_cpu_to_nasid_sn2;
 
 	return xp_register_nofault_code_sn2();
 }
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index e194d3140f6..96408fcf5a1 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -13,17 +13,10 @@
 #ifndef _DRIVERS_MISC_SGIXP_XPC_H
 #define _DRIVERS_MISC_SGIXP_XPC_H
 
-#include <linux/interrupt.h>
-#include <linux/sysctl.h>
-#include <linux/device.h>
-#include <linux/mutex.h>
+#include <linux/wait.h>
 #include <linux/completion.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/mspec.h>
-#include <asm/sn/shub_mmr.h>
+#include <linux/timer.h>
+#include <linux/sched.h>
 #include "xp.h"
 
 /*
@@ -179,7 +172,8 @@ struct xpc_vars_part_sn2 {
 #define XPC_RP_HEADER_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
 #define XPC_RP_VARS_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2))
 
-#define XPC_RP_PART_NASIDS(_rp) ((u64 *)((u8 *)(_rp) + XPC_RP_HEADER_SIZE))
+#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \
+				 XPC_RP_HEADER_SIZE))
 #define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \
 				 xpc_nasid_mask_nlongs)
 #define XPC_RP_VARS(_rp)	((struct xpc_vars_sn2 *) \
@@ -202,13 +196,13 @@ struct xpc_vars_part_sn2 {
 /*
  * Define a Get/Put value pair (pointers) used with a message queue.
  */
-struct xpc_gp {
+struct xpc_gp_sn2 {
 	s64 get;		/* Get value */
 	s64 put;		/* Put value */
 };
 
 #define XPC_GP_SIZE \
-		L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_MAX_NCHANNELS)
+		L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS)
 
 /*
  * Define a structure that contains arguments associated with opening and
@@ -340,10 +334,10 @@ struct xpc_channel_sn2 {
 
 	/* various flavors of local and remote Get/Put values */
 
-	struct xpc_gp *local_GP;	/* local Get/Put values */
-	struct xpc_gp remote_GP;	/* remote Get/Put values */
-	struct xpc_gp w_local_GP;	/* working local Get/Put values */
-	struct xpc_gp w_remote_GP;	/* working remote Get/Put values */
+	struct xpc_gp_sn2 *local_GP;	/* local Get/Put values */
+	struct xpc_gp_sn2 remote_GP;	/* remote Get/Put values */
+	struct xpc_gp_sn2 w_local_GP;	/* working local Get/Put values */
+	struct xpc_gp_sn2 w_remote_GP;	/* working remote Get/Put values */
 	s64 next_msg_to_pull;	/* Put value of next msg to pull */
 
 	struct mutex msg_to_pull_mutex;	/* next msg to pull serialization */
@@ -506,9 +500,9 @@ struct xpc_partition_sn2 {
 	u8 remote_vars_version;	/* version# of partition's vars */
 
 	void *local_GPs_base;	/* base address of kmalloc'd space */
-	struct xpc_gp *local_GPs;	/* local Get/Put values */
+	struct xpc_gp_sn2 *local_GPs;	/* local Get/Put values */
 	void *remote_GPs_base;	/* base address of kmalloc'd space */
-	struct xpc_gp *remote_GPs;	/* copy of remote partition's local */
+	struct xpc_gp_sn2 *remote_GPs;	/* copy of remote partition's local */
 					/* Get/Put values */
 	u64 remote_GPs_pa;	/* phys address of remote partition's local */
 				/* Get/Put values */
@@ -629,6 +623,8 @@ extern void xpc_activate_partition(struct xpc_partition *);
 extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int, int);
 extern void xpc_disconnect_wait(int);
+extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (u64, u64 *, u64 *,
+							 size_t *);
 extern enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *);
 extern void (*xpc_heartbeat_init) (void);
 extern void (*xpc_heartbeat_exit) (void);
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index f1afc0a7c33..0615efbe007 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -14,14 +14,7 @@
  *
  */
 
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-#include <linux/completion.h>
-#include <asm/sn/sn_sal.h>
+#include <linux/device.h>
 #include "xpc.h"
 
 /*
@@ -373,8 +366,9 @@ again:
 		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa="
 			"0x%lx, local_nentries=%d, remote_nentries=%d) "
 			"received from partid=%d, channel=%d\n",
-			args->local_msgqueue_pa, args->local_nentries,
-			args->remote_nentries, ch->partid, ch->number);
+			(unsigned long)args->local_msgqueue_pa,
+			args->local_nentries, args->remote_nentries,
+			ch->partid, ch->number);
 
 		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -940,7 +934,7 @@ xpc_deliver_msg(struct xpc_channel *ch)
 		if (ch->func != NULL) {
 			dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, "
 				"msg_number=%ld, partid=%d, channel=%d\n",
-				(void *)msg, msg->number, ch->partid,
+				msg, (signed long)msg->number, ch->partid,
 				ch->number);
 
 			/* deliver the message to its intended recipient */
@@ -949,7 +943,7 @@ xpc_deliver_msg(struct xpc_channel *ch)
 
 			dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, "
 				"msg_number=%ld, partid=%d, channel=%d\n",
-				(void *)msg, msg->number, ch->partid,
+				msg, (signed long)msg->number, ch->partid,
 				ch->number);
 		}
 
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index e7ff9e1670f..f7478cc3572 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -43,19 +43,13 @@
  *
  */
 
-#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/reboot.h>
-#include <linux/completion.h>
 #include <linux/kdebug.h>
 #include <linux/kthread.h>
-#include <linux/uaccess.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
 #include "xpc.h"
 
 /* define two XPC debug device structures to be used with dev_dbg() et al */
@@ -175,6 +169,8 @@ static struct notifier_block xpc_die_notifier = {
 	.notifier_call = xpc_system_die,
 };
 
+enum xp_retval (*xpc_get_partition_rsvd_page_pa) (u64 buf, u64 *cookie,
+						  u64 *paddr, size_t *len);
 enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *rp);
 void (*xpc_heartbeat_init) (void);
 void (*xpc_heartbeat_exit) (void);
@@ -920,7 +916,8 @@ xpc_die_deactivate(void)
 	struct xpc_partition *part;
 	short partid;
 	int any_engaged;
-	long time, printmsg_time, disengage_timeout;
+	long keep_waiting;
+	long wait_to_print;
 
 	/* keep xpc_hb_checker thread from doing anything (just in case) */
 	xpc_exiting = 1;
@@ -937,16 +934,17 @@ xpc_die_deactivate(void)
 		}
 	}
 
-	time = rtc_time();
-	printmsg_time = time +
-	    (XPC_DEACTIVATE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
-	disengage_timeout = time +
-	    (xpc_disengage_timelimit * sn_rtc_cycles_per_second);
-
 	/*
 	 * Though we requested that all other partitions deactivate from us,
-	 * we only wait until they've all disengaged.
+	 * we only wait until they've all disengaged or we've reached the
+	 * defined timelimit.
+	 *
+	 * Given that one iteration through the following while-loop takes
+	 * approximately 200 microseconds, calculate the #of loops to take
+	 * before bailing and the #of loops before printing a waiting message.
 	 */
+	keep_waiting = xpc_disengage_timelimit * 1000 * 5;
+	wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
 
 	while (1) {
 		any_engaged = xpc_any_partition_engaged();
@@ -955,8 +953,7 @@ xpc_die_deactivate(void)
 			break;
 		}
 
-		time = rtc_time();
-		if (time >= disengage_timeout) {
+		if (!keep_waiting--) {
 			for (partid = 0; partid < xp_max_npartitions;
 			     partid++) {
 				if (xpc_partition_engaged(partid)) {
@@ -968,15 +965,15 @@ xpc_die_deactivate(void)
 			break;
 		}
 
-		if (time >= printmsg_time) {
+		if (!wait_to_print--) {
 			dev_info(xpc_part, "waiting for remote partitions to "
 				 "deactivate, timeout in %ld seconds\n",
-				 (disengage_timeout - time) /
-				 sn_rtc_cycles_per_second);
-			printmsg_time = time +
-			    (XPC_DEACTIVATE_PRINTMSG_INTERVAL *
-			     sn_rtc_cycles_per_second);
+				 keep_waiting / (1000 * 5));
+			wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
+			    1000 * 5;
 		}
+
+		udelay(200);
 	}
 }
 
@@ -991,6 +988,7 @@ xpc_die_deactivate(void)
 static int
 xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 {
+#ifdef CONFIG_IA64		/* !!! temporary kludge */
 	switch (event) {
 	case DIE_MACHINE_RESTART:
 	case DIE_MACHINE_HALT:
@@ -1019,6 +1017,9 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 		xpc_online_heartbeat();
 		break;
 	}
+#else
+	xpc_die_deactivate();
+#endif
 
 	return NOTIFY_DONE;
 }
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 70d4a00c972..f84d6641020 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -15,15 +15,8 @@
  *
  */
 
-#include <linux/kernel.h>
-#include <linux/sysctl.h>
-#include <linux/cache.h>
-#include <linux/mmzone.h>
-#include <linux/nodemask.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/sn/nodepda.h>
-#include <asm/sn/addrs.h>
+#include <linux/device.h>
+#include <linux/hardirq.h>
 #include "xpc.h"
 
 /* XPC is exiting flag */
@@ -71,24 +64,23 @@ static u64
 xpc_get_rsvd_page_pa(int nasid)
 {
 	enum xp_retval ret;
-	s64 status;
 	u64 cookie = 0;
 	u64 rp_pa = nasid;	/* seed with nasid */
-	u64 len = 0;
+	size_t len = 0;
 	u64 buf = buf;
 	u64 buf_len = 0;
 	void *buf_base = NULL;
 
 	while (1) {
 
-		status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
-						       &len);
+		ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa,
+						     &len);
 
-		dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
-			"0x%016lx, address=0x%016lx, len=0x%016lx\n",
-			status, cookie, rp_pa, len);
+		dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
+			"address=0x%016lx, len=0x%016lx\n", ret,
+			(unsigned long)cookie, (unsigned long)rp_pa, len);
 
-		if (status != SALRET_MORE_PASSES)
+		if (ret != xpNeedMoreInfo)
 			break;
 
 		/* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
@@ -100,8 +92,9 @@ xpc_get_rsvd_page_pa(int nasid)
 								 &buf_base);
 			if (buf_base == NULL) {
 				dev_err(xpc_part, "unable to kmalloc "
-					"len=0x%016lx\n", buf_len);
-				status = SALRET_ERROR;
+					"len=0x%016lx\n",
+					(unsigned long)buf_len);
+				ret = xpNoMemory;
 				break;
 			}
 		}
@@ -109,17 +102,17 @@ xpc_get_rsvd_page_pa(int nasid)
 		ret = xp_remote_memcpy((void *)buf, (void *)rp_pa, buf_len);
 		if (ret != xpSuccess) {
 			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
-			status = SALRET_ERROR;
 			break;
 		}
 	}
 
 	kfree(buf_base);
 
-	if (status != SALRET_OK)
+	if (ret != xpSuccess)
 		rp_pa = 0;
 
-	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
+	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n",
+		(unsigned long)rp_pa);
 	return rp_pa;
 }
 
@@ -138,7 +131,7 @@ xpc_setup_rsvd_page(void)
 	/* get the local reserved page's address */
 
 	preempt_disable();
-	rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
+	rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
 	preempt_enable();
 	if (rp_pa == 0) {
 		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
@@ -150,7 +143,7 @@ xpc_setup_rsvd_page(void)
 		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
 		rp->SAL_partid &= 0xff;
 	}
-	BUG_ON(rp->SAL_partid != sn_partition_id);
+	BUG_ON(rp->SAL_partid != xp_partition_id);
 
 	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
 		dev_err(xpc_part, "the reserved page's partid of %d is outside "
@@ -237,11 +230,11 @@ xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
 	/* check that both remote and local partids are valid for each side */
 	if (remote_rp->SAL_partid < 0 ||
 	    remote_rp->SAL_partid >= xp_max_npartitions ||
-	    remote_rp->max_npartitions <= sn_partition_id) {
+	    remote_rp->max_npartitions <= xp_partition_id) {
 		return xpInvalidPartid;
 	}
 
-	if (remote_rp->SAL_partid == sn_partition_id)
+	if (remote_rp->SAL_partid == xp_partition_id)
 		return xpLocalPartid;
 
 	return xpSuccess;
@@ -426,7 +419,7 @@ xpc_discovery(void)
 	 * protection is in regards to memory, IOI and IPI.
 	 */
 	max_regions = 64;
-	region_size = sn_region_size;
+	region_size = xp_region_size;
 
 	switch (region_size) {
 	case 128:
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 4b5f69edf0d..fde870aebcb 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -13,9 +13,9 @@
  *
  */
 
-#include <linux/kernel.h>
 #include <linux/delay.h>
 #include <asm/uncached.h>
+#include <asm/sn/mspec.h>
 #include <asm/sn/sn_sal.h>
 #include "xpc.h"
 
@@ -176,7 +176,7 @@ xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid,
 
 	local_irq_restore(irq_flags);
 
-	return ((ret == 0) ? xpSuccess : xpPioReadError);
+	return (ret == 0) ? xpSuccess : xpPioReadError;
 }
 
 static struct amo *
@@ -284,7 +284,7 @@ xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
 	short partid = (short)(u64)dev_id;
 	struct xpc_partition *part = &xpc_partitions[partid];
 
-	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2);
 
 	if (xpc_part_ref(part)) {
 		xpc_check_for_sent_chctl_flags_sn2(part);
@@ -576,6 +576,25 @@ xpc_allow_amo_ops_shub_wars_1_1_sn2(void)
 	}
 }
 
+static enum xp_retval
+xpc_get_partition_rsvd_page_pa_sn2(u64 buf, u64 *cookie, u64 *paddr,
+				   size_t *len)
+{
+	s64 status;
+	enum xp_retval ret;
+
+	status = sn_partition_reserved_page_pa(buf, cookie, paddr, len);
+	if (status == SALRET_OK)
+		ret = xpSuccess;
+	else if (status == SALRET_MORE_PASSES)
+		ret = xpNeedMoreInfo;
+	else
+		ret = xpSalError;
+
+	return ret;
+}
+
+
 static enum xp_retval
 xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 {
@@ -636,7 +655,7 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 
 	/* clear xpc_vars_part_sn2 */
 	memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) *
-	       xp_max_npartitions);
+	       XP_MAX_NPARTITIONS_SN2);
 
 	/* initialize the activate IRQ related amo variables */
 	for (i = 0; i < xpc_nasid_mask_nlongs; i++)
@@ -699,7 +718,7 @@ xpc_check_remote_hb_sn2(void)
 
 	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
 
-	for (partid = 0; partid < xp_max_npartitions; partid++) {
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_SN2; partid++) {
 
 		if (xpc_exiting)
 			break;
@@ -2386,6 +2405,7 @@ xpc_init_sn2(void)
 	int ret;
 	size_t buf_size;
 
+	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2;
 	xpc_rsvd_page_init = xpc_rsvd_page_init_sn2;
 	xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
 	xpc_offline_heartbeat = xpc_offline_heartbeat_sn2;
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 2aec1dfbb3d..232867aa692 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -14,11 +14,8 @@
  */
 
 #include <linux/kernel.h>
-
-/* !!! #include <gru/grukservices.h> */
-/* !!! uv_gpa() is defined in <gru/grukservices.h> */
-#define uv_gpa(_a)		((unsigned long)_a)
-
+#include <asm/uv/uv_hub.h>
+#include "../sgi-gru/grukservices.h"
 #include "xpc.h"
 
 static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 07c89c4e2c2..49385f44170 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -21,17 +21,8 @@
  */
 
 #include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <asm/atomic.h>
 #include "xp.h"
 
 /*
@@ -175,8 +166,9 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 
 		return;
 	}
-	dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
-		msg->leadin_ignore, msg->tailout_ignore);
+	dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n",
+		(unsigned long)msg->buf_pa, msg->size, msg->leadin_ignore,
+		msg->tailout_ignore);
 
 	/* reserve an extra cache line */
 	skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
@@ -320,8 +312,10 @@ xpnet_dev_open(struct net_device *dev)
 
 	dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
 		"%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
-		XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS,
-		XPNET_MAX_IDLE_KTHREADS);
+		(unsigned long)XPNET_MSG_SIZE,
+		(unsigned long)XPNET_MSG_NENTRIES,
+		(unsigned long)XPNET_MAX_KTHREADS,
+		(unsigned long)XPNET_MAX_IDLE_KTHREADS);
 
 	ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
 			  XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
@@ -439,8 +433,8 @@ xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
 	dev_dbg(xpnet, "sending XPC message to %d:%d\n"
 		KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
 		"msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
-		dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
-		msg->leadin_ignore, msg->tailout_ignore);
+		dest_partid, XPC_NET_CHANNEL, (unsigned long)msg->buf_pa,
+		msg->size, msg->leadin_ignore, msg->tailout_ignore);
 
 	atomic_inc(&queued_msg->use_count);
 
@@ -602,8 +596,8 @@ xpnet_init(void)
 	 */
 	xpnet_device->dev_addr[0] = 0x02;     /* locally administered, no OUI */
 
-	xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = sn_partition_id;
-	xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (sn_partition_id >> 8);
+	xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id;
+	xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8);
 
 	/*
 	 * ether_setup() sets this to a multicast device.  We are
-- 
cgit v1.2.3


From a812dcc3a298eef650c381e094e2cf41a4ecc9ad Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:16 -0700
Subject: sgi-xp: add usage of GRU driver by xpc_remote_memcpy()

Add UV support to xpc_remote_memcpy(), which involves interfacing to the
GRU driver.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h            |  8 +++--
 drivers/misc/sgi-xp/xp_main.c       |  6 +++-
 drivers/misc/sgi-xp/xp_sn2.c        | 50 +++++++++++++--------------
 drivers/misc/sgi-xp/xp_uv.c         | 27 +++++++++++++--
 drivers/misc/sgi-xp/xpc.h           | 44 ++++++++++++------------
 drivers/misc/sgi-xp/xpc_channel.c   |  5 ++-
 drivers/misc/sgi-xp/xpc_main.c      |  8 +++--
 drivers/misc/sgi-xp/xpc_partition.c | 37 ++++++++++----------
 drivers/misc/sgi-xp/xpc_sn2.c       | 68 ++++++++++++++++++++-----------------
 drivers/misc/sgi-xp/xpc_uv.c        |  2 +-
 drivers/misc/sgi-xp/xpnet.c         | 26 ++++++--------
 11 files changed, 154 insertions(+), 127 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 01bf1a2cd8e..45d0a08c2dd 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -207,7 +207,9 @@ enum xp_retval {
 	xpUnsupported,		/* 56: unsupported functionality or resource */
 	xpNeedMoreInfo,		/* 57: more info is needed by SAL */
 
-	xpUnknownReason		/* 58: unknown reason - must be last in enum */
+	xpGruCopyError,		/* 58: gru_copy_gru() returned error */
+
+	xpUnknownReason		/* 59: unknown reason - must be last in enum */
 };
 
 /*
@@ -349,7 +351,9 @@ extern short xp_max_npartitions;
 extern short xp_partition_id;
 extern u8 xp_region_size;
 
-extern enum xp_retval (*xp_remote_memcpy) (void *, const void *, size_t);
+extern unsigned long (*xp_pa) (void *);
+extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long,
+		       size_t);
 extern int (*xp_cpu_to_nasid) (int);
 
 extern u64 xp_nofault_PIOR_target;
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index c34b23fe498..f86ad3af26b 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -41,7 +41,11 @@ EXPORT_SYMBOL_GPL(xp_partition_id);
 u8 xp_region_size;
 EXPORT_SYMBOL_GPL(xp_region_size);
 
-enum xp_retval (*xp_remote_memcpy) (void *dst, const void *src, size_t len);
+unsigned long (*xp_pa) (void *addr);
+EXPORT_SYMBOL_GPL(xp_pa);
+
+enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa,
+				    const unsigned long src_gpa, size_t len);
 EXPORT_SYMBOL_GPL(xp_remote_memcpy);
 
 int (*xp_cpu_to_nasid) (int cpuid);
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
index c6a1ede7d6e..1440134caf3 100644
--- a/drivers/misc/sgi-xp/xp_sn2.c
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -63,7 +63,7 @@ xp_register_nofault_code_sn2(void)
 	return xpSuccess;
 }
 
-void
+static void
 xp_unregister_nofault_code_sn2(void)
 {
 	u64 func_addr = *(u64 *)xp_nofault_PIOR;
@@ -74,45 +74,42 @@ xp_unregister_nofault_code_sn2(void)
 				       err_func_addr, 1, 0);
 }
 
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_sn2(void *addr)
+{
+	return __pa(addr);
+}
+
 /*
  * Wrapper for bte_copy().
  *
- *	vdst - virtual address of the destination of the transfer.
- *	psrc - physical address of the source of the transfer.
+ *	dst_pa - physical address of the destination of the transfer.
+ *	src_pa - physical address of the source of the transfer.
  *	len - number of bytes to transfer from source to destination.
  *
  * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock.
  */
 static enum xp_retval
-xp_remote_memcpy_sn2(void *vdst, const void *psrc, size_t len)
+xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa,
+		     size_t len)
 {
 	bte_result_t ret;
-	u64 pdst = ia64_tpa(vdst);
-	/* ??? What are the rules governing the src and dst addresses passed in?
-	 * ??? Currently we're assuming that dst is a virtual address and src
-	 * ??? is a physical address, is this appropriate? Can we allow them to
-	 * ??? be whatever and we make the change here without damaging the
-	 * ??? addresses?
-	 */
 
-	/*
-	 * Ensure that the physically mapped memory is contiguous.
-	 *
-	 * We do this by ensuring that the memory is from region 7 only.
-	 * If the need should arise to use memory from one of the other
-	 * regions, then modify the BUG_ON() statement to ensure that the
-	 * memory from that region is always physically contiguous.
-	 */
-	BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
-
-	ret = bte_copy((u64)psrc, pdst, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+	ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
 	if (ret == BTE_SUCCESS)
 		return xpSuccess;
 
-	if (is_shub2())
-		dev_err(xp, "bte_copy() on shub2 failed, error=0x%x\n", ret);
-	else
-		dev_err(xp, "bte_copy() failed, error=%d\n", ret);
+	if (is_shub2()) {
+		dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa="
+			"0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa,
+			src_pa, len);
+	} else {
+		dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx "
+			"src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len);
+	}
 
 	return xpBteCopyError;
 }
@@ -132,6 +129,7 @@ xp_init_sn2(void)
 	xp_partition_id = sn_partition_id;
 	xp_region_size = sn_region_size;
 
+	xp_pa = xp_pa_sn2;
 	xp_remote_memcpy = xp_remote_memcpy_sn2;
 	xp_cpu_to_nasid = xp_cpu_to_nasid_sn2;
 
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
index 382b1b6bcc0..44f2c2b58c2 100644
--- a/drivers/misc/sgi-xp/xp_uv.c
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -13,13 +13,33 @@
  *
  */
 
+#include <linux/device.h>
+#include <asm/uv/uv_hub.h>
+#include "../sgi-gru/grukservices.h"
 #include "xp.h"
 
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_uv(void *addr)
+{
+	return uv_gpa(addr);
+}
+
 static enum xp_retval
-xp_remote_memcpy_uv(void *vdst, const void *psrc, size_t len)
+xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa,
+		    size_t len)
 {
-	/* !!! this function needs fleshing out */
-	return xpUnsupported;
+	int ret;
+
+	ret = gru_copy_gpa(dst_gpa, src_gpa, len);
+	if (ret == 0)
+		return xpSuccess;
+
+	dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
+		"len=%ld\n", dst_gpa, src_gpa, len);
+	return xpGruCopyError;
 }
 
 enum xp_retval
@@ -29,6 +49,7 @@ xp_init_uv(void)
 
 	xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
 
+	xp_pa = xp_pa_uv;
 	xp_remote_memcpy = xp_remote_memcpy_uv;
 
 	return xpSuccess;
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 96408fcf5a1..49e26993345 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -91,8 +91,8 @@ struct xpc_rsvd_page {
 	u8 version;
 	u8 pad1[3];		/* align to next u64 in 1st 64-byte cacheline */
 	union {
-		u64 vars_pa;	/* physical address of struct xpc_vars */
-		u64 activate_mq_gpa;	/* global phys address of activate_mq */
+		unsigned long vars_pa;	/* phys address of struct xpc_vars */
+		unsigned long activate_mq_gpa; /* gru phy addr of activate_mq */
 	} sn;
 	unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
 	u64 pad2[10];		/* align to last u64 in 2nd 64-byte cacheline */
@@ -122,8 +122,8 @@ struct xpc_vars_sn2 {
 	u64 heartbeat_offline;	/* if 0, heartbeat should be changing */
 	int activate_IRQ_nasid;
 	int activate_IRQ_phys_cpuid;
-	u64 vars_part_pa;
-	u64 amos_page_pa;	/* paddr of page of amos from MSPEC driver */
+	unsigned long vars_part_pa;
+	unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */
 	struct amo *amos_page;	/* vaddr of page of amos from MSPEC driver */
 };
 
@@ -142,10 +142,10 @@ struct xpc_vars_sn2 {
 struct xpc_vars_part_sn2 {
 	u64 magic;
 
-	u64 openclose_args_pa;	/* physical address of open and close args */
-	u64 GPs_pa;		/* physical address of Get/Put values */
+	unsigned long openclose_args_pa; /* phys addr of open and close args */
+	unsigned long GPs_pa;	/* physical address of Get/Put values */
 
-	u64 chctl_amo_pa;	/* physical address of chctl flags' amo */
+	unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */
 
 	int notify_IRQ_nasid;	/* nasid of where to send notify IRQs */
 	int notify_IRQ_phys_cpuid;	/* CPUID of where to send notify IRQs */
@@ -213,7 +213,7 @@ struct xpc_openclose_args {
 	u16 msg_size;		/* sizeof each message entry */
 	u16 remote_nentries;	/* #of message entries in remote msg queue */
 	u16 local_nentries;	/* #of message entries in local msg queue */
-	u64 local_msgqueue_pa;	/* physical address of local message queue */
+	unsigned long local_msgqueue_pa; /* phys addr of local message queue */
 };
 
 #define XPC_OPENCLOSE_ARGS_SIZE \
@@ -366,8 +366,8 @@ struct xpc_channel {
 	void *remote_msgqueue_base;	/* base address of kmalloc'd space */
 	struct xpc_msg *remote_msgqueue; /* cached copy of remote partition's */
 					 /* local message queue */
-	u64 remote_msgqueue_pa;	/* phys addr of remote partition's */
-				/* local message queue */
+	unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */
+					  /* local message queue */
 
 	atomic_t references;	/* #of external references to queues */
 
@@ -491,12 +491,12 @@ xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
  */
 
 struct xpc_partition_sn2 {
-	u64 remote_amos_page_pa;	/* phys addr of partition's amos page */
+	unsigned long remote_amos_page_pa; /* paddr of partition's amos page */
 	int activate_IRQ_nasid;	/* active partition's act/deact nasid */
 	int activate_IRQ_phys_cpuid;	/* active part's act/deact phys cpuid */
 
-	u64 remote_vars_pa;	/* phys addr of partition's vars */
-	u64 remote_vars_part_pa;	/* phys addr of partition's vars part */
+	unsigned long remote_vars_pa;	/* phys addr of partition's vars */
+	unsigned long remote_vars_part_pa; /* paddr of partition's vars part */
 	u8 remote_vars_version;	/* version# of partition's vars */
 
 	void *local_GPs_base;	/* base address of kmalloc'd space */
@@ -504,10 +504,10 @@ struct xpc_partition_sn2 {
 	void *remote_GPs_base;	/* base address of kmalloc'd space */
 	struct xpc_gp_sn2 *remote_GPs;	/* copy of remote partition's local */
 					/* Get/Put values */
-	u64 remote_GPs_pa;	/* phys address of remote partition's local */
-				/* Get/Put values */
+	unsigned long remote_GPs_pa; /* phys addr of remote partition's local */
+				     /* Get/Put values */
 
-	u64 remote_openclose_args_pa;	/* phys addr of remote's args */
+	unsigned long remote_openclose_args_pa;	/* phys addr of remote's args */
 
 	int notify_IRQ_nasid;	/* nasid of where to send notify IRQs */
 	int notify_IRQ_phys_cpuid;	/* CPUID of where to send notify IRQs */
@@ -529,7 +529,7 @@ struct xpc_partition {
 
 	u8 remote_rp_version;	/* version# of partition's rsvd pg */
 	unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */
-	u64 remote_rp_pa;	/* phys addr of partition's rsvd pg */
+	unsigned long remote_rp_pa;	/* phys addr of partition's rsvd pg */
 	u64 last_heartbeat;	/* HB at last read */
 	u32 activate_IRQ_rcvd;	/* IRQs since activation */
 	spinlock_t act_lock;	/* protect updating of act_state */
@@ -623,7 +623,8 @@ extern void xpc_activate_partition(struct xpc_partition *);
 extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int, int);
 extern void xpc_disconnect_wait(int);
-extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (u64, u64 *, u64 *,
+extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *,
+							 unsigned long *,
 							 size_t *);
 extern enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *);
 extern void (*xpc_heartbeat_init) (void);
@@ -640,8 +641,8 @@ extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
 extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int);
 extern int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *);
 extern struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *);
-extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *, u64,
-						 int);
+extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *,
+						 unsigned long, int);
 extern void (*xpc_request_partition_reactivation) (struct xpc_partition *);
 extern void (*xpc_request_partition_deactivation) (struct xpc_partition *);
 extern void (*xpc_cancel_partition_deactivation_request) (
@@ -690,7 +691,8 @@ extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
 extern void xpc_mark_partition_inactive(struct xpc_partition *);
 extern void xpc_discovery(void);
 extern enum xp_retval xpc_get_remote_rp(int, unsigned long *,
-					struct xpc_rsvd_page *, u64 *);
+					struct xpc_rsvd_page *,
+					unsigned long *);
 extern void xpc_deactivate_partition(const int, struct xpc_partition *,
 				     enum xp_retval);
 extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 0615efbe007..d7a15f1a78a 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -366,9 +366,8 @@ again:
 		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa="
 			"0x%lx, local_nentries=%d, remote_nentries=%d) "
 			"received from partid=%d, channel=%d\n",
-			(unsigned long)args->local_msgqueue_pa,
-			args->local_nentries, args->remote_nentries,
-			ch->partid, ch->number);
+			args->local_msgqueue_pa, args->local_nentries,
+			args->remote_nentries, ch->partid, ch->number);
 
 		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index f7478cc3572..dc686110aef 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -169,8 +169,9 @@ static struct notifier_block xpc_die_notifier = {
 	.notifier_call = xpc_system_die,
 };
 
-enum xp_retval (*xpc_get_partition_rsvd_page_pa) (u64 buf, u64 *cookie,
-						  u64 *paddr, size_t *len);
+enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
+						  unsigned long *rp_pa,
+						  size_t *len);
 enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *rp);
 void (*xpc_heartbeat_init) (void);
 void (*xpc_heartbeat_exit) (void);
@@ -189,7 +190,8 @@ int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *ch);
 struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *ch);
 
 void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
-					  u64 remote_rp_pa, int nasid);
+					  unsigned long remote_rp_pa,
+					  int nasid);
 void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
 void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
 void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index f84d6641020..f150dbfcfcc 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -60,15 +60,15 @@ xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
  * Given a nasid, get the physical address of the  partition's reserved page
  * for that nasid. This function returns 0 on any error.
  */
-static u64
+static unsigned long
 xpc_get_rsvd_page_pa(int nasid)
 {
 	enum xp_retval ret;
 	u64 cookie = 0;
-	u64 rp_pa = nasid;	/* seed with nasid */
+	unsigned long rp_pa = nasid;	/* seed with nasid */
 	size_t len = 0;
-	u64 buf = buf;
-	u64 buf_len = 0;
+	size_t buf_len = 0;
+	void *buf = buf;
 	void *buf_base = NULL;
 
 	while (1) {
@@ -78,7 +78,7 @@ xpc_get_rsvd_page_pa(int nasid)
 
 		dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
 			"address=0x%016lx, len=0x%016lx\n", ret,
-			(unsigned long)cookie, (unsigned long)rp_pa, len);
+			(unsigned long)cookie, rp_pa, len);
 
 		if (ret != xpNeedMoreInfo)
 			break;
@@ -87,19 +87,17 @@ xpc_get_rsvd_page_pa(int nasid)
 		if (L1_CACHE_ALIGN(len) > buf_len) {
 			kfree(buf_base);
 			buf_len = L1_CACHE_ALIGN(len);
-			buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
-								 GFP_KERNEL,
-								 &buf_base);
+			buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
+							    &buf_base);
 			if (buf_base == NULL) {
 				dev_err(xpc_part, "unable to kmalloc "
-					"len=0x%016lx\n",
-					(unsigned long)buf_len);
+					"len=0x%016lx\n", buf_len);
 				ret = xpNoMemory;
 				break;
 			}
 		}
 
-		ret = xp_remote_memcpy((void *)buf, (void *)rp_pa, buf_len);
+		ret = xp_remote_memcpy(xp_pa(buf), rp_pa, buf_len);
 		if (ret != xpSuccess) {
 			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
 			break;
@@ -111,8 +109,7 @@ xpc_get_rsvd_page_pa(int nasid)
 	if (ret != xpSuccess)
 		rp_pa = 0;
 
-	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n",
-		(unsigned long)rp_pa);
+	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
 	return rp_pa;
 }
 
@@ -125,7 +122,7 @@ struct xpc_rsvd_page *
 xpc_setup_rsvd_page(void)
 {
 	struct xpc_rsvd_page *rp;
-	u64 rp_pa;
+	unsigned long rp_pa;
 	unsigned long new_ts_jiffies;
 
 	/* get the local reserved page's address */
@@ -193,7 +190,7 @@ xpc_setup_rsvd_page(void)
  */
 enum xp_retval
 xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
-		  struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
+		  struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
 {
 	int l;
 	enum xp_retval ret;
@@ -205,7 +202,7 @@ xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
 		return xpNoRsvdPageAddr;
 
 	/* pull over the reserved page header and part_nasids mask */
-	ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
+	ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
 			       XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
 	if (ret != xpSuccess)
 		return ret;
@@ -389,7 +386,7 @@ xpc_discovery(void)
 {
 	void *remote_rp_base;
 	struct xpc_rsvd_page *remote_rp;
-	u64 remote_rp_pa;
+	unsigned long remote_rp_pa;
 	int region;
 	int region_size;
 	int max_regions;
@@ -500,7 +497,7 @@ enum xp_retval
 xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
 {
 	struct xpc_partition *part;
-	u64 part_nasid_pa;
+	unsigned long part_nasid_pa;
 
 	part = &xpc_partitions[partid];
 	if (part->remote_rp_pa == 0)
@@ -508,8 +505,8 @@ xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
 
 	memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
 
-	part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
+	part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
 
-	return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
+	return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
 				xpc_nasid_mask_nbytes);
 }
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index fde870aebcb..1571a7cdf9d 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -207,8 +207,8 @@ xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
  * Flag the appropriate amo variable and send an IRQ to the specified node.
  */
 static void
-xpc_send_activate_IRQ_sn2(u64 amos_page_pa, int from_nasid, int to_nasid,
-			  int to_phys_cpuid)
+xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid,
+			  int to_nasid, int to_phys_cpuid)
 {
 	struct amo *amos = (struct amo *)__va(amos_page_pa +
 					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
@@ -404,7 +404,7 @@ xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	args->remote_nentries = ch->remote_nentries;
 	args->local_nentries = ch->local_nentries;
-	args->local_msgqueue_pa = __pa(ch->local_msgqueue);
+	args->local_msgqueue_pa = xp_pa(ch->local_msgqueue);
 	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags);
 }
 
@@ -577,13 +577,13 @@ xpc_allow_amo_ops_shub_wars_1_1_sn2(void)
 }
 
 static enum xp_retval
-xpc_get_partition_rsvd_page_pa_sn2(u64 buf, u64 *cookie, u64 *paddr,
+xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa,
 				   size_t *len)
 {
 	s64 status;
 	enum xp_retval ret;
 
-	status = sn_partition_reserved_page_pa(buf, cookie, paddr, len);
+	status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
 	if (status == SALRET_OK)
 		ret = xpSuccess;
 	else if (status == SALRET_MORE_PASSES)
@@ -604,7 +604,7 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 
 	xpc_vars_sn2 = XPC_RP_VARS(rp);
 
-	rp->sn.vars_pa = __pa(xpc_vars_sn2);
+	rp->sn.vars_pa = xp_pa(xpc_vars_sn2);
 
 	/* vars_part array follows immediately after vars */
 	xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
@@ -649,7 +649,7 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	xpc_vars_sn2->version = XPC_V_VERSION;
 	xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0);
 	xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0);
-	xpc_vars_sn2->vars_part_pa = __pa(xpc_vars_part_sn2);
+	xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2);
 	xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page);
 	xpc_vars_sn2->amos_page = amos_page;	/* save for next load of XPC */
 
@@ -734,8 +734,8 @@ xpc_check_remote_hb_sn2(void)
 		}
 
 		/* pull the remote_hb cache line */
-		ret = xp_remote_memcpy(remote_vars,
-				       (void *)part->sn.sn2.remote_vars_pa,
+		ret = xp_remote_memcpy(xp_pa(remote_vars),
+				       part->sn.sn2.remote_vars_pa,
 				       XPC_RP_VARS_SIZE);
 		if (ret != xpSuccess) {
 			XPC_DEACTIVATE_PARTITION(part, ret);
@@ -768,7 +768,8 @@ xpc_check_remote_hb_sn2(void)
  * assumed to be of size XPC_RP_VARS_SIZE.
  */
 static enum xp_retval
-xpc_get_remote_vars_sn2(u64 remote_vars_pa, struct xpc_vars_sn2 *remote_vars)
+xpc_get_remote_vars_sn2(unsigned long remote_vars_pa,
+			struct xpc_vars_sn2 *remote_vars)
 {
 	enum xp_retval ret;
 
@@ -776,7 +777,7 @@ xpc_get_remote_vars_sn2(u64 remote_vars_pa, struct xpc_vars_sn2 *remote_vars)
 		return xpVarsNotSet;
 
 	/* pull over the cross partition variables */
-	ret = xp_remote_memcpy(remote_vars, (void *)remote_vars_pa,
+	ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa,
 			       XPC_RP_VARS_SIZE);
 	if (ret != xpSuccess)
 		return ret;
@@ -791,7 +792,7 @@ xpc_get_remote_vars_sn2(u64 remote_vars_pa, struct xpc_vars_sn2 *remote_vars)
 
 static void
 xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
-				     u64 remote_rp_pa, int nasid)
+				     unsigned long remote_rp_pa, int nasid)
 {
 	xpc_send_local_activate_IRQ_sn2(nasid);
 }
@@ -883,7 +884,8 @@ xpc_partition_deactivation_requested_sn2(short partid)
 static void
 xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
 			      unsigned long *remote_rp_ts_jiffies,
-			      u64 remote_rp_pa, u64 remote_vars_pa,
+			      unsigned long remote_rp_pa,
+			      unsigned long remote_vars_pa,
 			      struct xpc_vars_sn2 *remote_vars)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
@@ -948,8 +950,8 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 {
 	struct xpc_rsvd_page *remote_rp;
 	struct xpc_vars_sn2 *remote_vars;
-	u64 remote_rp_pa;
-	u64 remote_vars_pa;
+	unsigned long remote_rp_pa;
+	unsigned long remote_vars_pa;
 	int remote_rp_version;
 	int reactivate = 0;
 	unsigned long remote_rp_ts_jiffies = 0;
@@ -1291,11 +1293,11 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	 * The setting of the magic # indicates that these per partition
 	 * specific variables are ready to be used.
 	 */
-	xpc_vars_part_sn2[partid].GPs_pa = __pa(part_sn2->local_GPs);
+	xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs);
 	xpc_vars_part_sn2[partid].openclose_args_pa =
-	    __pa(part->local_openclose_args);
+	    xp_pa(part->local_openclose_args);
 	xpc_vars_part_sn2[partid].chctl_amo_pa =
-	    __pa(part_sn2->local_chctl_amo_va);
+	    xp_pa(part_sn2->local_chctl_amo_va);
 	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
 	xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid);
 	xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid =
@@ -1382,25 +1384,25 @@ xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
  * Create a wrapper that hides the underlying mechanism for pulling a cacheline
  * (or multiple cachelines) from a remote partition.
  *
- * src must be a cacheline aligned physical address on the remote partition.
+ * src_pa must be a cacheline aligned physical address on the remote partition.
  * dst must be a cacheline aligned virtual address on this partition.
  * cnt must be cacheline sized
  */
 /* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */
 static enum xp_retval
 xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
-			       const void *src, size_t cnt)
+			       const unsigned long src_pa, size_t cnt)
 {
 	enum xp_retval ret;
 
-	DBUG_ON((u64)src != L1_CACHE_ALIGN((u64)src));
-	DBUG_ON((u64)dst != L1_CACHE_ALIGN((u64)dst));
+	DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa));
+	DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst));
 	DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
 
 	if (part->act_state == XPC_P_DEACTIVATING)
 		return part->reason;
 
-	ret = xp_remote_memcpy(dst, src, cnt);
+	ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt);
 	if (ret != xpSuccess) {
 		dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed,"
 			" ret=%d\n", XPC_PARTID(part), ret);
@@ -1420,7 +1422,8 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 	struct xpc_vars_part_sn2 *pulled_entry_cacheline =
 	    (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer);
 	struct xpc_vars_part_sn2 *pulled_entry;
-	u64 remote_entry_cacheline_pa, remote_entry_pa;
+	unsigned long remote_entry_cacheline_pa;
+	unsigned long remote_entry_pa;
 	short partid = XPC_PARTID(part);
 	enum xp_retval ret;
 
@@ -1440,7 +1443,7 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 						    (L1_CACHE_BYTES - 1)));
 
 	ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline,
-					     (void *)remote_entry_cacheline_pa,
+					     remote_entry_cacheline_pa,
 					     L1_CACHE_BYTES);
 	if (ret != xpSuccess) {
 		dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
@@ -1587,7 +1590,7 @@ xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
 	if (xpc_any_openclose_chctl_flags_set(&chctl)) {
 		ret = xpc_pull_remote_cachelines_sn2(part, part->
 						     remote_openclose_args,
-						     (void *)part_sn2->
+						     part_sn2->
 						     remote_openclose_args_pa,
 						     XPC_OPENCLOSE_ARGS_SIZE);
 		if (ret != xpSuccess) {
@@ -1604,7 +1607,7 @@ xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
 
 	if (xpc_any_msg_chctl_flags_set(&chctl)) {
 		ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs,
-						(void *)part_sn2->remote_GPs_pa,
+						     part_sn2->remote_GPs_pa,
 						     XPC_GP_SIZE);
 		if (ret != xpSuccess) {
 			XPC_DEACTIVATE_PARTITION(part, ret);
@@ -1971,8 +1974,10 @@ xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
 {
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
-	struct xpc_msg *remote_msg, *msg;
-	u32 msg_index, nmsgs;
+	unsigned long remote_msg_pa;
+	struct xpc_msg *msg;
+	u32 msg_index;
+	u32 nmsgs;
 	u64 msg_offset;
 	enum xp_retval ret;
 
@@ -1996,10 +2001,9 @@ xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
 
 		msg_offset = msg_index * ch->msg_size;
 		msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
-		remote_msg = (struct xpc_msg *)(ch->remote_msgqueue_pa +
-						msg_offset);
+		remote_msg_pa = ch->remote_msgqueue_pa + msg_offset;
 
-		ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg,
+		ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa,
 						     nmsgs * ch->msg_size);
 		if (ret != xpSuccess) {
 
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 232867aa692..c2d4ddd6e95 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -61,7 +61,7 @@ xpc_heartbeat_exit_uv(void)
 
 static void
 xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
-				    u64 remote_rp_pa, int nasid)
+				    unsigned long remote_rp_pa, int nasid)
 {
 	short partid = remote_rp->SAL_partid;
 	struct xpc_partition *part = &xpc_partitions[partid];
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 49385f44170..4f5d6223011 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -44,7 +44,7 @@ struct xpnet_message {
 	u16 version;		/* Version for this message */
 	u16 embedded_bytes;	/* #of bytes embedded in XPC message */
 	u32 magic;		/* Special number indicating this is xpnet */
-	u64 buf_pa;		/* phys address of buffer to retrieve */
+	unsigned long buf_pa;	/* phys address of buffer to retrieve */
 	u32 size;		/* #of bytes in buffer */
 	u8 leadin_ignore;	/* #of bytes to ignore at the beginning */
 	u8 tailout_ignore;	/* #of bytes to ignore at the end */
@@ -152,6 +152,7 @@ static void
 xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 {
 	struct sk_buff *skb;
+	void *dst;
 	enum xp_retval ret;
 	struct xpnet_dev_private *priv =
 	    (struct xpnet_dev_private *)xpnet_device->priv;
@@ -166,9 +167,8 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 
 		return;
 	}
-	dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n",
-		(unsigned long)msg->buf_pa, msg->size, msg->leadin_ignore,
-		msg->tailout_ignore);
+	dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
+		msg->leadin_ignore, msg->tailout_ignore);
 
 	/* reserve an extra cache line */
 	skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
@@ -210,15 +210,12 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 		skb_copy_to_linear_data(skb, &msg->data,
 					(size_t)msg->embedded_bytes);
 	} else {
+		dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1));
 		dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
-			"xp_remote_memcpy(0x%p, 0x%p, %hu)\n", (void *)
-				       ((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
+			"xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst,
 					  (void *)msg->buf_pa, msg->size);
 
-		ret = xp_remote_memcpy((void *)((u64)skb->data &
-						~(L1_CACHE_BYTES - 1)),
-				       (void *)msg->buf_pa, msg->size);
-
+		ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size);
 		if (ret != xpSuccess) {
 			/*
 			 * !!! Need better way of cleaning skb.  Currently skb
@@ -226,8 +223,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 			 * !!! dev_kfree_skb.
 			 */
 			dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
-				"returned error=0x%x\n", (void *)
-				((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
+				"returned error=0x%x\n", dst,
 				(void *)msg->buf_pa, msg->size, ret);
 
 			xpc_received(partid, channel, (void *)msg);
@@ -428,13 +424,13 @@ xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
 	msg->size = end_addr - start_addr;
 	msg->leadin_ignore = (u64)skb->data - start_addr;
 	msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
-	msg->buf_pa = __pa(start_addr);
+	msg->buf_pa = xp_pa((void *)start_addr);
 
 	dev_dbg(xpnet, "sending XPC message to %d:%d\n"
 		KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
 		"msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
-		dest_partid, XPC_NET_CHANNEL, (unsigned long)msg->buf_pa,
-		msg->size, msg->leadin_ignore, msg->tailout_ignore);
+		dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
+		msg->leadin_ignore, msg->tailout_ignore);
 
 	atomic_inc(&queued_msg->use_count);
 
-- 
cgit v1.2.3


From 61deb86e98f51151b225f7563ee1cf2b50857d10 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:17 -0700
Subject: sgi-xp: move xpc_check_remote_hb() to support both SN2 and UV

Move xpc_check_remote_hb() so it can support both SN2 and UV.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc.h      |  2 +-
 drivers/misc/sgi-xp/xpc_main.c | 34 +++++++++++++++++++-
 drivers/misc/sgi-xp/xpc_sn2.c  | 70 ++++++++++++++----------------------------
 3 files changed, 57 insertions(+), 49 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 49e26993345..f258f89b8d3 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -632,7 +632,7 @@ extern void (*xpc_heartbeat_exit) (void);
 extern void (*xpc_increment_heartbeat) (void);
 extern void (*xpc_offline_heartbeat) (void);
 extern void (*xpc_online_heartbeat) (void);
-extern void (*xpc_check_remote_hb) (void);
+extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *);
 extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
 extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
 extern enum xp_retval (*xpc_allocate_msgqueues) (struct xpc_channel *);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index dc686110aef..f4d866113f2 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -178,7 +178,7 @@ void (*xpc_heartbeat_exit) (void);
 void (*xpc_increment_heartbeat) (void);
 void (*xpc_offline_heartbeat) (void);
 void (*xpc_online_heartbeat) (void);
-void (*xpc_check_remote_hb) (void);
+enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part);
 
 enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
 void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
@@ -269,6 +269,38 @@ xpc_stop_hb_beater(void)
 	xpc_heartbeat_exit();
 }
 
+/*
+ * At periodic intervals, scan through all active partitions and ensure
+ * their heartbeat is still active.  If not, the partition is deactivated.
+ */
+static void
+xpc_check_remote_hb(void)
+{
+	struct xpc_partition *part;
+	short partid;
+	enum xp_retval ret;
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+
+		if (xpc_exiting)
+			break;
+
+		if (partid == xp_partition_id)
+			continue;
+
+		part = &xpc_partitions[partid];
+
+		if (part->act_state == XPC_P_INACTIVE ||
+		    part->act_state == XPC_P_DEACTIVATING) {
+			continue;
+		}
+
+		ret = xpc_get_remote_heartbeat(part);
+		if (ret != xpSuccess)
+			XPC_DEACTIVATE_PARTITION(part, ret);
+	}
+}
+
 /*
  * This thread is responsible for nearly all of the partition
  * activation/deactivation.
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 1571a7cdf9d..d34cdd533a9 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -704,61 +704,37 @@ xpc_heartbeat_exit_sn2(void)
 	xpc_offline_heartbeat_sn2();
 }
 
-/*
- * At periodic intervals, scan through all active partitions and ensure
- * their heartbeat is still active.  If not, the partition is deactivated.
- */
-static void
-xpc_check_remote_hb_sn2(void)
+static enum xp_retval
+xpc_get_remote_heartbeat_sn2(struct xpc_partition *part)
 {
 	struct xpc_vars_sn2 *remote_vars;
-	struct xpc_partition *part;
-	short partid;
 	enum xp_retval ret;
 
 	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
 
-	for (partid = 0; partid < XP_MAX_NPARTITIONS_SN2; partid++) {
-
-		if (xpc_exiting)
-			break;
-
-		if (partid == sn_partition_id)
-			continue;
-
-		part = &xpc_partitions[partid];
-
-		if (part->act_state == XPC_P_INACTIVE ||
-		    part->act_state == XPC_P_DEACTIVATING) {
-			continue;
-		}
-
-		/* pull the remote_hb cache line */
-		ret = xp_remote_memcpy(xp_pa(remote_vars),
-				       part->sn.sn2.remote_vars_pa,
-				       XPC_RP_VARS_SIZE);
-		if (ret != xpSuccess) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-			continue;
-		}
-
-		dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
-			" = %ld, heartbeat_offline = %ld, HB_mask[0] = 0x%lx\n",
-			partid, remote_vars->heartbeat, part->last_heartbeat,
-			remote_vars->heartbeat_offline,
-			remote_vars->heartbeating_to_mask[0]);
-
-		if (((remote_vars->heartbeat == part->last_heartbeat) &&
-		     (remote_vars->heartbeat_offline == 0)) ||
-		    !xpc_hb_allowed(sn_partition_id,
-				    &remote_vars->heartbeating_to_mask)) {
-
-			XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat);
-			continue;
-		}
+	/* pull the remote vars structure that contains the heartbeat */
+	ret = xp_remote_memcpy(xp_pa(remote_vars),
+			       part->sn.sn2.remote_vars_pa,
+			       XPC_RP_VARS_SIZE);
+	if (ret != xpSuccess)
+		return ret;
 
+	dev_dbg(xpc_part, "partid=%d, heartbeat=%ld, last_heartbeat=%ld, "
+		"heartbeat_offline=%ld, HB_mask[0]=0x%lx\n", XPC_PARTID(part),
+		remote_vars->heartbeat, part->last_heartbeat,
+		remote_vars->heartbeat_offline,
+		remote_vars->heartbeating_to_mask[0]);
+
+	if ((remote_vars->heartbeat == part->last_heartbeat &&
+	    remote_vars->heartbeat_offline == 0) ||
+	    !xpc_hb_allowed(sn_partition_id,
+			    &remote_vars->heartbeating_to_mask)) {
+		ret = xpNoHeartbeat;
+	} else {
 		part->last_heartbeat = remote_vars->heartbeat;
 	}
+
+	return ret;
 }
 
 /*
@@ -2416,7 +2392,7 @@ xpc_init_sn2(void)
 	xpc_online_heartbeat = xpc_online_heartbeat_sn2;
 	xpc_heartbeat_init = xpc_heartbeat_init_sn2;
 	xpc_heartbeat_exit = xpc_heartbeat_exit_sn2;
-	xpc_check_remote_hb = xpc_check_remote_hb_sn2;
+	xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_sn2;
 
 	xpc_request_partition_activation = xpc_request_partition_activation_sn2;
 	xpc_request_partition_reactivation =
-- 
cgit v1.2.3


From 83469b5525b4a35be40b17cb41d64118d84d9f80 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:18 -0700
Subject: sgi-xp: cleanup naming of partition defines

Cleanup naming of partition defines.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc.h           | 22 +++++++++++-----------
 drivers/misc/sgi-xp/xpc_channel.c   | 10 +++++-----
 drivers/misc/sgi-xp/xpc_main.c      | 32 ++++++++++++++++----------------
 drivers/misc/sgi-xp/xpc_partition.c | 18 +++++++++---------
 drivers/misc/sgi-xp/xpc_sn2.c       | 20 ++++++++++----------
 5 files changed, 51 insertions(+), 51 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index f258f89b8d3..1e48f776505 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -576,21 +576,21 @@ struct xpc_partition {
 
 /* struct xpc_partition act_state values (for XPC HB) */
 
-#define	XPC_P_INACTIVE		0x00	/* partition is not active */
-#define XPC_P_ACTIVATION_REQ	0x01	/* created thread to activate */
-#define XPC_P_ACTIVATING	0x02	/* activation thread started */
-#define XPC_P_ACTIVE		0x03	/* xpc_partition_up() was called */
-#define XPC_P_DEACTIVATING	0x04	/* partition deactivation initiated */
+#define	XPC_P_AS_INACTIVE	0x00	/* partition is not active */
+#define XPC_P_AS_ACTIVATION_REQ	0x01	/* created thread to activate */
+#define XPC_P_AS_ACTIVATING	0x02	/* activation thread started */
+#define XPC_P_AS_ACTIVE		0x03	/* xpc_partition_up() was called */
+#define XPC_P_AS_DEACTIVATING	0x04	/* partition deactivation initiated */
 
 #define XPC_DEACTIVATE_PARTITION(_p, _reason) \
 			xpc_deactivate_partition(__LINE__, (_p), (_reason))
 
 /* struct xpc_partition setup_state values */
 
-#define XPC_P_UNSET		0x00	/* infrastructure was never setup */
-#define XPC_P_SETUP		0x01	/* infrastructure is setup */
-#define XPC_P_WTEARDOWN		0x02	/* waiting to teardown infrastructure */
-#define XPC_P_TORNDOWN		0x03	/* infrastructure is torndown */
+#define XPC_P_SS_UNSET		0x00	/* infrastructure was never setup */
+#define XPC_P_SS_SETUP		0x01	/* infrastructure is setup */
+#define XPC_P_SS_WTEARDOWN	0x02	/* waiting to teardown infrastructure */
+#define XPC_P_SS_TORNDOWN	0x03	/* infrastructure is torndown */
 
 /*
  * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the
@@ -787,7 +787,7 @@ xpc_part_deref(struct xpc_partition *part)
 	s32 refs = atomic_dec_return(&part->references);
 
 	DBUG_ON(refs < 0);
-	if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN)
+	if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN)
 		wake_up(&part->teardown_wq);
 }
 
@@ -797,7 +797,7 @@ xpc_part_ref(struct xpc_partition *part)
 	int setup;
 
 	atomic_inc(&part->references);
-	setup = (part->setup_state == XPC_P_SETUP);
+	setup = (part->setup_state == XPC_P_SS_SETUP);
 	if (!setup)
 		xpc_part_deref(part);
 
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index d7a15f1a78a..17ab75d69e8 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -99,7 +99,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
 		!(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE));
 
-	if (part->act_state == XPC_P_DEACTIVATING) {
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
 		/* can't proceed until the other side disengages from us */
 		if (xpc_partition_engaged(ch->partid))
 			return;
@@ -155,7 +155,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 		/* we won't lose the CPU since we're holding ch->lock */
 		complete(&ch->wdisconnect_wait);
 	} else if (ch->delayed_chctl_flags) {
-		if (part->act_state != XPC_P_DEACTIVATING) {
+		if (part->act_state != XPC_P_AS_DEACTIVATING) {
 			/* time to take action on any delayed chctl flags */
 			spin_lock(&part->chctl_lock);
 			part->chctl.flags[ch->number] |=
@@ -276,7 +276,7 @@ again:
 			"%d, channel=%d\n", ch->partid, ch->number);
 
 		if (ch->flags & XPC_C_DISCONNECTED) {
-			DBUG_ON(part->act_state != XPC_P_DEACTIVATING);
+			DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING);
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 			return;
 		}
@@ -312,7 +312,7 @@ again:
 			"channel=%d\n", args->msg_size, args->local_nentries,
 			ch->partid, ch->number);
 
-		if (part->act_state == XPC_P_DEACTIVATING ||
+		if (part->act_state == XPC_P_AS_DEACTIVATING ||
 		    (ch->flags & XPC_C_ROPENREQUEST)) {
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 			return;
@@ -546,7 +546,7 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part)
 			continue;
 		}
 
-		if (part->act_state == XPC_P_DEACTIVATING)
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
 			continue;
 
 		if (!(ch_flags & XPC_C_CONNECTED)) {
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index f4d866113f2..b303c130bba 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -290,8 +290,8 @@ xpc_check_remote_hb(void)
 
 		part = &xpc_partitions[partid];
 
-		if (part->act_state == XPC_P_INACTIVE ||
-		    part->act_state == XPC_P_DEACTIVATING) {
+		if (part->act_state == XPC_P_AS_INACTIVE ||
+		    part->act_state == XPC_P_AS_DEACTIVATING) {
 			continue;
 		}
 
@@ -406,7 +406,7 @@ xpc_initiate_discovery(void *ignore)
 static void
 xpc_channel_mgr(struct xpc_partition *part)
 {
-	while (part->act_state != XPC_P_DEACTIVATING ||
+	while (part->act_state != XPC_P_AS_DEACTIVATING ||
 	       atomic_read(&part->nchannels_active) > 0 ||
 	       !xpc_partition_disengaged(part)) {
 
@@ -429,7 +429,7 @@ xpc_channel_mgr(struct xpc_partition *part)
 		(void)wait_event_interruptible(part->channel_mgr_wq,
 				(atomic_read(&part->channel_mgr_requests) > 0 ||
 				 part->chctl.all_flags != 0 ||
-				 (part->act_state == XPC_P_DEACTIVATING &&
+				 (part->act_state == XPC_P_AS_DEACTIVATING &&
 				 atomic_read(&part->nchannels_active) == 0 &&
 				 xpc_partition_disengaged(part))));
 		atomic_set(&part->channel_mgr_requests, 1);
@@ -458,16 +458,16 @@ xpc_activating(void *__partid)
 
 	spin_lock_irqsave(&part->act_lock, irq_flags);
 
-	if (part->act_state == XPC_P_DEACTIVATING) {
-		part->act_state = XPC_P_INACTIVE;
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
+		part->act_state = XPC_P_AS_INACTIVE;
 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
 		part->remote_rp_pa = 0;
 		return 0;
 	}
 
 	/* indicate the thread is activating */
-	DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ);
-	part->act_state = XPC_P_ACTIVATING;
+	DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
+	part->act_state = XPC_P_AS_ACTIVATING;
 
 	XPC_SET_REASON(part, 0, 0);
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
@@ -509,9 +509,9 @@ xpc_activate_partition(struct xpc_partition *part)
 
 	spin_lock_irqsave(&part->act_lock, irq_flags);
 
-	DBUG_ON(part->act_state != XPC_P_INACTIVE);
+	DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
 
-	part->act_state = XPC_P_ACTIVATION_REQ;
+	part->act_state = XPC_P_AS_ACTIVATION_REQ;
 	XPC_SET_REASON(part, xpCloneKThread, __LINE__);
 
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
@@ -520,7 +520,7 @@ xpc_activate_partition(struct xpc_partition *part)
 			      partid);
 	if (IS_ERR(kthread)) {
 		spin_lock_irqsave(&part->act_lock, irq_flags);
-		part->act_state = XPC_P_INACTIVE;
+		part->act_state = XPC_P_AS_INACTIVE;
 		XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
 	}
@@ -786,7 +786,7 @@ xpc_disconnect_wait(int ch_number)
 		wakeup_channel_mgr = 0;
 
 		if (ch->delayed_chctl_flags) {
-			if (part->act_state != XPC_P_DEACTIVATING) {
+			if (part->act_state != XPC_P_AS_DEACTIVATING) {
 				spin_lock(&part->chctl_lock);
 				part->chctl.flags[ch->number] |=
 				    ch->delayed_chctl_flags;
@@ -846,7 +846,7 @@ xpc_do_exit(enum xp_retval reason)
 			part = &xpc_partitions[partid];
 
 			if (xpc_partition_disengaged(part) &&
-			    part->act_state == XPC_P_INACTIVE) {
+			    part->act_state == XPC_P_AS_INACTIVE) {
 				continue;
 			}
 
@@ -962,7 +962,7 @@ xpc_die_deactivate(void)
 		part = &xpc_partitions[partid];
 
 		if (xpc_partition_engaged(partid) ||
-		    part->act_state != XPC_P_INACTIVE) {
+		    part->act_state != XPC_P_AS_INACTIVE) {
 			xpc_request_partition_deactivation(part);
 			xpc_indicate_partition_disengaged(part);
 		}
@@ -1113,7 +1113,7 @@ xpc_init(void)
 
 		part->activate_IRQ_rcvd = 0;
 		spin_lock_init(&part->act_lock);
-		part->act_state = XPC_P_INACTIVE;
+		part->act_state = XPC_P_AS_INACTIVE;
 		XPC_SET_REASON(part, 0, 0);
 
 		init_timer(&part->disengage_timer);
@@ -1121,7 +1121,7 @@ xpc_init(void)
 		    xpc_timeout_partition_disengage;
 		part->disengage_timer.data = (unsigned long)part;
 
-		part->setup_state = XPC_P_UNSET;
+		part->setup_state = XPC_P_SS_UNSET;
 		init_waitqueue_head(&part->teardown_wq);
 		atomic_set(&part->references, 0);
 	}
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index f150dbfcfcc..b5fb2164113 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -273,9 +273,9 @@ xpc_partition_disengaged(struct xpc_partition *part)
 		if (!in_interrupt())
 			del_singleshot_timer_sync(&part->disengage_timer);
 
-		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
-			part->act_state != XPC_P_INACTIVE);
-		if (part->act_state != XPC_P_INACTIVE)
+		DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
+			part->act_state != XPC_P_AS_INACTIVE);
+		if (part->act_state != XPC_P_AS_INACTIVE)
 			xpc_wakeup_channel_mgr(part);
 
 		xpc_cancel_partition_deactivation_request(part);
@@ -295,8 +295,8 @@ xpc_mark_partition_active(struct xpc_partition *part)
 	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
 
 	spin_lock_irqsave(&part->act_lock, irq_flags);
-	if (part->act_state == XPC_P_ACTIVATING) {
-		part->act_state = XPC_P_ACTIVE;
+	if (part->act_state == XPC_P_AS_ACTIVATING) {
+		part->act_state = XPC_P_AS_ACTIVE;
 		ret = xpSuccess;
 	} else {
 		DBUG_ON(part->reason == xpSuccess);
@@ -318,7 +318,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 
 	spin_lock_irqsave(&part->act_lock, irq_flags);
 
-	if (part->act_state == XPC_P_INACTIVE) {
+	if (part->act_state == XPC_P_AS_INACTIVE) {
 		XPC_SET_REASON(part, reason, line);
 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
 		if (reason == xpReactivating) {
@@ -327,7 +327,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 		}
 		return;
 	}
-	if (part->act_state == XPC_P_DEACTIVATING) {
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
 		if ((part->reason == xpUnloading && reason != xpUnloading) ||
 		    reason == xpReactivating) {
 			XPC_SET_REASON(part, reason, line);
@@ -336,7 +336,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 		return;
 	}
 
-	part->act_state = XPC_P_DEACTIVATING;
+	part->act_state = XPC_P_AS_DEACTIVATING;
 	XPC_SET_REASON(part, reason, line);
 
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
@@ -367,7 +367,7 @@ xpc_mark_partition_inactive(struct xpc_partition *part)
 		XPC_PARTID(part));
 
 	spin_lock_irqsave(&part->act_lock, irq_flags);
-	part->act_state = XPC_P_INACTIVE;
+	part->act_state = XPC_P_AS_INACTIVE;
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
 	part->remote_rp_pa = 0;
 }
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index d34cdd533a9..d1ccadc0857 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -327,7 +327,7 @@ xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
 	union xpc_channel_ctl_flags chctl = { 0 };
 	enum xp_retval ret;
 
-	if (likely(part->act_state != XPC_P_DEACTIVATING)) {
+	if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) {
 		chctl.flags[ch->number] = chctl_flag;
 		ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va,
 				       chctl.all_flags,
@@ -975,7 +975,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 		remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]);
 
 	if (xpc_partition_disengaged(part) &&
-	    part->act_state == XPC_P_INACTIVE) {
+	    part->act_state == XPC_P_AS_INACTIVE) {
 
 		xpc_update_partition_info_sn2(part, remote_rp_version,
 					      &remote_rp_ts_jiffies,
@@ -1257,10 +1257,10 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	}
 
 	/*
-	 * With the setting of the partition setup_state to XPC_P_SETUP, we're
-	 * declaring that this partition is ready to go.
+	 * With the setting of the partition setup_state to XPC_P_SS_SETUP,
+	 * we're declaring that this partition is ready to go.
 	 */
-	part->setup_state = XPC_P_SETUP;
+	part->setup_state = XPC_P_SS_SETUP;
 
 	/*
 	 * Setup the per partition specific variables required by the
@@ -1323,8 +1323,8 @@ xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
 
 	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
 	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
-	DBUG_ON(part->setup_state != XPC_P_SETUP);
-	part->setup_state = XPC_P_WTEARDOWN;
+	DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
+	part->setup_state = XPC_P_SS_WTEARDOWN;
 
 	xpc_vars_part_sn2[partid].magic = 0;
 
@@ -1338,7 +1338,7 @@ xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
 
 	/* now we can begin tearing down the infrastructure */
 
-	part->setup_state = XPC_P_TORNDOWN;
+	part->setup_state = XPC_P_SS_TORNDOWN;
 
 	/* in case we've still got outstanding timers registered... */
 	del_timer_sync(&part_sn2->dropped_notify_IRQ_timer);
@@ -1375,7 +1375,7 @@ xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
 	DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst));
 	DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
 
-	if (part->act_state == XPC_P_DEACTIVATING)
+	if (part->act_state == XPC_P_AS_DEACTIVATING)
 		return part->reason;
 
 	ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt);
@@ -1534,7 +1534,7 @@ xpc_make_first_contact_sn2(struct xpc_partition *part)
 		/* wait a 1/4 of a second or so */
 		(void)msleep_interruptible(250);
 
-		if (part->act_state == XPC_P_DEACTIVATING)
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
 			return part->reason;
 	}
 
-- 
cgit v1.2.3


From 5b8669dfd110a62a74eea525a009342f73987ea0 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:18 -0700
Subject: sgi-xp: setup the activate GRU message queue

Setup the activate GRU message queue that is used for partition activation
and channel connection on UV systems.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h            |   3 +-
 drivers/misc/sgi-xp/xp_uv.c         |  10 +
 drivers/misc/sgi-xp/xpc.h           | 158 ++++++--
 drivers/misc/sgi-xp/xpc_channel.c   |  22 +-
 drivers/misc/sgi-xp/xpc_main.c      | 329 +++++++++++----
 drivers/misc/sgi-xp/xpc_partition.c |  28 +-
 drivers/misc/sgi-xp/xpc_sn2.c       | 387 +++++++-----------
 drivers/misc/sgi-xp/xpc_uv.c        | 781 ++++++++++++++++++++++++++++++++++--
 8 files changed, 1328 insertions(+), 390 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 45d0a08c2dd..9ac5758f4d0 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -208,8 +208,9 @@ enum xp_retval {
 	xpNeedMoreInfo,		/* 57: more info is needed by SAL */
 
 	xpGruCopyError,		/* 58: gru_copy_gru() returned error */
+	xpGruSendMqError,	/* 59: gru send message queue related error */
 
-	xpUnknownReason		/* 59: unknown reason - must be last in enum */
+	xpUnknownReason		/* 60: unknown reason - must be last in enum */
 };
 
 /*
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
index 44f2c2b58c2..d9f7ce2510b 100644
--- a/drivers/misc/sgi-xp/xp_uv.c
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -42,15 +42,25 @@ xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa,
 	return xpGruCopyError;
 }
 
+static int
+xp_cpu_to_nasid_uv(int cpuid)
+{
+	/* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */
+	return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid));
+}
+
 enum xp_retval
 xp_init_uv(void)
 {
 	BUG_ON(!is_uv());
 
 	xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
+	xp_partition_id = 0;	/* !!! not correct value */
+	xp_region_size = 0;	/* !!! not correct value */
 
 	xp_pa = xp_pa_uv;
 	xp_remote_memcpy = xp_remote_memcpy_uv;
+	xp_cpu_to_nasid = xp_cpu_to_nasid_uv;
 
 	return xpSuccess;
 }
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 1e48f776505..4c26181deff 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -164,8 +164,8 @@ struct xpc_vars_part_sn2 {
  * MAGIC2 indicates that this partition has pulled the remote partititions
  * per partition variables that pertain to this partition.
  */
-#define XPC_VP_MAGIC1	0x0053524156435058L   /* 'XPCVARS\0'L (little endian) */
-#define XPC_VP_MAGIC2	0x0073726176435058L   /* 'XPCvars\0'L (little endian) */
+#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
+#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
 
 /* the reserved page sizes and offsets */
 
@@ -180,6 +180,80 @@ struct xpc_vars_part_sn2 {
 				 (XPC_RP_MACH_NASIDS(_rp) + \
 				  xpc_nasid_mask_nlongs))
 
+/*
+ * The activate_mq is used to send/receive messages that affect XPC's heartbeat,
+ * partition active state, and channel state. This is UV only.
+ */
+struct xpc_activate_mq_msghdr_uv {
+	short partid;		/* sender's partid */
+	u8 act_state;		/* sender's act_state at time msg sent */
+	u8 type;		/* message's type */
+	unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */
+};
+
+/* activate_mq defined message types */
+#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV		0
+#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV		1
+#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV	2
+#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV		3
+
+#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV		4
+#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV		5
+
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV	6
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV		7
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV	8
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV		9
+
+#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV		10
+#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV		11
+
+struct xpc_activate_mq_msg_uv {
+	struct xpc_activate_mq_msghdr_uv header;
+};
+
+struct xpc_activate_mq_msg_heartbeat_req_uv {
+	struct xpc_activate_mq_msghdr_uv header;
+	u64 heartbeat;
+};
+
+struct xpc_activate_mq_msg_activate_req_uv {
+	struct xpc_activate_mq_msghdr_uv header;
+	unsigned long rp_gpa;
+	unsigned long activate_mq_gpa;
+};
+
+struct xpc_activate_mq_msg_deactivate_req_uv {
+	struct xpc_activate_mq_msghdr_uv header;
+	enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closerequest_uv {
+	struct xpc_activate_mq_msghdr_uv header;
+	short ch_number;
+	enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closereply_uv {
+	struct xpc_activate_mq_msghdr_uv header;
+	short ch_number;
+};
+
+struct xpc_activate_mq_msg_chctl_openrequest_uv {
+	struct xpc_activate_mq_msghdr_uv header;
+	short ch_number;
+	short msg_size;		/* size of notify_mq's messages */
+	short local_nentries;	/* ??? Is this needed? What is? */
+};
+
+struct xpc_activate_mq_msg_chctl_openreply_uv {
+	struct xpc_activate_mq_msghdr_uv header;
+	short ch_number;
+	short remote_nentries;	/* ??? Is this needed? What is? */
+	short local_nentries;	/* ??? Is this needed? What is? */
+	unsigned long local_notify_mq_gpa;
+};
+
 /*
  * Functions registered by add_timer() or called by kernel_thread() only
  * allow for a single 64-bit argument. The following macros can be used to
@@ -331,6 +405,18 @@ struct xpc_notify {
  */
 
 struct xpc_channel_sn2 {
+	struct xpc_openclose_args *local_openclose_args; /* args passed on */
+					     /* opening or closing of channel */
+
+	void *local_msgqueue_base;	/* base address of kmalloc'd space */
+	struct xpc_msg *local_msgqueue;	/* local message queue */
+	void *remote_msgqueue_base;	/* base address of kmalloc'd space */
+	struct xpc_msg *remote_msgqueue; /* cached copy of remote partition's */
+					 /* local message queue */
+	unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */
+					  /* local message queue */
+
+	struct xpc_notify *notify_queue;    /* notify queue for messages sent */
 
 	/* various flavors of local and remote Get/Put values */
 
@@ -344,13 +430,14 @@ struct xpc_channel_sn2 {
 };
 
 struct xpc_channel_uv {
-	/* !!! code is coming */
+	unsigned long remote_notify_mq_gpa;	/* gru phys address of remote */
+						/* partition's notify mq */
 };
 
 struct xpc_channel {
 	short partid;		/* ID of remote partition connected */
 	spinlock_t lock;	/* lock for updating this structure */
-	u32 flags;		/* general flags */
+	unsigned int flags;	/* general flags */
 
 	enum xp_retval reason;	/* reason why channel is disconnect'g */
 	int reason_line;	/* line# disconnect initiated from */
@@ -361,14 +448,6 @@ struct xpc_channel {
 	u16 local_nentries;	/* #of msg entries in local msg queue */
 	u16 remote_nentries;	/* #of msg entries in remote msg queue */
 
-	void *local_msgqueue_base;	/* base address of kmalloc'd space */
-	struct xpc_msg *local_msgqueue;	/* local message queue */
-	void *remote_msgqueue_base;	/* base address of kmalloc'd space */
-	struct xpc_msg *remote_msgqueue; /* cached copy of remote partition's */
-					 /* local message queue */
-	unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */
-					  /* local message queue */
-
 	atomic_t references;	/* #of external references to queues */
 
 	atomic_t n_on_msg_allocate_wq;	/* #on msg allocation wait queue */
@@ -377,19 +456,13 @@ struct xpc_channel {
 	u8 delayed_chctl_flags;	/* chctl flags received, but delayed */
 				/* action until channel disconnected */
 
-	/* queue of msg senders who want to be notified when msg received */
-
 	atomic_t n_to_notify;	/* #of msg senders to notify */
-	struct xpc_notify *notify_queue;    /* notify queue for messages sent */
 
 	xpc_channel_func func;	/* user's channel function */
 	void *key;		/* pointer to user's key */
 
 	struct completion wdisconnect_wait;    /* wait for channel disconnect */
 
-	struct xpc_openclose_args *local_openclose_args; /* args passed on */
-					     /* opening or closing of channel */
-
 	/* kthread management related fields */
 
 	atomic_t kthreads_assigned;	/* #of kthreads assigned to channel */
@@ -507,6 +580,8 @@ struct xpc_partition_sn2 {
 	unsigned long remote_GPs_pa; /* phys addr of remote partition's local */
 				     /* Get/Put values */
 
+	void *local_openclose_args_base;   /* base address of kmalloc'd space */
+	struct xpc_openclose_args *local_openclose_args;      /* local's args */
 	unsigned long remote_openclose_args_pa;	/* phys addr of remote's args */
 
 	int notify_IRQ_nasid;	/* nasid of where to send notify IRQs */
@@ -520,9 +595,27 @@ struct xpc_partition_sn2 {
 };
 
 struct xpc_partition_uv {
-	/* !!! code is coming */
+	unsigned long remote_activate_mq_gpa;	/* gru phys address of remote */
+						/* partition's activate mq */
+	spinlock_t flags_lock;	/* protect updating of flags */
+	unsigned int flags;	/* general flags */
+	u8 remote_act_state;	/* remote partition's act_state */
+	u8 act_state_req;	/* act_state request from remote partition */
+	enum xp_retval reason;	/* reason for deactivate act_state request */
+	u64 heartbeat;		/* incremented by remote partition */
 };
 
+/* struct xpc_partition_uv flags */
+
+#define XPC_P_HEARTBEAT_OFFLINE_UV	0x00000001
+#define XPC_P_ENGAGED_UV		0x00000002
+
+/* struct xpc_partition_uv act_state change requests */
+
+#define XPC_P_ASR_ACTIVATE_UV		0x01
+#define XPC_P_ASR_REACTIVATE_UV		0x02
+#define XPC_P_ASR_DEACTIVATE_UV		0x03
+
 struct xpc_partition {
 
 	/* XPC HB infrastructure */
@@ -556,8 +649,6 @@ struct xpc_partition {
 	union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */
 	spinlock_t chctl_lock;	/* chctl flags lock */
 
-	void *local_openclose_args_base;   /* base address of kmalloc'd space */
-	struct xpc_openclose_args *local_openclose_args;      /* local's args */
 	void *remote_openclose_args_base;  /* base address of kmalloc'd space */
 	struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
 							  /* args */
@@ -616,17 +707,20 @@ extern struct device *xpc_part;
 extern struct device *xpc_chan;
 extern int xpc_disengage_timelimit;
 extern int xpc_disengage_timedout;
-extern atomic_t xpc_activate_IRQ_rcvd;
+extern int xpc_activate_IRQ_rcvd;
+extern spinlock_t xpc_activate_IRQ_rcvd_lock;
 extern wait_queue_head_t xpc_activate_IRQ_wq;
 extern void *xpc_heartbeating_to_mask;
+extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern void xpc_activate_partition(struct xpc_partition *);
 extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int, int);
 extern void xpc_disconnect_wait(int);
+extern int (*xpc_setup_partitions_sn) (void);
 extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *,
 							 unsigned long *,
 							 size_t *);
-extern enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *);
+extern int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *);
 extern void (*xpc_heartbeat_init) (void);
 extern void (*xpc_heartbeat_exit) (void);
 extern void (*xpc_increment_heartbeat) (void);
@@ -635,8 +729,8 @@ extern void (*xpc_online_heartbeat) (void);
 extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *);
 extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
 extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
-extern enum xp_retval (*xpc_allocate_msgqueues) (struct xpc_channel *);
-extern void (*xpc_free_msgqueues) (struct xpc_channel *);
+extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *);
+extern void (*xpc_teardown_msg_structures) (struct xpc_channel *);
 extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
 extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int);
 extern int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *);
@@ -647,9 +741,9 @@ extern void (*xpc_request_partition_reactivation) (struct xpc_partition *);
 extern void (*xpc_request_partition_deactivation) (struct xpc_partition *);
 extern void (*xpc_cancel_partition_deactivation_request) (
 							struct xpc_partition *);
-extern void (*xpc_process_activate_IRQ_rcvd) (int);
-extern enum xp_retval (*xpc_setup_infrastructure) (struct xpc_partition *);
-extern void (*xpc_teardown_infrastructure) (struct xpc_partition *);
+extern void (*xpc_process_activate_IRQ_rcvd) (void);
+extern enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *);
+extern void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *);
 
 extern void (*xpc_indicate_partition_engaged) (struct xpc_partition *);
 extern int (*xpc_partition_engaged) (short);
@@ -665,6 +759,9 @@ extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *,
 					   unsigned long *);
 extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *);
 
+extern void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *,
+					    unsigned long);
+
 extern enum xp_retval (*xpc_send_msg) (struct xpc_channel *, u32, void *, u16,
 				       u8, xpc_notify_func, void *);
 extern void (*xpc_received_msg) (struct xpc_channel *, struct xpc_msg *);
@@ -674,7 +771,7 @@ extern int xpc_init_sn2(void);
 extern void xpc_exit_sn2(void);
 
 /* found in xpc_uv.c */
-extern void xpc_init_uv(void);
+extern int xpc_init_uv(void);
 extern void xpc_exit_uv(void);
 
 /* found in xpc_partition.c */
@@ -684,7 +781,8 @@ extern struct xpc_rsvd_page *xpc_rsvd_page;
 extern unsigned long *xpc_mach_nasids;
 extern struct xpc_partition *xpc_partitions;
 extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
-extern struct xpc_rsvd_page *xpc_setup_rsvd_page(void);
+extern int xpc_setup_rsvd_page(void);
+extern void xpc_teardown_rsvd_page(void);
 extern int xpc_identify_activate_IRQ_sender(void);
 extern int xpc_partition_disengaged(struct xpc_partition *);
 extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 17ab75d69e8..73df9fb5ee6 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -39,7 +39,7 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	if (!(ch->flags & XPC_C_SETUP)) {
 		spin_unlock_irqrestore(&ch->lock, *irq_flags);
-		ret = xpc_allocate_msgqueues(ch);
+		ret = xpc_setup_msg_structures(ch);
 		spin_lock_irqsave(&ch->lock, *irq_flags);
 
 		if (ret != xpSuccess)
@@ -62,8 +62,6 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 	if (!(ch->flags & XPC_C_ROPENREPLY))
 		return;
 
-	DBUG_ON(ch->remote_msgqueue_pa == 0);
-
 	ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP);	/* clear all else */
 
 	dev_info(xpc_chan, "channel %d to partition %d connected\n",
@@ -134,13 +132,23 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 		spin_lock_irqsave(&ch->lock, *irq_flags);
 	}
 
+	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
+
 	/* it's now safe to free the channel's message queues */
-	xpc_free_msgqueues(ch);
+	xpc_teardown_msg_structures(ch);
+
+	ch->func = NULL;
+	ch->key = NULL;
+	ch->msg_size = 0;
+	ch->local_nentries = 0;
+	ch->remote_nentries = 0;
+	ch->kthreads_assigned_limit = 0;
+	ch->kthreads_idle_limit = 0;
 
 	/*
 	 * Mark the channel disconnected and clear all other flags, including
-	 * XPC_C_SETUP (because of call to xpc_free_msgqueues()) but not
-	 * including XPC_C_WDISCONNECT (if it was set).
+	 * XPC_C_SETUP (because of call to xpc_teardown_msg_structures()) but
+	 * not including XPC_C_WDISCONNECT (if it was set).
 	 */
 	ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
 
@@ -395,7 +403,7 @@ again:
 		DBUG_ON(args->remote_nentries == 0);
 
 		ch->flags |= XPC_C_ROPENREPLY;
-		ch->remote_msgqueue_pa = args->local_msgqueue_pa;
+		xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa);
 
 		if (args->local_nentries < ch->remote_nentries) {
 			dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index b303c130bba..13ec4792899 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -141,8 +141,9 @@ static struct ctl_table_header *xpc_sysctl;
 /* non-zero if any remote partition disengage was timed out */
 int xpc_disengage_timedout;
 
-/* #of activate IRQs received */
-atomic_t xpc_activate_IRQ_rcvd = ATOMIC_INIT(0);
+/* #of activate IRQs received and not yet processed */
+int xpc_activate_IRQ_rcvd;
+DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
 
 /* IRQ handler notifies this wait queue on receipt of an IRQ */
 DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
@@ -169,10 +170,11 @@ static struct notifier_block xpc_die_notifier = {
 	.notifier_call = xpc_system_die,
 };
 
+int (*xpc_setup_partitions_sn) (void);
 enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
 						  unsigned long *rp_pa,
 						  size_t *len);
-enum xp_retval (*xpc_rsvd_page_init) (struct xpc_rsvd_page *rp);
+int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp);
 void (*xpc_heartbeat_init) (void);
 void (*xpc_heartbeat_exit) (void);
 void (*xpc_increment_heartbeat) (void);
@@ -183,8 +185,8 @@ enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part);
 enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
 void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
 u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
-enum xp_retval (*xpc_allocate_msgqueues) (struct xpc_channel *ch);
-void (*xpc_free_msgqueues) (struct xpc_channel *ch);
+enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch);
+void (*xpc_teardown_msg_structures) (struct xpc_channel *ch);
 void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
 int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *ch);
 struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *ch);
@@ -196,9 +198,9 @@ void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
 void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
 void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
 
-void (*xpc_process_activate_IRQ_rcvd) (int n_IRQs_expected);
-enum xp_retval (*xpc_setup_infrastructure) (struct xpc_partition *part);
-void (*xpc_teardown_infrastructure) (struct xpc_partition *part);
+void (*xpc_process_activate_IRQ_rcvd) (void);
+enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part);
+void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part);
 
 void (*xpc_indicate_partition_engaged) (struct xpc_partition *part);
 int (*xpc_partition_engaged) (short partid);
@@ -215,6 +217,9 @@ void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
 void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
 				  unsigned long *irq_flags);
 
+void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
+				     unsigned long msgqueue_pa);
+
 enum xp_retval (*xpc_send_msg) (struct xpc_channel *ch, u32 flags,
 				void *payload, u16 payload_size, u8 notify_type,
 				xpc_notify_func func, void *key);
@@ -308,8 +313,6 @@ xpc_check_remote_hb(void)
 static int
 xpc_hb_checker(void *ignore)
 {
-	int last_IRQ_count = 0;
-	int new_IRQ_count;
 	int force_IRQ = 0;
 
 	/* this thread was marked active by xpc_hb_init() */
@@ -325,43 +328,37 @@ xpc_hb_checker(void *ignore)
 		dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
 			"been received\n",
 			(int)(xpc_hb_check_timeout - jiffies),
-			atomic_read(&xpc_activate_IRQ_rcvd) - last_IRQ_count);
+			xpc_activate_IRQ_rcvd);
 
 		/* checking of remote heartbeats is skewed by IRQ handling */
 		if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
+			xpc_hb_check_timeout = jiffies +
+			    (xpc_hb_check_interval * HZ);
+
 			dev_dbg(xpc_part, "checking remote heartbeats\n");
 			xpc_check_remote_hb();
 
 			/*
-			 * We need to periodically recheck to ensure no
-			 * IRQ/amo pairs have been missed.  That check
-			 * must always reset xpc_hb_check_timeout.
+			 * On sn2 we need to periodically recheck to ensure no
+			 * IRQ/amo pairs have been missed.
 			 */
-			force_IRQ = 1;
+			if (is_shub())
+				force_IRQ = 1;
 		}
 
 		/* check for outstanding IRQs */
-		new_IRQ_count = atomic_read(&xpc_activate_IRQ_rcvd);
-		if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
+		if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
 			force_IRQ = 0;
-
-			dev_dbg(xpc_part, "found an IRQ to process; will be "
-				"resetting xpc_hb_check_timeout\n");
-
-			xpc_process_activate_IRQ_rcvd(new_IRQ_count -
-						      last_IRQ_count);
-			last_IRQ_count = new_IRQ_count;
-
-			xpc_hb_check_timeout = jiffies +
-			    (xpc_hb_check_interval * HZ);
+			dev_dbg(xpc_part, "processing activate IRQs "
+				"received\n");
+			xpc_process_activate_IRQ_rcvd();
 		}
 
 		/* wait for IRQ or timeout */
 		(void)wait_event_interruptible(xpc_activate_IRQ_wq,
-					       (last_IRQ_count < atomic_read(
-						&xpc_activate_IRQ_rcvd)
-						|| time_is_before_eq_jiffies(
+					       (time_is_before_eq_jiffies(
 						xpc_hb_check_timeout) ||
+						xpc_activate_IRQ_rcvd > 0 ||
 						xpc_exiting));
 	}
 
@@ -436,6 +433,153 @@ xpc_channel_mgr(struct xpc_partition *part)
 	}
 }
 
+/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+	/* see if kzalloc will give us cachline aligned memory by default */
+	*base = kzalloc(size, flags);
+	if (*base == NULL)
+		return NULL;
+
+	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+		return *base;
+
+	kfree(*base);
+
+	/* nope, we'll have to do it ourselves */
+	*base = kzalloc(size + L1_CACHE_BYTES, flags);
+	if (*base == NULL)
+		return NULL;
+
+	return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Setup the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static enum xp_retval
+xpc_setup_ch_structures(struct xpc_partition *part)
+{
+	enum xp_retval ret;
+	int ch_number;
+	struct xpc_channel *ch;
+	short partid = XPC_PARTID(part);
+
+	/*
+	 * Allocate all of the channel structures as a contiguous chunk of
+	 * memory.
+	 */
+	DBUG_ON(part->channels != NULL);
+	part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
+				 GFP_KERNEL);
+	if (part->channels == NULL) {
+		dev_err(xpc_chan, "can't get memory for channels\n");
+		return xpNoMemory;
+	}
+
+	/* allocate the remote open and close args */
+
+	part->remote_openclose_args =
+	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+					  GFP_KERNEL, &part->
+					  remote_openclose_args_base);
+	if (part->remote_openclose_args == NULL) {
+		dev_err(xpc_chan, "can't get memory for remote connect args\n");
+		ret = xpNoMemory;
+		goto out_1;
+	}
+
+	part->chctl.all_flags = 0;
+	spin_lock_init(&part->chctl_lock);
+
+	atomic_set(&part->channel_mgr_requests, 1);
+	init_waitqueue_head(&part->channel_mgr_wq);
+
+	part->nchannels = XPC_MAX_NCHANNELS;
+
+	atomic_set(&part->nchannels_active, 0);
+	atomic_set(&part->nchannels_engaged, 0);
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		ch->partid = partid;
+		ch->number = ch_number;
+		ch->flags = XPC_C_DISCONNECTED;
+
+		atomic_set(&ch->kthreads_assigned, 0);
+		atomic_set(&ch->kthreads_idle, 0);
+		atomic_set(&ch->kthreads_active, 0);
+
+		atomic_set(&ch->references, 0);
+		atomic_set(&ch->n_to_notify, 0);
+
+		spin_lock_init(&ch->lock);
+		init_completion(&ch->wdisconnect_wait);
+
+		atomic_set(&ch->n_on_msg_allocate_wq, 0);
+		init_waitqueue_head(&ch->msg_allocate_wq);
+		init_waitqueue_head(&ch->idle_wq);
+	}
+
+	ret = xpc_setup_ch_structures_sn(part);
+	if (ret != xpSuccess)
+		goto out_2;
+
+	/*
+	 * With the setting of the partition setup_state to XPC_P_SS_SETUP,
+	 * we're declaring that this partition is ready to go.
+	 */
+	part->setup_state = XPC_P_SS_SETUP;
+
+	return xpSuccess;
+
+	/* setup of ch structures failed */
+out_2:
+	kfree(part->remote_openclose_args_base);
+	part->remote_openclose_args = NULL;
+out_1:
+	kfree(part->channels);
+	part->channels = NULL;
+	return ret;
+}
+
+/*
+ * Teardown the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static void
+xpc_teardown_ch_structures(struct xpc_partition *part)
+{
+	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
+	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
+
+	/*
+	 * Make this partition inaccessible to local processes by marking it
+	 * as no longer setup. Then wait before proceeding with the teardown
+	 * until all existing references cease.
+	 */
+	DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
+	part->setup_state = XPC_P_SS_WTEARDOWN;
+
+	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
+
+	/* now we can begin tearing down the infrastructure */
+
+	xpc_teardown_ch_structures_sn(part);
+
+	kfree(part->remote_openclose_args_base);
+	part->remote_openclose_args = NULL;
+	kfree(part->channels);
+	part->channels = NULL;
+
+	part->setup_state = XPC_P_SS_TORNDOWN;
+}
+
 /*
  * When XPC HB determines that a partition has come up, it will create a new
  * kthread and that kthread will call this function to attempt to set up the
@@ -476,7 +620,7 @@ xpc_activating(void *__partid)
 
 	xpc_allow_hb(partid);
 
-	if (xpc_setup_infrastructure(part) == xpSuccess) {
+	if (xpc_setup_ch_structures(part) == xpSuccess) {
 		(void)xpc_part_ref(part);	/* this will always succeed */
 
 		if (xpc_make_first_contact(part) == xpSuccess) {
@@ -486,7 +630,7 @@ xpc_activating(void *__partid)
 		}
 
 		xpc_part_deref(part);
-		xpc_teardown_infrastructure(part);
+		xpc_teardown_ch_structures(part);
 	}
 
 	xpc_disallow_hb(partid);
@@ -806,6 +950,56 @@ xpc_disconnect_wait(int ch_number)
 	}
 }
 
+static int
+xpc_setup_partitions(void)
+{
+	short partid;
+	struct xpc_partition *part;
+
+	xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
+				 xp_max_npartitions, GFP_KERNEL);
+	if (xpc_partitions == NULL) {
+		dev_err(xpc_part, "can't get memory for partition structure\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * The first few fields of each entry of xpc_partitions[] need to
+	 * be initialized now so that calls to xpc_connect() and
+	 * xpc_disconnect() can be made prior to the activation of any remote
+	 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
+	 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
+	 * PARTITION HAS BEEN ACTIVATED.
+	 */
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
+
+		part->activate_IRQ_rcvd = 0;
+		spin_lock_init(&part->act_lock);
+		part->act_state = XPC_P_AS_INACTIVE;
+		XPC_SET_REASON(part, 0, 0);
+
+		init_timer(&part->disengage_timer);
+		part->disengage_timer.function =
+		    xpc_timeout_partition_disengage;
+		part->disengage_timer.data = (unsigned long)part;
+
+		part->setup_state = XPC_P_SS_UNSET;
+		init_waitqueue_head(&part->teardown_wq);
+		atomic_set(&part->references, 0);
+	}
+
+	return xpc_setup_partitions_sn();
+}
+
+static void
+xpc_teardown_partitions(void)
+{
+	kfree(xpc_partitions);
+}
+
 static void
 xpc_do_exit(enum xp_retval reason)
 {
@@ -892,8 +1086,7 @@ xpc_do_exit(enum xp_retval reason)
 	DBUG_ON(xpc_any_partition_engaged());
 	DBUG_ON(xpc_any_hbs_allowed() != 0);
 
-	/* a zero timestamp indicates our rsvd page is not initialized */
-	xpc_rsvd_page->ts_jiffies = 0;
+	xpc_teardown_rsvd_page();
 
 	if (reason == xpUnloading) {
 		(void)unregister_die_notifier(&xpc_die_notifier);
@@ -906,7 +1099,7 @@ xpc_do_exit(enum xp_retval reason)
 	if (xpc_sysctl)
 		unregister_sysctl_table(xpc_sysctl);
 
-	kfree(xpc_partitions);
+	xpc_teardown_partitions();
 
 	if (is_shub())
 		xpc_exit_sn2();
@@ -1062,8 +1255,6 @@ int __init
 xpc_init(void)
 {
 	int ret;
-	short partid;
-	struct xpc_partition *part;
 	struct task_struct *kthread;
 
 	snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
@@ -1076,56 +1267,29 @@ xpc_init(void)
 		 * further to only support exactly 64 partitions on this
 		 * architecture, no less.
 		 */
-		if (xp_max_npartitions != 64)
-			return -EINVAL;
-
-		ret = xpc_init_sn2();
-		if (ret != 0)
-			return ret;
+		if (xp_max_npartitions != 64) {
+			dev_err(xpc_part, "max #of partitions not set to 64\n");
+			ret = -EINVAL;
+		} else {
+			ret = xpc_init_sn2();
+		}
 
 	} else if (is_uv()) {
-		xpc_init_uv();
+		ret = xpc_init_uv();
 
 	} else {
-		return -ENODEV;
+		ret = -ENODEV;
 	}
 
-	xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
-				 xp_max_npartitions, GFP_KERNEL);
-	if (xpc_partitions == NULL) {
+	if (ret != 0)
+		return ret;
+
+	ret = xpc_setup_partitions();
+	if (ret != 0) {
 		dev_err(xpc_part, "can't get memory for partition structure\n");
-		ret = -ENOMEM;
 		goto out_1;
 	}
 
-	/*
-	 * The first few fields of each entry of xpc_partitions[] need to
-	 * be initialized now so that calls to xpc_connect() and
-	 * xpc_disconnect() can be made prior to the activation of any remote
-	 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
-	 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
-	 * PARTITION HAS BEEN ACTIVATED.
-	 */
-	for (partid = 0; partid < xp_max_npartitions; partid++) {
-		part = &xpc_partitions[partid];
-
-		DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
-
-		part->activate_IRQ_rcvd = 0;
-		spin_lock_init(&part->act_lock);
-		part->act_state = XPC_P_AS_INACTIVE;
-		XPC_SET_REASON(part, 0, 0);
-
-		init_timer(&part->disengage_timer);
-		part->disengage_timer.function =
-		    xpc_timeout_partition_disengage;
-		part->disengage_timer.data = (unsigned long)part;
-
-		part->setup_state = XPC_P_SS_UNSET;
-		init_waitqueue_head(&part->teardown_wq);
-		atomic_set(&part->references, 0);
-	}
-
 	xpc_sysctl = register_sysctl_table(xpc_sys_dir);
 
 	/*
@@ -1133,10 +1297,9 @@ xpc_init(void)
 	 * other partitions to discover we are alive and establish initial
 	 * communications.
 	 */
-	xpc_rsvd_page = xpc_setup_rsvd_page();
-	if (xpc_rsvd_page == NULL) {
+	ret = xpc_setup_rsvd_page();
+	if (ret != 0) {
 		dev_err(xpc_part, "can't setup our reserved page\n");
-		ret = -EBUSY;
 		goto out_2;
 	}
 
@@ -1187,15 +1350,15 @@ xpc_init(void)
 
 	/* initialization was not successful */
 out_3:
-	/* a zero timestamp indicates our rsvd page is not initialized */
-	xpc_rsvd_page->ts_jiffies = 0;
+	xpc_teardown_rsvd_page();
 
 	(void)unregister_die_notifier(&xpc_die_notifier);
 	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
 out_2:
 	if (xpc_sysctl)
 		unregister_sysctl_table(xpc_sysctl);
-	kfree(xpc_partitions);
+
+	xpc_teardown_partitions();
 out_1:
 	if (is_shub())
 		xpc_exit_sn2();
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index b5fb2164113..6722f6fe4dc 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -73,6 +73,12 @@ xpc_get_rsvd_page_pa(int nasid)
 
 	while (1) {
 
+		/* !!! rp_pa will need to be _gpa on UV.
+		 * ??? So do we save it into the architecture specific parts
+		 * ??? of the xpc_partition structure? Do we rename this
+		 * ??? function or have two versions? Rename rp_pa for UV to
+		 * ??? rp_gpa?
+		 */
 		ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa,
 						     &len);
 
@@ -118,9 +124,10 @@ xpc_get_rsvd_page_pa(int nasid)
  * other partitions to discover we are alive and establish initial
  * communications.
  */
-struct xpc_rsvd_page *
+int
 xpc_setup_rsvd_page(void)
 {
+	int ret;
 	struct xpc_rsvd_page *rp;
 	unsigned long rp_pa;
 	unsigned long new_ts_jiffies;
@@ -132,7 +139,7 @@ xpc_setup_rsvd_page(void)
 	preempt_enable();
 	if (rp_pa == 0) {
 		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
-		return NULL;
+		return -ESRCH;
 	}
 	rp = (struct xpc_rsvd_page *)__va(rp_pa);
 
@@ -146,7 +153,7 @@ xpc_setup_rsvd_page(void)
 		dev_err(xpc_part, "the reserved page's partid of %d is outside "
 			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
 			xp_max_npartitions);
-		return NULL;
+		return -EINVAL;
 	}
 
 	rp->version = XPC_RP_VERSION;
@@ -165,8 +172,9 @@ xpc_setup_rsvd_page(void)
 	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
 	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
 
-	if (xpc_rsvd_page_init(rp) != xpSuccess)
-		return NULL;
+	ret = xpc_setup_rsvd_page_sn(rp);
+	if (ret != 0)
+		return ret;
 
 	/*
 	 * Set timestamp of when reserved page was setup by XPC.
@@ -178,7 +186,15 @@ xpc_setup_rsvd_page(void)
 		new_ts_jiffies++;
 	rp->ts_jiffies = new_ts_jiffies;
 
-	return rp;
+	xpc_rsvd_page = rp;
+	return 0;
+}
+
+void
+xpc_teardown_rsvd_page(void)
+{
+	/* a zero timestamp indicates our rsvd page is not initialized */
+	xpc_rsvd_page->ts_jiffies = 0;
 }
 
 /*
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index d1ccadc0857..8b4b0653d9e 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -53,12 +53,19 @@
  * Buffer used to store a local copy of portions of a remote partition's
  * reserved page (either its header and part_nasids mask, or its vars).
  */
-static char *xpc_remote_copy_buffer_sn2;
 static void *xpc_remote_copy_buffer_base_sn2;
+static char *xpc_remote_copy_buffer_sn2;
 
 static struct xpc_vars_sn2 *xpc_vars_sn2;
 static struct xpc_vars_part_sn2 *xpc_vars_part_sn2;
 
+static int
+xpc_setup_partitions_sn_sn2(void)
+{
+	/* nothing needs to be done */
+	return 0;
+}
+
 /* SH_IPI_ACCESS shub register value on startup */
 static u64 xpc_sh1_IPI_access_sn2;
 static u64 xpc_sh2_IPI_access0_sn2;
@@ -198,7 +205,12 @@ xpc_init_IRQ_amo_sn2(int index)
 static irqreturn_t
 xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
 {
-	atomic_inc(&xpc_activate_IRQ_rcvd);
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	xpc_activate_IRQ_rcvd++;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
 	wake_up_interruptible(&xpc_activate_IRQ_wq);
 	return IRQ_HANDLED;
 }
@@ -222,6 +234,7 @@ xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid,
 static void
 xpc_send_local_activate_IRQ_sn2(int from_nasid)
 {
+	unsigned long irq_flags;
 	struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa +
 					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
 					      sizeof(struct amo)));
@@ -230,7 +243,10 @@ xpc_send_local_activate_IRQ_sn2(int from_nasid)
 	FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable),
 			 FETCHOP_OR, BIT_MASK(from_nasid / 2));
 
-	atomic_inc(&xpc_activate_IRQ_rcvd);
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	xpc_activate_IRQ_rcvd++;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
 	wake_up_interruptible(&xpc_activate_IRQ_wq);
 }
 
@@ -375,7 +391,7 @@ static void
 xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch,
 				unsigned long *irq_flags)
 {
-	struct xpc_openclose_args *args = ch->local_openclose_args;
+	struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
 
 	args->reason = ch->reason;
 	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags);
@@ -390,7 +406,7 @@ xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 static void
 xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 {
-	struct xpc_openclose_args *args = ch->local_openclose_args;
+	struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
 
 	args->msg_size = ch->msg_size;
 	args->local_nentries = ch->local_nentries;
@@ -400,11 +416,11 @@ xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 static void
 xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 {
-	struct xpc_openclose_args *args = ch->local_openclose_args;
+	struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
 
 	args->remote_nentries = ch->remote_nentries;
 	args->local_nentries = ch->local_nentries;
-	args->local_msgqueue_pa = xp_pa(ch->local_msgqueue);
+	args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue);
 	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags);
 }
 
@@ -420,6 +436,13 @@ xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch)
 	XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST);
 }
 
+static void
+xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch,
+				unsigned long msgqueue_pa)
+{
+	ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa;
+}
+
 /*
  * This next set of functions are used to keep track of when a partition is
  * potentially engaged in accessing memory belonging to another partition.
@@ -489,6 +512,17 @@ xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
 				  part_sn2->activate_IRQ_phys_cpuid);
 }
 
+static void
+xpc_assume_partition_disengaged_sn2(short partid)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+	/* clear bit(s) based on partid mask in our partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~BIT(partid));
+}
+
 static int
 xpc_partition_engaged_sn2(short partid)
 {
@@ -510,17 +544,6 @@ xpc_any_partition_engaged_sn2(void)
 	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
 }
 
-static void
-xpc_assume_partition_disengaged_sn2(short partid)
-{
-	struct amo *amo = xpc_vars_sn2->amos_page +
-			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
-
-	/* clear bit(s) based on partid mask in our partition's amo */
-	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
-			 ~BIT(partid));
-}
-
 /* original protection values for each node */
 static u64 xpc_prot_vec_sn2[MAX_NUMNODES];
 
@@ -595,8 +618,8 @@ xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa,
 }
 
 
-static enum xp_retval
-xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
+static int
+xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp)
 {
 	struct amo *amos_page;
 	int i;
@@ -627,7 +650,7 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 		amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1));
 		if (amos_page == NULL) {
 			dev_err(xpc_part, "can't allocate page of amos\n");
-			return xpNoMemory;
+			return -ENOMEM;
 		}
 
 		/*
@@ -639,7 +662,7 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 			dev_err(xpc_part, "can't allow amo operations\n");
 			uncached_free_page(__IA64_UNCACHED_OFFSET |
 					   TO_PHYS((u64)amos_page), 1);
-			return ret;
+			return -EPERM;
 		}
 	}
 
@@ -665,7 +688,7 @@ xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
 	(void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2);
 	(void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2);
 
-	return xpSuccess;
+	return 0;
 }
 
 static void
@@ -1082,10 +1105,19 @@ xpc_identify_activate_IRQ_sender_sn2(void)
 }
 
 static void
-xpc_process_activate_IRQ_rcvd_sn2(int n_IRQs_expected)
+xpc_process_activate_IRQ_rcvd_sn2(void)
 {
+	unsigned long irq_flags;
+	int n_IRQs_expected;
 	int n_IRQs_detected;
 
+	DBUG_ON(xpc_activate_IRQ_rcvd == 0);
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	n_IRQs_expected = xpc_activate_IRQ_rcvd;
+	xpc_activate_IRQ_rcvd = 0;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
 	n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2();
 	if (n_IRQs_detected < n_IRQs_expected) {
 		/* retry once to help avoid missing amo */
@@ -1094,116 +1126,63 @@ xpc_process_activate_IRQ_rcvd_sn2(int n_IRQs_expected)
 }
 
 /*
- * Guarantee that the kzalloc'd memory is cacheline aligned.
- */
-static void *
-xpc_kzalloc_cacheline_aligned_sn2(size_t size, gfp_t flags, void **base)
-{
-	/* see if kzalloc will give us cachline aligned memory by default */
-	*base = kzalloc(size, flags);
-	if (*base == NULL)
-		return NULL;
-
-	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
-		return *base;
-
-	kfree(*base);
-
-	/* nope, we'll have to do it ourselves */
-	*base = kzalloc(size + L1_CACHE_BYTES, flags);
-	if (*base == NULL)
-		return NULL;
-
-	return (void *)L1_CACHE_ALIGN((u64)*base);
-}
-
-/*
- * Setup the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
+ * Setup the channel structures that are sn2 specific.
  */
 static enum xp_retval
-xpc_setup_infrastructure_sn2(struct xpc_partition *part)
+xpc_setup_ch_structures_sn_sn2(struct xpc_partition *part)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	struct xpc_channel_sn2 *ch_sn2;
 	enum xp_retval retval;
 	int ret;
 	int cpuid;
 	int ch_number;
-	struct xpc_channel *ch;
 	struct timer_list *timer;
 	short partid = XPC_PARTID(part);
 
-	/*
-	 * Allocate all of the channel structures as a contiguous chunk of
-	 * memory.
-	 */
-	DBUG_ON(part->channels != NULL);
-	part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
-				 GFP_KERNEL);
-	if (part->channels == NULL) {
-		dev_err(xpc_chan, "can't get memory for channels\n");
-		return xpNoMemory;
-	}
-
 	/* allocate all the required GET/PUT values */
 
 	part_sn2->local_GPs =
-	    xpc_kzalloc_cacheline_aligned_sn2(XPC_GP_SIZE, GFP_KERNEL,
-					      &part_sn2->local_GPs_base);
+	    xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
+					  &part_sn2->local_GPs_base);
 	if (part_sn2->local_GPs == NULL) {
 		dev_err(xpc_chan, "can't get memory for local get/put "
 			"values\n");
-		retval = xpNoMemory;
-		goto out_1;
+		return xpNoMemory;
 	}
 
 	part_sn2->remote_GPs =
-	    xpc_kzalloc_cacheline_aligned_sn2(XPC_GP_SIZE, GFP_KERNEL,
-					      &part_sn2->remote_GPs_base);
+	    xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
+					  &part_sn2->remote_GPs_base);
 	if (part_sn2->remote_GPs == NULL) {
 		dev_err(xpc_chan, "can't get memory for remote get/put "
 			"values\n");
 		retval = xpNoMemory;
-		goto out_2;
+		goto out_1;
 	}
 
 	part_sn2->remote_GPs_pa = 0;
 
 	/* allocate all the required open and close args */
 
-	part->local_openclose_args =
-	    xpc_kzalloc_cacheline_aligned_sn2(XPC_OPENCLOSE_ARGS_SIZE,
-					      GFP_KERNEL,
-					      &part->local_openclose_args_base);
-	if (part->local_openclose_args == NULL) {
+	part_sn2->local_openclose_args =
+	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+					  GFP_KERNEL, &part_sn2->
+					  local_openclose_args_base);
+	if (part_sn2->local_openclose_args == NULL) {
 		dev_err(xpc_chan, "can't get memory for local connect args\n");
 		retval = xpNoMemory;
-		goto out_3;
-	}
-
-	part->remote_openclose_args =
-	    xpc_kzalloc_cacheline_aligned_sn2(XPC_OPENCLOSE_ARGS_SIZE,
-					      GFP_KERNEL,
-					     &part->remote_openclose_args_base);
-	if (part->remote_openclose_args == NULL) {
-		dev_err(xpc_chan, "can't get memory for remote connect args\n");
-		retval = xpNoMemory;
-		goto out_4;
+		goto out_2;
 	}
 
 	part_sn2->remote_openclose_args_pa = 0;
 
 	part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid);
-	part->chctl.all_flags = 0;
-	spin_lock_init(&part->chctl_lock);
 
 	part_sn2->notify_IRQ_nasid = 0;
 	part_sn2->notify_IRQ_phys_cpuid = 0;
 	part_sn2->remote_chctl_amo_va = NULL;
 
-	atomic_set(&part->channel_mgr_requests, 1);
-	init_waitqueue_head(&part->channel_mgr_wq);
-
 	sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid);
 	ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2,
 			  IRQF_SHARED, part_sn2->notify_IRQ_owner,
@@ -1212,7 +1191,7 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 		dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
 			"errno=%d\n", -ret);
 		retval = xpLackOfResources;
-		goto out_5;
+		goto out_3;
 	}
 
 	/* Setup a timer to check for dropped notify IRQs */
@@ -1224,44 +1203,16 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
 	add_timer(timer);
 
-	part->nchannels = XPC_MAX_NCHANNELS;
-
-	atomic_set(&part->nchannels_active, 0);
-	atomic_set(&part->nchannels_engaged, 0);
-
 	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
-		ch = &part->channels[ch_number];
-
-		ch->partid = partid;
-		ch->number = ch_number;
-		ch->flags = XPC_C_DISCONNECTED;
-
-		ch->sn.sn2.local_GP = &part_sn2->local_GPs[ch_number];
-		ch->local_openclose_args =
-		    &part->local_openclose_args[ch_number];
-
-		atomic_set(&ch->kthreads_assigned, 0);
-		atomic_set(&ch->kthreads_idle, 0);
-		atomic_set(&ch->kthreads_active, 0);
+		ch_sn2 = &part->channels[ch_number].sn.sn2;
 
-		atomic_set(&ch->references, 0);
-		atomic_set(&ch->n_to_notify, 0);
+		ch_sn2->local_GP = &part_sn2->local_GPs[ch_number];
+		ch_sn2->local_openclose_args =
+		    &part_sn2->local_openclose_args[ch_number];
 
-		spin_lock_init(&ch->lock);
-		mutex_init(&ch->sn.sn2.msg_to_pull_mutex);
-		init_completion(&ch->wdisconnect_wait);
-
-		atomic_set(&ch->n_on_msg_allocate_wq, 0);
-		init_waitqueue_head(&ch->msg_allocate_wq);
-		init_waitqueue_head(&ch->idle_wq);
+		mutex_init(&ch_sn2->msg_to_pull_mutex);
 	}
 
-	/*
-	 * With the setting of the partition setup_state to XPC_P_SS_SETUP,
-	 * we're declaring that this partition is ready to go.
-	 */
-	part->setup_state = XPC_P_SS_SETUP;
-
 	/*
 	 * Setup the per partition specific variables required by the
 	 * remote partition to establish channel connections with us.
@@ -1271,7 +1222,7 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	 */
 	xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs);
 	xpc_vars_part_sn2[partid].openclose_args_pa =
-	    xp_pa(part->local_openclose_args);
+	    xp_pa(part_sn2->local_openclose_args);
 	xpc_vars_part_sn2[partid].chctl_amo_pa =
 	    xp_pa(part_sn2->local_chctl_amo_va);
 	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
@@ -1279,80 +1230,48 @@ xpc_setup_infrastructure_sn2(struct xpc_partition *part)
 	xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid =
 	    cpu_physical_id(cpuid);
 	xpc_vars_part_sn2[partid].nchannels = part->nchannels;
-	xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1;
+	xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2;
 
 	return xpSuccess;
 
-	/* setup of infrastructure failed */
-out_5:
-	kfree(part->remote_openclose_args_base);
-	part->remote_openclose_args = NULL;
-out_4:
-	kfree(part->local_openclose_args_base);
-	part->local_openclose_args = NULL;
+	/* setup of ch structures failed */
 out_3:
+	kfree(part_sn2->local_openclose_args_base);
+	part_sn2->local_openclose_args = NULL;
+out_2:
 	kfree(part_sn2->remote_GPs_base);
 	part_sn2->remote_GPs = NULL;
-out_2:
+out_1:
 	kfree(part_sn2->local_GPs_base);
 	part_sn2->local_GPs = NULL;
-out_1:
-	kfree(part->channels);
-	part->channels = NULL;
 	return retval;
 }
 
 /*
- * Teardown the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
+ * Teardown the channel structures that are sn2 specific.
  */
 static void
-xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
+xpc_teardown_ch_structures_sn_sn2(struct xpc_partition *part)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	short partid = XPC_PARTID(part);
 
 	/*
-	 * We start off by making this partition inaccessible to local
-	 * processes by marking it as no longer setup. Then we make it
-	 * inaccessible to remote processes by clearing the XPC per partition
-	 * specific variable's magic # (which indicates that these variables
-	 * are no longer valid) and by ignoring all XPC notify IRQs sent to
-	 * this partition.
+	 * Indicate that the variables specific to the remote partition are no
+	 * longer available for its use.
 	 */
-
-	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
-	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
-	DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
-	part->setup_state = XPC_P_SS_WTEARDOWN;
-
 	xpc_vars_part_sn2[partid].magic = 0;
 
-	free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
-
-	/*
-	 * Before proceeding with the teardown we have to wait until all
-	 * existing references cease.
-	 */
-	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
-
-	/* now we can begin tearing down the infrastructure */
-
-	part->setup_state = XPC_P_SS_TORNDOWN;
-
 	/* in case we've still got outstanding timers registered... */
 	del_timer_sync(&part_sn2->dropped_notify_IRQ_timer);
+	free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
 
-	kfree(part->remote_openclose_args_base);
-	part->remote_openclose_args = NULL;
-	kfree(part->local_openclose_args_base);
-	part->local_openclose_args = NULL;
+	kfree(part_sn2->local_openclose_args_base);
+	part_sn2->local_openclose_args = NULL;
 	kfree(part_sn2->remote_GPs_base);
 	part_sn2->remote_GPs = NULL;
 	kfree(part_sn2->local_GPs_base);
 	part_sn2->local_GPs = NULL;
-	kfree(part->channels);
-	part->channels = NULL;
 	part_sn2->local_chctl_amo_va = NULL;
 }
 
@@ -1429,8 +1348,8 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 
 	/* see if they've been set up yet */
 
-	if (pulled_entry->magic != XPC_VP_MAGIC1 &&
-	    pulled_entry->magic != XPC_VP_MAGIC2) {
+	if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 &&
+	    pulled_entry->magic != XPC_VP_MAGIC2_SN2) {
 
 		if (pulled_entry->magic != 0) {
 			dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
@@ -1443,7 +1362,7 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 		return xpRetry;
 	}
 
-	if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1) {
+	if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) {
 
 		/* validate the variables */
 
@@ -1473,10 +1392,10 @@ xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
 
 		/* let the other side know that we've pulled their variables */
 
-		xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2;
+		xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2;
 	}
 
-	if (pulled_entry->magic == XPC_VP_MAGIC1)
+	if (pulled_entry->magic == XPC_VP_MAGIC1_SN2)
 		return xpRetry;
 
 	return xpSuccess;
@@ -1605,6 +1524,7 @@ xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
 static enum xp_retval
 xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
 {
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	unsigned long irq_flags;
 	int nentries;
 	size_t nbytes;
@@ -1612,17 +1532,17 @@ xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
 	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
 
 		nbytes = nentries * ch->msg_size;
-		ch->local_msgqueue =
-		    xpc_kzalloc_cacheline_aligned_sn2(nbytes, GFP_KERNEL,
-						      &ch->local_msgqueue_base);
-		if (ch->local_msgqueue == NULL)
+		ch_sn2->local_msgqueue =
+		    xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL,
+						  &ch_sn2->local_msgqueue_base);
+		if (ch_sn2->local_msgqueue == NULL)
 			continue;
 
 		nbytes = nentries * sizeof(struct xpc_notify);
-		ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
-		if (ch->notify_queue == NULL) {
-			kfree(ch->local_msgqueue_base);
-			ch->local_msgqueue = NULL;
+		ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_sn2->notify_queue == NULL) {
+			kfree(ch_sn2->local_msgqueue_base);
+			ch_sn2->local_msgqueue = NULL;
 			continue;
 		}
 
@@ -1649,6 +1569,7 @@ xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
 static enum xp_retval
 xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
 {
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	unsigned long irq_flags;
 	int nentries;
 	size_t nbytes;
@@ -1658,10 +1579,10 @@ xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
 	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
 
 		nbytes = nentries * ch->msg_size;
-		ch->remote_msgqueue =
-		    xpc_kzalloc_cacheline_aligned_sn2(nbytes, GFP_KERNEL,
-						     &ch->remote_msgqueue_base);
-		if (ch->remote_msgqueue == NULL)
+		ch_sn2->remote_msgqueue =
+		    xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2->
+						  remote_msgqueue_base);
+		if (ch_sn2->remote_msgqueue == NULL)
 			continue;
 
 		spin_lock_irqsave(&ch->lock, irq_flags);
@@ -1687,8 +1608,9 @@ xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
  * Note: Assumes all of the channel sizes are filled in.
  */
 static enum xp_retval
-xpc_allocate_msgqueues_sn2(struct xpc_channel *ch)
+xpc_setup_msg_structures_sn2(struct xpc_channel *ch)
 {
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	enum xp_retval ret;
 
 	DBUG_ON(ch->flags & XPC_C_SETUP);
@@ -1698,10 +1620,10 @@ xpc_allocate_msgqueues_sn2(struct xpc_channel *ch)
 
 		ret = xpc_allocate_remote_msgqueue_sn2(ch);
 		if (ret != xpSuccess) {
-			kfree(ch->local_msgqueue_base);
-			ch->local_msgqueue = NULL;
-			kfree(ch->notify_queue);
-			ch->notify_queue = NULL;
+			kfree(ch_sn2->local_msgqueue_base);
+			ch_sn2->local_msgqueue = NULL;
+			kfree(ch_sn2->notify_queue);
+			ch_sn2->notify_queue = NULL;
 		}
 	}
 	return ret;
@@ -1715,21 +1637,13 @@ xpc_allocate_msgqueues_sn2(struct xpc_channel *ch)
  * they're cleared when XPC_C_DISCONNECTED is cleared.
  */
 static void
-xpc_free_msgqueues_sn2(struct xpc_channel *ch)
+xpc_teardown_msg_structures_sn2(struct xpc_channel *ch)
 {
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 
 	DBUG_ON(!spin_is_locked(&ch->lock));
-	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
 
-	ch->remote_msgqueue_pa = 0;
-	ch->func = NULL;
-	ch->key = NULL;
-	ch->msg_size = 0;
-	ch->local_nentries = 0;
-	ch->remote_nentries = 0;
-	ch->kthreads_assigned_limit = 0;
-	ch->kthreads_idle_limit = 0;
+	ch_sn2->remote_msgqueue_pa = 0;
 
 	ch_sn2->local_GP->get = 0;
 	ch_sn2->local_GP->put = 0;
@@ -1745,12 +1659,12 @@ xpc_free_msgqueues_sn2(struct xpc_channel *ch)
 		dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
 			ch->flags, ch->partid, ch->number);
 
-		kfree(ch->local_msgqueue_base);
-		ch->local_msgqueue = NULL;
-		kfree(ch->remote_msgqueue_base);
-		ch->remote_msgqueue = NULL;
-		kfree(ch->notify_queue);
-		ch->notify_queue = NULL;
+		kfree(ch_sn2->local_msgqueue_base);
+		ch_sn2->local_msgqueue = NULL;
+		kfree(ch_sn2->remote_msgqueue_base);
+		ch_sn2->remote_msgqueue = NULL;
+		kfree(ch_sn2->notify_queue);
+		ch_sn2->notify_queue = NULL;
 	}
 }
 
@@ -1766,7 +1680,7 @@ xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put)
 
 	while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
 
-		notify = &ch->notify_queue[get % ch->local_nentries];
+		notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries];
 
 		/*
 		 * See if the notify entry indicates it was associated with
@@ -1818,7 +1732,7 @@ xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch)
 
 	get = ch_sn2->w_remote_GP.get;
 	do {
-		msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
+		msg = (struct xpc_msg *)((u64)ch_sn2->local_msgqueue +
 					 (get % ch->local_nentries) *
 					 ch->msg_size);
 		msg->flags = 0;
@@ -1837,7 +1751,7 @@ xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
 
 	put = ch_sn2->w_remote_GP.put;
 	do {
-		msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
+		msg = (struct xpc_msg *)((u64)ch_sn2->remote_msgqueue +
 					 (put % ch->remote_nentries) *
 					 ch->msg_size);
 		msg->flags = 0;
@@ -1976,8 +1890,9 @@ xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
 		}
 
 		msg_offset = msg_index * ch->msg_size;
-		msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
-		remote_msg_pa = ch->remote_msgqueue_pa + msg_offset;
+		msg = (struct xpc_msg *)((u64)ch_sn2->remote_msgqueue +
+		    msg_offset);
+		remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset;
 
 		ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa,
 						     nmsgs * ch->msg_size);
@@ -2001,7 +1916,7 @@ xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
 
 	/* return the message we were looking for */
 	msg_offset = (get % ch->remote_nentries) * ch->msg_size;
-	msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
+	msg = (struct xpc_msg *)((u64)ch_sn2->remote_msgqueue + msg_offset);
 
 	return msg;
 }
@@ -2080,7 +1995,7 @@ xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 			if (put == ch_sn2->w_local_GP.put)
 				break;
 
-			msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
+			msg = (struct xpc_msg *)((u64)ch_sn2->local_msgqueue +
 						 (put % ch->local_nentries) *
 						 ch->msg_size);
 
@@ -2182,7 +2097,7 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 	}
 
 	/* get the message's address and initialize it */
-	msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
+	msg = (struct xpc_msg *)((u64)ch_sn2->local_msgqueue +
 				 (put % ch->local_nentries) * ch->msg_size);
 
 	DBUG_ON(msg->flags != 0);
@@ -2207,6 +2122,7 @@ xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
 		 void *key)
 {
 	enum xp_retval ret = xpSuccess;
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	struct xpc_msg *msg = msg;
 	struct xpc_notify *notify = notify;
 	s64 msg_number;
@@ -2243,7 +2159,7 @@ xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
 
 		atomic_inc(&ch->n_to_notify);
 
-		notify = &ch->notify_queue[msg_number % ch->local_nentries];
+		notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries];
 		notify->func = func;
 		notify->key = key;
 		notify->type = notify_type;
@@ -2279,7 +2195,7 @@ xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
 
 	/* see if the message is next in line to be sent, if so send it */
 
-	put = ch->sn.sn2.local_GP->put;
+	put = ch_sn2->local_GP->put;
 	if (put == msg_number)
 		xpc_send_msgs_sn2(ch, put);
 
@@ -2307,7 +2223,7 @@ xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 			if (get == ch_sn2->w_local_GP.get)
 				break;
 
-			msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
+			msg = (struct xpc_msg *)((u64)ch_sn2->remote_msgqueue +
 						 (get % ch->remote_nentries) *
 						 ch->msg_size);
 
@@ -2385,8 +2301,9 @@ xpc_init_sn2(void)
 	int ret;
 	size_t buf_size;
 
+	xpc_setup_partitions_sn = xpc_setup_partitions_sn_sn2;
 	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2;
-	xpc_rsvd_page_init = xpc_rsvd_page_init_sn2;
+	xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_sn2;
 	xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
 	xpc_offline_heartbeat = xpc_offline_heartbeat_sn2;
 	xpc_online_heartbeat = xpc_online_heartbeat_sn2;
@@ -2403,29 +2320,33 @@ xpc_init_sn2(void)
 	    xpc_cancel_partition_deactivation_request_sn2;
 
 	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2;
-	xpc_setup_infrastructure = xpc_setup_infrastructure_sn2;
-	xpc_teardown_infrastructure = xpc_teardown_infrastructure_sn2;
+	xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_sn2;
+	xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_sn2;
 	xpc_make_first_contact = xpc_make_first_contact_sn2;
+
 	xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2;
-	xpc_allocate_msgqueues = xpc_allocate_msgqueues_sn2;
-	xpc_free_msgqueues = xpc_free_msgqueues_sn2;
+	xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2;
+	xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2;
+	xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2;
+	xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2;
+
+	xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2;
+
+	xpc_setup_msg_structures = xpc_setup_msg_structures_sn2;
+	xpc_teardown_msg_structures = xpc_teardown_msg_structures_sn2;
+
 	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
 	xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2;
 	xpc_n_of_deliverable_msgs = xpc_n_of_deliverable_msgs_sn2;
 	xpc_get_deliverable_msg = xpc_get_deliverable_msg_sn2;
 
 	xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2;
-	xpc_partition_engaged = xpc_partition_engaged_sn2;
-	xpc_any_partition_engaged = xpc_any_partition_engaged_sn2;
 	xpc_indicate_partition_disengaged =
 	    xpc_indicate_partition_disengaged_sn2;
+	xpc_partition_engaged = xpc_partition_engaged_sn2;
+	xpc_any_partition_engaged = xpc_any_partition_engaged_sn2;
 	xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2;
 
-	xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2;
-	xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2;
-	xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2;
-	xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2;
-
 	xpc_send_msg = xpc_send_msg_sn2;
 	xpc_received_msg = xpc_received_msg_sn2;
 
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index c2d4ddd6e95..689cb5c68cc 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -14,41 +14,528 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/device.h>
 #include <asm/uv/uv_hub.h>
+#include "../sgi-gru/gru.h"
 #include "../sgi-gru/grukservices.h"
 #include "xpc.h"
 
+static atomic64_t xpc_heartbeat_uv;
 static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
 
-static void *xpc_activate_mq;
+#define XPC_ACTIVATE_MSG_SIZE_UV	(1 * GRU_CACHE_LINE_BYTES)
+#define XPC_NOTIFY_MSG_SIZE_UV		(2 * GRU_CACHE_LINE_BYTES)
+
+#define XPC_ACTIVATE_MQ_SIZE_UV	(4 * XP_MAX_NPARTITIONS_UV * \
+				 XPC_ACTIVATE_MSG_SIZE_UV)
+#define XPC_NOTIFY_MQ_SIZE_UV	(4 * XP_MAX_NPARTITIONS_UV * \
+				 XPC_NOTIFY_MSG_SIZE_UV)
+
+static void *xpc_activate_mq_uv;
+static void *xpc_notify_mq_uv;
+
+static int
+xpc_setup_partitions_sn_uv(void)
+{
+	short partid;
+	struct xpc_partition_uv *part_uv;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+
+		spin_lock_init(&part_uv->flags_lock);
+		part_uv->remote_act_state = XPC_P_AS_INACTIVE;
+	}
+	return 0;
+}
+
+static void *
+xpc_create_gru_mq_uv(unsigned int mq_size, int cpuid, unsigned int irq,
+		     irq_handler_t irq_handler)
+{
+	int ret;
+	int nid;
+	int mq_order;
+	struct page *page;
+	void *mq;
+
+	nid = cpu_to_node(cpuid);
+	mq_order = get_order(mq_size);
+	page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+				mq_order);
+	if (page == NULL)
+		return NULL;
+
+	mq = page_address(page);
+	ret = gru_create_message_queue(mq, mq_size);
+	if (ret != 0) {
+		dev_err(xpc_part, "gru_create_message_queue() returned "
+			"error=%d\n", ret);
+		free_pages((unsigned long)mq, mq_order);
+		return NULL;
+	}
+
+	/* !!! Need to do some other things to set up IRQ */
+
+	ret = request_irq(irq, irq_handler, 0, "xpc", NULL);
+	if (ret != 0) {
+		dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
+			irq, ret);
+		free_pages((unsigned long)mq, mq_order);
+		return NULL;
+	}
+
+	/* !!! enable generation of irq when GRU mq op occurs to this mq */
+
+	/* ??? allow other partitions to access GRU mq? */
+
+	return mq;
+}
 
 static void
-xpc_send_local_activate_IRQ_uv(struct xpc_partition *part)
+xpc_destroy_gru_mq_uv(void *mq, unsigned int mq_size, unsigned int irq)
+{
+	/* ??? disallow other partitions to access GRU mq? */
+
+	/* !!! disable generation of irq when GRU mq op occurs to this mq */
+
+	free_irq(irq, NULL);
+
+	free_pages((unsigned long)mq, get_order(mq_size));
+}
+
+static enum xp_retval
+xpc_send_gru_msg(unsigned long mq_gpa, void *msg, size_t msg_size)
 {
+	enum xp_retval xp_ret;
+	int ret;
+
+	while (1) {
+		ret = gru_send_message_gpa(mq_gpa, msg, msg_size);
+		if (ret == MQE_OK) {
+			xp_ret = xpSuccess;
+			break;
+		}
+
+		if (ret == MQE_QUEUE_FULL) {
+			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+				"error=MQE_QUEUE_FULL\n");
+			/* !!! handle QLimit reached; delay & try again */
+			/* ??? Do we add a limit to the number of retries? */
+			(void)msleep_interruptible(10);
+		} else if (ret == MQE_CONGESTION) {
+			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+				"error=MQE_CONGESTION\n");
+			/* !!! handle LB Overflow; simply try again */
+			/* ??? Do we add a limit to the number of retries? */
+		} else {
+			/* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
+			dev_err(xpc_chan, "gru_send_message_gpa() returned "
+				"error=%d\n", ret);
+			xp_ret = xpGruSendMqError;
+			break;
+		}
+	}
+	return xp_ret;
+}
+
+static void
+xpc_process_activate_IRQ_rcvd_uv(void)
+{
+	unsigned long irq_flags;
+	short partid;
+	struct xpc_partition *part;
+	u8 act_state_req;
+
+	DBUG_ON(xpc_activate_IRQ_rcvd == 0);
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (part->sn.uv.act_state_req == 0)
+			continue;
+
+		xpc_activate_IRQ_rcvd--;
+		BUG_ON(xpc_activate_IRQ_rcvd < 0);
+
+		act_state_req = part->sn.uv.act_state_req;
+		part->sn.uv.act_state_req = 0;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
+			if (part->act_state == XPC_P_AS_INACTIVE)
+				xpc_activate_partition(part);
+			else if (part->act_state == XPC_P_AS_DEACTIVATING)
+				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+		} else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
+			if (part->act_state == XPC_P_AS_INACTIVE)
+				xpc_activate_partition(part);
+			else
+				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+		} else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
+			XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
+
+		} else {
+			BUG();
+		}
+
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (xpc_activate_IRQ_rcvd == 0)
+			break;
+	}
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+}
+
+static irqreturn_t
+xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
+{
+	unsigned long irq_flags;
+	struct xpc_activate_mq_msghdr_uv *msg_hdr;
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_partition_uv *part_uv;
+	struct xpc_openclose_args *args;
+	int wakeup_hb_checker = 0;
+
+	while ((msg_hdr = gru_get_next_message(xpc_activate_mq_uv)) != NULL) {
+
+		partid = msg_hdr->partid;
+		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+			dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() invalid"
+				"partid=0x%x passed in message\n", partid);
+			gru_free_message(xpc_activate_mq_uv, msg_hdr);
+			continue;
+		}
+		part = &xpc_partitions[partid];
+		part_uv = &part->sn.uv;
+
+		part_uv->remote_act_state = msg_hdr->act_state;
+
+		switch (msg_hdr->type) {
+		case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
+			/* syncing of remote_act_state was just done above */
+			break;
+
+		case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
+			struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+			msg = (struct xpc_activate_mq_msg_heartbeat_req_uv *)
+			    msg_hdr;
+			part_uv->heartbeat = msg->heartbeat;
+			break;
+		}
+		case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
+			struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+			msg = (struct xpc_activate_mq_msg_heartbeat_req_uv *)
+			    msg_hdr;
+			part_uv->heartbeat = msg->heartbeat;
+			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+			part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
+			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+			break;
+		}
+		case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
+			struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+			msg = (struct xpc_activate_mq_msg_heartbeat_req_uv *)
+			    msg_hdr;
+			part_uv->heartbeat = msg->heartbeat;
+			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+			part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
+			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+			break;
+		}
+		case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
+			struct xpc_activate_mq_msg_activate_req_uv *msg;
+
+			/*
+			 * ??? Do we deal here with ts_jiffies being different
+			 * ??? if act_state != XPC_P_AS_INACTIVE instead of
+			 * ??? below?
+			 */
+			msg = (struct xpc_activate_mq_msg_activate_req_uv *)
+			    msg_hdr;
+			spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock,
+					  irq_flags);
+			if (part_uv->act_state_req == 0)
+				xpc_activate_IRQ_rcvd++;
+			part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
+			part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
+			part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
+			part_uv->remote_activate_mq_gpa = msg->activate_mq_gpa;
+			spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock,
+					       irq_flags);
+			wakeup_hb_checker++;
+			break;
+		}
+		case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
+			struct xpc_activate_mq_msg_deactivate_req_uv *msg;
+
+			msg = (struct xpc_activate_mq_msg_deactivate_req_uv *)
+			    msg_hdr;
+			spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock,
+					  irq_flags);
+			if (part_uv->act_state_req == 0)
+				xpc_activate_IRQ_rcvd++;
+			part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+			part_uv->reason = msg->reason;
+			spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock,
+					       irq_flags);
+			wakeup_hb_checker++;
+			break;
+		}
+		case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
+			struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
+
+			msg = (struct xpc_activate_mq_msg_chctl_closerequest_uv
+			    *)msg_hdr;
+			args = &part->remote_openclose_args[msg->ch_number];
+			args->reason = msg->reason;
+
+			spin_lock_irqsave(&part->chctl_lock, irq_flags);
+			part->chctl.flags[msg->ch_number] |=
+			    XPC_CHCTL_CLOSEREQUEST;
+			spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+			xpc_wakeup_channel_mgr(part);
+			break;
+		}
+		case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
+			struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
+
+			msg = (struct xpc_activate_mq_msg_chctl_closereply_uv *)
+			    msg_hdr;
+
+			spin_lock_irqsave(&part->chctl_lock, irq_flags);
+			part->chctl.flags[msg->ch_number] |=
+			    XPC_CHCTL_CLOSEREPLY;
+			spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+			xpc_wakeup_channel_mgr(part);
+			break;
+		}
+		case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
+			struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
+
+			msg = (struct xpc_activate_mq_msg_chctl_openrequest_uv
+			    *)msg_hdr;
+			args = &part->remote_openclose_args[msg->ch_number];
+			args->msg_size = msg->msg_size;
+			args->local_nentries = msg->local_nentries;
+
+			spin_lock_irqsave(&part->chctl_lock, irq_flags);
+			part->chctl.flags[msg->ch_number] |=
+			    XPC_CHCTL_OPENREQUEST;
+			spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+			xpc_wakeup_channel_mgr(part);
+			break;
+		}
+		case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
+			struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
+
+			msg = (struct xpc_activate_mq_msg_chctl_openreply_uv *)
+			    msg_hdr;
+			args = &part->remote_openclose_args[msg->ch_number];
+			args->remote_nentries = msg->remote_nentries;
+			args->local_nentries = msg->local_nentries;
+			args->local_msgqueue_pa = msg->local_notify_mq_gpa;
+
+			spin_lock_irqsave(&part->chctl_lock, irq_flags);
+			part->chctl.flags[msg->ch_number] |=
+			    XPC_CHCTL_OPENREPLY;
+			spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+			xpc_wakeup_channel_mgr(part);
+			break;
+		}
+		case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
+			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+			part_uv->flags |= XPC_P_ENGAGED_UV;
+			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+			break;
+
+		case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
+			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+			part_uv->flags &= ~XPC_P_ENGAGED_UV;
+			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+			break;
+
+		default:
+			dev_err(xpc_part, "received unknown activate_mq msg "
+				"type=%d from partition=%d\n", msg_hdr->type,
+				partid);
+		}
+
+		if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
+		    part->remote_rp_ts_jiffies != 0) {
+			/*
+			 * ??? Does what we do here need to be sensitive to
+			 * ??? act_state or remote_act_state?
+			 */
+			spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock,
+					  irq_flags);
+			if (part_uv->act_state_req == 0)
+				xpc_activate_IRQ_rcvd++;
+			part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
+			spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock,
+					       irq_flags);
+			wakeup_hb_checker++;
+		}
+
+		gru_free_message(xpc_activate_mq_uv, msg_hdr);
+	}
+
+	if (wakeup_hb_checker)
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+	return IRQ_HANDLED;
+}
+
+static enum xp_retval
+xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
+			 int msg_type)
+{
+	struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
+
+	DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
+
+	msg_hdr->type = msg_type;
+	msg_hdr->partid = XPC_PARTID(part);
+	msg_hdr->act_state = part->act_state;
+	msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
+
+	/* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
+	return xpc_send_gru_msg(part->sn.uv.remote_activate_mq_gpa, msg,
+				msg_size);
+}
+
+static void
+xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
+			      size_t msg_size, int msg_type)
+{
+	enum xp_retval ret;
+
+	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+	if (unlikely(ret != xpSuccess))
+		XPC_DEACTIVATE_PARTITION(part, ret);
+}
+
+static void
+xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
+			 void *msg, size_t msg_size, int msg_type)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->number];
+	enum xp_retval ret;
+
+	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+	if (unlikely(ret != xpSuccess)) {
+		if (irq_flags != NULL)
+			spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+		XPC_DEACTIVATE_PARTITION(part, ret);
+
+		if (irq_flags != NULL)
+			spin_lock_irqsave(&ch->lock, *irq_flags);
+	}
+}
+
+static void
+xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
+{
+	unsigned long irq_flags;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+
 	/*
 	 * !!! Make our side think that the remote parition sent an activate
-	 * !!! message our way. Also do what the activate IRQ handler would
+	 * !!! message our way by doing what the activate IRQ handler would
 	 * !!! do had one really been sent.
 	 */
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	if (part_uv->act_state_req == 0)
+		xpc_activate_IRQ_rcvd++;
+	part_uv->act_state_req = act_state_req;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
 }
 
 static enum xp_retval
-xpc_rsvd_page_init_uv(struct xpc_rsvd_page *rp)
+xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
+				  size_t *len)
 {
-	/* !!! need to have established xpc_activate_mq earlier */
-	rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq);
-	return xpSuccess;
+	/* !!! call the UV version of sn_partition_reserved_page_pa() */
+	return xpUnsupported;
+}
+
+static int
+xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
+{
+	rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv);
+	return 0;
+}
+
+static void
+xpc_send_heartbeat_uv(int msg_type)
+{
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_activate_mq_msg_heartbeat_req_uv msg;
+
+	/*
+	 * !!! On uv we're broadcasting a heartbeat message every 5 seconds.
+	 * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20
+	 * !!! seconds. This is an increase in numalink traffic.
+	 * ??? Is this good?
+	 */
+
+	msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv);
+
+	partid = find_first_bit(xpc_heartbeating_to_mask_uv,
+				XP_MAX_NPARTITIONS_UV);
+
+	while (partid < XP_MAX_NPARTITIONS_UV) {
+		part = &xpc_partitions[partid];
+
+		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+					      msg_type);
+
+		partid = find_next_bit(xpc_heartbeating_to_mask_uv,
+				       XP_MAX_NPARTITIONS_UV, partid + 1);
+	}
 }
 
 static void
 xpc_increment_heartbeat_uv(void)
 {
-	/* !!! send heartbeat msg to xpc_heartbeating_to_mask partids */
+	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV);
+}
+
+static void
+xpc_offline_heartbeat_uv(void)
+{
+	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
+}
+
+static void
+xpc_online_heartbeat_uv(void)
+{
+	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV);
 }
 
 static void
 xpc_heartbeat_init_uv(void)
 {
+	atomic64_set(&xpc_heartbeat_uv, 0);
 	bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
 	xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
 }
@@ -56,48 +543,94 @@ xpc_heartbeat_init_uv(void)
 static void
 xpc_heartbeat_exit_uv(void)
 {
-	/* !!! send heartbeat_offline msg to xpc_heartbeating_to_mask partids */
+	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
+}
+
+static enum xp_retval
+xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
+{
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	enum xp_retval ret = xpNoHeartbeat;
+
+	if (part_uv->remote_act_state != XPC_P_AS_INACTIVE &&
+	    part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) {
+
+		if (part_uv->heartbeat != part->last_heartbeat ||
+		    (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) {
+
+			part->last_heartbeat = part_uv->heartbeat;
+			ret = xpSuccess;
+		}
+	}
+	return ret;
 }
 
 static void
 xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
-				    unsigned long remote_rp_pa, int nasid)
+				    unsigned long remote_rp_gpa, int nasid)
 {
 	short partid = remote_rp->SAL_partid;
 	struct xpc_partition *part = &xpc_partitions[partid];
+	struct xpc_activate_mq_msg_activate_req_uv msg;
 
-/*
- * !!! Setup part structure with the bits of info we can glean from the rp:
- * !!!	part->remote_rp_pa = remote_rp_pa;
- * !!!	part->sn.uv.activate_mq_gpa = remote_rp->sn.activate_mq_gpa;
- */
+	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
+	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+	part->sn.uv.remote_activate_mq_gpa = remote_rp->sn.activate_mq_gpa;
+
+	/*
+	 * ??? Is it a good idea to make this conditional on what is
+	 * ??? potentially stale state information?
+	 */
+	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
+		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
+		msg.activate_mq_gpa = xpc_rsvd_page->sn.activate_mq_gpa;
+		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
+	}
 
-	xpc_send_local_activate_IRQ_uv(part);
+	if (part->act_state == XPC_P_AS_INACTIVE)
+		xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
 }
 
 static void
 xpc_request_partition_reactivation_uv(struct xpc_partition *part)
 {
-	xpc_send_local_activate_IRQ_uv(part);
+	xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_deactivation_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_deactivate_req_uv msg;
+
+	/*
+	 * ??? Is it a good idea to make this conditional on what is
+	 * ??? potentially stale state information?
+	 */
+	if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
+	    part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
+
+		msg.reason = part->reason;
+		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+					 XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
+	}
 }
 
 /*
- * Setup the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
+ * Setup the channel structures that are uv specific.
  */
 static enum xp_retval
-xpc_setup_infrastructure_uv(struct xpc_partition *part)
+xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
 {
 	/* !!! this function needs fleshing out */
 	return xpUnsupported;
 }
 
 /*
- * Teardown the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
+ * Teardown the channel structures that are uv specific.
  */
 static void
-xpc_teardown_infrastructure_uv(struct xpc_partition *part)
+xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part)
 {
 	/* !!! this function needs fleshing out */
 	return;
@@ -106,15 +639,163 @@ xpc_teardown_infrastructure_uv(struct xpc_partition *part)
 static enum xp_retval
 xpc_make_first_contact_uv(struct xpc_partition *part)
 {
-	/* !!! this function needs fleshing out */
-	return xpUnsupported;
+	struct xpc_activate_mq_msg_uv msg;
+
+	/*
+	 * We send a sync msg to get the remote partition's remote_act_state
+	 * updated to our current act_state which at this point should
+	 * be XPC_P_AS_ACTIVATING.
+	 */
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
+
+	while (part->sn.uv.remote_act_state != XPC_P_AS_ACTIVATING) {
+
+		dev_dbg(xpc_part, "waiting to make first contact with "
+			"partition %d\n", XPC_PARTID(part));
+
+		/* wait a 1/4 of a second or so */
+		(void)msleep_interruptible(250);
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
+			return part->reason;
+	}
+
+	return xpSuccess;
 }
 
 static u64
 xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
 {
+	unsigned long irq_flags;
+	union xpc_channel_ctl_flags chctl;
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	chctl = part->chctl;
+	if (chctl.all_flags != 0)
+		part->chctl.all_flags = 0;
+
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+	return chctl.all_flags;
+}
+
+static enum xp_retval
+xpc_setup_msg_structures_uv(struct xpc_channel *ch)
+{
+	/* !!! this function needs fleshing out */
+	return xpUnsupported;
+}
+
+static void
+xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	ch_uv->remote_notify_mq_gpa = 0;
+
 	/* !!! this function needs fleshing out */
-	return 0UL;
+}
+
+static void
+xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.reason = ch->reason;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_closereply_uv msg;
+
+	msg.ch_number = ch->number;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
+}
+
+static void
+xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.msg_size = ch->msg_size;
+	msg.local_nentries = ch->local_nentries;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_openreply_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.local_nentries = ch->local_nentries;
+	msg.remote_nentries = ch->remote_nentries;
+	msg.local_notify_mq_gpa = uv_gpa(xpc_notify_mq_uv);
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
+}
+
+static void
+xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
+			       unsigned long msgqueue_pa)
+{
+	ch->sn.uv.remote_notify_mq_gpa = msgqueue_pa;
+}
+
+static void
+xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
+}
+
+static void
+xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
+}
+
+static void
+xpc_assume_partition_disengaged_uv(short partid)
+{
+	struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+	part_uv->flags &= ~XPC_P_ENGAGED_UV;
+	spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+}
+
+static int
+xpc_partition_engaged_uv(short partid)
+{
+	return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
+}
+
+static int
+xpc_any_partition_engaged_uv(void)
+{
+	struct xpc_partition_uv *part_uv;
+	short partid;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+		if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
+			return 1;
+	}
+	return 0;
 }
 
 static struct xpc_msg *
@@ -124,24 +805,64 @@ xpc_get_deliverable_msg_uv(struct xpc_channel *ch)
 	return NULL;
 }
 
-void
+int
 xpc_init_uv(void)
 {
-	xpc_rsvd_page_init = xpc_rsvd_page_init_uv;
+	xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv;
+	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
+	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
+	xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
 	xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
+	xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
+	xpc_online_heartbeat = xpc_online_heartbeat_uv;
 	xpc_heartbeat_init = xpc_heartbeat_init_uv;
 	xpc_heartbeat_exit = xpc_heartbeat_exit_uv;
+	xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_uv;
+
 	xpc_request_partition_activation = xpc_request_partition_activation_uv;
 	xpc_request_partition_reactivation =
 	    xpc_request_partition_reactivation_uv;
-	xpc_setup_infrastructure = xpc_setup_infrastructure_uv;
-	xpc_teardown_infrastructure = xpc_teardown_infrastructure_uv;
+	xpc_request_partition_deactivation =
+	    xpc_request_partition_deactivation_uv;
+
+	xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv;
+	xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv;
+
 	xpc_make_first_contact = xpc_make_first_contact_uv;
+
 	xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv;
+	xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_uv;
+	xpc_send_chctl_closereply = xpc_send_chctl_closereply_uv;
+	xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_uv;
+	xpc_send_chctl_openreply = xpc_send_chctl_openreply_uv;
+
+	xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv;
+
+	xpc_setup_msg_structures = xpc_setup_msg_structures_uv;
+	xpc_teardown_msg_structures = xpc_teardown_msg_structures_uv;
+
+	xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_uv;
+	xpc_indicate_partition_disengaged =
+	    xpc_indicate_partition_disengaged_uv;
+	xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_uv;
+	xpc_partition_engaged = xpc_partition_engaged_uv;
+	xpc_any_partition_engaged = xpc_any_partition_engaged_uv;
+
 	xpc_get_deliverable_msg = xpc_get_deliverable_msg_uv;
+
+	/* ??? The cpuid argument's value is 0, is that what we want? */
+	/* !!! The irq argument's value isn't correct. */
+	xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, 0,
+						  xpc_handle_activate_IRQ_uv);
+	if (xpc_activate_mq_uv == NULL)
+		return -ENOMEM;
+
+	return 0;
 }
 
 void
 xpc_exit_uv(void)
 {
+	/* !!! The irq argument's value isn't correct. */
+	xpc_destroy_gru_mq_uv(xpc_activate_mq_uv, XPC_ACTIVATE_MQ_SIZE_UV, 0);
 }
-- 
cgit v1.2.3


From bd3e64c1759e4930315ebf022611468ee9621486 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 29 Jul 2008 22:34:19 -0700
Subject: sgi-xp: setup the notify GRU message queue

Setup the notify GRU message queue that is used for sending user messages
on UV systems.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xp.h          |  45 +-
 drivers/misc/sgi-xp/xp_main.c     |   7 +-
 drivers/misc/sgi-xp/xpc.h         | 140 ++++--
 drivers/misc/sgi-xp/xpc_channel.c |  63 ++-
 drivers/misc/sgi-xp/xpc_main.c    |  21 +-
 drivers/misc/sgi-xp/xpc_sn2.c     | 178 +++----
 drivers/misc/sgi-xp/xpc_uv.c      | 951 ++++++++++++++++++++++++++++++--------
 drivers/misc/sgi-xp/xpnet.c       |  11 +-
 8 files changed, 1032 insertions(+), 384 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 9ac5758f4d0..859a5281c61 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -87,39 +87,18 @@
 #endif
 
 /*
- * The format of an XPC message is as follows:
- *
- *      +-------+--------------------------------+
- *      | flags |////////////////////////////////|
- *      +-------+--------------------------------+
- *      |             message #                  |
- *      +----------------------------------------+
- *      |     payload (user-defined message)     |
- *      |                                        |
- *         		:
- *      |                                        |
- *      +----------------------------------------+
- *
- * The size of the payload is defined by the user via xpc_connect(). A user-
- * defined message resides in the payload area.
- *
- * The size of a message entry (within a message queue) must be a cacheline
- * sized multiple in order to facilitate the BTE transfer of messages from one
- * message queue to another. A macro, XPC_MSG_SIZE(), is provided for the user
+ * Define macro, XPC_MSG_SIZE(), is provided for the user
  * that wants to fit as many msg entries as possible in a given memory size
  * (e.g. a memory page).
  */
-struct xpc_msg {
-	u8 flags;		/* FOR XPC INTERNAL USE ONLY */
-	u8 reserved[7];		/* FOR XPC INTERNAL USE ONLY */
-	s64 number;		/* FOR XPC INTERNAL USE ONLY */
-
-	u64 payload;		/* user defined portion of message */
-};
+#define XPC_MSG_MAX_SIZE	128
+#define XPC_MSG_HDR_MAX_SIZE	16
+#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE)
 
-#define XPC_MSG_PAYLOAD_OFFSET	(u64) (&((struct xpc_msg *)0)->payload)
 #define XPC_MSG_SIZE(_payload_size) \
-		L1_CACHE_ALIGN(XPC_MSG_PAYLOAD_OFFSET + (_payload_size))
+				ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \
+				      is_uv() ? 64 : 128)
+
 
 /*
  * Define the return values and values passed to user's callout functions.
@@ -210,7 +189,10 @@ enum xp_retval {
 	xpGruCopyError,		/* 58: gru_copy_gru() returned error */
 	xpGruSendMqError,	/* 59: gru send message queue related error */
 
-	xpUnknownReason		/* 60: unknown reason - must be last in enum */
+	xpBadChannelNumber,	/* 60: invalid channel number */
+	xpBadMsgType,		/* 60: invalid message type */
+
+	xpUnknownReason		/* 61: unknown reason - must be last in enum */
 };
 
 /*
@@ -261,6 +243,9 @@ typedef void (*xpc_channel_func) (enum xp_retval reason, short partid,
  * calling xpc_received().
  *
  * All other reason codes indicate failure.
+ *
+ * NOTE: The user defined function must be callable by an interrupt handler
+ *       and thus cannot block.
  */
 typedef void (*xpc_notify_func) (enum xp_retval reason, short partid,
 				 int ch_number, void *key);
@@ -284,7 +269,7 @@ struct xpc_registration {
 	xpc_channel_func func;	/* function to call */
 	void *key;		/* pointer to user's key */
 	u16 nentries;		/* #of msg entries in local msg queue */
-	u16 msg_size;		/* message queue's message size */
+	u16 entry_size;		/* message queue's message entry size */
 	u32 assigned_limit;	/* limit on #of assigned kthreads */
 	u32 idle_limit;		/* limit on #of idle kthreads */
 } ____cacheline_aligned;
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index f86ad3af26b..66a1d19e08a 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -154,6 +154,9 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
 	DBUG_ON(func == NULL);
 	DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
 
+	if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE)
+		return xpPayloadTooBig;
+
 	registration = &xpc_registrations[ch_number];
 
 	if (mutex_lock_interruptible(&registration->mutex) != 0)
@@ -166,7 +169,7 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
 	}
 
 	/* register the channel for connection */
-	registration->msg_size = XPC_MSG_SIZE(payload_size);
+	registration->entry_size = XPC_MSG_SIZE(payload_size);
 	registration->nentries = nentries;
 	registration->assigned_limit = assigned_limit;
 	registration->idle_limit = idle_limit;
@@ -220,7 +223,7 @@ xpc_disconnect(int ch_number)
 	registration->func = NULL;
 	registration->key = NULL;
 	registration->nentries = 0;
-	registration->msg_size = 0;
+	registration->entry_size = 0;
 	registration->assigned_limit = 0;
 	registration->idle_limit = 0;
 
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 4c26181deff..619208d6186 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -181,8 +181,8 @@ struct xpc_vars_part_sn2 {
 				  xpc_nasid_mask_nlongs))
 
 /*
- * The activate_mq is used to send/receive messages that affect XPC's heartbeat,
- * partition active state, and channel state. This is UV only.
+ * The activate_mq is used to send/receive GRU messages that affect XPC's
+ * heartbeat, partition active state, and channel state. This is UV only.
  */
 struct xpc_activate_mq_msghdr_uv {
 	short partid;		/* sender's partid */
@@ -209,45 +209,45 @@ struct xpc_activate_mq_msghdr_uv {
 #define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV		11
 
 struct xpc_activate_mq_msg_uv {
-	struct xpc_activate_mq_msghdr_uv header;
+	struct xpc_activate_mq_msghdr_uv hdr;
 };
 
 struct xpc_activate_mq_msg_heartbeat_req_uv {
-	struct xpc_activate_mq_msghdr_uv header;
+	struct xpc_activate_mq_msghdr_uv hdr;
 	u64 heartbeat;
 };
 
 struct xpc_activate_mq_msg_activate_req_uv {
-	struct xpc_activate_mq_msghdr_uv header;
+	struct xpc_activate_mq_msghdr_uv hdr;
 	unsigned long rp_gpa;
 	unsigned long activate_mq_gpa;
 };
 
 struct xpc_activate_mq_msg_deactivate_req_uv {
-	struct xpc_activate_mq_msghdr_uv header;
+	struct xpc_activate_mq_msghdr_uv hdr;
 	enum xp_retval reason;
 };
 
 struct xpc_activate_mq_msg_chctl_closerequest_uv {
-	struct xpc_activate_mq_msghdr_uv header;
+	struct xpc_activate_mq_msghdr_uv hdr;
 	short ch_number;
 	enum xp_retval reason;
 };
 
 struct xpc_activate_mq_msg_chctl_closereply_uv {
-	struct xpc_activate_mq_msghdr_uv header;
+	struct xpc_activate_mq_msghdr_uv hdr;
 	short ch_number;
 };
 
 struct xpc_activate_mq_msg_chctl_openrequest_uv {
-	struct xpc_activate_mq_msghdr_uv header;
+	struct xpc_activate_mq_msghdr_uv hdr;
 	short ch_number;
-	short msg_size;		/* size of notify_mq's messages */
+	short entry_size;	/* size of notify_mq's GRU messages */
 	short local_nentries;	/* ??? Is this needed? What is? */
 };
 
 struct xpc_activate_mq_msg_chctl_openreply_uv {
-	struct xpc_activate_mq_msghdr_uv header;
+	struct xpc_activate_mq_msghdr_uv hdr;
 	short ch_number;
 	short remote_nentries;	/* ??? Is this needed? What is? */
 	short local_nentries;	/* ??? Is this needed? What is? */
@@ -284,7 +284,7 @@ struct xpc_gp_sn2 {
  */
 struct xpc_openclose_args {
 	u16 reason;		/* reason why channel is closing */
-	u16 msg_size;		/* sizeof each message entry */
+	u16 entry_size;		/* sizeof each message entry */
 	u16 remote_nentries;	/* #of message entries in remote msg queue */
 	u16 local_nentries;	/* #of message entries in local msg queue */
 	unsigned long local_msgqueue_pa; /* phys addr of local message queue */
@@ -294,22 +294,79 @@ struct xpc_openclose_args {
 	      L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \
 	      XPC_MAX_NCHANNELS)
 
-/* struct xpc_msg flags */
 
-#define	XPC_M_DONE		0x01	/* msg has been received/consumed */
-#define	XPC_M_READY		0x02	/* msg is ready to be sent */
-#define	XPC_M_INTERRUPT		0x04	/* send interrupt when msg consumed */
+/*
+ * Structures to define a fifo singly-linked list.
+ */
+
+struct xpc_fifo_entry_uv {
+	struct xpc_fifo_entry_uv *next;
+};
+
+struct xpc_fifo_head_uv {
+	struct xpc_fifo_entry_uv *first;
+	struct xpc_fifo_entry_uv *last;
+	spinlock_t lock;
+	int n_entries;
+};
+
+/*
+ * Define a sn2 styled message.
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message entry (within a message queue) must be a 128-byte
+ * cacheline sized multiple in order to facilitate the BTE transfer of messages
+ * from one message queue to another.
+ */
+struct xpc_msg_sn2 {
+	u8 flags;		/* FOR XPC INTERNAL USE ONLY */
+	u8 reserved[7];		/* FOR XPC INTERNAL USE ONLY */
+	s64 number;		/* FOR XPC INTERNAL USE ONLY */
+
+	u64 payload;		/* user defined portion of message */
+};
+
+/* struct xpc_msg_sn2 flags */
+
+#define	XPC_M_SN2_DONE		0x01	/* msg has been received/consumed */
+#define	XPC_M_SN2_READY		0x02	/* msg is ready to be sent */
+#define	XPC_M_SN2_INTERRUPT	0x04	/* send interrupt when msg consumed */
+
+/*
+ * The format of a uv XPC notify_mq GRU message is as follows:
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message (payload and header) sent via the GRU must be either 1
+ * or 2 GRU_CACHE_LINE_BYTES in length.
+ */
 
-#define XPC_MSG_ADDRESS(_payload) \
-		((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET))
+struct xpc_notify_mq_msghdr_uv {
+	union {
+		unsigned int gru_msg_hdr;	/* FOR GRU INTERNAL USE ONLY */
+		struct xpc_fifo_entry_uv next;	/* FOR XPC INTERNAL USE ONLY */
+	} u;
+	short partid;		/* FOR XPC INTERNAL USE ONLY */
+	u8 ch_number;		/* FOR XPC INTERNAL USE ONLY */
+	u8 size;		/* FOR XPC INTERNAL USE ONLY */
+	unsigned int msg_slot_number;	/* FOR XPC INTERNAL USE ONLY */
+};
+
+struct xpc_notify_mq_msg_uv {
+	struct xpc_notify_mq_msghdr_uv hdr;
+	unsigned long payload;
+};
 
 /*
- * Defines notify entry.
+ * Define sn2's notify entry.
  *
  * This is used to notify a message's sender that their message was received
  * and consumed by the intended recipient.
  */
-struct xpc_notify {
+struct xpc_notify_sn2 {
 	u8 type;		/* type of notification */
 
 	/* the following two fields are only used if type == XPC_N_CALL */
@@ -317,9 +374,20 @@ struct xpc_notify {
 	void *key;		/* pointer to user's key */
 };
 
-/* struct xpc_notify type of notification */
+/* struct xpc_notify_sn2 type of notification */
 
-#define	XPC_N_CALL		0x01	/* notify function provided by user */
+#define	XPC_N_CALL	0x01	/* notify function provided by user */
+
+/*
+ * Define uv's version of the notify entry. It additionally is used to allocate
+ * a msg slot on the remote partition into which is copied a sent message.
+ */
+struct xpc_send_msg_slot_uv {
+	struct xpc_fifo_entry_uv next;
+	unsigned int msg_slot_number;
+	xpc_notify_func func;	/* user's notify function */
+	void *key;		/* pointer to user's key */
+};
 
 /*
  * Define the structure that manages all the stuff required by a channel. In
@@ -409,14 +477,14 @@ struct xpc_channel_sn2 {
 					     /* opening or closing of channel */
 
 	void *local_msgqueue_base;	/* base address of kmalloc'd space */
-	struct xpc_msg *local_msgqueue;	/* local message queue */
+	struct xpc_msg_sn2 *local_msgqueue;	/* local message queue */
 	void *remote_msgqueue_base;	/* base address of kmalloc'd space */
-	struct xpc_msg *remote_msgqueue; /* cached copy of remote partition's */
-					 /* local message queue */
+	struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */
+					   /* partition's local message queue */
 	unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */
 					  /* local message queue */
 
-	struct xpc_notify *notify_queue;    /* notify queue for messages sent */
+	struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */
 
 	/* various flavors of local and remote Get/Put values */
 
@@ -432,6 +500,12 @@ struct xpc_channel_sn2 {
 struct xpc_channel_uv {
 	unsigned long remote_notify_mq_gpa;	/* gru phys address of remote */
 						/* partition's notify mq */
+
+	struct xpc_send_msg_slot_uv *send_msg_slots;
+	struct xpc_notify_mq_msg_uv *recv_msg_slots;
+
+	struct xpc_fifo_head_uv msg_slot_free_list;
+	struct xpc_fifo_head_uv recv_msg_list;	/* deliverable payloads */
 };
 
 struct xpc_channel {
@@ -444,7 +518,7 @@ struct xpc_channel {
 
 	u16 number;		/* channel # */
 
-	u16 msg_size;		/* sizeof each msg entry */
+	u16 entry_size;		/* sizeof each msg entry */
 	u16 local_nentries;	/* #of msg entries in local msg queue */
 	u16 remote_nentries;	/* #of msg entries in remote msg queue */
 
@@ -733,8 +807,8 @@ extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *);
 extern void (*xpc_teardown_msg_structures) (struct xpc_channel *);
 extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
 extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int);
-extern int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *);
-extern struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *);
+extern int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *);
+extern void *(*xpc_get_deliverable_payload) (struct xpc_channel *);
 extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *,
 						 unsigned long, int);
 extern void (*xpc_request_partition_reactivation) (struct xpc_partition *);
@@ -762,9 +836,9 @@ extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *);
 extern void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *,
 					    unsigned long);
 
-extern enum xp_retval (*xpc_send_msg) (struct xpc_channel *, u32, void *, u16,
-				       u8, xpc_notify_func, void *);
-extern void (*xpc_received_msg) (struct xpc_channel *, struct xpc_msg *);
+extern enum xp_retval (*xpc_send_payload) (struct xpc_channel *, u32, void *,
+					   u16, u8, xpc_notify_func, void *);
+extern void (*xpc_received_payload) (struct xpc_channel *, void *);
 
 /* found in xpc_sn2.c */
 extern int xpc_init_sn2(void);
@@ -805,7 +879,7 @@ extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16,
 extern void xpc_initiate_received(short, int, void *);
 extern void xpc_process_sent_chctl_flags(struct xpc_partition *);
 extern void xpc_connected_callout(struct xpc_channel *);
-extern void xpc_deliver_msg(struct xpc_channel *);
+extern void xpc_deliver_payload(struct xpc_channel *);
 extern void xpc_disconnect_channel(const int, struct xpc_channel *,
 				   enum xp_retval, unsigned long *);
 extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 73df9fb5ee6..9cd2ebe2a3b 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -139,7 +139,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	ch->func = NULL;
 	ch->key = NULL;
-	ch->msg_size = 0;
+	ch->entry_size = 0;
 	ch->local_nentries = 0;
 	ch->remote_nentries = 0;
 	ch->kthreads_assigned_limit = 0;
@@ -315,9 +315,9 @@ again:
 
 	if (chctl_flags & XPC_CHCTL_OPENREQUEST) {
 
-		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (msg_size=%d, "
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, "
 			"local_nentries=%d) received from partid=%d, "
-			"channel=%d\n", args->msg_size, args->local_nentries,
+			"channel=%d\n", args->entry_size, args->local_nentries,
 			ch->partid, ch->number);
 
 		if (part->act_state == XPC_P_AS_DEACTIVATING ||
@@ -338,10 +338,10 @@ again:
 
 		/*
 		 * The meaningful OPENREQUEST connection state fields are:
-		 *      msg_size = size of channel's messages in bytes
+		 *      entry_size = size of channel's messages in bytes
 		 *      local_nentries = remote partition's local_nentries
 		 */
-		if (args->msg_size == 0 || args->local_nentries == 0) {
+		if (args->entry_size == 0 || args->local_nentries == 0) {
 			/* assume OPENREQUEST was delayed by mistake */
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 			return;
@@ -351,14 +351,14 @@ again:
 		ch->remote_nentries = args->local_nentries;
 
 		if (ch->flags & XPC_C_OPENREQUEST) {
-			if (args->msg_size != ch->msg_size) {
+			if (args->entry_size != ch->entry_size) {
 				XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
 						       &irq_flags);
 				spin_unlock_irqrestore(&ch->lock, irq_flags);
 				return;
 			}
 		} else {
-			ch->msg_size = args->msg_size;
+			ch->entry_size = args->entry_size;
 
 			XPC_SET_REASON(ch, 0, 0);
 			ch->flags &= ~XPC_C_DISCONNECTED;
@@ -473,7 +473,7 @@ xpc_connect_channel(struct xpc_channel *ch)
 	ch->local_nentries = registration->nentries;
 
 	if (ch->flags & XPC_C_ROPENREQUEST) {
-		if (registration->msg_size != ch->msg_size) {
+		if (registration->entry_size != ch->entry_size) {
 			/* the local and remote sides aren't the same */
 
 			/*
@@ -492,7 +492,7 @@ xpc_connect_channel(struct xpc_channel *ch)
 			return xpUnequalMsgSizes;
 		}
 	} else {
-		ch->msg_size = registration->msg_size;
+		ch->entry_size = registration->entry_size;
 
 		XPC_SET_REASON(ch, 0, 0);
 		ch->flags &= ~XPC_C_DISCONNECTED;
@@ -859,8 +859,8 @@ xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
 	DBUG_ON(payload == NULL);
 
 	if (xpc_part_ref(part)) {
-		ret = xpc_send_msg(&part->channels[ch_number], flags, payload,
-				   payload_size, 0, NULL, NULL);
+		ret = xpc_send_payload(&part->channels[ch_number], flags,
+				       payload, payload_size, 0, NULL, NULL);
 		xpc_part_deref(part);
 	}
 
@@ -911,23 +911,24 @@ xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
 	DBUG_ON(func == NULL);
 
 	if (xpc_part_ref(part)) {
-		ret = xpc_send_msg(&part->channels[ch_number], flags, payload,
-				   payload_size, XPC_N_CALL, func, key);
+		ret = xpc_send_payload(&part->channels[ch_number], flags,
+				       payload, payload_size, XPC_N_CALL, func,
+				       key);
 		xpc_part_deref(part);
 	}
 	return ret;
 }
 
 /*
- * Deliver a message to its intended recipient.
+ * Deliver a message's payload to its intended recipient.
  */
 void
-xpc_deliver_msg(struct xpc_channel *ch)
+xpc_deliver_payload(struct xpc_channel *ch)
 {
-	struct xpc_msg *msg;
+	void *payload;
 
-	msg = xpc_get_deliverable_msg(ch);
-	if (msg != NULL) {
+	payload = xpc_get_deliverable_payload(ch);
+	if (payload != NULL) {
 
 		/*
 		 * This ref is taken to protect the payload itself from being
@@ -939,18 +940,16 @@ xpc_deliver_msg(struct xpc_channel *ch)
 		atomic_inc(&ch->kthreads_active);
 
 		if (ch->func != NULL) {
-			dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
-				msg, (signed long)msg->number, ch->partid,
+			dev_dbg(xpc_chan, "ch->func() called, payload=0x%p "
+				"partid=%d channel=%d\n", payload, ch->partid,
 				ch->number);
 
 			/* deliver the message to its intended recipient */
-			ch->func(xpMsgReceived, ch->partid, ch->number,
-				 &msg->payload, ch->key);
+			ch->func(xpMsgReceived, ch->partid, ch->number, payload,
+				 ch->key);
 
-			dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
-				msg, (signed long)msg->number, ch->partid,
+			dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p "
+				"partid=%d channel=%d\n", payload, ch->partid,
 				ch->number);
 		}
 
@@ -959,14 +958,11 @@ xpc_deliver_msg(struct xpc_channel *ch)
 }
 
 /*
- * Acknowledge receipt of a delivered message.
- *
- * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition
- * that sent the message.
+ * Acknowledge receipt of a delivered message's payload.
  *
  * This function, although called by users, does not call xpc_part_ref() to
  * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg().
+ * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload().
  *
  * Arguments:
  *
@@ -980,14 +976,13 @@ xpc_initiate_received(short partid, int ch_number, void *payload)
 {
 	struct xpc_partition *part = &xpc_partitions[partid];
 	struct xpc_channel *ch;
-	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
 
 	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
 
 	ch = &part->channels[ch_number];
-	xpc_received_msg(ch, msg);
+	xpc_received_payload(ch, payload);
 
-	/* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg()  */
+	/* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload()  */
 	xpc_msgqueue_deref(ch);
 }
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 13ec4792899..46325fc8481 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -188,8 +188,8 @@ u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
 enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch);
 void (*xpc_teardown_msg_structures) (struct xpc_channel *ch);
 void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
-int (*xpc_n_of_deliverable_msgs) (struct xpc_channel *ch);
-struct xpc_msg *(*xpc_get_deliverable_msg) (struct xpc_channel *ch);
+int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch);
+void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch);
 
 void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
 					  unsigned long remote_rp_pa,
@@ -220,10 +220,11 @@ void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
 void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
 				     unsigned long msgqueue_pa);
 
-enum xp_retval (*xpc_send_msg) (struct xpc_channel *ch, u32 flags,
-				void *payload, u16 payload_size, u8 notify_type,
-				xpc_notify_func func, void *key);
-void (*xpc_received_msg) (struct xpc_channel *ch, struct xpc_msg *msg);
+enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags,
+				    void *payload, u16 payload_size,
+				    u8 notify_type, xpc_notify_func func,
+				    void *key);
+void (*xpc_received_payload) (struct xpc_channel *ch, void *payload);
 
 /*
  * Timer function to enforce the timelimit on the partition disengage.
@@ -714,9 +715,9 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 	do {
 		/* deliver messages to their intended recipients */
 
-		while (xpc_n_of_deliverable_msgs(ch) > 0 &&
+		while (xpc_n_of_deliverable_payloads(ch) > 0 &&
 		       !(ch->flags & XPC_C_DISCONNECTING)) {
-			xpc_deliver_msg(ch);
+			xpc_deliver_payload(ch);
 		}
 
 		if (atomic_inc_return(&ch->kthreads_idle) >
@@ -730,7 +731,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 			"wait_event_interruptible_exclusive()\n");
 
 		(void)wait_event_interruptible_exclusive(ch->idle_wq,
-				(xpc_n_of_deliverable_msgs(ch) > 0 ||
+				(xpc_n_of_deliverable_payloads(ch) > 0 ||
 				 (ch->flags & XPC_C_DISCONNECTING)));
 
 		atomic_dec(&ch->kthreads_idle);
@@ -775,7 +776,7 @@ xpc_kthread_start(void *args)
 			 * additional kthreads to help deliver them. We only
 			 * need one less than total #of messages to deliver.
 			 */
-			n_needed = xpc_n_of_deliverable_msgs(ch) - 1;
+			n_needed = xpc_n_of_deliverable_payloads(ch) - 1;
 			if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
 				xpc_activate_kthreads(ch, n_needed);
 
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 8b4b0653d9e..b4882ccf634 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -408,7 +408,7 @@ xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 {
 	struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
 
-	args->msg_size = ch->msg_size;
+	args->entry_size = ch->entry_size;
 	args->local_nentries = ch->local_nentries;
 	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags);
 }
@@ -1531,14 +1531,14 @@ xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
 
 	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
 
-		nbytes = nentries * ch->msg_size;
+		nbytes = nentries * ch->entry_size;
 		ch_sn2->local_msgqueue =
 		    xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL,
 						  &ch_sn2->local_msgqueue_base);
 		if (ch_sn2->local_msgqueue == NULL)
 			continue;
 
-		nbytes = nentries * sizeof(struct xpc_notify);
+		nbytes = nentries * sizeof(struct xpc_notify_sn2);
 		ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL);
 		if (ch_sn2->notify_queue == NULL) {
 			kfree(ch_sn2->local_msgqueue_base);
@@ -1578,7 +1578,7 @@ xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
 
 	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
 
-		nbytes = nentries * ch->msg_size;
+		nbytes = nentries * ch->entry_size;
 		ch_sn2->remote_msgqueue =
 		    xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2->
 						  remote_msgqueue_base);
@@ -1632,9 +1632,6 @@ xpc_setup_msg_structures_sn2(struct xpc_channel *ch)
 /*
  * Free up message queues and other stuff that were allocated for the specified
  * channel.
- *
- * Note: ch->reason and ch->reason_line are left set for debugging purposes,
- * they're cleared when XPC_C_DISCONNECTED is cleared.
  */
 static void
 xpc_teardown_msg_structures_sn2(struct xpc_channel *ch)
@@ -1674,7 +1671,7 @@ xpc_teardown_msg_structures_sn2(struct xpc_channel *ch)
 static void
 xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put)
 {
-	struct xpc_notify *notify;
+	struct xpc_notify_sn2 *notify;
 	u8 notify_type;
 	s64 get = ch->sn.sn2.w_remote_GP.get - 1;
 
@@ -1699,17 +1696,16 @@ xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put)
 		atomic_dec(&ch->n_to_notify);
 
 		if (notify->func != NULL) {
-			dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
+			dev_dbg(xpc_chan, "notify->func() called, notify=0x%p "
+				"msg_number=%ld partid=%d channel=%d\n",
 				(void *)notify, get, ch->partid, ch->number);
 
 			notify->func(reason, ch->partid, ch->number,
 				     notify->key);
 
-			dev_dbg(xpc_chan, "notify->func() returned, "
-				"notify=0x%p, msg_number=%ld, partid=%d, "
-				"channel=%d\n", (void *)notify, get,
-				ch->partid, ch->number);
+			dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p"
+				" msg_number=%ld partid=%d channel=%d\n",
+				(void *)notify, get, ch->partid, ch->number);
 		}
 	}
 }
@@ -1727,14 +1723,14 @@ static inline void
 xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch)
 {
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
-	struct xpc_msg *msg;
+	struct xpc_msg_sn2 *msg;
 	s64 get;
 
 	get = ch_sn2->w_remote_GP.get;
 	do {
-		msg = (struct xpc_msg *)((u64)ch_sn2->local_msgqueue +
-					 (get % ch->local_nentries) *
-					 ch->msg_size);
+		msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
+					     (get % ch->local_nentries) *
+					     ch->entry_size);
 		msg->flags = 0;
 	} while (++get < ch_sn2->remote_GP.get);
 }
@@ -1746,24 +1742,30 @@ static inline void
 xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
 {
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
-	struct xpc_msg *msg;
+	struct xpc_msg_sn2 *msg;
 	s64 put;
 
 	put = ch_sn2->w_remote_GP.put;
 	do {
-		msg = (struct xpc_msg *)((u64)ch_sn2->remote_msgqueue +
-					 (put % ch->remote_nentries) *
-					 ch->msg_size);
+		msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
+					     (put % ch->remote_nentries) *
+					     ch->entry_size);
 		msg->flags = 0;
 	} while (++put < ch_sn2->remote_GP.put);
 }
 
+static int
+xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch)
+{
+	return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get;
+}
+
 static void
 xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number)
 {
 	struct xpc_channel *ch = &part->channels[ch_number];
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
-	int nmsgs_sent;
+	int npayloads_sent;
 
 	ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number];
 
@@ -1835,7 +1837,7 @@ xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number)
 	if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) {
 		/*
 		 * Clear msg->flags in previously received messages, so that
-		 * they're ready for xpc_get_deliverable_msg().
+		 * they're ready for xpc_get_deliverable_payload_sn2().
 		 */
 		xpc_clear_remote_msgqueue_flags_sn2(ch);
 
@@ -1845,27 +1847,27 @@ xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number)
 			"channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid,
 			ch->number);
 
-		nmsgs_sent = ch_sn2->w_remote_GP.put - ch_sn2->w_local_GP.get;
-		if (nmsgs_sent > 0) {
+		npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch);
+		if (npayloads_sent > 0) {
 			dev_dbg(xpc_chan, "msgs waiting to be copied and "
 				"delivered=%d, partid=%d, channel=%d\n",
-				nmsgs_sent, ch->partid, ch->number);
+				npayloads_sent, ch->partid, ch->number);
 
 			if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
-				xpc_activate_kthreads(ch, nmsgs_sent);
+				xpc_activate_kthreads(ch, npayloads_sent);
 		}
 	}
 
 	xpc_msgqueue_deref(ch);
 }
 
-static struct xpc_msg *
+static struct xpc_msg_sn2 *
 xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
 {
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	unsigned long remote_msg_pa;
-	struct xpc_msg *msg;
+	struct xpc_msg_sn2 *msg;
 	u32 msg_index;
 	u32 nmsgs;
 	u64 msg_offset;
@@ -1889,13 +1891,13 @@ xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
 			nmsgs = ch->remote_nentries - msg_index;
 		}
 
-		msg_offset = msg_index * ch->msg_size;
-		msg = (struct xpc_msg *)((u64)ch_sn2->remote_msgqueue +
+		msg_offset = msg_index * ch->entry_size;
+		msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
 		    msg_offset);
 		remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset;
 
 		ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa,
-						     nmsgs * ch->msg_size);
+						     nmsgs * ch->entry_size);
 		if (ret != xpSuccess) {
 
 			dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
@@ -1915,26 +1917,21 @@ xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
 	mutex_unlock(&ch_sn2->msg_to_pull_mutex);
 
 	/* return the message we were looking for */
-	msg_offset = (get % ch->remote_nentries) * ch->msg_size;
-	msg = (struct xpc_msg *)((u64)ch_sn2->remote_msgqueue + msg_offset);
+	msg_offset = (get % ch->remote_nentries) * ch->entry_size;
+	msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset);
 
 	return msg;
 }
 
-static int
-xpc_n_of_deliverable_msgs_sn2(struct xpc_channel *ch)
-{
-	return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get;
-}
-
 /*
- * Get a message to be delivered.
+ * Get the next deliverable message's payload.
  */
-static struct xpc_msg *
-xpc_get_deliverable_msg_sn2(struct xpc_channel *ch)
+static void *
+xpc_get_deliverable_payload_sn2(struct xpc_channel *ch)
 {
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
-	struct xpc_msg *msg = NULL;
+	struct xpc_msg_sn2 *msg;
+	void *payload = NULL;
 	s64 get;
 
 	do {
@@ -1965,15 +1962,16 @@ xpc_get_deliverable_msg_sn2(struct xpc_channel *ch)
 			msg = xpc_pull_remote_msg_sn2(ch, get);
 
 			DBUG_ON(msg != NULL && msg->number != get);
-			DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
-			DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
+			DBUG_ON(msg != NULL && (msg->flags & XPC_M_SN2_DONE));
+			DBUG_ON(msg != NULL && !(msg->flags & XPC_M_SN2_READY));
 
+			payload = &msg->payload;
 			break;
 		}
 
 	} while (1);
 
-	return msg;
+	return payload;
 }
 
 /*
@@ -1985,7 +1983,7 @@ static void
 xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 {
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
-	struct xpc_msg *msg;
+	struct xpc_msg_sn2 *msg;
 	s64 put = initial_put + 1;
 	int send_msgrequest = 0;
 
@@ -1995,11 +1993,12 @@ xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 			if (put == ch_sn2->w_local_GP.put)
 				break;
 
-			msg = (struct xpc_msg *)((u64)ch_sn2->local_msgqueue +
-						 (put % ch->local_nentries) *
-						 ch->msg_size);
+			msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
+						     local_msgqueue + (put %
+						     ch->local_nentries) *
+						     ch->entry_size);
 
-			if (!(msg->flags & XPC_M_READY))
+			if (!(msg->flags & XPC_M_SN2_READY))
 				break;
 
 			put++;
@@ -2026,7 +2025,7 @@ xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
 
 		/*
 		 * We need to ensure that the message referenced by
-		 * local_GP->put is not XPC_M_READY or that local_GP->put
+		 * local_GP->put is not XPC_M_SN2_READY or that local_GP->put
 		 * equals w_local_GP.put, so we'll go have a look.
 		 */
 		initial_put = put;
@@ -2042,10 +2041,10 @@ xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
  */
 static enum xp_retval
 xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
-		     struct xpc_msg **address_of_msg)
+		     struct xpc_msg_sn2 **address_of_msg)
 {
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
-	struct xpc_msg *msg;
+	struct xpc_msg_sn2 *msg;
 	enum xp_retval ret;
 	s64 put;
 
@@ -2097,8 +2096,9 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
 	}
 
 	/* get the message's address and initialize it */
-	msg = (struct xpc_msg *)((u64)ch_sn2->local_msgqueue +
-				 (put % ch->local_nentries) * ch->msg_size);
+	msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
+				     (put % ch->local_nentries) *
+				     ch->entry_size);
 
 	DBUG_ON(msg->flags != 0);
 	msg->number = put;
@@ -2117,20 +2117,20 @@ xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
  * partition the message is being sent to.
  */
 static enum xp_retval
-xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
-		 u16 payload_size, u8 notify_type, xpc_notify_func func,
-		 void *key)
+xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload,
+		     u16 payload_size, u8 notify_type, xpc_notify_func func,
+		     void *key)
 {
 	enum xp_retval ret = xpSuccess;
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
-	struct xpc_msg *msg = msg;
-	struct xpc_notify *notify = notify;
+	struct xpc_msg_sn2 *msg = msg;
+	struct xpc_notify_sn2 *notify = notify;
 	s64 msg_number;
 	s64 put;
 
 	DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
 
-	if (XPC_MSG_SIZE(payload_size) > ch->msg_size)
+	if (XPC_MSG_SIZE(payload_size) > ch->entry_size)
 		return xpPayloadTooBig;
 
 	xpc_msgqueue_ref(ch);
@@ -2155,7 +2155,7 @@ xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
 		 * Tell the remote side to send an ACK interrupt when the
 		 * message has been delivered.
 		 */
-		msg->flags |= XPC_M_INTERRUPT;
+		msg->flags |= XPC_M_SN2_INTERRUPT;
 
 		atomic_inc(&ch->n_to_notify);
 
@@ -2185,7 +2185,7 @@ xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
 
 	memcpy(&msg->payload, payload, payload_size);
 
-	msg->flags |= XPC_M_READY;
+	msg->flags |= XPC_M_SN2_READY;
 
 	/*
 	 * The preceding store of msg->flags must occur before the following
@@ -2208,12 +2208,15 @@ out_1:
  * Now we actually acknowledge the messages that have been delivered and ack'd
  * by advancing the cached remote message queue's Get value and if requested
  * send a chctl msgrequest to the message sender's partition.
+ *
+ * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition
+ * that sent the message.
  */
 static void
 xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 {
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
-	struct xpc_msg *msg;
+	struct xpc_msg_sn2 *msg;
 	s64 get = initial_get + 1;
 	int send_msgrequest = 0;
 
@@ -2223,11 +2226,12 @@ xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 			if (get == ch_sn2->w_local_GP.get)
 				break;
 
-			msg = (struct xpc_msg *)((u64)ch_sn2->remote_msgqueue +
-						 (get % ch->remote_nentries) *
-						 ch->msg_size);
+			msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
+						     remote_msgqueue + (get %
+						     ch->remote_nentries) *
+						     ch->entry_size);
 
-			if (!(msg->flags & XPC_M_DONE))
+			if (!(msg->flags & XPC_M_SN2_DONE))
 				break;
 
 			msg_flags |= msg->flags;
@@ -2251,11 +2255,11 @@ xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 		dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
 			"channel=%d\n", get, ch->partid, ch->number);
 
-		send_msgrequest = (msg_flags & XPC_M_INTERRUPT);
+		send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT);
 
 		/*
 		 * We need to ensure that the message referenced by
-		 * local_GP->get is not XPC_M_DONE or that local_GP->get
+		 * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get
 		 * equals w_local_GP.get, so we'll go have a look.
 		 */
 		initial_get = get;
@@ -2266,19 +2270,23 @@ xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
 }
 
 static void
-xpc_received_msg_sn2(struct xpc_channel *ch, struct xpc_msg *msg)
+xpc_received_payload_sn2(struct xpc_channel *ch, void *payload)
 {
+	struct xpc_msg_sn2 *msg;
+	s64 msg_number;
 	s64 get;
-	s64 msg_number = msg->number;
+
+	msg = container_of(payload, struct xpc_msg_sn2, payload);
+	msg_number = msg->number;
 
 	dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
 		(void *)msg, msg_number, ch->partid, ch->number);
 
-	DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->msg_size) !=
+	DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->entry_size) !=
 		msg_number % ch->remote_nentries);
-	DBUG_ON(msg->flags & XPC_M_DONE);
+	DBUG_ON(msg->flags & XPC_M_SN2_DONE);
 
-	msg->flags |= XPC_M_DONE;
+	msg->flags |= XPC_M_SN2_DONE;
 
 	/*
 	 * The preceding store of msg->flags must occur before the following
@@ -2337,8 +2345,8 @@ xpc_init_sn2(void)
 
 	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
 	xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2;
-	xpc_n_of_deliverable_msgs = xpc_n_of_deliverable_msgs_sn2;
-	xpc_get_deliverable_msg = xpc_get_deliverable_msg_sn2;
+	xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2;
+	xpc_get_deliverable_payload = xpc_get_deliverable_payload_sn2;
 
 	xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2;
 	xpc_indicate_partition_disengaged =
@@ -2347,8 +2355,14 @@ xpc_init_sn2(void)
 	xpc_any_partition_engaged = xpc_any_partition_engaged_sn2;
 	xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2;
 
-	xpc_send_msg = xpc_send_msg_sn2;
-	xpc_received_msg = xpc_received_msg_sn2;
+	xpc_send_payload = xpc_send_payload_sn2;
+	xpc_received_payload = xpc_received_payload_sn2;
+
+	if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) {
+		dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is "
+			"larger than %d\n", XPC_MSG_HDR_MAX_SIZE);
+		return -E2BIG;
+	}
 
 	buf_size = max(XPC_RP_VARS_SIZE,
 		       XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2);
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 689cb5c68cc..1ac694c0162 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -66,8 +66,11 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpuid, unsigned int irq,
 	mq_order = get_order(mq_size);
 	page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
 				mq_order);
-	if (page == NULL)
+	if (page == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
+			"bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
 		return NULL;
+	}
 
 	mq = page_address(page);
 	ret = gru_create_message_queue(mq, mq_size);
@@ -193,202 +196,226 @@ xpc_process_activate_IRQ_rcvd_uv(void)
 
 }
 
-static irqreturn_t
-xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
+static void
+xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
+			      struct xpc_activate_mq_msghdr_uv *msg_hdr,
+			      int *wakeup_hb_checker)
 {
 	unsigned long irq_flags;
-	struct xpc_activate_mq_msghdr_uv *msg_hdr;
-	short partid;
-	struct xpc_partition *part;
-	struct xpc_partition_uv *part_uv;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
 	struct xpc_openclose_args *args;
-	int wakeup_hb_checker = 0;
 
-	while ((msg_hdr = gru_get_next_message(xpc_activate_mq_uv)) != NULL) {
+	part_uv->remote_act_state = msg_hdr->act_state;
 
-		partid = msg_hdr->partid;
-		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
-			dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() invalid"
-				"partid=0x%x passed in message\n", partid);
-			gru_free_message(xpc_activate_mq_uv, msg_hdr);
-			continue;
-		}
-		part = &xpc_partitions[partid];
-		part_uv = &part->sn.uv;
+	switch (msg_hdr->type) {
+	case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
+		/* syncing of remote_act_state was just done above */
+		break;
 
-		part_uv->remote_act_state = msg_hdr->act_state;
+	case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
+		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
 
-		switch (msg_hdr->type) {
-		case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
-			/* syncing of remote_act_state was just done above */
-			break;
+		msg = container_of(msg_hdr,
+				   struct xpc_activate_mq_msg_heartbeat_req_uv,
+				   hdr);
+		part_uv->heartbeat = msg->heartbeat;
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
+		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+		msg = container_of(msg_hdr,
+				   struct xpc_activate_mq_msg_heartbeat_req_uv,
+				   hdr);
+		part_uv->heartbeat = msg->heartbeat;
+
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
+		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+		msg = container_of(msg_hdr,
+				   struct xpc_activate_mq_msg_heartbeat_req_uv,
+				   hdr);
+		part_uv->heartbeat = msg->heartbeat;
+
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
+		struct xpc_activate_mq_msg_activate_req_uv *msg;
 
-		case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
-			struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+		/*
+		 * ??? Do we deal here with ts_jiffies being different
+		 * ??? if act_state != XPC_P_AS_INACTIVE instead of
+		 * ??? below?
+		 */
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_activate_req_uv, hdr);
 
-			msg = (struct xpc_activate_mq_msg_heartbeat_req_uv *)
-			    msg_hdr;
-			part_uv->heartbeat = msg->heartbeat;
-			break;
-		}
-		case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
-			struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
-			msg = (struct xpc_activate_mq_msg_heartbeat_req_uv *)
-			    msg_hdr;
-			part_uv->heartbeat = msg->heartbeat;
-			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
-			part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
-			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
-			break;
-		}
-		case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
-			struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
-			msg = (struct xpc_activate_mq_msg_heartbeat_req_uv *)
-			    msg_hdr;
-			part_uv->heartbeat = msg->heartbeat;
-			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
-			part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
-			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
-			break;
-		}
-		case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
-			struct xpc_activate_mq_msg_activate_req_uv *msg;
-
-			/*
-			 * ??? Do we deal here with ts_jiffies being different
-			 * ??? if act_state != XPC_P_AS_INACTIVE instead of
-			 * ??? below?
-			 */
-			msg = (struct xpc_activate_mq_msg_activate_req_uv *)
-			    msg_hdr;
-			spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock,
-					  irq_flags);
-			if (part_uv->act_state_req == 0)
-				xpc_activate_IRQ_rcvd++;
-			part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
-			part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
-			part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
-			part_uv->remote_activate_mq_gpa = msg->activate_mq_gpa;
-			spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock,
-					       irq_flags);
-			wakeup_hb_checker++;
-			break;
-		}
-		case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
-			struct xpc_activate_mq_msg_deactivate_req_uv *msg;
-
-			msg = (struct xpc_activate_mq_msg_deactivate_req_uv *)
-			    msg_hdr;
-			spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock,
-					  irq_flags);
-			if (part_uv->act_state_req == 0)
-				xpc_activate_IRQ_rcvd++;
-			part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
-			part_uv->reason = msg->reason;
-			spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock,
-					       irq_flags);
-			wakeup_hb_checker++;
-			break;
-		}
-		case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
-			struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
+		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
+		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
+		part_uv->remote_activate_mq_gpa = msg->activate_mq_gpa;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
 
-			msg = (struct xpc_activate_mq_msg_chctl_closerequest_uv
-			    *)msg_hdr;
-			args = &part->remote_openclose_args[msg->ch_number];
-			args->reason = msg->reason;
+		(*wakeup_hb_checker)++;
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
+		struct xpc_activate_mq_msg_deactivate_req_uv *msg;
 
-			spin_lock_irqsave(&part->chctl_lock, irq_flags);
-			part->chctl.flags[msg->ch_number] |=
-			    XPC_CHCTL_CLOSEREQUEST;
-			spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_deactivate_req_uv, hdr);
 
-			xpc_wakeup_channel_mgr(part);
-			break;
-		}
-		case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
-			struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = msg->reason;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+		return;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
+		struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
 
-			msg = (struct xpc_activate_mq_msg_chctl_closereply_uv *)
-			    msg_hdr;
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_closerequest_uv,
+				   hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->reason = msg->reason;
 
-			spin_lock_irqsave(&part->chctl_lock, irq_flags);
-			part->chctl.flags[msg->ch_number] |=
-			    XPC_CHCTL_CLOSEREPLY;
-			spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
 
-			xpc_wakeup_channel_mgr(part);
-			break;
-		}
-		case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
-			struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
+		struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
 
-			msg = (struct xpc_activate_mq_msg_chctl_openrequest_uv
-			    *)msg_hdr;
-			args = &part->remote_openclose_args[msg->ch_number];
-			args->msg_size = msg->msg_size;
-			args->local_nentries = msg->local_nentries;
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_closereply_uv,
+				   hdr);
 
-			spin_lock_irqsave(&part->chctl_lock, irq_flags);
-			part->chctl.flags[msg->ch_number] |=
-			    XPC_CHCTL_OPENREQUEST;
-			spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
 
-			xpc_wakeup_channel_mgr(part);
-			break;
-		}
-		case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
-			struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
-
-			msg = (struct xpc_activate_mq_msg_chctl_openreply_uv *)
-			    msg_hdr;
-			args = &part->remote_openclose_args[msg->ch_number];
-			args->remote_nentries = msg->remote_nentries;
-			args->local_nentries = msg->local_nentries;
-			args->local_msgqueue_pa = msg->local_notify_mq_gpa;
-
-			spin_lock_irqsave(&part->chctl_lock, irq_flags);
-			part->chctl.flags[msg->ch_number] |=
-			    XPC_CHCTL_OPENREPLY;
-			spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
-
-			xpc_wakeup_channel_mgr(part);
-			break;
-		}
-		case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
-			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
-			part_uv->flags |= XPC_P_ENGAGED_UV;
-			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
-			break;
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
+		struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_openrequest_uv,
+				   hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->entry_size = msg->entry_size;
+		args->local_nentries = msg->local_nentries;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
+		struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_openreply_uv, hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->remote_nentries = msg->remote_nentries;
+		args->local_nentries = msg->local_nentries;
+		args->local_msgqueue_pa = msg->local_notify_mq_gpa;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags |= XPC_P_ENGAGED_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+
+	case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags &= ~XPC_P_ENGAGED_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+
+	default:
+		dev_err(xpc_part, "received unknown activate_mq msg type=%d "
+			"from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
+
+		/* get hb checker to deactivate from the remote partition */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = xpBadMsgType;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
 
-		case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
-			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
-			part_uv->flags &= ~XPC_P_ENGAGED_UV;
-			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
-			break;
+		(*wakeup_hb_checker)++;
+		return;
+	}
 
-		default:
-			dev_err(xpc_part, "received unknown activate_mq msg "
-				"type=%d from partition=%d\n", msg_hdr->type,
-				partid);
-		}
+	if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
+	    part->remote_rp_ts_jiffies != 0) {
+		/*
+		 * ??? Does what we do here need to be sensitive to
+		 * ??? act_state or remote_act_state?
+		 */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
 
-		if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
-		    part->remote_rp_ts_jiffies != 0) {
-			/*
-			 * ??? Does what we do here need to be sensitive to
-			 * ??? act_state or remote_act_state?
-			 */
-			spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock,
-					  irq_flags);
-			if (part_uv->act_state_req == 0)
-				xpc_activate_IRQ_rcvd++;
-			part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
-			spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock,
-					       irq_flags);
-			wakeup_hb_checker++;
+		(*wakeup_hb_checker)++;
+	}
+}
+
+static irqreturn_t
+xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
+{
+	struct xpc_activate_mq_msghdr_uv *msg_hdr;
+	short partid;
+	struct xpc_partition *part;
+	int wakeup_hb_checker = 0;
+
+	while ((msg_hdr = gru_get_next_message(xpc_activate_mq_uv)) != NULL) {
+
+		partid = msg_hdr->partid;
+		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+			dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
+				"received invalid partid=0x%x in message\n",
+				partid);
+		} else {
+			part = &xpc_partitions[partid];
+			if (xpc_part_ref(part)) {
+				xpc_handle_activate_mq_msg_uv(part, msg_hdr,
+							    &wakeup_hb_checker);
+				xpc_part_deref(part);
+			}
 		}
 
 		gru_free_message(xpc_activate_mq_uv, msg_hdr);
@@ -616,14 +643,82 @@ xpc_request_partition_deactivation_uv(struct xpc_partition *part)
 	}
 }
 
+static void
+xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
+{
+	/* nothing needs to be done */
+	return;
+}
+
+static void
+xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
+{
+	head->first = NULL;
+	head->last = NULL;
+	spin_lock_init(&head->lock);
+	head->n_entries = 0;
+}
+
+static void *
+xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
+{
+	unsigned long irq_flags;
+	struct xpc_fifo_entry_uv *first;
+
+	spin_lock_irqsave(&head->lock, irq_flags);
+	first = head->first;
+	if (head->first != NULL) {
+		head->first = first->next;
+		if (head->first == NULL)
+			head->last = NULL;
+	}
+	head->n_entries++;
+	spin_unlock_irqrestore(&head->lock, irq_flags);
+	first->next = NULL;
+	return first;
+}
+
+static void
+xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
+		      struct xpc_fifo_entry_uv *last)
+{
+	unsigned long irq_flags;
+
+	last->next = NULL;
+	spin_lock_irqsave(&head->lock, irq_flags);
+	if (head->last != NULL)
+		head->last->next = last;
+	else
+		head->first = last;
+	head->last = last;
+	head->n_entries--;
+	BUG_ON(head->n_entries < 0);
+	spin_unlock_irqrestore(&head->lock, irq_flags);
+}
+
+static int
+xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
+{
+	return head->n_entries;
+}
+
 /*
  * Setup the channel structures that are uv specific.
  */
 static enum xp_retval
 xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
 {
-	/* !!! this function needs fleshing out */
-	return xpUnsupported;
+	struct xpc_channel_uv *ch_uv;
+	int ch_number;
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch_uv = &part->channels[ch_number].sn.uv;
+
+		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+	}
+
+	return xpSuccess;
 }
 
 /*
@@ -632,7 +727,7 @@ xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
 static void
 xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part)
 {
-	/* !!! this function needs fleshing out */
+	/* nothing needs to be done */
 	return;
 }
 
@@ -679,21 +774,115 @@ xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
 	return chctl.all_flags;
 }
 
+static enum xp_retval
+xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+	struct xpc_send_msg_slot_uv *msg_slot;
+	unsigned long irq_flags;
+	int nentries;
+	int entry;
+	size_t nbytes;
+
+	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+		nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
+		ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_uv->send_msg_slots == NULL)
+			continue;
+
+		for (entry = 0; entry < nentries; entry++) {
+			msg_slot = &ch_uv->send_msg_slots[entry];
+
+			msg_slot->msg_slot_number = entry;
+			xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
+					      &msg_slot->next);
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->local_nentries)
+			ch->local_nentries = nentries;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	return xpNoMemory;
+}
+
+static enum xp_retval
+xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+	struct xpc_notify_mq_msg_uv *msg_slot;
+	unsigned long irq_flags;
+	int nentries;
+	int entry;
+	size_t nbytes;
+
+	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+		nbytes = nentries * ch->entry_size;
+		ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_uv->recv_msg_slots == NULL)
+			continue;
+
+		for (entry = 0; entry < nentries; entry++) {
+			msg_slot = ch_uv->recv_msg_slots + entry *
+			    ch->entry_size;
+
+			msg_slot->hdr.msg_slot_number = entry;
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->remote_nentries)
+			ch->remote_nentries = nentries;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	return xpNoMemory;
+}
+
+/*
+ * Allocate msg_slots associated with the channel.
+ */
 static enum xp_retval
 xpc_setup_msg_structures_uv(struct xpc_channel *ch)
 {
-	/* !!! this function needs fleshing out */
-	return xpUnsupported;
+	static enum xp_retval ret;
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	DBUG_ON(ch->flags & XPC_C_SETUP);
+
+	ret = xpc_allocate_send_msg_slot_uv(ch);
+	if (ret == xpSuccess) {
+
+		ret = xpc_allocate_recv_msg_slot_uv(ch);
+		if (ret != xpSuccess) {
+			kfree(ch_uv->send_msg_slots);
+			xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		}
+	}
+	return ret;
 }
 
+/*
+ * Free up msg_slots and clear other stuff that were setup for the specified
+ * channel.
+ */
 static void
 xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
 {
 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
 
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
 	ch_uv->remote_notify_mq_gpa = 0;
 
-	/* !!! this function needs fleshing out */
+	if (ch->flags & XPC_C_SETUP) {
+		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		kfree(ch_uv->send_msg_slots);
+		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+		kfree(ch_uv->recv_msg_slots);
+	}
 }
 
 static void
@@ -723,7 +912,7 @@ xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
 	struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
 
 	msg.ch_number = ch->number;
-	msg.msg_size = ch->msg_size;
+	msg.entry_size = ch->entry_size;
 	msg.local_nentries = ch->local_nentries;
 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
@@ -742,6 +931,18 @@ xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
 }
 
+static void
+xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+	xpc_wakeup_channel_mgr(part);
+}
+
 static void
 xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
 			       unsigned long msgqueue_pa)
@@ -798,11 +999,358 @@ xpc_any_partition_engaged_uv(void)
 	return 0;
 }
 
-static struct xpc_msg *
-xpc_get_deliverable_msg_uv(struct xpc_channel *ch)
+static enum xp_retval
+xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
+			 struct xpc_send_msg_slot_uv **address_of_msg_slot)
+{
+	enum xp_retval ret;
+	struct xpc_send_msg_slot_uv *msg_slot;
+	struct xpc_fifo_entry_uv *entry;
+
+	while (1) {
+		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
+		if (entry != NULL)
+			break;
+
+		if (flags & XPC_NOWAIT)
+			return xpNoWait;
+
+		ret = xpc_allocate_msg_wait(ch);
+		if (ret != xpInterrupted && ret != xpTimeout)
+			return ret;
+	}
+
+	msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
+	*address_of_msg_slot = msg_slot;
+	return xpSuccess;
+}
+
+static void
+xpc_free_msg_slot_uv(struct xpc_channel *ch,
+		     struct xpc_send_msg_slot_uv *msg_slot)
+{
+	xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
+
+	/* wakeup anyone waiting for a free msg slot */
+	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+		wake_up(&ch->msg_allocate_wq);
+}
+
+static void
+xpc_notify_sender_uv(struct xpc_channel *ch,
+		     struct xpc_send_msg_slot_uv *msg_slot,
+		     enum xp_retval reason)
+{
+	xpc_notify_func func = msg_slot->func;
+
+	if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
+
+		atomic_dec(&ch->n_to_notify);
+
+		dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
+			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+			msg_slot->msg_slot_number, ch->partid, ch->number);
+
+		func(reason, ch->partid, ch->number, msg_slot->key);
+
+		dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
+			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+			msg_slot->msg_slot_number, ch->partid, ch->number);
+	}
+}
+
+static void
+xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
+			    struct xpc_notify_mq_msg_uv *msg)
+{
+	struct xpc_send_msg_slot_uv *msg_slot;
+	int entry = msg->hdr.msg_slot_number % ch->local_nentries;
+
+	msg_slot = &ch->sn.uv.send_msg_slots[entry];
+
+	BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
+	msg_slot->msg_slot_number += ch->local_nentries;
+
+	if (msg_slot->func != NULL)
+		xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
+
+	xpc_free_msg_slot_uv(ch, msg_slot);
+}
+
+static void
+xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
+			    struct xpc_notify_mq_msg_uv *msg)
+{
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct xpc_channel *ch;
+	struct xpc_channel_uv *ch_uv;
+	struct xpc_notify_mq_msg_uv *msg_slot;
+	unsigned long irq_flags;
+	int ch_number = msg->hdr.ch_number;
+
+	if (unlikely(ch_number >= part->nchannels)) {
+		dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
+			"channel number=0x%x in message from partid=%d\n",
+			ch_number, XPC_PARTID(part));
+
+		/* get hb checker to deactivate from the remote partition */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = xpBadChannelNumber;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+		return;
+	}
+
+	ch = &part->channels[ch_number];
+	xpc_msgqueue_ref(ch);
+
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/* see if we're really dealing with an ACK for a previously sent msg */
+	if (msg->hdr.size == 0) {
+		xpc_handle_notify_mq_ack_uv(ch, msg);
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/* we're dealing with a normal message sent via the notify_mq */
+	ch_uv = &ch->sn.uv;
+
+	msg_slot = (struct xpc_notify_mq_msg_uv *)((u64)ch_uv->recv_msg_slots +
+		    (msg->hdr.msg_slot_number % ch->remote_nentries) *
+		    ch->entry_size);
+
+	BUG_ON(msg->hdr.msg_slot_number != msg_slot->hdr.msg_slot_number);
+	BUG_ON(msg_slot->hdr.size != 0);
+
+	memcpy(msg_slot, msg, msg->hdr.size);
+
+	xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
+
+	if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
+		/*
+		 * If there is an existing idle kthread get it to deliver
+		 * the payload, otherwise we'll have to get the channel mgr
+		 * for this partition to create a kthread to do the delivery.
+		 */
+		if (atomic_read(&ch->kthreads_idle) > 0)
+			wake_up_nr(&ch->idle_wq, 1);
+		else
+			xpc_send_chctl_local_msgrequest_uv(part, ch->number);
+	}
+	xpc_msgqueue_deref(ch);
+}
+
+static irqreturn_t
+xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
+{
+	struct xpc_notify_mq_msg_uv *msg;
+	short partid;
+	struct xpc_partition *part;
+
+	while ((msg = gru_get_next_message(xpc_notify_mq_uv)) != NULL) {
+
+		partid = msg->hdr.partid;
+		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+			dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
+				"invalid partid=0x%x in message\n", partid);
+		} else {
+			part = &xpc_partitions[partid];
+
+			if (xpc_part_ref(part)) {
+				xpc_handle_notify_mq_msg_uv(part, msg);
+				xpc_part_deref(part);
+			}
+		}
+
+		gru_free_message(xpc_notify_mq_uv, msg);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int
+xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
+{
+	return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
+}
+
+static void
+xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
+{
+	struct xpc_channel *ch = &part->channels[ch_number];
+	int ndeliverable_payloads;
+
+	xpc_msgqueue_ref(ch);
+
+	ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
+
+	if (ndeliverable_payloads > 0 &&
+	    (ch->flags & XPC_C_CONNECTED) &&
+	    (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
+
+		xpc_activate_kthreads(ch, ndeliverable_payloads);
+	}
+
+	xpc_msgqueue_deref(ch);
+}
+
+static enum xp_retval
+xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
+		    u16 payload_size, u8 notify_type, xpc_notify_func func,
+		    void *key)
+{
+	enum xp_retval ret = xpSuccess;
+	struct xpc_send_msg_slot_uv *msg_slot = NULL;
+	struct xpc_notify_mq_msg_uv *msg;
+	u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
+	size_t msg_size;
+
+	DBUG_ON(notify_type != XPC_N_CALL);
+
+	msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
+	if (msg_size > ch->entry_size)
+		return xpPayloadTooBig;
+
+	xpc_msgqueue_ref(ch);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		ret = ch->reason;
+		goto out_1;
+	}
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		ret = xpNotConnected;
+		goto out_1;
+	}
+
+	ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
+	if (ret != xpSuccess)
+		goto out_1;
+
+	if (func != NULL) {
+		atomic_inc(&ch->n_to_notify);
+
+		msg_slot->key = key;
+		wmb(); /* a non-NULL func must hit memory after the key */
+		msg_slot->func = func;
+
+		if (ch->flags & XPC_C_DISCONNECTING) {
+			ret = ch->reason;
+			goto out_2;
+		}
+	}
+
+	msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
+	msg->hdr.partid = xp_partition_id;
+	msg->hdr.ch_number = ch->number;
+	msg->hdr.size = msg_size;
+	msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
+	memcpy(&msg->payload, payload, payload_size);
+
+	ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg, msg_size);
+	if (ret == xpSuccess)
+		goto out_1;
+
+	XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+out_2:
+	if (func != NULL) {
+		/*
+		 * Try to NULL the msg_slot's func field. If we fail, then
+		 * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
+		 * case we need to pretend we succeeded to send the message
+		 * since the user will get a callout for the disconnect error
+		 * by xpc_notify_senders_of_disconnect_uv(), and to also get an
+		 * error returned here will confuse them. Additionally, since
+		 * in this case the channel is being disconnected we don't need
+		 * to put the the msg_slot back on the free list.
+		 */
+		if (cmpxchg(&msg_slot->func, func, NULL) != func) {
+			ret = xpSuccess;
+			goto out_1;
+		}
+
+		msg_slot->key = NULL;
+		atomic_dec(&ch->n_to_notify);
+	}
+	xpc_free_msg_slot_uv(ch, msg_slot);
+out_1:
+	xpc_msgqueue_deref(ch);
+	return ret;
+}
+
+/*
+ * Tell the callers of xpc_send_notify() that the status of their payloads
+ * is unknown because the channel is now disconnecting.
+ *
+ * We don't worry about putting these msg_slots on the free list since the
+ * msg_slots themselves are about to be kfree'd.
+ */
+static void
+xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
+{
+	struct xpc_send_msg_slot_uv *msg_slot;
+	int entry;
+
+	DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+
+	for (entry = 0; entry < ch->local_nentries; entry++) {
+
+		if (atomic_read(&ch->n_to_notify) == 0)
+			break;
+
+		msg_slot = &ch->sn.uv.send_msg_slots[entry];
+		if (msg_slot->func != NULL)
+			xpc_notify_sender_uv(ch, msg_slot, ch->reason);
+	}
+}
+
+/*
+ * Get the next deliverable message's payload.
+ */
+static void *
+xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
+{
+	struct xpc_fifo_entry_uv *entry;
+	struct xpc_notify_mq_msg_uv *msg;
+	void *payload = NULL;
+
+	if (!(ch->flags & XPC_C_DISCONNECTING)) {
+		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
+		if (entry != NULL) {
+			msg = container_of(entry, struct xpc_notify_mq_msg_uv,
+					   hdr.u.next);
+			payload = &msg->payload;
+		}
+	}
+	return payload;
+}
+
+static void
+xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
 {
-	/* !!! this function needs fleshing out */
-	return NULL;
+	struct xpc_notify_mq_msg_uv *msg;
+	enum xp_retval ret;
+
+	msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
+
+	/* return an ACK to the sender of this message */
+
+	msg->hdr.partid = xp_partition_id;
+	msg->hdr.size = 0;	/* size of zero indicates this is an ACK */
+
+	ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg,
+			       sizeof(struct xpc_notify_mq_msghdr_uv));
+	if (ret != xpSuccess)
+		XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+
+	msg->hdr.msg_slot_number += ch->remote_nentries;
 }
 
 int
@@ -824,6 +1372,8 @@ xpc_init_uv(void)
 	    xpc_request_partition_reactivation_uv;
 	xpc_request_partition_deactivation =
 	    xpc_request_partition_deactivation_uv;
+	xpc_cancel_partition_deactivation_request =
+	    xpc_cancel_partition_deactivation_request_uv;
 
 	xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv;
 	xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv;
@@ -848,7 +1398,18 @@ xpc_init_uv(void)
 	xpc_partition_engaged = xpc_partition_engaged_uv;
 	xpc_any_partition_engaged = xpc_any_partition_engaged_uv;
 
-	xpc_get_deliverable_msg = xpc_get_deliverable_msg_uv;
+	xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv;
+	xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv;
+	xpc_send_payload = xpc_send_payload_uv;
+	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv;
+	xpc_get_deliverable_payload = xpc_get_deliverable_payload_uv;
+	xpc_received_payload = xpc_received_payload_uv;
+
+	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
+		dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
+			XPC_MSG_HDR_MAX_SIZE);
+		return -E2BIG;
+	}
 
 	/* ??? The cpuid argument's value is 0, is that what we want? */
 	/* !!! The irq argument's value isn't correct. */
@@ -857,12 +1418,26 @@ xpc_init_uv(void)
 	if (xpc_activate_mq_uv == NULL)
 		return -ENOMEM;
 
+	/* ??? The cpuid argument's value is 0, is that what we want? */
+	/* !!! The irq argument's value isn't correct. */
+	xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, 0,
+						xpc_handle_notify_IRQ_uv);
+	if (xpc_notify_mq_uv == NULL) {
+		/* !!! The irq argument's value isn't correct. */
+		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv,
+				      XPC_ACTIVATE_MQ_SIZE_UV, 0);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
 void
 xpc_exit_uv(void)
 {
+	/* !!! The irq argument's value isn't correct. */
+	xpc_destroy_gru_mq_uv(xpc_notify_mq_uv, XPC_NOTIFY_MQ_SIZE_UV, 0);
+
 	/* !!! The irq argument's value isn't correct. */
 	xpc_destroy_gru_mq_uv(xpc_activate_mq_uv, XPC_ACTIVATE_MQ_SIZE_UV, 0);
 }
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 4f5d6223011..71513b3af70 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -57,11 +57,10 @@ struct xpnet_message {
  *
  * XPC expects each message to exist in an individual cacheline.
  */
-#define XPNET_MSG_SIZE		(L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET)
+#define XPNET_MSG_SIZE		XPC_MSG_PAYLOAD_MAX_SIZE
 #define XPNET_MSG_DATA_MAX	\
-		(XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data))
-#define XPNET_MSG_ALIGNED_SIZE	(L1_CACHE_ALIGN(XPNET_MSG_SIZE))
-#define XPNET_MSG_NENTRIES	(PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE)
+		(XPNET_MSG_SIZE - offsetof(struct xpnet_message, data))
+#define XPNET_MSG_NENTRIES	(PAGE_SIZE / XPC_MSG_MAX_SIZE)
 
 #define XPNET_MAX_KTHREADS	(XPNET_MSG_NENTRIES + 1)
 #define XPNET_MAX_IDLE_KTHREADS	(XPNET_MSG_NENTRIES + 1)
@@ -408,6 +407,7 @@ xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
 {
 	u8 msg_buffer[XPNET_MSG_SIZE];
 	struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer;
+	u16 msg_size = sizeof(struct xpnet_message);
 	enum xp_retval ret;
 
 	msg->embedded_bytes = embedded_bytes;
@@ -417,6 +417,7 @@ xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
 			&msg->data, skb->data, (size_t)embedded_bytes);
 		skb_copy_from_linear_data(skb, &msg->data,
 					  (size_t)embedded_bytes);
+		msg_size += embedded_bytes - 1;
 	} else {
 		msg->version = XPNET_VERSION;
 	}
@@ -435,7 +436,7 @@ xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
 	atomic_inc(&queued_msg->use_count);
 
 	ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg,
-			      XPNET_MSG_SIZE, xpnet_send_completed, queued_msg);
+			      msg_size, xpnet_send_completed, queued_msg);
 	if (unlikely(ret != xpSuccess))
 		atomic_dec(&queued_msg->use_count);
 }
-- 
cgit v1.2.3


From 3b0de7b364c8b8a975f201fdae2fb394c876eb56 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Fri, 1 Aug 2008 14:19:08 -0500
Subject: add dependency of CONFIG_SGI_XP upon CONFIG_NET

Add a dependency of CONFIG_SGI_XP upon CONFIG_NET to Kconfig.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/misc')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index fa50e9ede0e..0db06f1f4b5 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -426,6 +426,7 @@ config ENCLOSURE_SERVICES
 
 config SGI_XP
 	tristate "Support communication between SGI SSIs"
+	depends on NET
 	depends on IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || (X86_64 && SMP)
 	select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
 	select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
-- 
cgit v1.2.3


From 46bd58eab21650fe820e4e3a27a6a134892cc2eb Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Fri, 1 Aug 2008 09:55:26 -0500
Subject: add reverse dependency of CONFIG_SGI_XP upon CONFIG_SGI_GRU

Add a reverse dependency of CONFIG_SGI_XP upon CONFIG_SGI_GRU to Kconfig.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/misc')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 0db06f1f4b5..82af385460e 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -430,6 +430,7 @@ config SGI_XP
 	depends on IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || (X86_64 && SMP)
 	select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
 	select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+	select SGI_GRU if IA64_GENERIC || IA64_SGI_UV || (X86_64 && SMP)
 	---help---
 	  An SGI machine can be divided into multiple Single System
 	  Images which act independently of each other and have
-- 
cgit v1.2.3


From ee694d6b4106ca09dcf23f839b44efd152a1da82 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 4 Aug 2008 13:39:28 -0700
Subject: [IA64] Fix uniprocessor build w.r.t. SGI_XP and SGI_GRU

The SGI XP and GRU drivers only work on SMP systems ... the Kconfig
file only disallowed them for non-SMP X86.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/misc/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 82af385460e..a726f3b01a6 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -427,10 +427,10 @@ config ENCLOSURE_SERVICES
 config SGI_XP
 	tristate "Support communication between SGI SSIs"
 	depends on NET
-	depends on IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || (X86_64 && SMP)
+	depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP
 	select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
 	select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
-	select SGI_GRU if IA64_GENERIC || IA64_SGI_UV || (X86_64 && SMP)
+	select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
 	---help---
 	  An SGI machine can be divided into multiple Single System
 	  Images which act independently of each other and have
-- 
cgit v1.2.3


From dc39778f952a820b7da45756a900a4778da343cd Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Tue, 5 Aug 2008 13:01:33 -0700
Subject: drivers/misc/sgi-gru/grutlbpurge.c: removed duplicated #include

Removed duplicated include <linux/delay.h> in
drivers/misc/sgi-gru/grutlbpurge.c.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Cc: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grutlbpurge.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index bcfd5425e2e..c84496a7769 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -34,7 +34,6 @@
 #include <linux/hugetlb.h>
 #include <linux/delay.h>
 #include <linux/timex.h>
-#include <linux/delay.h>
 #include <linux/srcu.h>
 #include <asm/processor.h>
 #include "gru.h"
-- 
cgit v1.2.3


From 6a4ad39b3de60ad0e75a78098be0f0eb1722b753 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 12 Aug 2008 15:08:51 -0700
Subject: GRU: fix preprocessor symbol for sparse

Fix preprocessor symbol so that sparse sees it and does not generate
errors:

  drivers/misc/sgi-gru/grutables.h:286:2: error: "Unsupported architecture"
  drivers/misc/sgi-gru/grutables.h:286:2: error: "Unsupported architecture"
  drivers/misc/sgi-gru/grutables.h:286:2: error: "Unsupported architecture"
  drivers/misc/sgi-gru/grutables.h:286:2: error: "Unsupported architecture"
  drivers/misc/sgi-gru/grutlbpurge.c:185:11: error: undefined identifier 'GRUREGION'
  drivers/misc/sgi-gru/grutables.h:286:2: error: "Unsupported architecture"
  drivers/misc/sgi-gru/grutables.h:286:2: error: "Unsupported architecture"

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grutables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index 4251018f70f..a78f70deeb5 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -279,7 +279,7 @@ struct gru_stats_s {
 #if defined CONFIG_IA64
 #define VADDR_HI_BIT		64
 #define GRUREGION(addr)		((addr) >> (VADDR_HI_BIT - 3) & 3)
-#elif defined __x86_64
+#elif defined CONFIG_X86_64
 #define VADDR_HI_BIT		48
 #define GRUREGION(addr)		(0)		/* ZZZ could do better */
 #else
-- 
cgit v1.2.3


From 5c742b45dd5fbbb6cf74d3378341704f4b23c5e8 Mon Sep 17 00:00:00 2001
From: Carlos Corbacho <carlos@strangeworlds.co.uk>
Date: Wed, 6 Aug 2008 19:13:56 +0100
Subject: acer-wmi: Fix wireless and bluetooth on early AMW0 v2 laptops

In the old acer_acpi, I discovered that on some of the newer AMW0 laptops
that supported the WMID methods, they don't work properly for setting the
wireless and bluetooth values.

So for the AMW0 V2 laptops, we want to use both the 'old' AMW0 and the
'new' WMID methods for setting wireless & bluetooth to guarantee we always
enable it.

This was fixed in acer_acpi some time ago, but I forgot to port the patch
over to acer-wmi when it was merged.

(Without this patch, early AMW0 V2 laptops such as the Aspire 5040 won't
work with acer-wmi, where-as they did with the old acer_acpi).

AK: fix compilation

Signed-off-by: Carlos Corbacho <carlos@strangeworlds.co.uk>
CC: stable@kernel.org
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/misc/acer-wmi.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'drivers/misc')

diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c
index e7a3fe508df..b2d9878dc3f 100644
--- a/drivers/misc/acer-wmi.c
+++ b/drivers/misc/acer-wmi.c
@@ -803,11 +803,30 @@ static acpi_status get_u32(u32 *value, u32 cap)
 
 static acpi_status set_u32(u32 value, u32 cap)
 {
+	acpi_status status;
+
 	if (interface->capability & cap) {
 		switch (interface->type) {
 		case ACER_AMW0:
 			return AMW0_set_u32(value, cap, interface);
 		case ACER_AMW0_V2:
+			if (cap == ACER_CAP_MAILLED)
+				return AMW0_set_u32(value, cap, interface);
+
+			/*
+			 * On some models, some WMID methods don't toggle
+			 * properly. For those cases, we want to run the AMW0
+			 * method afterwards to be certain we've really toggled
+			 * the device state.
+			 */
+			if (cap == ACER_CAP_WIRELESS ||
+				cap == ACER_CAP_BLUETOOTH) {
+				status = WMID_set_u32(value, cap, interface);
+				if (ACPI_FAILURE(status))
+					return status;
+
+				return AMW0_set_u32(value, cap, interface);
+			}
 		case ACER_WMID:
 			return WMID_set_u32(value, cap, interface);
 		default:
-- 
cgit v1.2.3


From f14413184b1de4dcbd5ec3e7c129c3ce2079f543 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Wed, 20 Aug 2008 14:08:57 -0700
Subject: eeepc-laptop: fix use after free

eeepc-laptop uses the hwmon struct after unregistering the device, causing
an oops on module unload.  Flip the ordering to fix.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Cc: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Cc: Corentin Chary <corentincj@iksaif.net>
Cc: Karol Kozimor <sziwan@users.sourceforge.net>
Cc: <stable@kernel.org>		[2.6.26.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/eeepc-laptop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/eeepc-laptop.c b/drivers/misc/eeepc-laptop.c
index 9e8d79e7e9f..facdb9893c8 100644
--- a/drivers/misc/eeepc-laptop.c
+++ b/drivers/misc/eeepc-laptop.c
@@ -553,9 +553,9 @@ static void eeepc_hwmon_exit(void)
 	hwmon = eeepc_hwmon_device;
 	if (!hwmon)
 		return ;
-	hwmon_device_unregister(hwmon);
 	sysfs_remove_group(&hwmon->kobj,
 			   &hwmon_attribute_group);
+	hwmon_device_unregister(hwmon);
 	eeepc_hwmon_device = NULL;
 }
 
-- 
cgit v1.2.3


From 7a8fc9b248e77a4eab0613acf30a6811799786b3 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sun, 17 Aug 2008 17:36:59 +0300
Subject: removed unused #include <linux/version.h>'s

This patch lets the files using linux/version.h match the files that
#include it.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/eeprom_93cx6.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/eeprom_93cx6.c b/drivers/misc/eeprom_93cx6.c
index ea55654e594..15b1780025c 100644
--- a/drivers/misc/eeprom_93cx6.c
+++ b/drivers/misc/eeprom_93cx6.c
@@ -26,7 +26,6 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/delay.h>
 #include <linux/eeprom_93cx6.h>
 
-- 
cgit v1.2.3


From 83097aca8567a0bd593534853b71fe0fa9a75d69 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 23 Aug 2008 21:45:21 -0700
Subject: Fix oops in acer_wmi driver (acer_wmi_init)

The acer_wmi driver does a DMI scan for quirks, and then sets flags into the
"interface" datastructure for some cases. However, the quirks happen real early
before "interface" is per se initialized from NULL.

The patch below 1) adds a NULL pointer check and 2) (re)runs the quirks at the
end, when "interface" has it's final value.

Reported-by: kerneloops.org
Acked-by: Carlos Corbacho <carlos@strangeworlds.co.uk>
CC: stable@vger.kernel.org
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/acer-wmi.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers/misc')

diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c
index b2d9878dc3f..c6c77a505ec 100644
--- a/drivers/misc/acer-wmi.c
+++ b/drivers/misc/acer-wmi.c
@@ -192,6 +192,9 @@ static struct quirk_entry *quirks;
 
 static void set_quirks(void)
 {
+	if (!interface)
+		return;
+
 	if (quirks->mailled)
 		interface->capability |= ACER_CAP_MAILLED;
 
@@ -1237,6 +1240,8 @@ static int __init acer_wmi_init(void)
 		return -ENODEV;
 	}
 
+	set_quirks();
+
 	if (platform_driver_register(&acer_platform_driver)) {
 		printk(ACER_ERR "Unable to register platform driver.\n");
 		goto error_platform_register;
-- 
cgit v1.2.3


From 9662e0802445a1f56cef11bbd0d520b07238424a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 27 Aug 2008 21:04:49 -0700
Subject: ACPI: thinkpad-acpi: wan radio control is not experimental

The WWAN radio control has been working well for over three years,
and is no longer experimental.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/misc/thinkpad_acpi.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index d3eb7903c34..6b9300779a4 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -3086,7 +3086,6 @@ static struct ibm_struct wan_driver_data = {
 	.read = wan_read,
 	.write = wan_write,
 	.exit = wan_exit,
-	.flags.experimental = 1,
 };
 
 /*************************************************************************
-- 
cgit v1.2.3


From 39dbbb4523754df4a822c69191a848a03e556dc7 Mon Sep 17 00:00:00 2001
From: Russ Dill <russ.dill@gmail.com>
Date: Tue, 2 Sep 2008 14:35:40 -0700
Subject: acer-wmi: remove debugfs entries upon unloading

The exit function neglects to remove debugfs entries, leading to a BUG
on reload.

[akpm@linux-foundation.org: cleanups]
Signed-off-by: Russ Dill <Russ.Dill@gmail.com>
Acked-by: Carlos Corbacho <carlos@strangeworlds.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/acer-wmi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c
index c6c77a505ec..d8b0d326e45 100644
--- a/drivers/misc/acer-wmi.c
+++ b/drivers/misc/acer-wmi.c
@@ -1189,7 +1189,7 @@ static int create_debugfs(void)
 	return 0;
 
 error_debugfs:
-		remove_debugfs();
+	remove_debugfs();
 	return -ENOMEM;
 }
 
@@ -1272,6 +1272,7 @@ error_platform_register:
 static void __exit acer_wmi_exit(void)
 {
 	remove_sysfs(acer_platform_device);
+	remove_debugfs();
 	platform_device_del(acer_platform_device);
 	platform_driver_unregister(&acer_platform_driver);
 
-- 
cgit v1.2.3


From 3f6e2f137c5b83c3c2d48fae971e845c3450cc7c Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Tue, 2 Sep 2008 14:36:00 -0700
Subject: hp-wmi: update to match current rfkill semantics

hp-wmi currently changes the RFKill state by altering the struct members
rather than using the dedicated interface, meaning that update events
won't be pushed to userspace.  This patch fixes that, along with fixing
the declared type of the WWAN kill switch.  It also ensures that rfkill
interfaces are only registered for hardware that exists.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Cc: Ivo van Doorn <ivdoorn@gmail.com>
Cc: Dave Young <hidave.darkstar@gmail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/hp-wmi.c | 81 +++++++++++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 35 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c
index 1dbcbcb323a..c6c6d09b376 100644
--- a/drivers/misc/hp-wmi.c
+++ b/drivers/misc/hp-wmi.c
@@ -177,9 +177,9 @@ static int hp_wmi_wifi_state(void)
 	int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
 
 	if (wireless & 0x100)
-		return 1;
+		return RFKILL_STATE_UNBLOCKED;
 	else
-		return 0;
+		return RFKILL_STATE_SOFT_BLOCKED;
 }
 
 static int hp_wmi_bluetooth_state(void)
@@ -187,9 +187,9 @@ static int hp_wmi_bluetooth_state(void)
 	int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
 
 	if (wireless & 0x10000)
-		return 1;
+		return RFKILL_STATE_UNBLOCKED;
 	else
-		return 0;
+		return RFKILL_STATE_SOFT_BLOCKED;
 }
 
 static int hp_wmi_wwan_state(void)
@@ -197,9 +197,9 @@ static int hp_wmi_wwan_state(void)
 	int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
 
 	if (wireless & 0x1000000)
-		return 1;
+		return RFKILL_STATE_UNBLOCKED;
 	else
-		return 0;
+		return RFKILL_STATE_SOFT_BLOCKED;
 }
 
 static ssize_t show_display(struct device *dev, struct device_attribute *attr,
@@ -338,12 +338,14 @@ void hp_wmi_notify(u32 value, void *context)
 			}
 		} else if (eventcode == 0x5) {
 			if (wifi_rfkill)
-				wifi_rfkill->state = hp_wmi_wifi_state();
+				rfkill_force_state(wifi_rfkill,
+						   hp_wmi_wifi_state());
 			if (bluetooth_rfkill)
-				bluetooth_rfkill->state =
-				    hp_wmi_bluetooth_state();
+				rfkill_force_state(bluetooth_rfkill,
+						   hp_wmi_bluetooth_state());
 			if (wwan_rfkill)
-				wwan_rfkill->state = hp_wmi_wwan_state();
+				rfkill_force_state(wwan_rfkill,
+						   hp_wmi_wwan_state());
 		} else
 			printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n",
 			       eventcode);
@@ -398,6 +400,7 @@ static void cleanup_sysfs(struct platform_device *device)
 static int __init hp_wmi_bios_setup(struct platform_device *device)
 {
 	int err;
+	int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
 
 	err = device_create_file(&device->dev, &dev_attr_display);
 	if (err)
@@ -412,28 +415,33 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 	if (err)
 		goto add_sysfs_error;
 
-	wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN);
-	wifi_rfkill->name = "hp-wifi";
-	wifi_rfkill->state = hp_wmi_wifi_state();
-	wifi_rfkill->toggle_radio = hp_wmi_wifi_set;
-	wifi_rfkill->user_claim_unsupported = 1;
-
-	bluetooth_rfkill = rfkill_allocate(&device->dev,
-					   RFKILL_TYPE_BLUETOOTH);
-	bluetooth_rfkill->name = "hp-bluetooth";
-	bluetooth_rfkill->state = hp_wmi_bluetooth_state();
-	bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set;
-	bluetooth_rfkill->user_claim_unsupported = 1;
-
-	wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WIMAX);
-	wwan_rfkill->name = "hp-wwan";
-	wwan_rfkill->state = hp_wmi_wwan_state();
-	wwan_rfkill->toggle_radio = hp_wmi_wwan_set;
-	wwan_rfkill->user_claim_unsupported = 1;
-
-	rfkill_register(wifi_rfkill);
-	rfkill_register(bluetooth_rfkill);
-	rfkill_register(wwan_rfkill);
+	if (wireless & 0x1) {
+		wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN);
+		wifi_rfkill->name = "hp-wifi";
+		wifi_rfkill->state = hp_wmi_wifi_state();
+		wifi_rfkill->toggle_radio = hp_wmi_wifi_set;
+		wifi_rfkill->user_claim_unsupported = 1;
+		rfkill_register(wifi_rfkill);
+	}
+
+	if (wireless & 0x2) {
+		bluetooth_rfkill = rfkill_allocate(&device->dev,
+						   RFKILL_TYPE_BLUETOOTH);
+		bluetooth_rfkill->name = "hp-bluetooth";
+		bluetooth_rfkill->state = hp_wmi_bluetooth_state();
+		bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set;
+		bluetooth_rfkill->user_claim_unsupported = 1;
+		rfkill_register(bluetooth_rfkill);
+	}
+
+	if (wireless & 0x4) {
+		wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WWAN);
+		wwan_rfkill->name = "hp-wwan";
+		wwan_rfkill->state = hp_wmi_wwan_state();
+		wwan_rfkill->toggle_radio = hp_wmi_wwan_set;
+		wwan_rfkill->user_claim_unsupported = 1;
+		rfkill_register(wwan_rfkill);
+	}
 
 	return 0;
 add_sysfs_error:
@@ -445,9 +453,12 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device)
 {
 	cleanup_sysfs(device);
 
-	rfkill_unregister(wifi_rfkill);
-	rfkill_unregister(bluetooth_rfkill);
-	rfkill_unregister(wwan_rfkill);
+	if (wifi_rfkill)
+		rfkill_unregister(wifi_rfkill);
+	if (bluetooth_rfkill)
+		rfkill_unregister(bluetooth_rfkill);
+	if (wwan_rfkill)
+		rfkill_unregister(wwan_rfkill);
 
 	return 0;
 }
-- 
cgit v1.2.3


From a8823aefd142d2a9c4b3661bf8712ccd2da1b220 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Tue, 2 Sep 2008 14:36:03 -0700
Subject: hp-wmi: add proper hotkey support

It turns out that event 0x4 merely indcates that a hotkey has been
pressed, not which one.  A further query is required in order to determine
the actual keypress.  The following patch adds support for that along with
the known keycodes.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/hp-wmi.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c
index c6c6d09b376..6d407c2a4f9 100644
--- a/drivers/misc/hp-wmi.c
+++ b/drivers/misc/hp-wmi.c
@@ -49,6 +49,7 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
 #define HPWMI_ALS_QUERY 0x3
 #define HPWMI_DOCK_QUERY 0x4
 #define HPWMI_WIRELESS_QUERY 0x5
+#define HPWMI_HOTKEY_QUERY 0xc
 
 static int __init hp_wmi_bios_setup(struct platform_device *device);
 static int __exit hp_wmi_bios_remove(struct platform_device *device);
@@ -69,7 +70,7 @@ struct bios_return {
 
 struct key_entry {
 	char type;		/* See KE_* below */
-	u8 code;
+	u16 code;
 	u16 keycode;
 };
 
@@ -79,7 +80,9 @@ static struct key_entry hp_wmi_keymap[] = {
 	{KE_SW, 0x01, SW_DOCK},
 	{KE_KEY, 0x02, KEY_BRIGHTNESSUP},
 	{KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
-	{KE_KEY, 0x04, KEY_HELP},
+	{KE_KEY, 0x20e6, KEY_PROG1},
+	{KE_KEY, 0x2142, KEY_MEDIA},
+	{KE_KEY, 0x231b, KEY_HELP},
 	{KE_END, 0}
 };
 
@@ -318,6 +321,9 @@ void hp_wmi_notify(u32 value, void *context)
 
 	if (obj && obj->type == ACPI_TYPE_BUFFER && obj->buffer.length == 8) {
 		int eventcode = *((u8 *) obj->buffer.pointer);
+		if (eventcode == 0x4)
+			eventcode = hp_wmi_perform_query(HPWMI_HOTKEY_QUERY, 0,
+							 0);
 		key = hp_wmi_get_entry_by_scancode(eventcode);
 		if (key) {
 			switch (key->type) {
-- 
cgit v1.2.3


From d8196a93b1ce9a5abb410f39f9375912c9e53675 Mon Sep 17 00:00:00 2001
From: Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
Date: Fri, 29 Aug 2008 11:06:21 +0930
Subject: fujitsu-laptop: fix regression for P8010 in 2.6.27-rc

The following patch (based on a patch from Stephen Gildea) fixes a
regression with the LCD brightness keys on Fujitsu P8010 laptops which was
observed with the 2.6.27-rc series (basically they stopped working due to
changes within the fujitsu-laptop and video modules).  Please apply to
2.6.27-rc and acpi git.

A more complete solution for this laptop will be included in an upcoming
patch, hopefully for 2.6.28.  In the meantime this restores most
functionality for P8010 users.

Signed-off-by: Stephen Gildea <stepheng+fujitsu-laptop@gildea.com>
Signed-off-by: Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/misc/fujitsu-laptop.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers/misc')

diff --git a/drivers/misc/fujitsu-laptop.c b/drivers/misc/fujitsu-laptop.c
index 7a1ef6c262d..3e56203e494 100644
--- a/drivers/misc/fujitsu-laptop.c
+++ b/drivers/misc/fujitsu-laptop.c
@@ -463,6 +463,13 @@ static struct dmi_system_id __initdata fujitsu_dmi_table[] = {
 		     DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK S6410"),
 		     },
 	 .callback = dmi_check_cb_s6410},
+	{
+	 .ident = "FUJITSU LifeBook P8010",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook P8010"),
+		    },
+	 .callback = dmi_check_cb_s6410},
 	{}
 };
 
-- 
cgit v1.2.3


From 7d964c352b06aabb895e39d3b479e105bd9d1ca0 Mon Sep 17 00:00:00 2001
From: Russ Dill <russ.dill@gmail.com>
Date: Tue, 2 Sep 2008 14:35:40 -0700
Subject: acer-wmi: remove debugfs entries upon unloading

The exit function neglects to remove debugfs entries, leading to a BUG
on reload.

[akpm@linux-foundation.org: cleanups]
Signed-off-by: Russ Dill <Russ.Dill@gmail.com>
Acked-by: Carlos Corbacho <carlos@strangeworlds.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/misc/acer-wmi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c
index e7a3fe508df..9c883f8b389 100644
--- a/drivers/misc/acer-wmi.c
+++ b/drivers/misc/acer-wmi.c
@@ -1167,7 +1167,7 @@ static int create_debugfs(void)
 	return 0;
 
 error_debugfs:
-		remove_debugfs();
+	remove_debugfs();
 	return -ENOMEM;
 }
 
@@ -1248,6 +1248,7 @@ error_platform_register:
 static void __exit acer_wmi_exit(void)
 {
 	remove_sysfs(acer_platform_device);
+	remove_debugfs();
 	platform_device_del(acer_platform_device);
 	platform_driver_unregister(&acer_platform_driver);
 
-- 
cgit v1.2.3


From 8275d102f8dbaa4f437f6b03b00d85bfb4e16025 Mon Sep 17 00:00:00 2001
From: Robin Holt <holt@sgi.com>
Date: Sat, 13 Sep 2008 02:33:22 -0700
Subject: ia64: fix panic during `modprobe -r xpc'

If you are on ia64 and you modprobe xpc then modprobe -r xpc, you
immediately get a panic.  xpc depends on xp which depends on gru for a
symbol.  That symbol is only used when we are running on UV hardware.

Currently, the GRU driver detects we are not on UV hardware and does no
initializing.  It does not do the same check when unloading.  As a result,
the gru driver attempts to tear down stuff that was not setup.

This is a simple two-line workaround to get us through this release.  Once
2.6.28 is opened, we need to rework the symbols that xp is depending on
from gru so the gru driver can properly fail to load when hardware is not
available.

Signed-off-by: Robin Holt <holt@sgi.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grufile.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/misc')

diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 23c91f5f6b6..d61cee796ef 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -445,6 +445,9 @@ static void __exit gru_exit(void)
 	int order = get_order(sizeof(struct gru_state) *
 			      GRU_CHIPLETS_PER_BLADE);
 
+	if (!IS_UV())
+		return;
+
 	for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
 		free_irq(IRQ_GRU + i, NULL);
 
-- 
cgit v1.2.3


From 04dcd84bc79d9f756bf5b9fc16c7df3344823ca8 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Thu, 9 Oct 2008 15:33:57 +0200
Subject: eeepc-laptop: Fix hwmon interface

Creates a name file in the sysfs directory, that
is needed for the libsensors library to work.
Also rename fan1_pwm to pwm1 and scale its value as needed.

This fixes bug #11520:
http://bugzilla.kernel.org/show_bug.cgi?id=11520

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/misc/eeepc-laptop.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'drivers/misc')

diff --git a/drivers/misc/eeepc-laptop.c b/drivers/misc/eeepc-laptop.c
index facdb9893c8..1ee8501e90f 100644
--- a/drivers/misc/eeepc-laptop.c
+++ b/drivers/misc/eeepc-laptop.c
@@ -450,12 +450,14 @@ static int eeepc_get_fan_pwm(void)
 	int value = 0;
 
 	read_acpi_int(NULL, EEEPC_EC_FAN_PWM, &value);
+	value = value * 255 / 100;
 	return (value);
 }
 
 static void eeepc_set_fan_pwm(int value)
 {
-	value = SENSORS_LIMIT(value, 0, 100);
+	value = SENSORS_LIMIT(value, 0, 255);
+	value = value * 100 / 255;
 	ec_write(EEEPC_EC_SC02, value);
 }
 
@@ -520,15 +522,23 @@ static ssize_t show_sys_hwmon(int (*get)(void), char *buf)
 	static SENSOR_DEVICE_ATTR(_name, _mode, show_##_name, store_##_name, 0);
 
 EEEPC_CREATE_SENSOR_ATTR(fan1_input, S_IRUGO, eeepc_get_fan_rpm, NULL);
-EEEPC_CREATE_SENSOR_ATTR(fan1_pwm, S_IRUGO | S_IWUSR,
+EEEPC_CREATE_SENSOR_ATTR(pwm1, S_IRUGO | S_IWUSR,
 			 eeepc_get_fan_pwm, eeepc_set_fan_pwm);
 EEEPC_CREATE_SENSOR_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
 			 eeepc_get_fan_ctrl, eeepc_set_fan_ctrl);
 
+static ssize_t
+show_name(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "eeepc\n");
+}
+static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, 0);
+
 static struct attribute *hwmon_attributes[] = {
-	&sensor_dev_attr_fan1_pwm.dev_attr.attr,
+	&sensor_dev_attr_pwm1.dev_attr.attr,
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
 	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
+	&sensor_dev_attr_name.dev_attr.attr,
 	NULL
 };
 
-- 
cgit v1.2.3