57 files changed, 2073 insertions, 2155 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3deced637f0..945c15a0722 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -60,6 +60,7 @@ choice
 
 config IA64_GENERIC
 	bool "generic"
+	select ACPI
 	select NUMA
 	select ACPI_NUMA
 	select VIRTUAL_MEM_MAP
@@ -297,11 +298,6 @@ config PREEMPT
 
 source "mm/Kconfig"
 
-config HAVE_DEC_LOCK
-	bool
-	depends on (SMP || PREEMPT)
-	default y
-
 config IA32_SUPPORT
 	bool "Support for Linux/x86 binaries"
 	help
@@ -338,11 +334,6 @@ config IA64_PALINFO
 	  To use this option, you have to ensure that the "/proc file system
 	  support" (CONFIG_PROC_FS) is enabled, too.
 
-config ACPI_DEALLOCATE_IRQ
-	bool
-	depends on IOSAPIC && EXPERIMENTAL
-	default y
-
 source "drivers/firmware/Kconfig"
 
 source "fs/Kconfig.binfmt"
@@ -351,38 +342,10 @@ endmenu
 
 menu "Power management and ACPI"
 
-config PM
-	bool "Power Management support"
-	depends on !IA64_HP_SIM
-	default y
-	help
-	  "Power Management" means that parts of your computer are shut
-	  off or put into a power conserving "sleep" mode if they are not
-	  being used.  There are two competing standards for doing this: APM
-	  and ACPI.  If you want to use either one, say Y here and then also
-	  to the requisite support below.
-
-	  Power Management is most important for battery powered laptop
-	  computers; if you have a laptop, check out the Linux Laptop home
-	  page on the WWW at <http://www.linux-on-laptops.com/> and the
-	  Battery Powered Linux mini-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>.
-
-	  Note that, even if you say N here, Linux on the x86 architecture
-	  will issue the hlt instruction if nothing is to be done, thereby
-	  sending the processor to sleep and saving power.
-
-config ACPI
-	bool
-	depends on !IA64_HP_SIM
-	default y
-
-if !IA64_HP_SIM
+source "kernel/power/Kconfig"
 
 source "drivers/acpi/Kconfig"
 
-endif
-
 if PM
 
 source "arch/ia64/kernel/cpufreq/Kconfig"
@@ -434,6 +397,11 @@ config GENERIC_IRQ_PROBE
 	bool
 	default y
 
+config GENERIC_PENDING_IRQ
+	bool
+	depends on GENERIC_HARDIRQS && SMP
+	default y
+
 source "arch/ia64/hp/sim/Kconfig"
 
 source "arch/ia64/oprofile/Kconfig"
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index f9bd88ada70..67932ad5308 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -82,24 +82,7 @@ unwcheck: vmlinux
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
 
-CLEAN_FILES += include/asm-ia64/.offsets.h.stamp vmlinux.gz bootloader
-
-MRPROPER_FILES += include/asm-ia64/offsets.h
-
-prepare: include/asm-ia64/offsets.h
-
-arch/ia64/kernel/asm-offsets.s: include/asm include/linux/version.h include/config/MARKER
-
-include/asm-ia64/offsets.h: arch/ia64/kernel/asm-offsets.s
-	$(call filechk,gen-asm-offsets)
-
-arch/ia64/kernel/asm-offsets.s: include/asm-ia64/.offsets.h.stamp
-
-include/asm-ia64/.offsets.h.stamp:
-	mkdir -p include/asm-ia64
-	[ -s include/asm-ia64/offsets.h ] \
-	 || echo "#define IA64_TASK_SIZE 0" > include/asm-ia64/offsets.h
-	touch $@
+CLEAN_FILES += vmlinux.gz bootloader
 
 boot:	lib/lib.a vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $@
diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig
index b95fcf86ea0..3b65cbb31b1 100644
--- a/arch/ia64/configs/bigsur_defconfig
+++ b/arch/ia64/configs/bigsur_defconfig
@@ -107,18 +107,12 @@ CONFIG_ACPI=y
 #
 # ACPI (Advanced Configuration and Power Interface) Support
 #
-CONFIG_ACPI_BOOT=y
-CONFIG_ACPI_INTERPRETER=y
 CONFIG_ACPI_BUTTON=m
-CONFIG_ACPI_VIDEO=m
 CONFIG_ACPI_FAN=m
 CONFIG_ACPI_PROCESSOR=m
 CONFIG_ACPI_THERMAL=m
-CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_BUS=y
 CONFIG_ACPI_POWER=y
-CONFIG_ACPI_PCI=y
 CONFIG_ACPI_SYSTEM=y
 
 #
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
index dccf35c60b9..08112ab3846 100644
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
@@ -111,7 +111,6 @@ CONFIG_COMPAT=y
 CONFIG_IA64_MCA_RECOVERY=y
 CONFIG_PERFMON=y
 CONFIG_IA64_PALINFO=y
-CONFIG_ACPI_DEALLOCATE_IRQ=y
 
 #
 # Firmware Drivers
@@ -130,19 +129,12 @@ CONFIG_ACPI=y
 #
 # ACPI (Advanced Configuration and Power Interface) Support
 #
-CONFIG_ACPI_BOOT=y
-CONFIG_ACPI_INTERPRETER=y
 # CONFIG_ACPI_BUTTON is not set
-CONFIG_ACPI_VIDEO=m
-CONFIG_ACPI_HOTKEY=m
 # CONFIG_ACPI_FAN is not set
 # CONFIG_ACPI_PROCESSOR is not set
 CONFIG_ACPI_NUMA=y
-CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_BUS=y
 CONFIG_ACPI_POWER=y
-CONFIG_ACPI_PCI=y
 CONFIG_ACPI_SYSTEM=y
 # CONFIG_ACPI_CONTAINER is not set
 
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index c853cfcd2d1..d452e18ac49 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -109,7 +109,6 @@ CONFIG_COMPAT=y
 CONFIG_IA64_MCA_RECOVERY=y
 CONFIG_PERFMON=y
 CONFIG_IA64_PALINFO=y
-CONFIG_ACPI_DEALLOCATE_IRQ=y
 
 #
 # Firmware Drivers
@@ -128,20 +127,13 @@ CONFIG_ACPI=y
 #
 # ACPI (Advanced Configuration and Power Interface) Support
 #
-CONFIG_ACPI_BOOT=y
-CONFIG_ACPI_INTERPRETER=y
 CONFIG_ACPI_BUTTON=m
-# CONFIG_ACPI_VIDEO is not set
-# CONFIG_ACPI_HOTKEY is not set
 CONFIG_ACPI_FAN=m
 CONFIG_ACPI_PROCESSOR=m
 # CONFIG_ACPI_HOTPLUG_CPU is not set
 CONFIG_ACPI_THERMAL=m
-CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_BUS=y
 CONFIG_ACPI_POWER=y
-CONFIG_ACPI_PCI=y
 CONFIG_ACPI_SYSTEM=y
 # CONFIG_ACPI_CONTAINER is not set
 
diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig
index 88e8867fa8e..80b0e9eb7fb 100644
--- a/arch/ia64/configs/zx1_defconfig
+++ b/arch/ia64/configs/zx1_defconfig
@@ -109,7 +109,6 @@ CONFIG_COMPAT=y
 CONFIG_IA64_MCA_RECOVERY=y
 CONFIG_PERFMON=y
 CONFIG_IA64_PALINFO=y
-CONFIG_ACPI_DEALLOCATE_IRQ=y
 
 #
 # Firmware Drivers
@@ -128,19 +127,12 @@ CONFIG_ACPI=y
 #
 # ACPI (Advanced Configuration and Power Interface) Support
 #
-CONFIG_ACPI_BOOT=y
-CONFIG_ACPI_INTERPRETER=y
 CONFIG_ACPI_BUTTON=y
-CONFIG_ACPI_VIDEO=m
-CONFIG_ACPI_HOTKEY=m
 CONFIG_ACPI_FAN=y
 CONFIG_ACPI_PROCESSOR=y
 CONFIG_ACPI_THERMAL=y
-CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_BUS=y
 CONFIG_ACPI_POWER=y
-CONFIG_ACPI_PCI=y
 CONFIG_ACPI_SYSTEM=y
 # CONFIG_ACPI_CONTAINER is not set
 
diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig
index 8444add7638..5da208115ea 100644
--- a/arch/ia64/defconfig
+++ b/arch/ia64/defconfig
@@ -99,7 +99,6 @@ CONFIG_COMPAT=y
 CONFIG_IA64_MCA_RECOVERY=y
 CONFIG_PERFMON=y
 CONFIG_IA64_PALINFO=y
-CONFIG_ACPI_DEALLOCATE_IRQ=y
 
 #
 # Firmware Drivers
@@ -118,20 +117,14 @@ CONFIG_ACPI=y
 #
 # ACPI (Advanced Configuration and Power Interface) Support
 #
-CONFIG_ACPI_BOOT=y
-CONFIG_ACPI_INTERPRETER=y
 CONFIG_ACPI_BUTTON=m
-CONFIG_ACPI_VIDEO=m
 CONFIG_ACPI_FAN=m
 CONFIG_ACPI_PROCESSOR=m
 CONFIG_ACPI_HOTPLUG_CPU=y
 CONFIG_ACPI_THERMAL=m
 CONFIG_ACPI_NUMA=y
-CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_BUS=y
 CONFIG_ACPI_POWER=y
-CONFIG_ACPI_PCI=y
 CONFIG_ACPI_SYSTEM=y
 CONFIG_ACPI_CONTAINER=m
 
@@ -341,7 +334,7 @@ CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
 # CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
 # CONFIG_SCSI_IPR is not set
-CONFIG_SCSI_QLOGIC_FC=y
+# CONFIG_SCSI_QLOGIC_FC is not set
 # CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
 CONFIG_SCSI_QLOGIC_1280=y
 # CONFIG_SCSI_QLOGIC_1280_1040 is not set
diff --git a/arch/ia64/hp/sim/boot/boot_head.S b/arch/ia64/hp/sim/boot/boot_head.S
index 1c8c7e6a9a5..a9bd71ac78e 100644
--- a/arch/ia64/hp/sim/boot/boot_head.S
+++ b/arch/ia64/hp/sim/boot/boot_head.S
@@ -4,6 +4,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/pal.h>
 
 	.bss
 	.align 16
@@ -49,7 +50,11 @@ GLOBAL_ENTRY(jmp_to_kernel)
 	br.sptk.few b7
 END(jmp_to_kernel)
 
-
+/*
+ * r28 contains the index of the PAL function
+ * r29--31 the args
+ * Return values in ret0--3 (r8--11)
+ */
 GLOBAL_ENTRY(pal_emulator_static)
 	mov r8=-1
 	mov r9=256
@@ -62,7 +67,7 @@ GLOBAL_ENTRY(pal_emulator_static)
 	cmp.gtu p6,p7=r9,r28
 (p6)	br.cond.sptk.few stacked
 	;;
-static:	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
+static:	cmp.eq p6,p7=PAL_PTCE_INFO,r28
 (p7)	br.cond.sptk.few 1f
 	;;
 	mov r8=0			/* status = 0 */
@@ -70,21 +75,21 @@ static:	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
 	movl r10=0x0000000200000003	/* count[0], count[1] */
 	movl r11=0x1000000000002000	/* stride[0], stride[1] */
 	br.cond.sptk.few rp
-1:	cmp.eq p6,p7=14,r28		/* PAL_FREQ_RATIOS */
+1:	cmp.eq p6,p7=PAL_FREQ_RATIOS,r28
 (p7)	br.cond.sptk.few 1f
 	mov r8=0			/* status = 0 */
 	movl r9 =0x100000064		/* proc_ratio (1/100) */
 	movl r10=0x100000100		/* bus_ratio<<32 (1/256) */
 	movl r11=0x100000064		/* itc_ratio<<32 (1/100) */
 	;;
-1:	cmp.eq p6,p7=19,r28		/* PAL_RSE_INFO */
+1:	cmp.eq p6,p7=PAL_RSE_INFO,r28
 (p7)	br.cond.sptk.few 1f
 	mov r8=0			/* status = 0 */
 	mov r9=96			/* num phys stacked */
 	mov r10=0			/* hints */
 	mov r11=0
 	br.cond.sptk.few rp
-1:	cmp.eq p6,p7=1,r28		/* PAL_CACHE_FLUSH */
+1:	cmp.eq p6,p7=PAL_CACHE_FLUSH,r28		/* PAL_CACHE_FLUSH */
 (p7)	br.cond.sptk.few 1f
 	mov r9=ar.lc
 	movl r8=524288			/* flush 512k million cache lines (16MB) */
@@ -102,7 +107,7 @@ static:	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
 	mov ar.lc=r9
 	mov r8=r0
 	;;
-1:	cmp.eq p6,p7=15,r28		/* PAL_PERF_MON_INFO */
+1:	cmp.eq p6,p7=PAL_PERF_MON_INFO,r28
 (p7)	br.cond.sptk.few 1f
 	mov r8=0			/* status = 0 */
 	movl r9 =0x08122f04		/* generic=4 width=47 retired=8 cycles=18 */
@@ -138,6 +143,20 @@ static:	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
 	st8 [r29]=r0,16			/* clear remaining bits  */
 	st8 [r18]=r0,16			/* clear remaining bits  */
 	;;
+1:	cmp.eq p6,p7=PAL_VM_SUMMARY,r28
+(p7)	br.cond.sptk.few 1f
+	mov	r8=0			/* status = 0  */
+	movl	r9=0x2044040020F1865	/* num_tc_levels=2, num_unique_tcs=4 */
+					/* max_itr_entry=64, max_dtr_entry=64 */
+					/* hash_tag_id=2, max_pkr=15 */
+					/* key_size=24, phys_add_size=50, vw=1 */
+	movl	r10=0x183C		/* rid_size=24, impl_va_msb=60 */
+	;;
+1:	cmp.eq p6,p7=PAL_MEM_ATTRIB,r28
+(p7)	br.cond.sptk.few 1f
+	mov	r8=0			/* status = 0 */
+	mov	r9=0x80|0x01		/* NatPage|WB */
+	;;
 1:	br.cond.sptk.few rp
 stacked:
 	br.ret.sptk.few rp
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index 56405dbfd73..a18983a3c93 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -233,6 +233,23 @@ simscsi_readwrite10 (struct scsi_cmnd *sc, int mode)
 		simscsi_readwrite(sc, mode, offset, ((sc->cmnd[7] << 8) | sc->cmnd[8])*512);
 }
 
+static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len)
+{
+
+	int scatterlen = sc->use_sg;
+	struct scatterlist *slp;
+
+	if (scatterlen == 0)
+		memcpy(sc->request_buffer, buf, len);
+	else for (slp = (struct scatterlist *)sc->buffer; scatterlen-- > 0 && len > 0; slp++) {
+		unsigned thislen = min(len, slp->length);
+
+		memcpy(page_address(slp->page) + slp->offset, buf, thislen);
+		slp++;
+		len -= thislen;
+	}
+}
+
 static int
 simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
@@ -240,6 +257,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 	char fname[MAX_ROOT_LEN+16];
 	size_t disk_size;
 	char *buf;
+	char localbuf[36];
 #if DEBUG_SIMSCSI
 	register long sp asm ("sp");
 
@@ -263,7 +281,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 				/* disk doesn't exist... */
 				break;
 			}
-			buf = sc->request_buffer;
+			buf = localbuf;
 			buf[0] = 0;	/* magnetic disk */
 			buf[1] = 0;	/* not a removable medium */
 			buf[2] = 2;	/* SCSI-2 compliant device */
@@ -273,6 +291,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[6] = 0;	/* reserved */
 			buf[7] = 0;	/* various flags */
 			memcpy(buf + 8, "HP      SIMULATED DISK  0.00",  28);
+			simscsi_fillresult(sc, buf, 36);
 			sc->result = GOOD;
 			break;
 
@@ -304,16 +323,13 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			simscsi_readwrite10(sc, SSC_WRITE);
 			break;
 
-
 		      case READ_CAPACITY:
 			if (desc[target_id] < 0 || sc->request_bufflen < 8) {
 				break;
 			}
-			buf = sc->request_buffer;
-
+			buf = localbuf;
 			disk_size = simscsi_get_disk_size(desc[target_id]);
 
-			/* pretend to be a 1GB disk (partition table contains real stuff): */
 			buf[0] = (disk_size >> 24) & 0xff;
 			buf[1] = (disk_size >> 16) & 0xff;
 			buf[2] = (disk_size >>  8) & 0xff;
@@ -323,13 +339,14 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[5] = 0;
 			buf[6] = 2;
 			buf[7] = 0;
+			simscsi_fillresult(sc, buf, 8);
 			sc->result = GOOD;
 			break;
 
 		      case MODE_SENSE:
 		      case MODE_SENSE_10:
 			/* sd.c uses this to determine whether disk does write-caching. */
-			memset(sc->request_buffer, 0, 128);
+			simscsi_fillresult(sc, (char *)empty_zero_page, sc->request_bufflen);
 			sc->result = GOOD;
 			break;
 
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 7dcb8582ae0..b42ec37be51 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -130,7 +130,7 @@ static void rs_stop(struct tty_struct *tty)
 
 static void rs_start(struct tty_struct *tty)
 {
-#if SIMSERIAL_DEBUG
+#ifdef SIMSERIAL_DEBUG
 	printk("rs_start: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
 		tty->stopped, tty->hw_stopped, tty->flow_stopped);
 #endif
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index 31de70b7c67..a7280d9f6c1 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -216,12 +216,6 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
 	if (!mpnt)
 		return -ENOMEM;
 
-	if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
-				      >> PAGE_SHIFT)) {
-		kmem_cache_free(vm_area_cachep, mpnt);
-		return -ENOMEM;
-	}
-
 	memset(mpnt, 0, sizeof(*mpnt));
 
 	down_write(&current->mm->mmap_sem);
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index 829a6d80711..494fad6bf37 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -1,6 +1,6 @@
 #include <asm/asmmacro.h>
 #include <asm/ia32.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/signal.h>
 #include <asm/thread_info.h>
 
@@ -215,7 +215,7 @@ ia32_syscall_table:
 	data8 sys32_fork
 	data8 sys_read
 	data8 sys_write
-	data8 sys32_open	  /* 5 */
+	data8 compat_sys_open	  /* 5 */
 	data8 sys_close
 	data8 sys32_waitpid
 	data8 sys_creat
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index c1e20d65dd6..3fa67ecebc8 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -2327,7 +2327,7 @@ sys32_sendfile (int out_fd, int in_fd, int __user *offset, unsigned int count)
 	ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *) &of : NULL, count);
 	set_fs(old_fs);
 
-	if (!ret && offset && put_user(of, offset))
+	if (offset && put_user(of, offset))
 		return -EFAULT;
 
 	return ret;
@@ -2359,37 +2359,6 @@ sys32_brk (unsigned int brk)
 	return ret;
 }
 
-/*
- * Exactly like fs/open.c:sys_open(), except that it doesn't set the O_LARGEFILE flag.
- */
-asmlinkage long
-sys32_open (const char __user * filename, int flags, int mode)
-{
-	char * tmp;
-	int fd, error;
-
-	tmp = getname(filename);
-	fd = PTR_ERR(tmp);
-	if (!IS_ERR(tmp)) {
-		fd = get_unused_fd();
-		if (fd >= 0) {
-			struct file *f = filp_open(tmp, flags, mode);
-			error = PTR_ERR(f);
-			if (IS_ERR(f))
-				goto out_error;
-			fd_install(fd, f);
-		}
-out:
-		putname(tmp);
-	}
-	return fd;
-
-out_error:
-	put_unused_fd(fd);
-	fd = error;
-	goto out;
-}
-
 /* Structure for ia32 emulation on ia64 */
 struct epoll_event32
 {
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index b242594be55..307514f7a28 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
 obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
 obj-$(CONFIG_IOSAPIC)		+= iosapic.o
 obj-$(CONFIG_MODULES)		+= module.o
-obj-$(CONFIG_SMP)		+= smp.o smpboot.o domain.o
+obj-$(CONFIG_SMP)		+= smp.o smpboot.o
 obj-$(CONFIG_NUMA)		+= numa.o
 obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
 obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c
index 2623df5e263..13a5b3b49bf 100644
--- a/arch/ia64/kernel/acpi-ext.c
+++ b/arch/ia64/kernel/acpi-ext.c
@@ -17,20 +17,20 @@
 #include <asm/acpi-ext.h>
 
 struct acpi_vendor_descriptor {
-	u8				guid_id;
-	efi_guid_t			guid;
+	u8 guid_id;
+	efi_guid_t guid;
 };
 
 struct acpi_vendor_info {
-	struct acpi_vendor_descriptor	*descriptor;
-	u8				*data;
-	u32				length;
+	struct acpi_vendor_descriptor *descriptor;
+	u8 *data;
+	u32 length;
 };
 
 acpi_status
 acpi_vendor_resource_match(struct acpi_resource *resource, void *context)
 {
-	struct acpi_vendor_info *info = (struct acpi_vendor_info *) context;
+	struct acpi_vendor_info *info = (struct acpi_vendor_info *)context;
 	struct acpi_resource_vendor *vendor;
 	struct acpi_vendor_descriptor *descriptor;
 	u32 length;
@@ -38,8 +38,8 @@ acpi_vendor_resource_match(struct acpi_resource *resource, void *context)
 	if (resource->id != ACPI_RSTYPE_VENDOR)
 		return AE_OK;
 
-	vendor = (struct acpi_resource_vendor *) &resource->data;
-	descriptor = (struct acpi_vendor_descriptor *) vendor->reserved;
+	vendor = (struct acpi_resource_vendor *)&resource->data;
+	descriptor = (struct acpi_vendor_descriptor *)vendor->reserved;
 	if (vendor->length <= sizeof(*info->descriptor) ||
 	    descriptor->guid_id != info->descriptor->guid_id ||
 	    efi_guidcmp(descriptor->guid, info->descriptor->guid))
@@ -50,21 +50,24 @@ acpi_vendor_resource_match(struct acpi_resource *resource, void *context)
 	if (!info->data)
 		return AE_NO_MEMORY;
 
-	memcpy(info->data, vendor->reserved + sizeof(struct acpi_vendor_descriptor), length);
+	memcpy(info->data,
+	       vendor->reserved + sizeof(struct acpi_vendor_descriptor),
+	       length);
 	info->length = length;
 	return AE_CTRL_TERMINATE;
 }
 
 acpi_status
-acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor *id,
-		u8 **data, u32 *length)
+acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor * id,
+			  u8 ** data, u32 * length)
 {
 	struct acpi_vendor_info info;
 
 	info.descriptor = id;
 	info.data = NULL;
 
-	acpi_walk_resources(obj, METHOD_NAME__CRS, acpi_vendor_resource_match, &info);
+	acpi_walk_resources(obj, METHOD_NAME__CRS, acpi_vendor_resource_match,
+			    &info);
 	if (!info.data)
 		return AE_NOT_FOUND;
 
@@ -75,17 +78,19 @@ acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor *id,
 
 struct acpi_vendor_descriptor hp_ccsr_descriptor = {
 	.guid_id = 2,
-	.guid    = EFI_GUID(0x69e9adf9, 0x924f, 0xab5f, 0xf6, 0x4a, 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad)
+	.guid =
+	    EFI_GUID(0x69e9adf9, 0x924f, 0xab5f, 0xf6, 0x4a, 0x24, 0xd2, 0x01,
+		     0x37, 0x0e, 0xad)
 };
 
-acpi_status
-hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
+acpi_status hp_acpi_csr_space(acpi_handle obj, u64 * csr_base, u64 * csr_length)
 {
 	acpi_status status;
 	u8 *data;
 	u32 length;
 
-	status = acpi_find_vendor_resource(obj, &hp_ccsr_descriptor, &data, &length);
+	status =
+	    acpi_find_vendor_resource(obj, &hp_ccsr_descriptor, &data, &length);
 
 	if (ACPI_FAILURE(status) || length != 16)
 		return AE_NOT_FOUND;
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 9609f243e5d..7e926471e4e 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -74,12 +74,11 @@ unsigned int acpi_cpei_override;
 unsigned int acpi_cpei_phys_cpuid;
 
 #define MAX_SAPICS 256
-u16 ia64_acpiid_to_sapicid[MAX_SAPICS] =
-	{ [0 ... MAX_SAPICS - 1] = -1 };
+u16 ia64_acpiid_to_sapicid[MAX_SAPICS] = {[0 ... MAX_SAPICS - 1] = -1 };
+
 EXPORT_SYMBOL(ia64_acpiid_to_sapicid);
 
-const char *
-acpi_get_sysname (void)
+const char *acpi_get_sysname(void)
 {
 #ifdef CONFIG_IA64_GENERIC
 	unsigned long rsdp_phys;
@@ -89,27 +88,29 @@ acpi_get_sysname (void)
 
 	rsdp_phys = acpi_find_rsdp();
 	if (!rsdp_phys) {
-		printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n");
+		printk(KERN_ERR
+		       "ACPI 2.0 RSDP not found, default to \"dig\"\n");
 		return "dig";
 	}
 
-	rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
+	rsdp = (struct acpi20_table_rsdp *)__va(rsdp_phys);
 	if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
-		printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
+		printk(KERN_ERR
+		       "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
 		return "dig";
 	}
 
-	xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
+	xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_address);
 	hdr = &xsdt->header;
 	if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
-		printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
+		printk(KERN_ERR
+		       "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
 		return "dig";
 	}
 
 	if (!strcmp(hdr->oem_id, "HP")) {
 		return "hpzx1";
-	}
-	else if (!strcmp(hdr->oem_id, "SGI")) {
+	} else if (!strcmp(hdr->oem_id, "SGI")) {
 		return "sn2";
 	}
 
@@ -131,7 +132,7 @@ acpi_get_sysname (void)
 #endif
 }
 
-#ifdef CONFIG_ACPI_BOOT
+#ifdef CONFIG_ACPI
 
 #define ACPI_MAX_PLATFORM_INTERRUPTS	256
 
@@ -146,8 +147,7 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
  * Interrupt routing API for device drivers.  Provides interrupt vector for
  * a generic platform event.  Currently only CPEI is implemented.
  */
-int
-acpi_request_vector (u32 int_type)
+int acpi_request_vector(u32 int_type)
 {
 	int vector = -1;
 
@@ -155,12 +155,12 @@ acpi_request_vector (u32 int_type)
 		/* corrected platform error interrupt */
 		vector = platform_intr_list[int_type];
 	} else
-		printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n");
+		printk(KERN_ERR
+		       "acpi_request_vector(): invalid interrupt type\n");
 	return vector;
 }
 
-char *
-__acpi_map_table (unsigned long phys_addr, unsigned long size)
+char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
 {
 	return __va(phys_addr);
 }
@@ -169,19 +169,18 @@ __acpi_map_table (unsigned long phys_addr, unsigned long size)
                             Boot-time Table Parsing
    -------------------------------------------------------------------------- */
 
-static int			total_cpus __initdata;
-static int			available_cpus __initdata;
-struct acpi_table_madt *	acpi_madt __initdata;
-static u8			has_8259;
-
+static int total_cpus __initdata;
+static int available_cpus __initdata;
+struct acpi_table_madt *acpi_madt __initdata;
+static u8 has_8259;
 
 static int __init
-acpi_parse_lapic_addr_ovr (
-	acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
+			  const unsigned long end)
 {
 	struct acpi_table_lapic_addr_ovr *lapic;
 
-	lapic = (struct acpi_table_lapic_addr_ovr *) header;
+	lapic = (struct acpi_table_lapic_addr_ovr *)header;
 
 	if (BAD_MADT_ENTRY(lapic, end))
 		return -EINVAL;
@@ -193,22 +192,23 @@ acpi_parse_lapic_addr_ovr (
 	return 0;
 }
 
-
 static int __init
-acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_lsapic(acpi_table_entry_header * header, const unsigned long end)
 {
 	struct acpi_table_lsapic *lsapic;
 
-	lsapic = (struct acpi_table_lsapic *) header;
+	lsapic = (struct acpi_table_lsapic *)header;
 
 	if (BAD_MADT_ENTRY(lsapic, end))
 		return -EINVAL;
 
 	if (lsapic->flags.enabled) {
 #ifdef CONFIG_SMP
-		smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid;
+		smp_boot_data.cpu_phys_id[available_cpus] =
+		    (lsapic->id << 8) | lsapic->eid;
 #endif
-		ia64_acpiid_to_sapicid[lsapic->acpi_id] = (lsapic->id << 8) | lsapic->eid;
+		ia64_acpiid_to_sapicid[lsapic->acpi_id] =
+		    (lsapic->id << 8) | lsapic->eid;
 		++available_cpus;
 	}
 
@@ -216,13 +216,12 @@ acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
 	return 0;
 }
 
-
 static int __init
-acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
 {
 	struct acpi_table_lapic_nmi *lacpi_nmi;
 
-	lacpi_nmi = (struct acpi_table_lapic_nmi*) header;
+	lacpi_nmi = (struct acpi_table_lapic_nmi *)header;
 
 	if (BAD_MADT_ENTRY(lacpi_nmi, end))
 		return -EINVAL;
@@ -231,13 +230,12 @@ acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
 	return 0;
 }
 
-
 static int __init
-acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_iosapic(acpi_table_entry_header * header, const unsigned long end)
 {
 	struct acpi_table_iosapic *iosapic;
 
-	iosapic = (struct acpi_table_iosapic *) header;
+	iosapic = (struct acpi_table_iosapic *)header;
 
 	if (BAD_MADT_ENTRY(iosapic, end))
 		return -EINVAL;
@@ -245,15 +243,14 @@ acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
 	return iosapic_init(iosapic->address, iosapic->global_irq_base);
 }
 
-
 static int __init
-acpi_parse_plat_int_src (
-	acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_plat_int_src(acpi_table_entry_header * header,
+			const unsigned long end)
 {
 	struct acpi_table_plat_int_src *plintsrc;
 	int vector;
 
-	plintsrc = (struct acpi_table_plat_int_src *) header;
+	plintsrc = (struct acpi_table_plat_int_src *)header;
 
 	if (BAD_MADT_ENTRY(plintsrc, end))
 		return -EINVAL;
@@ -267,8 +264,12 @@ acpi_parse_plat_int_src (
 						plintsrc->iosapic_vector,
 						plintsrc->eid,
 						plintsrc->id,
-						(plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
-						(plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+						(plintsrc->flags.polarity ==
+						 1) ? IOSAPIC_POL_HIGH :
+						IOSAPIC_POL_LOW,
+						(plintsrc->flags.trigger ==
+						 1) ? IOSAPIC_EDGE :
+						IOSAPIC_LEVEL);
 
 	platform_intr_list[plintsrc->type] = vector;
 	if (acpi_madt_rev > 1) {
@@ -283,7 +284,6 @@ acpi_parse_plat_int_src (
 	return 0;
 }
 
-
 unsigned int can_cpei_retarget(void)
 {
 	extern int cpe_vector;
@@ -322,29 +322,30 @@ unsigned int get_cpei_target_cpu(void)
 }
 
 static int __init
-acpi_parse_int_src_ovr (
-	acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_int_src_ovr(acpi_table_entry_header * header,
+		       const unsigned long end)
 {
 	struct acpi_table_int_src_ovr *p;
 
-	p = (struct acpi_table_int_src_ovr *) header;
+	p = (struct acpi_table_int_src_ovr *)header;
 
 	if (BAD_MADT_ENTRY(p, end))
 		return -EINVAL;
 
 	iosapic_override_isa_irq(p->bus_irq, p->global_irq,
-				 (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
-				 (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+				 (p->flags.polarity ==
+				  1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+				 (p->flags.trigger ==
+				  1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
 	return 0;
 }
 
-
 static int __init
-acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
 {
 	struct acpi_table_nmi_src *nmi_src;
 
-	nmi_src = (struct acpi_table_nmi_src*) header;
+	nmi_src = (struct acpi_table_nmi_src *)header;
 
 	if (BAD_MADT_ENTRY(nmi_src, end))
 		return -EINVAL;
@@ -353,11 +354,9 @@ acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
 	return 0;
 }
 
-static void __init
-acpi_madt_oem_check (char *oem_id, char *oem_table_id)
+static void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-	if (!strncmp(oem_id, "IBM", 3) &&
-	    (!strncmp(oem_table_id, "SERMOW", 6))) {
+	if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERMOW", 6))) {
 
 		/*
 		 * Unfortunately ITC_DRIFT is not yet part of the
@@ -370,19 +369,18 @@ acpi_madt_oem_check (char *oem_id, char *oem_table_id)
 	}
 }
 
-static int __init
-acpi_parse_madt (unsigned long phys_addr, unsigned long size)
+static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
 {
 	if (!phys_addr || !size)
 		return -EINVAL;
 
-	acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+	acpi_madt = (struct acpi_table_madt *)__va(phys_addr);
 
 	acpi_madt_rev = acpi_madt->header.revision;
 
 	/* remember the value for reference after free_initmem() */
 #ifdef CONFIG_ITANIUM
-	has_8259 = 1; /* Firmware on old Itanium systems is broken */
+	has_8259 = 1;		/* Firmware on old Itanium systems is broken */
 #else
 	has_8259 = acpi_madt->flags.pcat_compat;
 #endif
@@ -396,19 +394,18 @@ acpi_parse_madt (unsigned long phys_addr, unsigned long size)
 	printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr);
 
 	acpi_madt_oem_check(acpi_madt->header.oem_id,
-		acpi_madt->header.oem_table_id);
+			    acpi_madt->header.oem_table_id);
 
 	return 0;
 }
 
-
 #ifdef CONFIG_ACPI_NUMA
 
 #undef SLIT_DEBUG
 
 #define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
 
-static int __initdata srat_num_cpus;			/* number of cpus */
+static int __initdata srat_num_cpus;	/* number of cpus */
 static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
 #define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
 #define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
@@ -421,15 +418,15 @@ static struct acpi_table_slit __initdata *slit_table;
  * ACPI 2.0 SLIT (System Locality Information Table)
  * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
  */
-void __init
-acpi_numa_slit_init (struct acpi_table_slit *slit)
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 {
 	u32 len;
 
 	len = sizeof(struct acpi_table_header) + 8
-		+ slit->localities * slit->localities;
+	    + slit->localities * slit->localities;
 	if (slit->header.length != len) {
-		printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
+		printk(KERN_ERR
+		       "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
 		       len, slit->header.length);
 		memset(numa_slit, 10, sizeof(numa_slit));
 		return;
@@ -438,19 +435,20 @@ acpi_numa_slit_init (struct acpi_table_slit *slit)
 }
 
 void __init
-acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
+acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
 {
 	/* record this node in proximity bitmap */
 	pxm_bit_set(pa->proximity_domain);
 
-	node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid);
+	node_cpuid[srat_num_cpus].phys_id =
+	    (pa->apic_id << 8) | (pa->lsapic_eid);
 	/* nid should be overridden as logical node id later */
 	node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
 	srat_num_cpus++;
 }
 
 void __init
-acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
+acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
 {
 	unsigned long paddr, size;
 	u8 pxm;
@@ -487,8 +485,7 @@ acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
 	num_node_memblks++;
 }
 
-void __init
-acpi_numa_arch_fixup (void)
+void __init acpi_numa_arch_fixup(void)
 {
 	int i, j, node_from, node_to;
 
@@ -534,21 +531,24 @@ acpi_numa_arch_fixup (void)
 	for (i = 0; i < srat_num_cpus; i++)
 		node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
 
-	printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes());
-	printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks);
+	printk(KERN_INFO "Number of logical nodes in system = %d\n",
+	       num_online_nodes());
+	printk(KERN_INFO "Number of memory chunks in system = %d\n",
+	       num_node_memblks);
 
-	if (!slit_table) return;
+	if (!slit_table)
+		return;
 	memset(numa_slit, -1, sizeof(numa_slit));
-	for (i=0; i<slit_table->localities; i++) {
+	for (i = 0; i < slit_table->localities; i++) {
 		if (!pxm_bit_test(i))
 			continue;
 		node_from = pxm_to_nid_map[i];
-		for (j=0; j<slit_table->localities; j++) {
+		for (j = 0; j < slit_table->localities; j++) {
 			if (!pxm_bit_test(j))
 				continue;
 			node_to = pxm_to_nid_map[j];
 			node_distance(node_from, node_to) =
-				slit_table->entry[i*slit_table->localities + j];
+			    slit_table->entry[i * slit_table->localities + j];
 		}
 	}
 
@@ -556,36 +556,41 @@ acpi_numa_arch_fixup (void)
 	printk("ACPI 2.0 SLIT locality table:\n");
 	for_each_online_node(i) {
 		for_each_online_node(j)
-			printk("%03d ", node_distance(i,j));
+		    printk("%03d ", node_distance(i, j));
 		printk("\n");
 	}
 #endif
 }
-#endif /* CONFIG_ACPI_NUMA */
+#endif				/* CONFIG_ACPI_NUMA */
 
-unsigned int
-acpi_register_gsi (u32 gsi, int edge_level, int active_high_low)
+/*
+ * success: return IRQ number (>=0)
+ * failure: return < 0
+ */
+int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
 {
 	if (has_8259 && gsi < 16)
 		return isa_irq_to_vector(gsi);
 
 	return iosapic_register_intr(gsi,
-			(active_high_low == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
-			(edge_level == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+				     (active_high_low ==
+				      ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH :
+				     IOSAPIC_POL_LOW,
+				     (edge_level ==
+				      ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE :
+				     IOSAPIC_LEVEL);
 }
+
 EXPORT_SYMBOL(acpi_register_gsi);
 
-#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
-void
-acpi_unregister_gsi (u32 gsi)
+void acpi_unregister_gsi(u32 gsi)
 {
 	iosapic_unregister_intr(gsi);
 }
+
 EXPORT_SYMBOL(acpi_unregister_gsi);
-#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
 
-static int __init
-acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
+static int __init acpi_parse_fadt(unsigned long phys_addr, unsigned long size)
 {
 	struct acpi_table_header *fadt_header;
 	struct fadt_descriptor_rev2 *fadt;
@@ -593,11 +598,11 @@ acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
 	if (!phys_addr || !size)
 		return -EINVAL;
 
-	fadt_header = (struct acpi_table_header *) __va(phys_addr);
+	fadt_header = (struct acpi_table_header *)__va(phys_addr);
 	if (fadt_header->revision != 3)
-		return -ENODEV;		/* Only deal with ACPI 2.0 FADT */
+		return -ENODEV;	/* Only deal with ACPI 2.0 FADT */
 
-	fadt = (struct fadt_descriptor_rev2 *) fadt_header;
+	fadt = (struct fadt_descriptor_rev2 *)fadt_header;
 
 	if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
 		acpi_kbd_controller_present = 0;
@@ -609,22 +614,19 @@ acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
 	return 0;
 }
 
-
-unsigned long __init
-acpi_find_rsdp (void)
+unsigned long __init acpi_find_rsdp(void)
 {
 	unsigned long rsdp_phys = 0;
 
 	if (efi.acpi20)
 		rsdp_phys = __pa(efi.acpi20);
 	else if (efi.acpi)
-		printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n");
+		printk(KERN_WARNING PREFIX
+		       "v1.0/r0.71 tables no longer supported\n");
 	return rsdp_phys;
 }
 
-
-int __init
-acpi_boot_init (void)
+int __init acpi_boot_init(void)
 {
 
 	/*
@@ -642,31 +644,43 @@ acpi_boot_init (void)
 
 	/* Local APIC */
 
-	if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
-		printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
+		printk(KERN_ERR PREFIX
+		       "Error parsing LAPIC address override entry\n");
 
-	if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1)
-		printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n");
+	if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS)
+	    < 1)
+		printk(KERN_ERR PREFIX
+		       "Error parsing MADT - no LAPIC entries\n");
 
-	if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0)
+	if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0)
+	    < 0)
 		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
 
 	/* I/O APIC */
 
-	if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
-		printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n");
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
+		printk(KERN_ERR PREFIX
+		       "Error parsing MADT - no IOSAPIC entries\n");
 
 	/* System-Level Interrupt Routing */
 
-	if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
-		printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n");
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src,
+	     ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
+		printk(KERN_ERR PREFIX
+		       "Error parsing platform interrupt source entry\n");
 
-	if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
-		printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
+		printk(KERN_ERR PREFIX
+		       "Error parsing interrupt source overrides entry\n");
 
 	if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
 		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
-  skip_madt:
+      skip_madt:
 
 	/*
 	 * FADT says whether a legacy keyboard controller is present.
@@ -681,8 +695,9 @@ acpi_boot_init (void)
 	if (available_cpus == 0) {
 		printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
 		printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
-		smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id();
-		available_cpus = 1; /* We've got at least one of these, no? */
+		smp_boot_data.cpu_phys_id[available_cpus] =
+		    hard_smp_processor_id();
+		available_cpus = 1;	/* We've got at least one of these, no? */
 	}
 	smp_boot_data.cpu_count = available_cpus;
 
@@ -691,8 +706,10 @@ acpi_boot_init (void)
 	if (srat_num_cpus == 0) {
 		int cpu, i = 1;
 		for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
-			if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id())
-				node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu];
+			if (smp_boot_data.cpu_phys_id[cpu] !=
+			    hard_smp_processor_id())
+				node_cpuid[i++].phys_id =
+				    smp_boot_data.cpu_phys_id[cpu];
 	}
 # endif
 #endif
@@ -700,12 +717,12 @@ acpi_boot_init (void)
 	build_cpu_to_node_map();
 #endif
 	/* Make boot-up look pretty */
-	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
+	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
+	       total_cpus);
 	return 0;
 }
 
-int
-acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
 {
 	int vector;
 
@@ -726,11 +743,10 @@ acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
  */
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 static
-int
-acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
+int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
 {
 #ifdef CONFIG_ACPI_NUMA
-	int 			pxm_id;
+	int pxm_id;
 
 	pxm_id = acpi_get_pxm(handle);
 
@@ -738,31 +754,28 @@ acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
 	 * Assuming that the container driver would have set the proximity
 	 * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag
 	 */
-	node_cpuid[cpu].nid = (pxm_id < 0) ? 0:
-			pxm_to_nid_map[pxm_id];
+	node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_nid_map[pxm_id];
 
-	node_cpuid[cpu].phys_id =  physid;
+	node_cpuid[cpu].phys_id = physid;
 #endif
-	return(0);
+	return (0);
 }
 
-
-int
-acpi_map_lsapic(acpi_handle handle, int *pcpu)
+int acpi_map_lsapic(acpi_handle handle, int *pcpu)
 {
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
 	struct acpi_table_lsapic *lsapic;
 	cpumask_t tmp_map;
 	long physid;
 	int cpu;
- 
+
 	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
 		return -EINVAL;
 
-	if (!buffer.length ||  !buffer.pointer)
+	if (!buffer.length || !buffer.pointer)
 		return -EINVAL;
- 
+
 	obj = buffer.pointer;
 	if (obj->type != ACPI_TYPE_BUFFER ||
 	    obj->buffer.length < sizeof(*lsapic)) {
@@ -778,7 +791,7 @@ acpi_map_lsapic(acpi_handle handle, int *pcpu)
 		return -EINVAL;
 	}
 
-	physid = ((lsapic->id <<8) | (lsapic->eid));
+	physid = ((lsapic->id << 8) | (lsapic->eid));
 
 	acpi_os_free(buffer.pointer);
 	buffer.length = ACPI_ALLOCATE_BUFFER;
@@ -786,50 +799,49 @@ acpi_map_lsapic(acpi_handle handle, int *pcpu)
 
 	cpus_complement(tmp_map, cpu_present_map);
 	cpu = first_cpu(tmp_map);
-	if(cpu >= NR_CPUS)
+	if (cpu >= NR_CPUS)
 		return -EINVAL;
 
 	acpi_map_cpu2node(handle, cpu, physid);
 
- 	cpu_set(cpu, cpu_present_map);
+	cpu_set(cpu, cpu_present_map);
 	ia64_cpu_to_sapicid[cpu] = physid;
 	ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu];
 
 	*pcpu = cpu;
-	return(0);
+	return (0);
 }
-EXPORT_SYMBOL(acpi_map_lsapic);
 
+EXPORT_SYMBOL(acpi_map_lsapic);
 
-int
-acpi_unmap_lsapic(int cpu)
+int acpi_unmap_lsapic(int cpu)
 {
 	int i;
 
-	for (i=0; i<MAX_SAPICS; i++) {
- 		if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) {
- 			ia64_acpiid_to_sapicid[i] = -1;
- 			break;
- 		}
- 	}
+	for (i = 0; i < MAX_SAPICS; i++) {
+		if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) {
+			ia64_acpiid_to_sapicid[i] = -1;
+			break;
+		}
+	}
 	ia64_cpu_to_sapicid[cpu] = -1;
-	cpu_clear(cpu,cpu_present_map);
+	cpu_clear(cpu, cpu_present_map);
 
 #ifdef CONFIG_ACPI_NUMA
 	/* NUMA specific cleanup's */
 #endif
 
-	return(0);
+	return (0);
 }
+
 EXPORT_SYMBOL(acpi_unmap_lsapic);
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
- 
+#endif				/* CONFIG_ACPI_HOTPLUG_CPU */
 
 #ifdef CONFIG_ACPI_NUMA
 acpi_status __devinit
-acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
+acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
 {
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
 	struct acpi_table_iosapic *iosapic;
 	unsigned int gsi_base;
@@ -878,29 +890,29 @@ acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
 	map_iosapic_to_node(gsi_base, node);
 	return AE_OK;
 }
-#endif /* CONFIG_NUMA */
+#endif				/* CONFIG_NUMA */
 
-int
-acpi_register_ioapic (acpi_handle handle, u64 phys_addr, u32 gsi_base)
+int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
 {
 	int err;
 
 	if ((err = iosapic_init(phys_addr, gsi_base)))
 		return err;
 
-#if CONFIG_ACPI_NUMA
+#ifdef CONFIG_ACPI_NUMA
 	acpi_map_iosapic(handle, 0, NULL, NULL);
-#endif /* CONFIG_ACPI_NUMA */
+#endif				/* CONFIG_ACPI_NUMA */
 
 	return 0;
 }
+
 EXPORT_SYMBOL(acpi_register_ioapic);
 
-int
-acpi_unregister_ioapic (acpi_handle handle, u32 gsi_base)
+int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
 {
 	return iosapic_remove(gsi_base);
 }
+
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
-#endif /* CONFIG_ACPI_BOOT */
+#endif				/* CONFIG_ACPI */
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 7d1ae2982c5..77225659e96 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -4,6 +4,7 @@
  * to extract and format the required data.
  */
 
+#define ASM_OFFSETS_C 1
 #include <linux/config.h>
 
 #include <linux/sched.h>
@@ -211,17 +212,41 @@ void foo(void)
 #endif
 
 	BLANK();
-	DEFINE(IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET,
-	       offsetof (struct ia64_mca_cpu, proc_state_dump));
-	DEFINE(IA64_MCA_CPU_STACK_OFFSET,
-	       offsetof (struct ia64_mca_cpu, stack));
-	DEFINE(IA64_MCA_CPU_STACKFRAME_OFFSET,
-	       offsetof (struct ia64_mca_cpu, stackframe));
-	DEFINE(IA64_MCA_CPU_RBSTORE_OFFSET,
-	       offsetof (struct ia64_mca_cpu, rbstore));
+	DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET,
+	       offsetof (struct ia64_mca_cpu, mca_stack));
 	DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
 	       offsetof (struct ia64_mca_cpu, init_stack));
 	BLANK();
+	DEFINE(IA64_SAL_OS_STATE_COMMON_OFFSET,
+	       offsetof (struct ia64_sal_os_state, sal_ra));
+	DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET,
+	       offsetof (struct ia64_sal_os_state, os_gp));
+	DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
+	       offsetof (struct ia64_sal_os_state, pal_min_state));
+	DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET,
+	       offsetof (struct ia64_sal_os_state, proc_state_param));
+	DEFINE(IA64_SAL_OS_STATE_SIZE,
+	       sizeof (struct ia64_sal_os_state));
+	DEFINE(IA64_PMSA_GR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_gr));
+	DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_bank1_gr));
+	DEFINE(IA64_PMSA_PR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_pr));
+	DEFINE(IA64_PMSA_BR0_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_br0));
+	DEFINE(IA64_PMSA_RSC_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_rsc));
+	DEFINE(IA64_PMSA_IIP_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_iip));
+	DEFINE(IA64_PMSA_IPSR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_ipsr));
+	DEFINE(IA64_PMSA_IFS_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_ifs));
+	DEFINE(IA64_PMSA_XIP_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_xip));
+	BLANK();
+
 	/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
 	DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
 	DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c
deleted file mode 100644
index bbb8efe126b..00000000000
--- a/arch/ia64/kernel/domain.c
+++ /dev/null
@@ -1,396 +0,0 @@
-/*
- * arch/ia64/kernel/domain.c
- * Architecture specific sched-domains builder.
- *
- * Copyright (C) 2004 Jesse Barnes
- * Copyright (C) 2004 Silicon Graphics, Inc.
- */
-
-#include <linux/sched.h>
-#include <linux/percpu.h>
-#include <linux/slab.h>
-#include <linux/cpumask.h>
-#include <linux/init.h>
-#include <linux/topology.h>
-#include <linux/nodemask.h>
-
-#define SD_NODES_PER_DOMAIN 16
-
-#ifdef CONFIG_NUMA
-/**
- * find_next_best_node - find the next node to include in a sched_domain
- * @node: node whose sched_domain we're building
- * @used_nodes: nodes already in the sched_domain
- *
- * Find the next node to include in a given scheduling domain.  Simply
- * finds the closest node not already in the @used_nodes map.
- *
- * Should use nodemask_t.
- */
-static int find_next_best_node(int node, unsigned long *used_nodes)
-{
-	int i, n, val, min_val, best_node = 0;
-
-	min_val = INT_MAX;
-
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		/* Start at @node */
-		n = (node + i) % MAX_NUMNODES;
-
-		if (!nr_cpus_node(n))
-			continue;
-
-		/* Skip already used nodes */
-		if (test_bit(n, used_nodes))
-			continue;
-
-		/* Simple min distance search */
-		val = node_distance(node, n);
-
-		if (val < min_val) {
-			min_val = val;
-			best_node = n;
-		}
-	}
-
-	set_bit(best_node, used_nodes);
-	return best_node;
-}
-
-/**
- * sched_domain_node_span - get a cpumask for a node's sched_domain
- * @node: node whose cpumask we're constructing
- * @size: number of nodes to include in this span
- *
- * Given a node, construct a good cpumask for its sched_domain to span.  It
- * should be one that prevents unnecessary balancing, but also spreads tasks
- * out optimally.
- */
-static cpumask_t sched_domain_node_span(int node)
-{
-	int i;
-	cpumask_t span, nodemask;
-	DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
-
-	cpus_clear(span);
-	bitmap_zero(used_nodes, MAX_NUMNODES);
-
-	nodemask = node_to_cpumask(node);
-	cpus_or(span, span, nodemask);
-	set_bit(node, used_nodes);
-
-	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
-		int next_node = find_next_best_node(node, used_nodes);
-		nodemask = node_to_cpumask(next_node);
-		cpus_or(span, span, nodemask);
-	}
-
-	return span;
-}
-#endif
-
-/*
- * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
- * can switch it on easily if needed.
- */
-#ifdef CONFIG_SCHED_SMT
-static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
-static struct sched_group sched_group_cpus[NR_CPUS];
-static int cpu_to_cpu_group(int cpu)
-{
-	return cpu;
-}
-#endif
-
-static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static struct sched_group sched_group_phys[NR_CPUS];
-static int cpu_to_phys_group(int cpu)
-{
-#ifdef CONFIG_SCHED_SMT
-	return first_cpu(cpu_sibling_map[cpu]);
-#else
-	return cpu;
-#endif
-}
-
-#ifdef CONFIG_NUMA
-/*
- * The init_sched_build_groups can't handle what we want to do with node
- * groups, so roll our own. Now each node has its own list of groups which
- * gets dynamically allocated.
- */
-static DEFINE_PER_CPU(struct sched_domain, node_domains);
-static struct sched_group *sched_group_nodes[MAX_NUMNODES];
-
-static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
-static struct sched_group sched_group_allnodes[MAX_NUMNODES];
-
-static int cpu_to_allnodes_group(int cpu)
-{
-	return cpu_to_node(cpu);
-}
-#endif
-
-/*
- * Build sched domains for a given set of cpus and attach the sched domains
- * to the individual cpus
- */
-void build_sched_domains(const cpumask_t *cpu_map)
-{
-	int i;
-
-	/*
-	 * Set up domains for cpus specified by the cpu_map.
-	 */
-	for_each_cpu_mask(i, *cpu_map) {
-		int group;
-		struct sched_domain *sd = NULL, *p;
-		cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
-
-		cpus_and(nodemask, nodemask, *cpu_map);
-
-#ifdef CONFIG_NUMA
-		if (num_online_cpus()
-				> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
-			sd = &per_cpu(allnodes_domains, i);
-			*sd = SD_ALLNODES_INIT;
-			sd->span = *cpu_map;
-			group = cpu_to_allnodes_group(i);
-			sd->groups = &sched_group_allnodes[group];
-			p = sd;
-		} else
-			p = NULL;
-
-		sd = &per_cpu(node_domains, i);
-		*sd = SD_NODE_INIT;
-		sd->span = sched_domain_node_span(cpu_to_node(i));
-		sd->parent = p;
-		cpus_and(sd->span, sd->span, *cpu_map);
-#endif
-
-		p = sd;
-		sd = &per_cpu(phys_domains, i);
-		group = cpu_to_phys_group(i);
-		*sd = SD_CPU_INIT;
-		sd->span = nodemask;
-		sd->parent = p;
-		sd->groups = &sched_group_phys[group];
-
-#ifdef CONFIG_SCHED_SMT
-		p = sd;
-		sd = &per_cpu(cpu_domains, i);
-		group = cpu_to_cpu_group(i);
-		*sd = SD_SIBLING_INIT;
-		sd->span = cpu_sibling_map[i];
-		cpus_and(sd->span, sd->span, *cpu_map);
-		sd->parent = p;
-		sd->groups = &sched_group_cpus[group];
-#endif
-	}
-
-#ifdef CONFIG_SCHED_SMT
-	/* Set up CPU (sibling) groups */
-	for_each_cpu_mask(i, *cpu_map) {
-		cpumask_t this_sibling_map = cpu_sibling_map[i];
-		cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
-		if (i != first_cpu(this_sibling_map))
-			continue;
-
-		init_sched_build_groups(sched_group_cpus, this_sibling_map,
-						&cpu_to_cpu_group);
-	}
-#endif
-
-	/* Set up physical groups */
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		cpumask_t nodemask = node_to_cpumask(i);
-
-		cpus_and(nodemask, nodemask, *cpu_map);
-		if (cpus_empty(nodemask))
-			continue;
-
-		init_sched_build_groups(sched_group_phys, nodemask,
-						&cpu_to_phys_group);
-	}
-
-#ifdef CONFIG_NUMA
-	init_sched_build_groups(sched_group_allnodes, *cpu_map,
-				&cpu_to_allnodes_group);
-
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		/* Set up node groups */
-		struct sched_group *sg, *prev;
-		cpumask_t nodemask = node_to_cpumask(i);
-		cpumask_t domainspan;
-		cpumask_t covered = CPU_MASK_NONE;
-		int j;
-
-		cpus_and(nodemask, nodemask, *cpu_map);
-		if (cpus_empty(nodemask))
-			continue;
-
-		domainspan = sched_domain_node_span(i);
-		cpus_and(domainspan, domainspan, *cpu_map);
-
-		sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
-		sched_group_nodes[i] = sg;
-		for_each_cpu_mask(j, nodemask) {
-			struct sched_domain *sd;
-			sd = &per_cpu(node_domains, j);
-			sd->groups = sg;
-			if (sd->groups == NULL) {
-				/* Turn off balancing if we have no groups */
-				sd->flags = 0;
-			}
-		}
-		if (!sg) {
-			printk(KERN_WARNING
-			"Can not alloc domain group for node %d\n", i);
-			continue;
-		}
-		sg->cpu_power = 0;
-		sg->cpumask = nodemask;
-		cpus_or(covered, covered, nodemask);
-		prev = sg;
-
-		for (j = 0; j < MAX_NUMNODES; j++) {
-			cpumask_t tmp, notcovered;
-			int n = (i + j) % MAX_NUMNODES;
-
-			cpus_complement(notcovered, covered);
-			cpus_and(tmp, notcovered, *cpu_map);
-			cpus_and(tmp, tmp, domainspan);
-			if (cpus_empty(tmp))
-				break;
-
-			nodemask = node_to_cpumask(n);
-			cpus_and(tmp, tmp, nodemask);
-			if (cpus_empty(tmp))
-				continue;
-
-			sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
-			if (!sg) {
-				printk(KERN_WARNING
-				"Can not alloc domain group for node %d\n", j);
-				break;
-			}
-			sg->cpu_power = 0;
-			sg->cpumask = tmp;
-			cpus_or(covered, covered, tmp);
-			prev->next = sg;
-			prev = sg;
-		}
-		prev->next = sched_group_nodes[i];
-	}
-#endif
-
-	/* Calculate CPU power for physical packages and nodes */
-	for_each_cpu_mask(i, *cpu_map) {
-		int power;
-		struct sched_domain *sd;
-#ifdef CONFIG_SCHED_SMT
-		sd = &per_cpu(cpu_domains, i);
-		power = SCHED_LOAD_SCALE;
-		sd->groups->cpu_power = power;
-#endif
-
-		sd = &per_cpu(phys_domains, i);
-		power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-				(cpus_weight(sd->groups->cpumask)-1) / 10;
-		sd->groups->cpu_power = power;
-
-#ifdef CONFIG_NUMA
-		sd = &per_cpu(allnodes_domains, i);
-		if (sd->groups) {
-			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-				(cpus_weight(sd->groups->cpumask)-1) / 10;
-			sd->groups->cpu_power = power;
-		}
-#endif
-	}
-
-#ifdef CONFIG_NUMA
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		struct sched_group *sg = sched_group_nodes[i];
-		int j;
-
-		if (sg == NULL)
-			continue;
-next_sg:
-		for_each_cpu_mask(j, sg->cpumask) {
-			struct sched_domain *sd;
-			int power;
-
-			sd = &per_cpu(phys_domains, j);
-			if (j != first_cpu(sd->groups->cpumask)) {
-				/*
-				 * Only add "power" once for each
-				 * physical package.
-				 */
-				continue;
-			}
-			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-				(cpus_weight(sd->groups->cpumask)-1) / 10;
-
-			sg->cpu_power += power;
-		}
-		sg = sg->next;
-		if (sg != sched_group_nodes[i])
-			goto next_sg;
-	}
-#endif
-
-	/* Attach the domains */
-	for_each_cpu_mask(i, *cpu_map) {
-		struct sched_domain *sd;
-#ifdef CONFIG_SCHED_SMT
-		sd = &per_cpu(cpu_domains, i);
-#else
-		sd = &per_cpu(phys_domains, i);
-#endif
-		cpu_attach_domain(sd, i);
-	}
-}
-/*
- * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
- */
-void arch_init_sched_domains(const cpumask_t *cpu_map)
-{
-	cpumask_t cpu_default_map;
-
-	/*
-	 * Setup mask for cpus without special case scheduling requirements.
-	 * For now this just excludes isolated cpus, but could be used to
-	 * exclude other special cases in the future.
-	 */
-	cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
-
-	build_sched_domains(&cpu_default_map);
-}
-
-void arch_destroy_sched_domains(const cpumask_t *cpu_map)
-{
-#ifdef CONFIG_NUMA
-	int i;
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		cpumask_t nodemask = node_to_cpumask(i);
-		struct sched_group *oldsg, *sg = sched_group_nodes[i];
-
-		cpus_and(nodemask, nodemask, *cpu_map);
-		if (cpus_empty(nodemask))
-			continue;
-
-		if (sg == NULL)
-			continue;
-		sg = sg->next;
-next_sg:
-		oldsg = sg;
-		sg = sg->next;
-		kfree(oldsg);
-		if (oldsg != sched_group_nodes[i])
-			goto next_sg;
-		sched_group_nodes[i] = NULL;
-	}
-#endif
-}
-
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 9be53e1ea40..0741b066b98 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -37,7 +37,7 @@
 #include <asm/cache.h>
 #include <asm/errno.h>
 #include <asm/kregs.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/pgtable.h>
 #include <asm/percpu.h>
 #include <asm/processor.h>
@@ -204,9 +204,6 @@ GLOBAL_ENTRY(ia64_switch_to)
 (p6)	br.cond.dpnt .map
 	;;
 .done:
-(p6)	ssm psr.ic			// if we had to map, reenable the psr.ic bit FIRST!!!
-	;;
-(p6)	srlz.d
 	ld8 sp=[r21]			// load kernel stack pointer of new task
 	mov IA64_KR(CURRENT)=in0	// update "current" application register
 	mov r8=r13			// return pointer to previously running task
@@ -234,6 +231,9 @@ GLOBAL_ENTRY(ia64_switch_to)
 	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
 	;;
 	itr.d dtr[r25]=r23		// wire in new mapping...
+	ssm psr.ic			// reenable the psr.ic bit
+	;;
+	srlz.d
 	br.cond.sptk .done
 END(ia64_switch_to)
 
@@ -470,6 +470,29 @@ ENTRY(load_switch_stack)
 	br.cond.sptk.many b7
 END(load_switch_stack)
 
+GLOBAL_ENTRY(prefetch_stack)
+	add r14 = -IA64_SWITCH_STACK_SIZE, sp
+	add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
+	;;
+	ld8 r16 = [r15]				// load next's stack pointer
+	lfetch.fault.excl [r14], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault [r16], 128
+	br.ret.sptk.many rp
+END(prefetch_stack)
+
 GLOBAL_ENTRY(execve)
 	mov r15=__NR_execve			// put syscall number in place
 	break __BREAK_SYSCALL
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 7d7684a369d..2ddbac6f499 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -14,7 +14,7 @@
 
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/percpu.h>
 #include <asm/thread_info.h>
 #include <asm/sal.h>
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index 86948ce63e4..86064ca9895 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -10,7 +10,7 @@
 
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/sigcontext.h>
 #include <asm/system.h>
 #include <asm/unistd.h>
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 8d3a9291b47..bfe65b2e862 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -25,7 +25,7 @@
 #include <asm/fpu.h>
 #include <asm/kregs.h>
 #include <asm/mmu_context.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/pal.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 7936b62f7a2..574084f343f 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -561,7 +561,7 @@ static inline int vector_is_shared (int vector)
 	return (iosapic_intr_info[vector].count > 1);
 }
 
-static void
+static int
 register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	       unsigned long polarity, unsigned long trigger)
 {
@@ -576,7 +576,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	index = find_iosapic(gsi);
 	if (index < 0) {
 		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
-		return;
+		return -ENODEV;
 	}
 
 	iosapic_address = iosapic_lists[index].addr;
@@ -587,7 +587,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 		rte = iosapic_alloc_rte();
 		if (!rte) {
 			printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__);
-			return;
+			return -ENOMEM;
 		}
 
 		rte_index = gsi - gsi_base;
@@ -603,7 +603,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 		struct iosapic_intr_info *info = &iosapic_intr_info[vector];
 		if (info->trigger != trigger || info->polarity != polarity) {
 			printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__);
-			return;
+			return -EINVAL;
 		}
 	}
 
@@ -623,6 +623,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 			       __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
 		idesc->handler = irq_type;
 	}
+	return 0;
 }
 
 static unsigned int
@@ -710,7 +711,7 @@ int
 iosapic_register_intr (unsigned int gsi,
 		       unsigned long polarity, unsigned long trigger)
 {
-	int vector, mask = 1;
+	int vector, mask = 1, err;
 	unsigned int dest;
 	unsigned long flags;
 	struct iosapic_rte_info *rte;
@@ -737,8 +738,8 @@ again:
 	vector = assign_irq_vector(AUTO_ASSIGN);
 	if (vector < 0) {
 		vector = iosapic_find_sharable_vector(trigger, polarity);
-		if (vector < 0)
-			panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+  		if (vector < 0)
+			return -ENOSPC;
 	}
 
 	spin_lock_irqsave(&irq_descp(vector)->lock, flags);
@@ -753,8 +754,13 @@ again:
 		}
 
 		dest = get_target_cpu(gsi, vector);
-		register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
+		err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
 			      polarity, trigger);
+		if (err < 0) {
+			spin_unlock(&iosapic_lock);
+			spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+			return err;
+		}
 
 		/*
 		 * If the vector is shared and already unmasked for
@@ -776,7 +782,6 @@ again:
 	return vector;
 }
 
-#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
 void
 iosapic_unregister_intr (unsigned int gsi)
 {
@@ -859,7 +864,6 @@ iosapic_unregister_intr (unsigned int gsi)
 	spin_unlock(&iosapic_lock);
 	spin_unlock_irqrestore(&idesc->lock, flags);
 }
-#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
 
 /*
  * ACPI calls this when it finds an entry for a platform interrupt.
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 28f2aadc38d..205d9802826 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -91,23 +91,8 @@ skip:
 }
 
 #ifdef CONFIG_SMP
-/*
- * This is updated when the user sets irq affinity via /proc
- */
-static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
-static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)];
-
 static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
 
-/*
- * Arch specific routine for deferred write to iosapic rte to reprogram
- * intr destination.
- */
-void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
-{
-	pending_irq_cpumask[irq] = mask_val;
-}
-
 void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
 {
 	cpumask_t mask = CPU_MASK_NONE;
@@ -116,32 +101,10 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
 
 	if (irq < NR_IRQS) {
 		irq_affinity[irq] = mask;
+		set_irq_info(irq, mask);
 		irq_redir[irq] = (char) (redir & 0xff);
 	}
 }
-
-
-void move_irq(int irq)
-{
-	/* note - we hold desc->lock */
-	cpumask_t tmp;
-	irq_desc_t *desc = irq_descp(irq);
-	int redir = test_bit(irq, pending_irq_redir);
-
-	if (unlikely(!desc->handler->set_affinity))
-		return;
-
-	if (!cpus_empty(pending_irq_cpumask[irq])) {
-		cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
-		if (unlikely(!cpus_empty(tmp))) {
-			desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0),
-						    pending_irq_cpumask[irq]);
-		}
-		cpus_clear(pending_irq_cpumask[irq]);
-	}
-}
-
-
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 3bb3a13c404..c13ca0d49c4 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -44,7 +44,7 @@
 #include <asm/break.h>
 #include <asm/ia32.h>
 #include <asm/kregs.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
@@ -69,7 +69,6 @@
 # define DBG_FAULT(i)
 #endif
 
-#define MINSTATE_VIRT	/* needed by minstate.h */
 #include "minstate.h"
 
 #define FAULT(n)									\
diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S
index b7fa3ccd2b0..2323377e369 100644
--- a/arch/ia64/kernel/jprobes.S
+++ b/arch/ia64/kernel/jprobes.S
@@ -49,6 +49,7 @@
 	/*
 	 * void jprobe_break(void)
 	 */
+	.section .kprobes.text, "ax"
 ENTRY(jprobe_break)
 	break.m 0x80300
 END(jprobe_break)
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 884f5cd27d8..471086b808a 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -87,12 +87,25 @@ static enum instruction_type bundle_encoding[32][3] = {
  * is IP relative instruction and update the kprobe
  * inst flag accordingly
  */
-static void update_kprobe_inst_flag(uint template, uint  slot, uint major_opcode,
-	unsigned long kprobe_inst, struct kprobe *p)
+static void __kprobes update_kprobe_inst_flag(uint template, uint  slot,
+					      uint major_opcode,
+					      unsigned long kprobe_inst,
+					      struct kprobe *p)
 {
 	p->ainsn.inst_flag = 0;
 	p->ainsn.target_br_reg = 0;
 
+	/* Check for Break instruction
+ 	 * Bits 37:40 Major opcode to be zero
+	 * Bits 27:32 X6 to be zero
+	 * Bits 32:35 X3 to be zero
+	 */
+	if ((!major_opcode) && (!((kprobe_inst >> 27) & 0x1FF)) ) {
+		/* is a break instruction */
+	 	p->ainsn.inst_flag |= INST_FLAG_BREAK_INST;
+		return;
+	}
+
 	if (bundle_encoding[template][slot] == B) {
 		switch (major_opcode) {
 		  case INDIRECT_CALL_OPCODE:
@@ -126,8 +139,10 @@ static void update_kprobe_inst_flag(uint template, uint  slot, uint major_opcode
  * Returns 0 if supported
  * Returns -EINVAL if unsupported
  */
-static int unsupported_inst(uint template, uint  slot, uint major_opcode,
-	unsigned long kprobe_inst, struct kprobe *p)
+static int __kprobes unsupported_inst(uint template, uint  slot,
+				      uint major_opcode,
+				      unsigned long kprobe_inst,
+				      struct kprobe *p)
 {
 	unsigned long addr = (unsigned long)p->addr;
 
@@ -168,8 +183,9 @@ static int unsupported_inst(uint template, uint  slot, uint major_opcode,
  * on which we are inserting kprobe is cmp instruction
  * with ctype as unc.
  */
-static uint is_cmp_ctype_unc_inst(uint template, uint slot, uint major_opcode,
-unsigned long kprobe_inst)
+static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot,
+					    uint major_opcode,
+					    unsigned long kprobe_inst)
 {
 	cmp_inst_t cmp_inst;
 	uint ctype_unc = 0;
@@ -201,8 +217,10 @@ out:
  * In this function we override the bundle with
  * the break instruction at the given slot.
  */
-static void prepare_break_inst(uint template, uint  slot, uint major_opcode,
-	unsigned long kprobe_inst, struct kprobe *p)
+static void __kprobes prepare_break_inst(uint template, uint  slot,
+					 uint major_opcode,
+					 unsigned long kprobe_inst,
+					 struct kprobe *p)
 {
 	unsigned long break_inst = BREAK_INST;
 	bundle_t *bundle = &p->ainsn.insn.bundle;
@@ -271,7 +289,8 @@ static inline int in_ivt_functions(unsigned long addr)
 		&& addr < (unsigned long)__end_ivt_text);
 }
 
-static int valid_kprobe_addr(int template, int slot, unsigned long addr)
+static int __kprobes valid_kprobe_addr(int template, int slot,
+				       unsigned long addr)
 {
 	if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) {
 		printk(KERN_WARNING "Attempting to insert unaligned kprobe "
@@ -323,7 +342,7 @@ static void kretprobe_trampoline(void)
  *    - cleanup by marking the instance as unused
  *    - long jump back to the original return address
  */
-int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head;
@@ -381,7 +400,8 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         return 1;
 }
 
-void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
+void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+				      struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri;
 
@@ -399,7 +419,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
 	}
 }
 
-int arch_prepare_kprobe(struct kprobe *p)
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
 	unsigned long addr = (unsigned long) p->addr;
 	unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL);
@@ -430,7 +450,7 @@ int arch_prepare_kprobe(struct kprobe *p)
 	return 0;
 }
 
-void arch_arm_kprobe(struct kprobe *p)
+void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
 	unsigned long addr = (unsigned long)p->addr;
 	unsigned long arm_addr = addr & ~0xFULL;
@@ -439,7 +459,7 @@ void arch_arm_kprobe(struct kprobe *p)
 	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
 }
 
-void arch_disarm_kprobe(struct kprobe *p)
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
 	unsigned long addr = (unsigned long)p->addr;
 	unsigned long arm_addr = addr & ~0xFULL;
@@ -449,7 +469,7 @@ void arch_disarm_kprobe(struct kprobe *p)
 	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
 }
 
-void arch_remove_kprobe(struct kprobe *p)
+void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 }
 
@@ -461,7 +481,7 @@ void arch_remove_kprobe(struct kprobe *p)
  * to original stack address, handle the case where we need to fixup the
  * relative IP address and/or fixup branch register.
  */
-static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
   	unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL;
   	unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
@@ -528,13 +548,16 @@ turn_ss_off:
   	ia64_psr(regs)->ss = 0;
 }
 
-static void prepare_ss(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
 {
 	unsigned long bundle_addr = (unsigned long) &p->opcode.bundle;
 	unsigned long slot = (unsigned long)p->addr & 0xf;
 
-	/* Update instruction pointer (IIP) and slot number (IPSR.ri) */
-	regs->cr_iip = bundle_addr & ~0xFULL;
+	/* single step inline if break instruction */
+	if (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)
+		regs->cr_iip = (unsigned long)p->addr & ~0xFULL;
+	else
+		regs->cr_iip = bundle_addr & ~0xFULL;
 
 	if (slot > 2)
 		slot = 0;
@@ -545,7 +568,39 @@ static void prepare_ss(struct kprobe *p, struct pt_regs *regs)
 	ia64_psr(regs)->ss = 1;
 }
 
-static int pre_kprobes_handler(struct die_args *args)
+static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
+{
+	unsigned int slot = ia64_psr(regs)->ri;
+	unsigned int template, major_opcode;
+	unsigned long kprobe_inst;
+	unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
+	bundle_t bundle;
+
+	memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
+	template = bundle.quad0.template;
+
+	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+	if (slot == 1 && bundle_encoding[template][1] == L)
+  		slot++;
+
+	/* Get Kprobe probe instruction at given slot*/
+	get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
+
+	/* For break instruction,
+	 * Bits 37:40 Major opcode to be zero
+	 * Bits 27:32 X6 to be zero
+	 * Bits 32:35 X3 to be zero
+	 */
+	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
+		/* Not a break instruction */
+		return 0;
+	}
+
+	/* Is a break instruction */
+	return 1;
+}
+
+static int __kprobes pre_kprobes_handler(struct die_args *args)
 {
 	struct kprobe *p;
 	int ret = 0;
@@ -558,7 +613,9 @@ static int pre_kprobes_handler(struct die_args *args)
 	if (kprobe_running()) {
 		p = get_kprobe(addr);
 		if (p) {
-			if (kprobe_status == KPROBE_HIT_SS) {
+			if ( (kprobe_status == KPROBE_HIT_SS) &&
+	 		     (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) {
+  				ia64_psr(regs)->ss = 0;
 				unlock_kprobes();
 				goto no_kprobe;
 			}
@@ -592,6 +649,19 @@ static int pre_kprobes_handler(struct die_args *args)
 	p = get_kprobe(addr);
 	if (!p) {
 		unlock_kprobes();
+		if (!is_ia64_break_inst(regs)) {
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it.  Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address.  In either case, no further
+			 * handling of this interrupt is appropriate.
+			 */
+			ret = 1;
+
+		}
+
+		/* Not one of our break, let kernel handle it */
 		goto no_kprobe;
 	}
 
@@ -616,7 +686,7 @@ no_kprobe:
 	return ret;
 }
 
-static int post_kprobes_handler(struct pt_regs *regs)
+static int __kprobes post_kprobes_handler(struct pt_regs *regs)
 {
 	if (!kprobe_running())
 		return 0;
@@ -641,7 +711,7 @@ out:
 	return 1;
 }
 
-static int kprobes_fault_handler(struct pt_regs *regs, int trapnr)
+static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	if (!kprobe_running())
 		return 0;
@@ -659,8 +729,8 @@ static int kprobes_fault_handler(struct pt_regs *regs, int trapnr)
 	return 0;
 }
 
-int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
-			     void *data)
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
 {
 	struct die_args *args = (struct die_args *)data;
 	switch(val) {
@@ -681,7 +751,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
 	return NOTIFY_DONE;
 }
 
-int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
@@ -703,7 +773,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	return 1;
 }
 
-int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	*regs = jprobe_saved_regs;
 	return 1;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 4ebbf397438..d0a5106fba2 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -48,6 +48,9 @@
  *            Delete dead variables and functions.
  *            Reorder to remove the need for forward declarations and to consolidate
  *            related code.
+ *
+ * 2005-08-12 Keith Owens <kaos@sgi.com>
+ *	      Convert MCA/INIT handlers to use per event stacks and SAL/OS state.
  */
 #include <linux/config.h>
 #include <linux/types.h>
@@ -77,6 +80,8 @@
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
 
+#include "entry.h"
+
 #if defined(IA64_MCA_DEBUG_INFO)
 # define IA64_MCA_DEBUG(fmt...)	printk(fmt)
 #else
@@ -84,9 +89,7 @@
 #endif
 
 /* Used by mca_asm.S */
-ia64_mca_sal_to_os_state_t	ia64_sal_to_os_handoff_state;
-ia64_mca_os_to_sal_state_t	ia64_os_to_sal_handoff_state;
-u64				ia64_mca_serialize;
+u32				ia64_mca_serialize;
 DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
 DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
 DEFINE_PER_CPU(u64, ia64_mca_pal_pte);	    /* PTE to map PAL code */
@@ -95,8 +98,10 @@ DEFINE_PER_CPU(u64, ia64_mca_pal_base);    /* vaddr PAL code granule */
 unsigned long __per_cpu_mca[NR_CPUS];
 
 /* In mca_asm.S */
-extern void			ia64_monarch_init_handler (void);
-extern void			ia64_slave_init_handler (void);
+extern void			ia64_os_init_dispatch_monarch (void);
+extern void			ia64_os_init_dispatch_slave (void);
+
+static int monarch_cpu = -1;
 
 static ia64_mc_info_t		ia64_mc_info;
 
@@ -234,7 +239,8 @@ ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe)
  *  This function retrieves a specified error record type from SAL
  *  and wakes up any processes waiting for error records.
  *
- *  Inputs  :   sal_info_type   (Type of error record MCA/CMC/CPE/INIT)
+ *  Inputs  :   sal_info_type   (Type of error record MCA/CMC/CPE)
+ *              FIXME: remove MCA and irq_safe.
  */
 static void
 ia64_mca_log_sal_error_record(int sal_info_type)
@@ -242,7 +248,7 @@ ia64_mca_log_sal_error_record(int sal_info_type)
 	u8 *buffer;
 	sal_log_record_header_t *rh;
 	u64 size;
-	int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT;
+	int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA;
 #ifdef IA64_MCA_DEBUG_INFO
 	static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" };
 #endif
@@ -330,191 +336,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
 
 #endif /* CONFIG_ACPI */
 
-static void
-show_min_state (pal_min_state_area_t *minstate)
-{
-	u64 iip = minstate->pmsa_iip + ((struct ia64_psr *)(&minstate->pmsa_ipsr))->ri;
-	u64 xip = minstate->pmsa_xip + ((struct ia64_psr *)(&minstate->pmsa_xpsr))->ri;
-
-	printk("NaT bits\t%016lx\n", minstate->pmsa_nat_bits);
-	printk("pr\t\t%016lx\n", minstate->pmsa_pr);
-	printk("b0\t\t%016lx ", minstate->pmsa_br0); print_symbol("%s\n", minstate->pmsa_br0);
-	printk("ar.rsc\t\t%016lx\n", minstate->pmsa_rsc);
-	printk("cr.iip\t\t%016lx ", iip); print_symbol("%s\n", iip);
-	printk("cr.ipsr\t\t%016lx\n", minstate->pmsa_ipsr);
-	printk("cr.ifs\t\t%016lx\n", minstate->pmsa_ifs);
-	printk("xip\t\t%016lx ", xip); print_symbol("%s\n", xip);
-	printk("xpsr\t\t%016lx\n", minstate->pmsa_xpsr);
-	printk("xfs\t\t%016lx\n", minstate->pmsa_xfs);
-	printk("b1\t\t%016lx ", minstate->pmsa_br1);
-	print_symbol("%s\n", minstate->pmsa_br1);
-
-	printk("\nstatic registers r0-r15:\n");
-	printk(" r0- 3 %016lx %016lx %016lx %016lx\n",
-	       0UL, minstate->pmsa_gr[0], minstate->pmsa_gr[1], minstate->pmsa_gr[2]);
-	printk(" r4- 7 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_gr[3], minstate->pmsa_gr[4],
-	       minstate->pmsa_gr[5], minstate->pmsa_gr[6]);
-	printk(" r8-11 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_gr[7], minstate->pmsa_gr[8],
-	       minstate->pmsa_gr[9], minstate->pmsa_gr[10]);
-	printk("r12-15 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_gr[11], minstate->pmsa_gr[12],
-	       minstate->pmsa_gr[13], minstate->pmsa_gr[14]);
-
-	printk("\nbank 0:\n");
-	printk("r16-19 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[0], minstate->pmsa_bank0_gr[1],
-	       minstate->pmsa_bank0_gr[2], minstate->pmsa_bank0_gr[3]);
-	printk("r20-23 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[4], minstate->pmsa_bank0_gr[5],
-	       minstate->pmsa_bank0_gr[6], minstate->pmsa_bank0_gr[7]);
-	printk("r24-27 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[8], minstate->pmsa_bank0_gr[9],
-	       minstate->pmsa_bank0_gr[10], minstate->pmsa_bank0_gr[11]);
-	printk("r28-31 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[12], minstate->pmsa_bank0_gr[13],
-	       minstate->pmsa_bank0_gr[14], minstate->pmsa_bank0_gr[15]);
-
-	printk("\nbank 1:\n");
-	printk("r16-19 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[0], minstate->pmsa_bank1_gr[1],
-	       minstate->pmsa_bank1_gr[2], minstate->pmsa_bank1_gr[3]);
-	printk("r20-23 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[4], minstate->pmsa_bank1_gr[5],
-	       minstate->pmsa_bank1_gr[6], minstate->pmsa_bank1_gr[7]);
-	printk("r24-27 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[8], minstate->pmsa_bank1_gr[9],
-	       minstate->pmsa_bank1_gr[10], minstate->pmsa_bank1_gr[11]);
-	printk("r28-31 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[12], minstate->pmsa_bank1_gr[13],
-	       minstate->pmsa_bank1_gr[14], minstate->pmsa_bank1_gr[15]);
-}
-
-static void
-fetch_min_state (pal_min_state_area_t *ms, struct pt_regs *pt, struct switch_stack *sw)
-{
-	u64 *dst_banked, *src_banked, bit, shift, nat_bits;
-	int i;
-
-	/*
-	 * First, update the pt-regs and switch-stack structures with the contents stored
-	 * in the min-state area:
-	 */
-	if (((struct ia64_psr *) &ms->pmsa_ipsr)->ic == 0) {
-		pt->cr_ipsr = ms->pmsa_xpsr;
-		pt->cr_iip = ms->pmsa_xip;
-		pt->cr_ifs = ms->pmsa_xfs;
-	} else {
-		pt->cr_ipsr = ms->pmsa_ipsr;
-		pt->cr_iip = ms->pmsa_iip;
-		pt->cr_ifs = ms->pmsa_ifs;
-	}
-	pt->ar_rsc = ms->pmsa_rsc;
-	pt->pr = ms->pmsa_pr;
-	pt->r1 = ms->pmsa_gr[0];
-	pt->r2 = ms->pmsa_gr[1];
-	pt->r3 = ms->pmsa_gr[2];
-	sw->r4 = ms->pmsa_gr[3];
-	sw->r5 = ms->pmsa_gr[4];
-	sw->r6 = ms->pmsa_gr[5];
-	sw->r7 = ms->pmsa_gr[6];
-	pt->r8 = ms->pmsa_gr[7];
-	pt->r9 = ms->pmsa_gr[8];
-	pt->r10 = ms->pmsa_gr[9];
-	pt->r11 = ms->pmsa_gr[10];
-	pt->r12 = ms->pmsa_gr[11];
-	pt->r13 = ms->pmsa_gr[12];
-	pt->r14 = ms->pmsa_gr[13];
-	pt->r15 = ms->pmsa_gr[14];
-	dst_banked = &pt->r16;		/* r16-r31 are contiguous in struct pt_regs */
-	src_banked = ms->pmsa_bank1_gr;
-	for (i = 0; i < 16; ++i)
-		dst_banked[i] = src_banked[i];
-	pt->b0 = ms->pmsa_br0;
-	sw->b1 = ms->pmsa_br1;
-
-	/* construct the NaT bits for the pt-regs structure: */
-#	define PUT_NAT_BIT(dst, addr)					\
-	do {								\
-		bit = nat_bits & 1; nat_bits >>= 1;			\
-		shift = ((unsigned long) addr >> 3) & 0x3f;		\
-		dst = ((dst) & ~(1UL << shift)) | (bit << shift);	\
-	} while (0)
-
-	/* Rotate the saved NaT bits such that bit 0 corresponds to pmsa_gr[0]: */
-	shift = ((unsigned long) &ms->pmsa_gr[0] >> 3) & 0x3f;
-	nat_bits = (ms->pmsa_nat_bits >> shift) | (ms->pmsa_nat_bits << (64 - shift));
-
-	PUT_NAT_BIT(sw->caller_unat, &pt->r1);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r2);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r3);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r4);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r5);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r6);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r7);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r8);	PUT_NAT_BIT(sw->caller_unat, &pt->r9);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r10);	PUT_NAT_BIT(sw->caller_unat, &pt->r11);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r12);	PUT_NAT_BIT(sw->caller_unat, &pt->r13);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r14);	PUT_NAT_BIT(sw->caller_unat, &pt->r15);
-	nat_bits >>= 16;	/* skip over bank0 NaT bits */
-	PUT_NAT_BIT(sw->caller_unat, &pt->r16);	PUT_NAT_BIT(sw->caller_unat, &pt->r17);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r18);	PUT_NAT_BIT(sw->caller_unat, &pt->r19);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r20);	PUT_NAT_BIT(sw->caller_unat, &pt->r21);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r22);	PUT_NAT_BIT(sw->caller_unat, &pt->r23);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r24);	PUT_NAT_BIT(sw->caller_unat, &pt->r25);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r26);	PUT_NAT_BIT(sw->caller_unat, &pt->r27);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r28);	PUT_NAT_BIT(sw->caller_unat, &pt->r29);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r30);	PUT_NAT_BIT(sw->caller_unat, &pt->r31);
-}
-
-static void
-init_handler_platform (pal_min_state_area_t *ms,
-		       struct pt_regs *pt, struct switch_stack *sw)
-{
-	struct unw_frame_info info;
-
-	/* if a kernel debugger is available call it here else just dump the registers */
-
-	/*
-	 * Wait for a bit.  On some machines (e.g., HP's zx2000 and zx6000, INIT can be
-	 * generated via the BMC's command-line interface, but since the console is on the
-	 * same serial line, the user will need some time to switch out of the BMC before
-	 * the dump begins.
-	 */
-	printk("Delaying for 5 seconds...\n");
-	udelay(5*1000000);
-	show_min_state(ms);
-
-	printk("Backtrace of current task (pid %d, %s)\n", current->pid, current->comm);
-	fetch_min_state(ms, pt, sw);
-	unw_init_from_interruption(&info, current, pt, sw);
-	ia64_do_show_stack(&info, NULL);
-
-#ifdef CONFIG_SMP
-	/* read_trylock() would be handy... */
-	if (!tasklist_lock.write_lock)
-		read_lock(&tasklist_lock);
-#endif
-	{
-		struct task_struct *g, *t;
-		do_each_thread (g, t) {
-			if (t == current)
-				continue;
-
-			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
-			show_stack(t, NULL);
-		} while_each_thread (g, t);
-	}
-#ifdef CONFIG_SMP
-	if (!tasklist_lock.write_lock)
-		read_unlock(&tasklist_lock);
-#endif
-
-	printk("\nINIT dump complete.  Please reboot now.\n");
-	while (1);			/* hang city if no debugger */
-}
-
 #ifdef CONFIG_ACPI
 /*
  * ia64_mca_register_cpev
@@ -657,42 +478,6 @@ ia64_mca_cmc_vector_enable_keventd(void *unused)
 }
 
 /*
- * ia64_mca_wakeup_ipi_wait
- *
- *	Wait for the inter-cpu interrupt to be sent by the
- *	monarch processor once it is done with handling the
- *	MCA.
- *
- *  Inputs  :   None
- *  Outputs :   None
- */
-static void
-ia64_mca_wakeup_ipi_wait(void)
-{
-	int	irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6);
-	int	irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f);
-	u64	irr = 0;
-
-	do {
-		switch(irr_num) {
-		      case 0:
-			irr = ia64_getreg(_IA64_REG_CR_IRR0);
-			break;
-		      case 1:
-			irr = ia64_getreg(_IA64_REG_CR_IRR1);
-			break;
-		      case 2:
-			irr = ia64_getreg(_IA64_REG_CR_IRR2);
-			break;
-		      case 3:
-			irr = ia64_getreg(_IA64_REG_CR_IRR3);
-			break;
-		}
-		cpu_relax();
-	} while (!(irr & (1UL << irr_bit))) ;
-}
-
-/*
  * ia64_mca_wakeup
  *
  *	Send an inter-cpu interrupt to wake-up a particular cpu
@@ -757,11 +542,9 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs)
 	 */
 	ia64_sal_mc_rendez();
 
-	/* Wait for the wakeup IPI from the monarch
-	 * This waiting is done by polling on the wakeup-interrupt
-	 * vector bit in the processor's IRRs
-	 */
-	ia64_mca_wakeup_ipi_wait();
+	/* Wait for the monarch cpu to exit. */
+	while (monarch_cpu != -1)
+	       cpu_relax();	/* spin until monarch leaves */
 
 	/* Enable all interrupts */
 	local_irq_restore(flags);
@@ -789,53 +572,13 @@ ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs)
 	return IRQ_HANDLED;
 }
 
-/*
- * ia64_return_to_sal_check
- *
- *	This is function called before going back from the OS_MCA handler
- *	to the OS_MCA dispatch code which finally takes the control back
- *	to the SAL.
- *	The main purpose of this routine is to setup the OS_MCA to SAL
- *	return state which can be used by the OS_MCA dispatch code
- *	just before going back to SAL.
- *
- *  Inputs  :   None
- *  Outputs :   None
- */
-
-static void
-ia64_return_to_sal_check(int recover)
-{
-
-	/* Copy over some relevant stuff from the sal_to_os_mca_handoff
-	 * so that it can be used at the time of os_mca_to_sal_handoff
-	 */
-	ia64_os_to_sal_handoff_state.imots_sal_gp =
-		ia64_sal_to_os_handoff_state.imsto_sal_gp;
-
-	ia64_os_to_sal_handoff_state.imots_sal_check_ra =
-		ia64_sal_to_os_handoff_state.imsto_sal_check_ra;
-
-	if (recover)
-		ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED;
-	else
-		ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
-
-	/* Default = tell SAL to return to same context */
-	ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT;
-
-	ia64_os_to_sal_handoff_state.imots_new_min_state =
-		(u64 *)ia64_sal_to_os_handoff_state.pal_min_state;
-
-}
-
 /* Function pointer for extra MCA recovery */
 int (*ia64_mca_ucmc_extension)
-	(void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*)
+	(void*,struct ia64_sal_os_state*)
 	= NULL;
 
 int
-ia64_reg_MCA_extension(void *fn)
+ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *))
 {
 	if (ia64_mca_ucmc_extension)
 		return 1;
@@ -854,8 +597,321 @@ ia64_unreg_MCA_extension(void)
 EXPORT_SYMBOL(ia64_reg_MCA_extension);
 EXPORT_SYMBOL(ia64_unreg_MCA_extension);
 
+
+static inline void
+copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
+{
+	u64 fslot, tslot, nat;
+	*tr = *fr;
+	fslot = ((unsigned long)fr >> 3) & 63;
+	tslot = ((unsigned long)tr >> 3) & 63;
+	*tnat &= ~(1UL << tslot);
+	nat = (fnat >> fslot) & 1;
+	*tnat |= (nat << tslot);
+}
+
+/* On entry to this routine, we are running on the per cpu stack, see
+ * mca_asm.h.  The original stack has not been touched by this event.  Some of
+ * the original stack's registers will be in the RBS on this stack.  This stack
+ * also contains a partial pt_regs and switch_stack, the rest of the data is in
+ * PAL minstate.
+ *
+ * The first thing to do is modify the original stack to look like a blocked
+ * task so we can run backtrace on the original task.  Also mark the per cpu
+ * stack as current to ensure that we use the correct task state, it also means
+ * that we can do backtrace on the MCA/INIT handler code itself.
+ */
+
+static task_t *
+ia64_mca_modify_original_stack(struct pt_regs *regs,
+		const struct switch_stack *sw,
+		struct ia64_sal_os_state *sos,
+		const char *type)
+{
+	char *p, comm[sizeof(current->comm)];
+	ia64_va va;
+	extern char ia64_leave_kernel[];	/* Need asm address, not function descriptor */
+	const pal_min_state_area_t *ms = sos->pal_min_state;
+	task_t *previous_current;
+	struct pt_regs *old_regs;
+	struct switch_stack *old_sw;
+	unsigned size = sizeof(struct pt_regs) +
+			sizeof(struct switch_stack) + 16;
+	u64 *old_bspstore, *old_bsp;
+	u64 *new_bspstore, *new_bsp;
+	u64 old_unat, old_rnat, new_rnat, nat;
+	u64 slots, loadrs = regs->loadrs;
+	u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1];
+	u64 ar_bspstore = regs->ar_bspstore;
+	u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16);
+	const u64 *bank;
+	const char *msg;
+	int cpu = smp_processor_id();
+
+	previous_current = curr_task(cpu);
+	set_curr_task(cpu, current);
+	if ((p = strchr(current->comm, ' ')))
+		*p = '\0';
+
+	/* Best effort attempt to cope with MCA/INIT delivered while in
+	 * physical mode.
+	 */
+	regs->cr_ipsr = ms->pmsa_ipsr;
+	if (ia64_psr(regs)->dt == 0) {
+		va.l = r12;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			r12 = va.l;
+		}
+		va.l = r13;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			r13 = va.l;
+		}
+	}
+	if (ia64_psr(regs)->rt == 0) {
+		va.l = ar_bspstore;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			ar_bspstore = va.l;
+		}
+		va.l = ar_bsp;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			ar_bsp = va.l;
+		}
+	}
+
+	/* mca_asm.S ia64_old_stack() cannot assume that the dirty registers
+	 * have been copied to the old stack, the old stack may fail the
+	 * validation tests below.  So ia64_old_stack() must restore the dirty
+	 * registers from the new stack.  The old and new bspstore probably
+	 * have different alignments, so loadrs calculated on the old bsp
+	 * cannot be used to restore from the new bsp.  Calculate a suitable
+	 * loadrs for the new stack and save it in the new pt_regs, where
+	 * ia64_old_stack() can get it.
+	 */
+	old_bspstore = (u64 *)ar_bspstore;
+	old_bsp = (u64 *)ar_bsp;
+	slots = ia64_rse_num_regs(old_bspstore, old_bsp);
+	new_bspstore = (u64 *)((u64)current + IA64_RBS_OFFSET);
+	new_bsp = ia64_rse_skip_regs(new_bspstore, slots);
+	regs->loadrs = (new_bsp - new_bspstore) * 8 << 16;
+
+	/* Verify the previous stack state before we change it */
+	if (user_mode(regs)) {
+		msg = "occurred in user space";
+		goto no_mod;
+	}
+	if (r13 != sos->prev_IA64_KR_CURRENT) {
+		msg = "inconsistent previous current and r13";
+		goto no_mod;
+	}
+	if ((r12 - r13) >= KERNEL_STACK_SIZE) {
+		msg = "inconsistent r12 and r13";
+		goto no_mod;
+	}
+	if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
+		msg = "inconsistent ar.bspstore and r13";
+		goto no_mod;
+	}
+	va.p = old_bspstore;
+	if (va.f.reg < 5) {
+		msg = "old_bspstore is in the wrong region";
+		goto no_mod;
+	}
+	if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
+		msg = "inconsistent ar.bsp and r13";
+		goto no_mod;
+	}
+	size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
+	if (ar_bspstore + size > r12) {
+		msg = "no room for blocked state";
+		goto no_mod;
+	}
+
+	/* Change the comm field on the MCA/INT task to include the pid that
+	 * was interrupted, it makes for easier debugging.  If that pid was 0
+	 * (swapper or nested MCA/INIT) then use the start of the previous comm
+	 * field suffixed with its cpu.
+	 */
+	if (previous_current->pid)
+		snprintf(comm, sizeof(comm), "%s %d",
+			current->comm, previous_current->pid);
+	else {
+		int l;
+		if ((p = strchr(previous_current->comm, ' ')))
+			l = p - previous_current->comm;
+		else
+			l = strlen(previous_current->comm);
+		snprintf(comm, sizeof(comm), "%s %*s %d",
+			current->comm, l, previous_current->comm,
+			previous_current->thread_info->cpu);
+	}
+	memcpy(current->comm, comm, sizeof(current->comm));
+
+	/* Make the original task look blocked.  First stack a struct pt_regs,
+	 * describing the state at the time of interrupt.  mca_asm.S built a
+	 * partial pt_regs, copy it and fill in the blanks using minstate.
+	 */
+	p = (char *)r12 - sizeof(*regs);
+	old_regs = (struct pt_regs *)p;
+	memcpy(old_regs, regs, sizeof(*regs));
+	/* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
+	 * pmsa_{xip,xpsr,xfs}
+	 */
+	if (ia64_psr(regs)->ic) {
+		old_regs->cr_iip = ms->pmsa_iip;
+		old_regs->cr_ipsr = ms->pmsa_ipsr;
+		old_regs->cr_ifs = ms->pmsa_ifs;
+	} else {
+		old_regs->cr_iip = ms->pmsa_xip;
+		old_regs->cr_ipsr = ms->pmsa_xpsr;
+		old_regs->cr_ifs = ms->pmsa_xfs;
+	}
+	old_regs->pr = ms->pmsa_pr;
+	old_regs->b0 = ms->pmsa_br0;
+	old_regs->loadrs = loadrs;
+	old_regs->ar_rsc = ms->pmsa_rsc;
+	old_unat = old_regs->ar_unat;
+	copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat);
+	copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat);
+	copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat);
+	copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat);
+	copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat);
+	copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat);
+	copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat);
+	copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat);
+	copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat);
+	copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat);
+	copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat);
+	if (ia64_psr(old_regs)->bn)
+		bank = ms->pmsa_bank1_gr;
+	else
+		bank = ms->pmsa_bank0_gr;
+	copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat);
+	copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat);
+	copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat);
+	copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat);
+	copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat);
+	copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat);
+	copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat);
+	copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat);
+	copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat);
+	copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat);
+	copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat);
+	copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat);
+	copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat);
+	copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat);
+	copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat);
+	copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat);
+
+	/* Next stack a struct switch_stack.  mca_asm.S built a partial
+	 * switch_stack, copy it and fill in the blanks using pt_regs and
+	 * minstate.
+	 *
+	 * In the synthesized switch_stack, b0 points to ia64_leave_kernel,
+	 * ar.pfs is set to 0.
+	 *
+	 * unwind.c::unw_unwind() does special processing for interrupt frames.
+	 * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate
+	 * is clear then unw_unwind() does _not_ adjust bsp over pt_regs.  Not
+	 * that this is documented, of course.  Set PRED_NON_SYSCALL in the
+	 * switch_stack on the original stack so it will unwind correctly when
+	 * unwind.c reads pt_regs.
+	 *
+	 * thread.ksp is updated to point to the synthesized switch_stack.
+	 */
+	p -= sizeof(struct switch_stack);
+	old_sw = (struct switch_stack *)p;
+	memcpy(old_sw, sw, sizeof(*sw));
+	old_sw->caller_unat = old_unat;
+	old_sw->ar_fpsr = old_regs->ar_fpsr;
+	copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat);
+	copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat);
+	copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat);
+	copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat);
+	old_sw->b0 = (u64)ia64_leave_kernel;
+	old_sw->b1 = ms->pmsa_br1;
+	old_sw->ar_pfs = 0;
+	old_sw->ar_unat = old_unat;
+	old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL);
+	previous_current->thread.ksp = (u64)p - 16;
+
+	/* Finally copy the original stack's registers back to its RBS.
+	 * Registers from ar.bspstore through ar.bsp at the time of the event
+	 * are in the current RBS, copy them back to the original stack.  The
+	 * copy must be done register by register because the original bspstore
+	 * and the current one have different alignments, so the saved RNAT
+	 * data occurs at different places.
+	 *
+	 * mca_asm does cover, so the old_bsp already includes all registers at
+	 * the time of MCA/INIT.  It also does flushrs, so all registers before
+	 * this function have been written to backing store on the MCA/INIT
+	 * stack.
+	 */
+	new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore));
+	old_rnat = regs->ar_rnat;
+	while (slots--) {
+		if (ia64_rse_is_rnat_slot(new_bspstore)) {
+			new_rnat = ia64_get_rnat(new_bspstore++);
+		}
+		if (ia64_rse_is_rnat_slot(old_bspstore)) {
+			*old_bspstore++ = old_rnat;
+			old_rnat = 0;
+		}
+		nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL;
+		old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore));
+		old_rnat |= (nat << ia64_rse_slot_num(old_bspstore));
+		*old_bspstore++ = *new_bspstore++;
+	}
+	old_sw->ar_bspstore = (unsigned long)old_bspstore;
+	old_sw->ar_rnat = old_rnat;
+
+	sos->prev_task = previous_current;
+	return previous_current;
+
+no_mod:
+	printk(KERN_INFO "cpu %d, %s %s, original stack not modified\n",
+			smp_processor_id(), type, msg);
+	return previous_current;
+}
+
+/* The monarch/slave interaction is based on monarch_cpu and requires that all
+ * slaves have entered rendezvous before the monarch leaves.  If any cpu has
+ * not entered rendezvous yet then wait a bit.  The assumption is that any
+ * slave that has not rendezvoused after a reasonable time is never going to do
+ * so.  In this context, slave includes cpus that respond to the MCA rendezvous
+ * interrupt, as well as cpus that receive the INIT slave event.
+ */
+
+static void
+ia64_wait_for_slaves(int monarch)
+{
+	int c, wait = 0;
+	for_each_online_cpu(c) {
+		if (c == monarch)
+			continue;
+		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
+			udelay(1000);		/* short wait first */
+			wait = 1;
+			break;
+		}
+	}
+	if (!wait)
+		return;
+	for_each_online_cpu(c) {
+		if (c == monarch)
+			continue;
+		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
+			udelay(5*1000000);	/* wait 5 seconds for slaves (arbitrary) */
+			break;
+		}
+	}
+}
+
 /*
- * ia64_mca_ucmc_handler
+ * ia64_mca_handler
  *
  *	This is uncorrectable machine check handler called from OS_MCA
  *	dispatch code which is in turn called from SAL_CHECK().
@@ -866,16 +922,28 @@ EXPORT_SYMBOL(ia64_unreg_MCA_extension);
  *	further MCA logging is enabled by clearing logs.
  *	Monarch also has the duty of sending wakeup-IPIs to pull the
  *	slave processors out of rendezvous spinloop.
- *
- *  Inputs  :   None
- *  Outputs :   None
  */
 void
-ia64_mca_ucmc_handler(void)
+ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
+		 struct ia64_sal_os_state *sos)
 {
 	pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
-		&ia64_sal_to_os_handoff_state.proc_state_param;
-	int recover; 
+		&sos->proc_state_param;
+	int recover, cpu = smp_processor_id();
+	task_t *previous_current;
+
+	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
+	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
+	monarch_cpu = cpu;
+	ia64_wait_for_slaves(cpu);
+
+	/* Wakeup all the processors which are spinning in the rendezvous loop.
+	 * They will leave SAL, then spin in the OS with interrupts disabled
+	 * until this monarch cpu leaves the MCA handler.  That gets control
+	 * back to the OS so we can backtrace the other cpus, backtrace when
+	 * spinning in SAL does not work.
+	 */
+	ia64_mca_wakeup_all();
 
 	/* Get the MCA error record and log it */
 	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
@@ -883,25 +951,20 @@ ia64_mca_ucmc_handler(void)
 	/* TLB error is only exist in this SAL error record */
 	recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
 	/* other error recovery */
-	   || (ia64_mca_ucmc_extension 
+	   || (ia64_mca_ucmc_extension
 		&& ia64_mca_ucmc_extension(
 			IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),
-			&ia64_sal_to_os_handoff_state,
-			&ia64_os_to_sal_handoff_state)); 
+			sos));
 
 	if (recover) {
 		sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA);
 		rh->severity = sal_log_severity_corrected;
 		ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
+		sos->os_status = IA64_MCA_CORRECTED;
 	}
-	/*
-	 *  Wakeup all the processors which are spinning in the rendezvous
-	 *  loop.
-	 */
-	ia64_mca_wakeup_all();
 
-	/* Return to SAL */
-	ia64_return_to_sal_check(recover);
+	set_curr_task(cpu, previous_current);
+	monarch_cpu = -1;
 }
 
 static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL);
@@ -953,6 +1016,11 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
 
 			cmc_polling_enabled = 1;
 			spin_unlock(&cmc_history_lock);
+			/* If we're being hit with CMC interrupts, we won't
+			 * ever execute the schedule_work() below.  Need to
+			 * disable CMC interrupts on this processor now.
+			 */
+			ia64_mca_cmc_vector_disable(NULL);
 			schedule_work(&cmc_disable_work);
 
 			/*
@@ -1125,34 +1193,114 @@ ia64_mca_cpe_poll (unsigned long dummy)
 /*
  * C portion of the OS INIT handler
  *
- * Called from ia64_monarch_init_handler
- *
- * Inputs: pointer to pt_regs where processor info was saved.
+ * Called from ia64_os_init_dispatch
  *
- * Returns:
- *   0 if SAL must warm boot the System
- *   1 if SAL must return to interrupted context using PAL_MC_RESUME
+ * Inputs: pointer to pt_regs where processor info was saved.  SAL/OS state for
+ * this event.  This code is used for both monarch and slave INIT events, see
+ * sos->monarch.
  *
+ * All INIT events switch to the INIT stack and change the previous process to
+ * blocked status.  If one of the INIT events is the monarch then we are
+ * probably processing the nmi button/command.  Use the monarch cpu to dump all
+ * the processes.  The slave INIT events all spin until the monarch cpu
+ * returns.  We can also get INIT slave events for MCA, in which case the MCA
+ * process is the monarch.
  */
+
 void
-ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw)
+ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
+		  struct ia64_sal_os_state *sos)
 {
-	pal_min_state_area_t *ms;
+	static atomic_t slaves;
+	static atomic_t monarchs;
+	task_t *previous_current;
+	int cpu = smp_processor_id(), c;
+	struct task_struct *g, *t;
 
-	oops_in_progress = 1;	/* avoid deadlock in printk, but it makes recovery dodgy */
+	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
 	console_loglevel = 15;	/* make sure printks make it to console */
 
-	printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n",
-		ia64_sal_to_os_handoff_state.proc_state_param);
+	printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
+		sos->proc_state_param, cpu, sos->monarch);
+	salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);
 
-	/*
-	 * Address of minstate area provided by PAL is physical,
-	 * uncacheable (bit 63 set). Convert to Linux virtual
-	 * address in region 6.
+	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT");
+	sos->os_status = IA64_INIT_RESUME;
+
+	/* FIXME: Workaround for broken proms that drive all INIT events as
+	 * slaves.  The last slave that enters is promoted to be a monarch.
+	 * Remove this code in September 2006, that gives platforms a year to
+	 * fix their proms and get their customers updated.
 	 */
-	ms = (pal_min_state_area_t *)(ia64_sal_to_os_handoff_state.pal_min_state | (6ul<<61));
+	if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
+		printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
+		       __FUNCTION__, cpu);
+		atomic_dec(&slaves);
+		sos->monarch = 1;
+	}
+
+	/* FIXME: Workaround for broken proms that drive all INIT events as
+	 * monarchs.  Second and subsequent monarchs are demoted to slaves.
+	 * Remove this code in September 2006, that gives platforms a year to
+	 * fix their proms and get their customers updated.
+	 */
+	if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
+		printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
+			       __FUNCTION__, cpu);
+		atomic_dec(&monarchs);
+		sos->monarch = 0;
+	}
 
-	init_handler_platform(ms, pt, sw);	/* call platform specific routines */
+	if (!sos->monarch) {
+		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
+		while (monarch_cpu == -1)
+		       cpu_relax();	/* spin until monarch enters */
+		while (monarch_cpu != -1)
+		       cpu_relax();	/* spin until monarch leaves */
+		printk("Slave on cpu %d returning to normal service.\n", cpu);
+		set_curr_task(cpu, previous_current);
+		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+		atomic_dec(&slaves);
+		return;
+	}
+
+	monarch_cpu = cpu;
+
+	/*
+	 * Wait for a bit.  On some machines (e.g., HP's zx2000 and zx6000, INIT can be
+	 * generated via the BMC's command-line interface, but since the console is on the
+	 * same serial line, the user will need some time to switch out of the BMC before
+	 * the dump begins.
+	 */
+	printk("Delaying for 5 seconds...\n");
+	udelay(5*1000000);
+	ia64_wait_for_slaves(cpu);
+	printk(KERN_ERR "Processes interrupted by INIT -");
+	for_each_online_cpu(c) {
+		struct ia64_sal_os_state *s;
+		t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET);
+		s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET);
+		g = s->prev_task;
+		if (g) {
+			if (g->pid)
+				printk(" %d", g->pid);
+			else
+				printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g);
+		}
+	}
+	printk("\n\n");
+	if (read_trylock(&tasklist_lock)) {
+		do_each_thread (g, t) {
+			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
+			show_stack(t, NULL);
+		} while_each_thread (g, t);
+		read_unlock(&tasklist_lock);
+	}
+	printk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
+	atomic_dec(&monarchs);
+	set_curr_task(cpu, previous_current);
+	monarch_cpu = -1;
+	return;
 }
 
 static int __init
@@ -1202,6 +1350,34 @@ static struct irqaction mca_cpep_irqaction = {
 };
 #endif /* CONFIG_ACPI */
 
+/* Minimal format of the MCA/INIT stacks.  The pseudo processes that run on
+ * these stacks can never sleep, they cannot return from the kernel to user
+ * space, they do not appear in a normal ps listing.  So there is no need to
+ * format most of the fields.
+ */
+
+static void
+format_mca_init_stack(void *mca_data, unsigned long offset,
+		const char *type, int cpu)
+{
+	struct task_struct *p = (struct task_struct *)((char *)mca_data + offset);
+	struct thread_info *ti;
+	memset(p, 0, KERNEL_STACK_SIZE);
+	ti = (struct thread_info *)((char *)p + IA64_TASK_SIZE);
+	ti->flags = _TIF_MCA_INIT;
+	ti->preempt_count = 1;
+	ti->task = p;
+	ti->cpu = cpu;
+	p->thread_info = ti;
+	p->state = TASK_UNINTERRUPTIBLE;
+	__set_bit(cpu, &p->cpus_allowed);
+	INIT_LIST_HEAD(&p->tasks);
+	p->parent = p->real_parent = p->group_leader = p;
+	INIT_LIST_HEAD(&p->children);
+	INIT_LIST_HEAD(&p->sibling);
+	strncpy(p->comm, type, sizeof(p->comm)-1);
+}
+
 /* Do per-CPU MCA-related initialization.  */
 
 void __devinit
@@ -1214,19 +1390,28 @@ ia64_mca_cpu_init(void *cpu_data)
 		int cpu;
 
 		mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
-					 * NR_CPUS);
+					 * NR_CPUS + KERNEL_STACK_SIZE);
+		mca_data = (void *)(((unsigned long)mca_data +
+					KERNEL_STACK_SIZE - 1) &
+				(-KERNEL_STACK_SIZE));
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
+			format_mca_init_stack(mca_data,
+					offsetof(struct ia64_mca_cpu, mca_stack),
+					"MCA", cpu);
+			format_mca_init_stack(mca_data,
+					offsetof(struct ia64_mca_cpu, init_stack),
+					"INIT", cpu);
 			__per_cpu_mca[cpu] = __pa(mca_data);
 			mca_data += sizeof(struct ia64_mca_cpu);
 		}
 	}
 
-        /*
-         * The MCA info structure was allocated earlier and its
-         * physical address saved in __per_cpu_mca[cpu].  Copy that
-         * address * to ia64_mca_data so we can access it as a per-CPU
-         * variable.
-         */
+	/*
+	 * The MCA info structure was allocated earlier and its
+	 * physical address saved in __per_cpu_mca[cpu].  Copy that
+	 * address * to ia64_mca_data so we can access it as a per-CPU
+	 * variable.
+	 */
 	__get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()];
 
 	/*
@@ -1236,11 +1421,11 @@ ia64_mca_cpu_init(void *cpu_data)
 	__get_cpu_var(ia64_mca_per_cpu_pte) =
 		pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL));
 
-        /*
-         * Also, stash away a copy of the PAL address and the PTE
-         * needed to map it.
-         */
-        pal_vaddr = efi_get_pal_addr();
+	/*
+	 * Also, stash away a copy of the PAL address and the PTE
+	 * needed to map it.
+	 */
+	pal_vaddr = efi_get_pal_addr();
 	if (!pal_vaddr)
 		return;
 	__get_cpu_var(ia64_mca_pal_base) =
@@ -1272,8 +1457,8 @@ ia64_mca_cpu_init(void *cpu_data)
 void __init
 ia64_mca_init(void)
 {
-	ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler;
-	ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler;
+	ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch;
+	ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave;
 	ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
 	int i;
 	s64 rc;
@@ -1351,9 +1536,9 @@ ia64_mca_init(void)
 	 * XXX - disable SAL checksum by setting size to 0, should be
 	 * size of the actual init handler in mca_asm.S.
 	 */
-	ia64_mc_info.imi_monarch_init_handler		= ia64_tpa(mon_init_ptr->fp);
+	ia64_mc_info.imi_monarch_init_handler		= ia64_tpa(init_hldlr_ptr_monarch->fp);
 	ia64_mc_info.imi_monarch_init_handler_size	= 0;
-	ia64_mc_info.imi_slave_init_handler		= ia64_tpa(slave_init_ptr->fp);
+	ia64_mc_info.imi_slave_init_handler		= ia64_tpa(init_hldlr_ptr_slave->fp);
 	ia64_mc_info.imi_slave_init_handler_size	= 0;
 
 	IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__,
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index ef3fd7265b6..db32fc1d393 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -16,6 +16,9 @@
 // 04/11/12 Russ Anderson <rja@sgi.com>
 //		   Added per cpu MCA/INIT stack save areas.
 //
+// 12/08/05 Keith Owens <kaos@sgi.com>
+//		   Use per cpu MCA/INIT stacks for all data.
+//
 #include <linux/config.h>
 #include <linux/threads.h>
 
@@ -25,96 +28,23 @@
 #include <asm/mca_asm.h>
 #include <asm/mca.h>
 
-/*
- * When we get a machine check, the kernel stack pointer is no longer
- * valid, so we need to set a new stack pointer.
- */
-#define	MINSTATE_PHYS	/* Make sure stack access is physical for MINSTATE */
-
-/*
- * Needed for return context to SAL
- */
-#define IA64_MCA_SAME_CONTEXT	0
-#define IA64_MCA_COLD_BOOT	-2
-
-#include "minstate.h"
-
-/*
- * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec)
- *		1. GR1 = OS GP
- *		2. GR8 = PAL_PROC physical address
- *		3. GR9 = SAL_PROC physical address
- *		4. GR10 = SAL GP (physical)
- *		5. GR11 = Rendez state
- *		6. GR12 = Return address to location within SAL_CHECK
- */
-#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp)		\
-	LOAD_PHYSICAL(p0, _tmp, ia64_sal_to_os_handoff_state);; \
-	st8	[_tmp]=r1,0x08;;			\
-	st8	[_tmp]=r8,0x08;;			\
-	st8	[_tmp]=r9,0x08;;			\
-	st8	[_tmp]=r10,0x08;;			\
-	st8	[_tmp]=r11,0x08;;			\
-	st8	[_tmp]=r12,0x08;;			\
-	st8	[_tmp]=r17,0x08;;			\
-	st8	[_tmp]=r18,0x08
-
-/*
- * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec)
- * (p6) is executed if we never entered virtual mode (TLB error)
- * (p7) is executed if we entered virtual mode as expected (normal case)
- *	1. GR8 = OS_MCA return status
- *	2. GR9 = SAL GP (physical)
- *	3. GR10 = 0/1 returning same/new context
- *	4. GR22 = New min state save area pointer
- *	returns ptr to SAL rtn save loc in _tmp
- */
-#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp)	\
-	movl	_tmp=ia64_os_to_sal_handoff_state;;	\
-	DATA_VA_TO_PA(_tmp);;				\
-	ld8	r8=[_tmp],0x08;;			\
-	ld8	r9=[_tmp],0x08;;			\
-	ld8	r10=[_tmp],0x08;;			\
-	ld8	r22=[_tmp],0x08;;
-	// now _tmp is pointing to SAL rtn save location
-
-/*
- * COLD_BOOT_HANDOFF_STATE() sets ia64_mca_os_to_sal_state
- *	imots_os_status=IA64_MCA_COLD_BOOT
- *	imots_sal_gp=SAL GP
- *	imots_context=IA64_MCA_SAME_CONTEXT
- *	imots_new_min_state=Min state save area pointer
- *	imots_sal_check_ra=Return address to location within SAL_CHECK
- *
- */
-#define COLD_BOOT_HANDOFF_STATE(sal_to_os_handoff,os_to_sal_handoff,tmp)\
-	movl	tmp=IA64_MCA_COLD_BOOT;					\
-	movl	sal_to_os_handoff=__pa(ia64_sal_to_os_handoff_state);	\
-	movl	os_to_sal_handoff=__pa(ia64_os_to_sal_handoff_state);;	\
-	st8	[os_to_sal_handoff]=tmp,8;;				\
-	ld8	tmp=[sal_to_os_handoff],48;;				\
-	st8	[os_to_sal_handoff]=tmp,8;;				\
-	movl	tmp=IA64_MCA_SAME_CONTEXT;;				\
-	st8	[os_to_sal_handoff]=tmp,8;;				\
-	ld8	tmp=[sal_to_os_handoff],-8;;				\
-	st8     [os_to_sal_handoff]=tmp,8;;				\
-	ld8	tmp=[sal_to_os_handoff];;				\
-	st8     [os_to_sal_handoff]=tmp;;
+#include "entry.h"
 
 #define GET_IA64_MCA_DATA(reg)						\
 	GET_THIS_PADDR(reg, ia64_mca_data)				\
 	;;								\
 	ld8 reg=[reg]
 
-	.global ia64_os_mca_dispatch
-	.global ia64_os_mca_dispatch_end
-	.global ia64_sal_to_os_handoff_state
-	.global	ia64_os_to_sal_handoff_state
 	.global ia64_do_tlb_purge
+	.global ia64_os_mca_dispatch
+	.global ia64_os_init_dispatch_monarch
+	.global ia64_os_init_dispatch_slave
 
 	.text
 	.align 16
 
+//StartMain////////////////////////////////////////////////////////////////////
+
 /*
  * Just the TLB purge part is moved to a separate function
  * so we can re-use the code for cpu hotplug code as well
@@ -207,34 +137,31 @@ ia64_do_tlb_purge:
 	br.sptk.many b1
 	;;
 
-ia64_os_mca_dispatch:
+//EndMain//////////////////////////////////////////////////////////////////////
+
+//StartMain////////////////////////////////////////////////////////////////////
 
+ia64_os_mca_dispatch:
 	// Serialize all MCA processing
 	mov	r3=1;;
 	LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
 ia64_os_mca_spin:
-	xchg8	r4=[r2],r3;;
+	xchg4	r4=[r2],r3;;
 	cmp.ne	p6,p0=r4,r0
 (p6)	br ia64_os_mca_spin
 
-	// Save the SAL to OS MCA handoff state as defined
-	// by SAL SPEC 3.0
-	// NOTE : The order in which the state gets saved
-	//	  is dependent on the way the C-structure
-	//	  for ia64_mca_sal_to_os_state_t has been
-	//	  defined in include/asm/mca.h
-	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
-	;;
-
-	// LOG PROCESSOR STATE INFO FROM HERE ON..
-begin_os_mca_dump:
-	br	ia64_os_mca_proc_state_dump;;
-
-ia64_os_mca_done_dump:
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	mov r19=1				// All MCA events are treated as monarch (for now)
+	br.sptk ia64_state_save			// save the state that is not in minstate
+1:
 
-	LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56)
+	GET_IA64_MCA_DATA(r2)
+	// Using MCA stack, struct ia64_sal_os_state, variable proc_state_param
+	;;
+	add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, r2
 	;;
-	ld8 r18=[r16]		// Get processor state parameter on existing PALE_CHECK.
+	ld8 r18=[r3]				// Get processor state parameter on existing PALE_CHECK.
 	;;
 	tbit.nz p6,p7=r18,60
 (p7)	br.spnt done_tlb_purge_and_reload
@@ -323,624 +250,849 @@ ia64_reload_tr:
 	itr.d dtr[r20]=r16
 	;;
 	srlz.d
-	;;
-	br.sptk.many done_tlb_purge_and_reload
-err:
-	COLD_BOOT_HANDOFF_STATE(r20,r21,r22)
-	br.sptk.many ia64_os_mca_done_restore
 
 done_tlb_purge_and_reload:
 
-	// Setup new stack frame for OS_MCA handling
-	GET_IA64_MCA_DATA(r2)
-	;;
-	add r3 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
-	add r2 = IA64_MCA_CPU_RBSTORE_OFFSET, r2
-	;;
-	rse_switch_context(r6,r3,r2);;	// RSC management in this new context
+	// switch to per cpu MCA stack
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_new_stack
+1:
+
+	// everything saved, now we can set the kernel registers
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_set_kernel_registers
+1:
 
+	// This must be done in physical mode
 	GET_IA64_MCA_DATA(r2)
 	;;
-	add r2 = IA64_MCA_CPU_STACK_OFFSET+IA64_MCA_STACK_SIZE-16, r2
-	;;
-	mov r12=r2		// establish new stack-pointer
+	mov r7=r2
 
         // Enter virtual mode from physical mode
 	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
-ia64_os_mca_virtual_begin:
+
+	// This code returns to SAL via SOS r2, in general SAL has no unwind
+	// data.  To get a clean termination when backtracing the C MCA/INIT
+	// handler, set a dummy return address of 0 in this routine.  That
+	// requires that ia64_os_mca_virtual_begin be a global function.
+ENTRY(ia64_os_mca_virtual_begin)
+	.prologue
+	.save rp,r0
+	.body
+
+	mov ar.rsc=3				// set eager mode for C handler
+	mov r2=r7				// see GET_IA64_MCA_DATA above
+	;;
 
 	// Call virtual mode handler
-	movl		r2=ia64_mca_ucmc_handler;;
-	mov		b6=r2;;
-	br.call.sptk.many    b0=b6;;
-.ret0:
+	alloc r14=ar.pfs,0,0,3,0
+	;;
+	DATA_PA_TO_VA(r2,r7)
+	;;
+	add out0=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
+	add out1=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
+	add out2=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET, r2
+	br.call.sptk.many    b0=ia64_mca_handler
+
 	// Revert back to physical mode before going back to SAL
 	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
 ia64_os_mca_virtual_end:
 
-	// restore the original stack frame here
+END(ia64_os_mca_virtual_begin)
+
+	// switch back to previous stack
+	alloc r14=ar.pfs,0,0,0,0		// remove the MCA handler frame
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_old_stack
+1:
+
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_restore		// restore the SAL state
+1:
+
+	mov		b0=r12			// SAL_CHECK return address
+
+	// release lock
+	LOAD_PHYSICAL(p0,r3,ia64_mca_serialize);;
+	st4.rel		[r3]=r0
+
+	br		b0
+
+//EndMain//////////////////////////////////////////////////////////////////////
+
+//StartMain////////////////////////////////////////////////////////////////////
+
+//
+// SAL to OS entry point for INIT on all processors.  This has been defined for
+// registration purposes with SAL as a part of ia64_mca_init.  Monarch and
+// slave INIT have identical processing, except for the value of the
+// sos->monarch flag in r19.
+//
+
+ia64_os_init_dispatch_monarch:
+	mov r19=1				// Bow, bow, ye lower middle classes!
+	br.sptk ia64_os_init_dispatch
+
+ia64_os_init_dispatch_slave:
+	mov r19=0				// <igor>yeth, mathter</igor>
+
+ia64_os_init_dispatch:
+
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_save			// save the state that is not in minstate
+1:
+
+	// switch to per cpu INIT stack
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_new_stack
+1:
+
+	// everything saved, now we can set the kernel registers
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_set_kernel_registers
+1:
+
+	// This must be done in physical mode
 	GET_IA64_MCA_DATA(r2)
 	;;
-	add r2 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
-	;;
-	movl    r4=IA64_PSR_MC
+	mov r7=r2
+
+        // Enter virtual mode from physical mode
+	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_begin, r4)
+
+	// This code returns to SAL via SOS r2, in general SAL has no unwind
+	// data.  To get a clean termination when backtracing the C MCA/INIT
+	// handler, set a dummy return address of 0 in this routine.  That
+	// requires that ia64_os_init_virtual_begin be a global function.
+ENTRY(ia64_os_init_virtual_begin)
+	.prologue
+	.save rp,r0
+	.body
+
+	mov ar.rsc=3				// set eager mode for C handler
+	mov r2=r7				// see GET_IA64_MCA_DATA above
 	;;
-	rse_return_context(r4,r3,r2)	// switch from interrupt context for RSE
 
-	// let us restore all the registers from our PSI structure
-	mov	r8=gp
+	// Call virtual mode handler
+	alloc r14=ar.pfs,0,0,3,0
 	;;
-begin_os_mca_restore:
-	br	ia64_os_mca_proc_state_restore;;
+	DATA_PA_TO_VA(r2,r7)
+	;;
+	add out0=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
+	add out1=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
+	add out2=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SOS_OFFSET, r2
+	br.call.sptk.many    b0=ia64_init_handler
 
-ia64_os_mca_done_restore:
-	OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);;
-	// branch back to SALE_CHECK
-	ld8		r3=[r2];;
-	mov		b0=r3;;		// SAL_CHECK return address
+	// Revert back to physical mode before going back to SAL
+	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_end, r4)
+ia64_os_init_virtual_end:
 
-	// release lock
-	movl		r3=ia64_mca_serialize;;
-	DATA_VA_TO_PA(r3);;
-	st8.rel		[r3]=r0
+END(ia64_os_init_virtual_begin)
 
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_restore		// restore the SAL state
+1:
+
+	// switch back to previous stack
+	alloc r14=ar.pfs,0,0,0,0		// remove the INIT handler frame
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_old_stack
+1:
+
+	mov		b0=r12			// SAL_CHECK return address
 	br		b0
-	;;
-ia64_os_mca_dispatch_end:
+
 //EndMain//////////////////////////////////////////////////////////////////////
 
+// common defines for the stubs
+#define	ms		r4
+#define	regs		r5
+#define	temp1		r2	/* careful, it overlaps with input registers */
+#define	temp2		r3	/* careful, it overlaps with input registers */
+#define	temp3		r7
+#define	temp4		r14
+
 
 //++
 // Name:
-//      ia64_os_mca_proc_state_dump()
+//	ia64_state_save()
 //
 // Stub Description:
 //
-//       This stub dumps the processor state during MCHK to a data area
+//	Save the state that is not in minstate.  This is sensitive to the layout of
+//	struct ia64_sal_os_state in mca.h.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	The OS to SAL section of struct ia64_sal_os_state is set to a default
+//	value of cold boot (MCA) or warm boot (INIT) and return to the same
+//	context.  ia64_sal_os_state is also used to hold some registers that
+//	need to be saved and restored across the stack switches.
+//
+//	Most input registers to this stub come from PAL/SAL
+//	r1  os gp, physical
+//	r8  pal_proc entry point
+//	r9  sal_proc entry point
+//	r10 sal gp
+//	r11 MCA - rendevzous state, INIT - reason code
+//	r12 sal return address
+//	r17 pal min_state
+//	r18 processor state parameter
+//	r19 monarch flag, set by the caller of this routine
+//
+//	In addition to the SAL to OS state, this routine saves all the
+//	registers that appear in struct pt_regs and struct switch_stack,
+//	excluding those that are already in the PAL minstate area.  This
+//	results in a partial pt_regs and switch_stack, the C code copies the
+//	remaining registers from PAL minstate to pt_regs and switch_stack.  The
+//	resulting structures contain all the state of the original process when
+//	MCA/INIT occurred.
 //
 //--
 
-ia64_os_mca_proc_state_dump:
-// Save bank 1 GRs 16-31 which will be used by c-language code when we switch
-//  to virtual addressing mode.
-	GET_IA64_MCA_DATA(r2)
+ia64_state_save:
+	add regs=MCA_SOS_OFFSET, r3
+	add ms=MCA_SOS_OFFSET+8, r3
+	mov b0=r2		// save return address
+	cmp.eq p1,p2=IA64_MCA_CPU_MCA_STACK_OFFSET, r3
+	;;
+	GET_IA64_MCA_DATA(temp2)
+	;;
+	add temp1=temp2, regs	// struct ia64_sal_os_state on MCA or INIT stack
+	add temp2=temp2, ms	// struct ia64_sal_os_state+8 on MCA or INIT stack
+	;;
+	mov regs=temp1		// save the start of sos
+	st8 [temp1]=r1,16	// os_gp
+	st8 [temp2]=r8,16	// pal_proc
+	;;
+	st8 [temp1]=r9,16	// sal_proc
+	st8 [temp2]=r11,16	// rv_rc
+	mov r11=cr.iipa
 	;;
-	add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
-	;;
-// save ar.NaT
-	mov		r5=ar.unat                  // ar.unat
-
-// save banked GRs 16-31 along with NaT bits
-	bsw.1;;
-	st8.spill	[r2]=r16,8;;
-	st8.spill	[r2]=r17,8;;
-	st8.spill	[r2]=r18,8;;
-	st8.spill	[r2]=r19,8;;
-	st8.spill	[r2]=r20,8;;
-	st8.spill	[r2]=r21,8;;
-	st8.spill	[r2]=r22,8;;
-	st8.spill	[r2]=r23,8;;
-	st8.spill	[r2]=r24,8;;
-	st8.spill	[r2]=r25,8;;
-	st8.spill	[r2]=r26,8;;
-	st8.spill	[r2]=r27,8;;
-	st8.spill	[r2]=r28,8;;
-	st8.spill	[r2]=r29,8;;
-	st8.spill	[r2]=r30,8;;
-	st8.spill	[r2]=r31,8;;
-
-	mov		r4=ar.unat;;
-	st8		[r2]=r4,8                // save User NaT bits for r16-r31
-	mov		ar.unat=r5                  // restore original unat
-	bsw.0;;
-
-//save BRs
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r4
-
-	mov		r3=b0
-	mov		r5=b1
-	mov		r7=b2;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=b3
-	mov		r5=b4
-	mov		r7=b5;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=b6
-	mov		r5=b7;;
-	st8		[r2]=r3,2*8
-	st8		[r4]=r5,2*8;;
-
-cSaveCRs:
-// save CRs
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r4
-
-	mov		r3=cr.dcr
-	mov		r5=cr.itm
-	mov		r7=cr.iva;;
-
-	st8		[r2]=r3,8*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;            // 48 byte rements
-
-	mov		r3=cr.pta;;
-	st8		[r2]=r3,8*8;;            // 64 byte rements
-
-// if PSR.ic=0, reading interruption registers causes an illegal operation fault
-	mov		r3=psr;;
-	tbit.nz.unc	p6,p0=r3,PSR_IC;;           // PSI Valid Log bit pos. test
-(p6)    st8     [r2]=r0,9*8+160             // increment by 232 byte inc.
-begin_skip_intr_regs:
-(p6)	br		SkipIntrRegs;;
-
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r6
-
-	mov		r3=cr.ipsr
-	mov		r5=cr.isr
-	mov		r7=r0;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr.iip
-	mov		r5=cr.ifa
-	mov		r7=cr.itir;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr.iipa
-	mov		r5=cr.ifs
-	mov		r7=cr.iim;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr25;;                   // cr.iha
-	st8		[r2]=r3,160;;               // 160 byte rement
-
-SkipIntrRegs:
-	st8		[r2]=r0,152;;               // another 152 byte .
-
-	add		r4=8,r2                     // duplicate r2 in r4
-	add		r6=2*8,r2                   // duplicate r2 in r6
-
-	mov		r3=cr.lid
-//	mov		r5=cr.ivr                     // cr.ivr, don't read it
-	mov		r7=cr.tpr;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=r0                       // cr.eoi => cr67
-	mov		r5=r0                       // cr.irr0 => cr68
-	mov		r7=r0;;                     // cr.irr1 => cr69
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=r0                       // cr.irr2 => cr70
-	mov		r5=r0                       // cr.irr3 => cr71
-	mov		r7=cr.itv;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr.pmv
-	mov		r5=cr.cmcv;;
-	st8		[r2]=r3,7*8
-	st8		[r4]=r5,7*8;;
-
-	mov		r3=r0                       // cr.lrr0 => cr80
-	mov		r5=r0;;                     // cr.lrr1 => cr81
-	st8		[r2]=r3,23*8
-	st8		[r4]=r5,23*8;;
-
-	adds		r2=25*8,r2;;
-
-cSaveARs:
-// save ARs
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r6
-
-	mov		r3=ar.k0
-	mov		r5=ar.k1
-	mov		r7=ar.k2;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=ar.k3
-	mov		r5=ar.k4
-	mov		r7=ar.k5;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=ar.k6
-	mov		r5=ar.k7
-	mov		r7=r0;;                     // ar.kr8
-	st8		[r2]=r3,10*8
-	st8		[r4]=r5,10*8
-	st8		[r6]=r7,10*8;;           // rement by 72 bytes
-
-	mov		r3=ar.rsc
-	mov		ar.rsc=r0			    // put RSE in enforced lazy mode
-	mov		r5=ar.bsp
-	;;
-	mov		r7=ar.bspstore;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=ar.rnat;;
-	st8		[r2]=r3,8*13             // increment by 13x8 bytes
-
-	mov		r3=ar.ccv;;
-	st8		[r2]=r3,8*4
-
-	mov		r3=ar.unat;;
-	st8		[r2]=r3,8*4
-
-	mov		r3=ar.fpsr;;
-	st8		[r2]=r3,8*4
-
-	mov		r3=ar.itc;;
-	st8		[r2]=r3,160                 // 160
-
-	mov		r3=ar.pfs;;
-	st8		[r2]=r3,8
-
-	mov		r3=ar.lc;;
-	st8		[r2]=r3,8
-
-	mov		r3=ar.ec;;
-	st8		[r2]=r3
-	add		r2=8*62,r2               //padding
-
-// save RRs
-	mov		ar.lc=0x08-1
-	movl		r4=0x00;;
-
-cStRR:
-	dep.z		r5=r4,61,3;;
-	mov		r3=rr[r5];;
-	st8		[r2]=r3,8
-	add		r4=1,r4
-	br.cloop.sptk.few	cStRR
-	;;
-end_os_mca_dump:
-	br	ia64_os_mca_done_dump;;
+	st8 [temp1]=r18,16	// proc_state_param
+	st8 [temp2]=r19,16	// monarch
+	mov r6=IA64_KR(CURRENT)
+	;;
+	st8 [temp1]=r12,16	// sal_ra
+	st8 [temp2]=r10,16	// sal_gp
+	mov r12=cr.isr
+	;;
+	st8 [temp1]=r17,16	// pal_min_state
+	st8 [temp2]=r6,16	// prev_IA64_KR_CURRENT
+	mov r6=IA64_KR(CURRENT_STACK)
+	;;
+	st8 [temp1]=r6,16	// prev_IA64_KR_CURRENT_STACK
+	st8 [temp2]=r0,16	// prev_task, starts off as NULL
+	mov r6=cr.ifa
+	;;
+	st8 [temp1]=r12,16	// cr.isr
+	st8 [temp2]=r6,16	// cr.ifa
+	mov r12=cr.itir
+	;;
+	st8 [temp1]=r12,16	// cr.itir
+	st8 [temp2]=r11,16	// cr.iipa
+	mov r12=cr.iim
+	;;
+	st8 [temp1]=r12,16	// cr.iim
+(p1)	mov r12=IA64_MCA_COLD_BOOT
+(p2)	mov r12=IA64_INIT_WARM_BOOT
+	mov r6=cr.iha
+	;;
+	st8 [temp2]=r6,16	// cr.iha
+	st8 [temp1]=r12		// os_status, default is cold boot
+	mov r6=IA64_MCA_SAME_CONTEXT
+	;;
+	st8 [temp1]=r6		// context, default is same context
+
+	// Save the pt_regs data that is not in minstate.  The previous code
+	// left regs at sos.
+	add regs=MCA_PT_REGS_OFFSET-MCA_SOS_OFFSET, regs
+	;;
+	add temp1=PT(B6), regs
+	mov temp3=b6
+	mov temp4=b7
+	add temp2=PT(B7), regs
+	;;
+	st8 [temp1]=temp3,PT(AR_CSD)-PT(B6)		// save b6
+	st8 [temp2]=temp4,PT(AR_SSD)-PT(B7)		// save b7
+	mov temp3=ar.csd
+	mov temp4=ar.ssd
+	cover						// must be last in group
+	;;
+	st8 [temp1]=temp3,PT(AR_UNAT)-PT(AR_CSD)	// save ar.csd
+	st8 [temp2]=temp4,PT(AR_PFS)-PT(AR_SSD)		// save ar.ssd
+	mov temp3=ar.unat
+	mov temp4=ar.pfs
+	;;
+	st8 [temp1]=temp3,PT(AR_RNAT)-PT(AR_UNAT)	// save ar.unat
+	st8 [temp2]=temp4,PT(AR_BSPSTORE)-PT(AR_PFS)	// save ar.pfs
+	mov temp3=ar.rnat
+	mov temp4=ar.bspstore
+	;;
+	st8 [temp1]=temp3,PT(LOADRS)-PT(AR_RNAT)	// save ar.rnat
+	st8 [temp2]=temp4,PT(AR_FPSR)-PT(AR_BSPSTORE)	// save ar.bspstore
+	mov temp3=ar.bsp
+	;;
+	sub temp3=temp3, temp4	// ar.bsp - ar.bspstore
+	mov temp4=ar.fpsr
+	;;
+	shl temp3=temp3,16	// compute ar.rsc to be used for "loadrs"
+	;;
+	st8 [temp1]=temp3,PT(AR_CCV)-PT(LOADRS)		// save loadrs
+	st8 [temp2]=temp4,PT(F6)-PT(AR_FPSR)		// save ar.fpsr
+	mov temp3=ar.ccv
+	;;
+	st8 [temp1]=temp3,PT(F7)-PT(AR_CCV)		// save ar.ccv
+	stf.spill [temp2]=f6,PT(F8)-PT(F6)
+	;;
+	stf.spill [temp1]=f7,PT(F9)-PT(F7)
+	stf.spill [temp2]=f8,PT(F10)-PT(F8)
+	;;
+	stf.spill [temp1]=f9,PT(F11)-PT(F9)
+	stf.spill [temp2]=f10
+	;;
+	stf.spill [temp1]=f11
+
+	// Save the switch_stack data that is not in minstate nor pt_regs.  The
+	// previous code left regs at pt_regs.
+	add regs=MCA_SWITCH_STACK_OFFSET-MCA_PT_REGS_OFFSET, regs
+	;;
+	add temp1=SW(F2), regs
+	add temp2=SW(F3), regs
+	;;
+	stf.spill [temp1]=f2,32
+	stf.spill [temp2]=f3,32
+	;;
+	stf.spill [temp1]=f4,32
+	stf.spill [temp2]=f5,32
+	;;
+	stf.spill [temp1]=f12,32
+	stf.spill [temp2]=f13,32
+	;;
+	stf.spill [temp1]=f14,32
+	stf.spill [temp2]=f15,32
+	;;
+	stf.spill [temp1]=f16,32
+	stf.spill [temp2]=f17,32
+	;;
+	stf.spill [temp1]=f18,32
+	stf.spill [temp2]=f19,32
+	;;
+	stf.spill [temp1]=f20,32
+	stf.spill [temp2]=f21,32
+	;;
+	stf.spill [temp1]=f22,32
+	stf.spill [temp2]=f23,32
+	;;
+	stf.spill [temp1]=f24,32
+	stf.spill [temp2]=f25,32
+	;;
+	stf.spill [temp1]=f26,32
+	stf.spill [temp2]=f27,32
+	;;
+	stf.spill [temp1]=f28,32
+	stf.spill [temp2]=f29,32
+	;;
+	stf.spill [temp1]=f30,SW(B2)-SW(F30)
+	stf.spill [temp2]=f31,SW(B3)-SW(F31)
+	mov temp3=b2
+	mov temp4=b3
+	;;
+	st8 [temp1]=temp3,16	// save b2
+	st8 [temp2]=temp4,16	// save b3
+	mov temp3=b4
+	mov temp4=b5
+	;;
+	st8 [temp1]=temp3,SW(AR_LC)-SW(B4)	// save b4
+	st8 [temp2]=temp4	// save b5
+	mov temp3=ar.lc
+	;;
+	st8 [temp1]=temp3	// save ar.lc
+
+	// FIXME: Some proms are incorrectly accessing the minstate area as
+	// cached data.  The C code uses region 6, uncached virtual.  Ensure
+	// that there is no cache data lying around for the first 1K of the
+	// minstate area.
+	// Remove this code in September 2006, that gives platforms a year to
+	// fix their proms and get their customers updated.
+
+	add r1=32*1,r17
+	add r2=32*2,r17
+	add r3=32*3,r17
+	add r4=32*4,r17
+	add r5=32*5,r17
+	add r6=32*6,r17
+	add r7=32*7,r17
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+
+	br.sptk b0
 
 //EndStub//////////////////////////////////////////////////////////////////////
 
 
 //++
 // Name:
-//       ia64_os_mca_proc_state_restore()
+//	ia64_state_restore()
 //
 // Stub Description:
 //
-//       This is a stub to restore the saved processor state during MCHK
+//	Restore the SAL/OS state.  This is sensitive to the layout of struct
+//	ia64_sal_os_state in mca.h.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	In addition to the SAL to OS state, this routine restores all the
+//	registers that appear in struct pt_regs and struct switch_stack,
+//	excluding those in the PAL minstate area.
 //
 //--
 
-ia64_os_mca_proc_state_restore:
+ia64_state_restore:
+	// Restore the switch_stack data that is not in minstate nor pt_regs.
+	add regs=MCA_SWITCH_STACK_OFFSET, r3
+	mov b0=r2		// save return address
+	;;
+	GET_IA64_MCA_DATA(temp2)
+	;;
+	add regs=temp2, regs
+	;;
+	add temp1=SW(F2), regs
+	add temp2=SW(F3), regs
+	;;
+	ldf.fill f2=[temp1],32
+	ldf.fill f3=[temp2],32
+	;;
+	ldf.fill f4=[temp1],32
+	ldf.fill f5=[temp2],32
+	;;
+	ldf.fill f12=[temp1],32
+	ldf.fill f13=[temp2],32
+	;;
+	ldf.fill f14=[temp1],32
+	ldf.fill f15=[temp2],32
+	;;
+	ldf.fill f16=[temp1],32
+	ldf.fill f17=[temp2],32
+	;;
+	ldf.fill f18=[temp1],32
+	ldf.fill f19=[temp2],32
+	;;
+	ldf.fill f20=[temp1],32
+	ldf.fill f21=[temp2],32
+	;;
+	ldf.fill f22=[temp1],32
+	ldf.fill f23=[temp2],32
+	;;
+	ldf.fill f24=[temp1],32
+	ldf.fill f25=[temp2],32
+	;;
+	ldf.fill f26=[temp1],32
+	ldf.fill f27=[temp2],32
+	;;
+	ldf.fill f28=[temp1],32
+	ldf.fill f29=[temp2],32
+	;;
+	ldf.fill f30=[temp1],SW(B2)-SW(F30)
+	ldf.fill f31=[temp2],SW(B3)-SW(F31)
+	;;
+	ld8 temp3=[temp1],16	// restore b2
+	ld8 temp4=[temp2],16	// restore b3
+	;;
+	mov b2=temp3
+	mov b3=temp4
+	ld8 temp3=[temp1],SW(AR_LC)-SW(B4)	// restore b4
+	ld8 temp4=[temp2]	// restore b5
+	;;
+	mov b4=temp3
+	mov b5=temp4
+	ld8 temp3=[temp1]	// restore ar.lc
+	;;
+	mov ar.lc=temp3
 
-// Restore bank1 GR16-31
-	GET_IA64_MCA_DATA(r2)
+	// Restore the pt_regs data that is not in minstate.  The previous code
+	// left regs at switch_stack.
+	add regs=MCA_PT_REGS_OFFSET-MCA_SWITCH_STACK_OFFSET, regs
+	;;
+	add temp1=PT(B6), regs
+	add temp2=PT(B7), regs
+	;;
+	ld8 temp3=[temp1],PT(AR_CSD)-PT(B6)		// restore b6
+	ld8 temp4=[temp2],PT(AR_SSD)-PT(B7)		// restore b7
+	;;
+	mov b6=temp3
+	mov b7=temp4
+	ld8 temp3=[temp1],PT(AR_UNAT)-PT(AR_CSD)	// restore ar.csd
+	ld8 temp4=[temp2],PT(AR_PFS)-PT(AR_SSD)		// restore ar.ssd
 	;;
-	add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
-
-restore_GRs:                                    // restore bank-1 GRs 16-31
-	bsw.1;;
-	add		r3=16*8,r2;;                // to get to NaT of GR 16-31
-	ld8		r3=[r3];;
-	mov		ar.unat=r3;;                // first restore NaT
-
-	ld8.fill	r16=[r2],8;;
-	ld8.fill	r17=[r2],8;;
-	ld8.fill	r18=[r2],8;;
-	ld8.fill	r19=[r2],8;;
-	ld8.fill	r20=[r2],8;;
-	ld8.fill	r21=[r2],8;;
-	ld8.fill	r22=[r2],8;;
-	ld8.fill	r23=[r2],8;;
-	ld8.fill	r24=[r2],8;;
-	ld8.fill	r25=[r2],8;;
-	ld8.fill	r26=[r2],8;;
-	ld8.fill	r27=[r2],8;;
-	ld8.fill	r28=[r2],8;;
-	ld8.fill	r29=[r2],8;;
-	ld8.fill	r30=[r2],8;;
-	ld8.fill	r31=[r2],8;;
-
-	ld8		r3=[r2],8;;              // increment to skip NaT
-	bsw.0;;
-
-restore_BRs:
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		b0=r3
-	mov		b1=r5
-	mov		b2=r7;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		b3=r3
-	mov		b4=r5
-	mov		b5=r7;;
-
-	ld8		r3=[r2],2*8
-	ld8		r5=[r4],2*8;;
-	mov		b6=r3
-	mov		b7=r5;;
-
-restore_CRs:
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],8*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;            // 48 byte increments
-	mov		cr.dcr=r3
-	mov		cr.itm=r5
-	mov		cr.iva=r7;;
-
-	ld8		r3=[r2],8*8;;            // 64 byte increments
-//      mov		cr.pta=r3
-
-
-// if PSR.ic=1, reading interruption registers causes an illegal operation fault
-	mov		r3=psr;;
-	tbit.nz.unc	p6,p0=r3,PSR_IC;;           // PSI Valid Log bit pos. test
-(p6)    st8     [r2]=r0,9*8+160             // increment by 232 byte inc.
-
-begin_rskip_intr_regs:
-(p6)	br		rSkipIntrRegs;;
-
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		cr.ipsr=r3
-//	mov		cr.isr=r5                   // cr.isr is read only
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		cr.iip=r3
-	mov		cr.ifa=r5
-	mov		cr.itir=r7;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		cr.iipa=r3
-	mov		cr.ifs=r5
-	mov		cr.iim=r7
-
-	ld8		r3=[r2],160;;               // 160 byte increment
-	mov		cr.iha=r3
-
-rSkipIntrRegs:
-	ld8		r3=[r2],152;;               // another 152 byte inc.
-
-	add		r4=8,r2                     // duplicate r2 in r4
-	add		r6=2*8,r2;;                 // duplicate r2 in r6
-
-	ld8		r3=[r2],8*3
-	ld8		r5=[r4],8*3
-	ld8		r7=[r6],8*3;;
-	mov		cr.lid=r3
-//	mov		cr.ivr=r5                   // cr.ivr is read only
-	mov		cr.tpr=r7;;
-
-	ld8		r3=[r2],8*3
-	ld8		r5=[r4],8*3
-	ld8		r7=[r6],8*3;;
-//	mov		cr.eoi=r3
-//	mov		cr.irr0=r5                  // cr.irr0 is read only
-//	mov		cr.irr1=r7;;                // cr.irr1 is read only
-
-	ld8		r3=[r2],8*3
-	ld8		r5=[r4],8*3
-	ld8		r7=[r6],8*3;;
-//	mov		cr.irr2=r3                  // cr.irr2 is read only
-//	mov		cr.irr3=r5                  // cr.irr3 is read only
-	mov		cr.itv=r7;;
-
-	ld8		r3=[r2],8*7
-	ld8		r5=[r4],8*7;;
-	mov		cr.pmv=r3
-	mov		cr.cmcv=r5;;
-
-	ld8		r3=[r2],8*23
-	ld8		r5=[r4],8*23;;
-	adds		r2=8*23,r2
-	adds		r4=8*23,r4;;
-//	mov		cr.lrr0=r3
-//	mov		cr.lrr1=r5
-
-	adds		r2=8*2,r2;;
-
-restore_ARs:
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		ar.k0=r3
-	mov		ar.k1=r5
-	mov		ar.k2=r7;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		ar.k3=r3
-	mov		ar.k4=r5
-	mov		ar.k5=r7;;
-
-	ld8		r3=[r2],10*8
-	ld8		r5=[r4],10*8
-	ld8		r7=[r6],10*8;;
-	mov		ar.k6=r3
-	mov		ar.k7=r5
-	;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-//	mov		ar.rsc=r3
-//	mov		ar.bsp=r5                   // ar.bsp is read only
-	mov		ar.rsc=r0			    // make sure that RSE is in enforced lazy mode
-	;;
-	mov		ar.bspstore=r7;;
-
-	ld8		r9=[r2],8*13;;
-	mov		ar.rnat=r9
-
-	mov		ar.rsc=r3
-	ld8		r3=[r2],8*4;;
-	mov		ar.ccv=r3
-
-	ld8		r3=[r2],8*4;;
-	mov		ar.unat=r3
-
-	ld8		r3=[r2],8*4;;
-	mov		ar.fpsr=r3
-
-	ld8		r3=[r2],160;;               // 160
-//      mov		ar.itc=r3
-
-	ld8		r3=[r2],8;;
-	mov		ar.pfs=r3
-
-	ld8		r3=[r2],8;;
-	mov		ar.lc=r3
-
-	ld8		r3=[r2];;
-	mov		ar.ec=r3
-	add		r2=8*62,r2;;             // padding
-
-restore_RRs:
-	mov		r5=ar.lc
-	mov		ar.lc=0x08-1
-	movl		r4=0x00;;
-cStRRr:
-	dep.z		r7=r4,61,3
-	ld8		r3=[r2],8;;
-	mov		rr[r7]=r3                   // what are its access previledges?
-	add		r4=1,r4
-	br.cloop.sptk.few	cStRRr
-	;;
-	mov		ar.lc=r5
-	;;
-end_os_mca_restore:
-	br	ia64_os_mca_done_restore;;
+	mov ar.csd=temp3
+	mov ar.ssd=temp4
+	ld8 temp3=[temp1]				// restore ar.unat
+	add temp1=PT(AR_CCV)-PT(AR_UNAT), temp1
+	ld8 temp4=[temp2],PT(AR_FPSR)-PT(AR_PFS)	// restore ar.pfs
+	;;
+	mov ar.unat=temp3
+	mov ar.pfs=temp4
+	// ar.rnat, ar.bspstore, loadrs are restore in ia64_old_stack.
+	ld8 temp3=[temp1],PT(F6)-PT(AR_CCV)		// restore ar.ccv
+	ld8 temp4=[temp2],PT(F7)-PT(AR_FPSR)		// restore ar.fpsr
+	;;
+	mov ar.ccv=temp3
+	mov ar.fpsr=temp4
+	ldf.fill f6=[temp1],PT(F8)-PT(F6)
+	ldf.fill f7=[temp2],PT(F9)-PT(F7)
+	;;
+	ldf.fill f8=[temp1],PT(F10)-PT(F8)
+	ldf.fill f9=[temp2],PT(F11)-PT(F9)
+	;;
+	ldf.fill f10=[temp1]
+	ldf.fill f11=[temp2]
+
+	// Restore the SAL to OS state. The previous code left regs at pt_regs.
+	add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs
+	;;
+	add temp1=IA64_SAL_OS_STATE_COMMON_OFFSET, regs
+	add temp2=IA64_SAL_OS_STATE_COMMON_OFFSET+8, regs
+	;;
+	ld8 r12=[temp1],16	// sal_ra
+	ld8 r9=[temp2],16	// sal_gp
+	;;
+	ld8 r22=[temp1],16	// pal_min_state, virtual
+	ld8 r21=[temp2],16	// prev_IA64_KR_CURRENT
+	;;
+	ld8 r16=[temp1],16	// prev_IA64_KR_CURRENT_STACK
+	ld8 r20=[temp2],16	// prev_task
+	;;
+	ld8 temp3=[temp1],16	// cr.isr
+	ld8 temp4=[temp2],16	// cr.ifa
+	;;
+	mov cr.isr=temp3
+	mov cr.ifa=temp4
+	ld8 temp3=[temp1],16	// cr.itir
+	ld8 temp4=[temp2],16	// cr.iipa
+	;;
+	mov cr.itir=temp3
+	mov cr.iipa=temp4
+	ld8 temp3=[temp1],16	// cr.iim
+	ld8 temp4=[temp2],16	// cr.iha
+	;;
+	mov cr.iim=temp3
+	mov cr.iha=temp4
+	dep r22=0,r22,62,2	// pal_min_state, physical, uncached
+	mov IA64_KR(CURRENT)=r21
+	ld8 r8=[temp1]		// os_status
+	ld8 r10=[temp2]		// context
+
+	/* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to.  To
+	 * avoid any dependencies on the algorithm in ia64_switch_to(), just
+	 * purge any existing CURRENT_STACK mapping and insert the new one.
+	 *
+	 * r16 contains prev_IA64_KR_CURRENT_STACK, r21 contains
+	 * prev_IA64_KR_CURRENT, these values may have been changed by the C
+	 * code.  Do not use r8, r9, r10, r22, they contain values ready for
+	 * the return to SAL.
+	 */
+
+	mov r15=IA64_KR(CURRENT_STACK)		// physical granule mapped by IA64_TR_CURRENT_STACK
+	;;
+	shl r15=r15,IA64_GRANULE_SHIFT
+	;;
+	dep r15=-1,r15,61,3			// virtual granule
+	mov r18=IA64_GRANULE_SHIFT<<2		// for cr.itir.ps
+	;;
+	ptr.d r15,r18
+	;;
+	srlz.d
+
+	extr.u r19=r21,61,3			// r21 = prev_IA64_KR_CURRENT
+	shl r20=r16,IA64_GRANULE_SHIFT		// r16 = prev_IA64_KR_CURRENT_STACK
+	movl r21=PAGE_KERNEL			// page properties
+	;;
+	mov IA64_KR(CURRENT_STACK)=r16
+	cmp.ne p6,p0=RGN_KERNEL,r19		// new stack is in the kernel region?
+	or r21=r20,r21				// construct PA | page properties
+(p6)	br.spnt 1f				// the dreaded cpu 0 idle task in region 5:(
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r21
+	mov r20=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r20]=r21
+	;;
+	srlz.d
+1:
+
+	br.sptk b0
 
 //EndStub//////////////////////////////////////////////////////////////////////
 
 
-// ok, the issue here is that we need to save state information so
-// it can be useable by the kernel debugger and show regs routines.
-// In order to do this, our best bet is save the current state (plus
-// the state information obtain from the MIN_STATE_AREA) into a pt_regs
-// format.  This way we can pass it on in a useable format.
+//++
+// Name:
+//	ia64_new_stack()
 //
-
+// Stub Description:
 //
-// SAL to OS entry point for INIT on the monarch processor
-// This has been defined for registration purposes with SAL
-// as a part of ia64_mca_init.
+//	Switch to the MCA/INIT stack.
 //
-// When we get here, the following registers have been
-// set by the SAL for our use
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
 //
-//		1. GR1 = OS INIT GP
-//		2. GR8 = PAL_PROC physical address
-//		3. GR9 = SAL_PROC physical address
-//		4. GR10 = SAL GP (physical)
-//		5. GR11 = Init Reason
-//			0 = Received INIT for event other than crash dump switch
-//			1 = Received wakeup at the end of an OS_MCA corrected machine check
-//			2 = Received INIT dude to CrashDump switch assertion
+//	On entry RBS is still on the original stack, this routine switches RBS
+//	to use the MCA/INIT stack.
 //
-//		6. GR12 = Return address to location within SAL_INIT procedure
-
+//	On entry, sos->pal_min_state is physical, on exit it is virtual.
+//
+//--
 
-GLOBAL_ENTRY(ia64_monarch_init_handler)
-	.prologue
-	// stash the information the SAL passed to os
-	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+ia64_new_stack:
+	add regs=MCA_PT_REGS_OFFSET, r3
+	add temp2=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, r3
+	mov b0=r2			// save return address
+	GET_IA64_MCA_DATA(temp1)
+	invala
 	;;
-	SAVE_MIN_WITH_COVER
+	add temp2=temp2, temp1		// struct ia64_sal_os_state.pal_min_state on MCA or INIT stack
+	add regs=regs, temp1		// struct pt_regs on MCA or INIT stack
 	;;
-	mov r8=cr.ifa
-	mov r9=cr.isr
-	adds r3=8,r2				// set up second base pointer
+	// Address of minstate area provided by PAL is physical, uncacheable.
+	// Convert to Linux virtual address in region 6 for C code.
+	ld8 ms=[temp2]			// pal_min_state, physical
 	;;
-	SAVE_REST
-
-// ok, enough should be saved at this point to be dangerous, and supply
-// information for a dump
-// We need to switch to Virtual mode before hitting the C functions.
+	dep temp1=-1,ms,62,2		// set region 6
+	mov temp3=IA64_RBS_OFFSET-MCA_PT_REGS_OFFSET
+	;;
+	st8 [temp2]=temp1		// pal_min_state, virtual
 
-	movl	r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN
-	mov	r3=psr	// get the current psr, minimum enabled at this point
+	add temp4=temp3, regs		// start of bspstore on new stack
 	;;
-	or	r2=r2,r3
+	mov ar.bspstore=temp4		// switch RBS to MCA/INIT stack
 	;;
-	movl	r3=IVirtual_Switch
+	flushrs				// must be first in group
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//	ia64_old_stack()
+//
+// Stub Description:
+//
+//	Switch to the old stack.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	On entry, pal_min_state is virtual, on exit it is physical.
+//
+//	On entry RBS is on the MCA/INIT stack, this routine switches RBS
+//	back to the previous stack.
+//
+//	The psr is set to all zeroes.  SAL return requires either all zeroes or
+//	just psr.mc set.  Leaving psr.mc off allows INIT to be issued if this
+//	code does not perform correctly.
+//
+//	The dirty registers at the time of the event were flushed to the
+//	MCA/INIT stack in ia64_pt_regs_save().  Restore the dirty registers
+//	before reverting to the previous bspstore.
+//--
+
+ia64_old_stack:
+	add regs=MCA_PT_REGS_OFFSET, r3
+	mov b0=r2			// save return address
+	GET_IA64_MCA_DATA(temp2)
+	LOAD_PHYSICAL(p0,temp1,1f)
 	;;
-	mov	cr.iip=r3	// short return to set the appropriate bits
-	mov	cr.ipsr=r2	// need to do an rfi to set appropriate bits
+	mov cr.ipsr=r0
+	mov cr.ifs=r0
+	mov cr.iip=temp1
 	;;
+	invala
 	rfi
+1:
+
+	add regs=regs, temp2		// struct pt_regs on MCA or INIT stack
 	;;
-IVirtual_Switch:
-	//
-	// We should now be running virtual
-	//
-	// Let's call the C handler to get the rest of the state info
-	//
-	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
+	add temp1=PT(LOADRS), regs
 	;;
-	adds out0=16,sp				// out0 = pointer to pt_regs
+	ld8 temp2=[temp1],PT(AR_BSPSTORE)-PT(LOADRS)	// restore loadrs
+	;;
+	ld8 temp3=[temp1],PT(AR_RNAT)-PT(AR_BSPSTORE)	// restore ar.bspstore
+	mov ar.rsc=temp2
+	;;
+	loadrs
+	ld8 temp4=[temp1]		// restore ar.rnat
+	;;
+	mov ar.bspstore=temp3		// back to old stack
+	;;
+	mov ar.rnat=temp4
 	;;
-	DO_SAVE_SWITCH_STACK
-	.body
-	adds out1=16,sp				// out0 = pointer to switch_stack
 
-	br.call.sptk.many rp=ia64_init_handler
-.ret1:
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
 
-return_from_init:
-	br.sptk return_from_init
-END(ia64_monarch_init_handler)
 
+//++
+// Name:
+//	ia64_set_kernel_registers()
+//
+// Stub Description:
+//
+//	Set the registers that are required by the C code in order to run on an
+//	MCA/INIT stack.
 //
-// SAL to OS entry point for INIT on the slave processor
-// This has been defined for registration purposes with SAL
-// as a part of ia64_mca_init.
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
 //
+//--
 
-GLOBAL_ENTRY(ia64_slave_init_handler)
-1:	br.sptk 1b
-END(ia64_slave_init_handler)
+ia64_set_kernel_registers:
+	add temp3=MCA_SP_OFFSET, r3
+	add temp4=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_OS_GP_OFFSET, r3
+	mov b0=r2		// save return address
+	GET_IA64_MCA_DATA(temp1)
+	;;
+	add temp4=temp4, temp1	// &struct ia64_sal_os_state.os_gp
+	add r12=temp1, temp3	// kernel stack pointer on MCA/INIT stack
+	add r13=temp1, r3	// set current to start of MCA/INIT stack
+	add r20=temp1, r3	// physical start of MCA/INIT stack
+	;;
+	ld8 r1=[temp4]		// OS GP from SAL OS state
+	;;
+	DATA_PA_TO_VA(r1,temp1)
+	DATA_PA_TO_VA(r12,temp2)
+	DATA_PA_TO_VA(r13,temp3)
+	;;
+	mov IA64_KR(CURRENT)=r13
+
+	/* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack.  To avoid
+	 * any dependencies on the algorithm in ia64_switch_to(), just purge
+	 * any existing CURRENT_STACK mapping and insert the new one.
+	 */
+
+	mov r16=IA64_KR(CURRENT_STACK)		// physical granule mapped by IA64_TR_CURRENT_STACK
+	;;
+	shl r16=r16,IA64_GRANULE_SHIFT
+	;;
+	dep r16=-1,r16,61,3			// virtual granule
+	mov r18=IA64_GRANULE_SHIFT<<2		// for cr.itir.ps
+	;;
+	ptr.d r16,r18
+	;;
+	srlz.d
+
+	shr.u r16=r20,IA64_GRANULE_SHIFT	// r20 = physical start of MCA/INIT stack
+	movl r21=PAGE_KERNEL			// page properties
+	;;
+	mov IA64_KR(CURRENT_STACK)=r16
+	or r21=r20,r21				// construct PA | page properties
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r13
+	mov r20=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r20]=r21
+	;;
+	srlz.d
+
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+#undef	ms
+#undef	regs
+#undef	temp1
+#undef	temp2
+#undef	temp3
+#undef	temp4
+
+
+// Support function for mca.c, it is here to avoid using inline asm.  Given the
+// address of an rnat slot, if that address is below the current ar.bspstore
+// then return the contents of that slot, otherwise return the contents of
+// ar.rnat.
+GLOBAL_ENTRY(ia64_get_rnat)
+	alloc r14=ar.pfs,1,0,0,0
+	mov ar.rsc=0
+	;;
+	mov r14=ar.bspstore
+	;;
+	cmp.lt p6,p7=in0,r14
+	;;
+(p6)	ld8 r8=[in0]
+(p7)	mov r8=ar.rnat
+	mov ar.rsc=3
+	br.ret.sptk.many rp
+END(ia64_get_rnat)
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index abc0113a821..f081c60ab20 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -4,6 +4,8 @@
  *
  * Copyright (C) 2004 FUJITSU LIMITED
  * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ * Copyright (C) 2005 Silicon Graphics, Inc
+ * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
  */
 #include <linux/config.h>
 #include <linux/types.h>
@@ -38,10 +40,6 @@
 /* max size of SAL error record (default) */
 static int sal_rec_max = 10000;
 
-/* from mca.c */
-static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state;
-static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state;
-
 /* from mca_drv_asm.S */
 extern void *mca_handler_bhhook(void);
 
@@ -58,8 +56,9 @@ static struct page *page_isolate[MAX_PAGE_ISOLATE];
 static int num_page_isolate = 0;
 
 typedef enum {
-	ISOLATE_NG = 0,
-	ISOLATE_OK = 1
+	ISOLATE_NG,
+	ISOLATE_OK,
+	ISOLATE_NONE
 } isolate_status_t;
 
 /*
@@ -76,7 +75,7 @@ static struct {
  * @paddr:	poisoned memory location
  *
  * Return value:
- *	ISOLATE_OK / ISOLATE_NG
+ *	one of isolate_status_t, ISOLATE_OK/NG/NONE.
  */
 
 static isolate_status_t
@@ -86,23 +85,26 @@ mca_page_isolate(unsigned long paddr)
 	struct page *p;
 
 	/* whether physical address is valid or not */
-	if ( !ia64_phys_addr_valid(paddr) ) 
-		return ISOLATE_NG;
+	if (!ia64_phys_addr_valid(paddr))
+		return ISOLATE_NONE;
+
+	if (!pfn_valid(paddr))
+		return ISOLATE_NONE;
 
 	/* convert physical address to physical page number */
 	p = pfn_to_page(paddr>>PAGE_SHIFT);
 
 	/* check whether a page number have been already registered or not */
-	for( i = 0; i < num_page_isolate; i++ )
-		if( page_isolate[i] == p )
+	for (i = 0; i < num_page_isolate; i++)
+		if (page_isolate[i] == p)
 			return ISOLATE_OK; /* already listed */
 
 	/* limitation check */
-	if( num_page_isolate == MAX_PAGE_ISOLATE ) 
+	if (num_page_isolate == MAX_PAGE_ISOLATE)
 		return ISOLATE_NG;
 
 	/* kick pages having attribute 'SLAB' or 'Reserved' */
-	if( PageSlab(p) || PageReserved(p) ) 
+	if (PageSlab(p) || PageReserved(p))
 		return ISOLATE_NG;
 
 	/* add attribute 'Reserved' and register the page */
@@ -124,10 +126,15 @@ mca_handler_bh(unsigned long paddr)
 		current->pid, current->comm);
 
 	spin_lock(&mca_bh_lock);
-	if (mca_page_isolate(paddr) == ISOLATE_OK) {
+	switch (mca_page_isolate(paddr)) {
+	case ISOLATE_OK:
 		printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
-	} else {
+		break;
+	case ISOLATE_NG:
 		printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr);
+		break;
+	default:
+		break;
 	}
 	spin_unlock(&mca_bh_lock);
 
@@ -141,10 +148,10 @@ mca_handler_bh(unsigned long paddr)
  * @peidx:	pointer to index of processor error section
  */
 
-static void 
+static void
 mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
 {
-	/* 
+	/*
 	 * calculate the start address of
 	 *   "struct cpuid_info" and "sal_processor_static_info_t".
 	 */
@@ -166,7 +173,7 @@ mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
 }
 
 /**
- * mca_make_slidx -  Make index of SAL error record 
+ * mca_make_slidx -  Make index of SAL error record
  * @buffer:	pointer to SAL error record
  * @slidx:	pointer to index of SAL error record
  *
@@ -174,12 +181,12 @@ mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
  *	1 if record has platform error / 0 if not
  */
 #define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
-        { slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
-          hl->hdr = ptr; \
-          list_add(&hl->list, &(sect)); \
-          slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
+	{slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
+	hl->hdr = ptr; \
+	list_add(&hl->list, &(sect)); \
+	slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
 
-static int 
+static int
 mca_make_slidx(void *buffer, slidx_table_t *slidx)
 {
 	int platform_err = 0;
@@ -216,28 +223,36 @@ mca_make_slidx(void *buffer, slidx_table_t *slidx)
 		sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
 		if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
 			LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
-		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
 			platform_err = 1;
 			LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
-		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
 			platform_err = 1;
 			LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
-		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
 			platform_err = 1;
 			LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
-		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
 			platform_err = 1;
 			LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
-		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
 			platform_err = 1;
 			LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
-		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
 			platform_err = 1;
 			LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
-		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
 			platform_err = 1;
 			LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
-		} else if (!efi_guidcmp(sp->guid, SAL_PLAT_BUS_ERR_SECT_GUID)) {
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_BUS_ERR_SECT_GUID)) {
 			platform_err = 1;
 			LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
 		} else {
@@ -255,15 +270,16 @@ mca_make_slidx(void *buffer, slidx_table_t *slidx)
  * Return value:
  *	0 on Success / -ENOMEM on Failure
  */
-static int 
+static int
 init_record_index_pools(void)
 {
 	int i;
 	int rec_max_size;  /* Maximum size of SAL error records */
 	int sect_min_size; /* Minimum size of SAL error sections */
 	/* minimum size table of each section */
-	static int sal_log_sect_min_sizes[] = { 
-		sizeof(sal_log_processor_info_t) + sizeof(sal_processor_static_info_t),
+	static int sal_log_sect_min_sizes[] = {
+		sizeof(sal_log_processor_info_t)
+		+ sizeof(sal_processor_static_info_t),
 		sizeof(sal_log_mem_dev_err_info_t),
 		sizeof(sal_log_sel_dev_err_info_t),
 		sizeof(sal_log_pci_bus_err_info_t),
@@ -296,7 +312,8 @@ init_record_index_pools(void)
 
 	/* - 3 - */
 	slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
-	slidx_pool.buffer = (slidx_list_t *) kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
+	slidx_pool.buffer = (slidx_list_t *)
+		kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
 
 	return slidx_pool.buffer ? 0 : -ENOMEM;
 }
@@ -310,24 +327,27 @@ init_record_index_pools(void)
  * is_mca_global - Check whether this MCA is global or not
  * @peidx:	pointer of index of processor error section
  * @pbci:	pointer to pal_bus_check_info_t
+ * @sos:	pointer to hand off struct between SAL and OS
  *
  * Return value:
  *	MCA_IS_LOCAL / MCA_IS_GLOBAL
  */
 
 static mca_type_t
-is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+	      struct ia64_sal_os_state *sos)
 {
-	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
+	pal_processor_state_info_t *psp =
+		(pal_processor_state_info_t*)peidx_psp(peidx);
 
-	/* 
+	/*
 	 * PAL can request a rendezvous, if the MCA has a global scope.
-	 * If "rz_always" flag is set, SAL requests MCA rendezvous 
+	 * If "rz_always" flag is set, SAL requests MCA rendezvous
 	 * in spite of global MCA.
 	 * Therefore it is local MCA when rendezvous has not been requested.
 	 * Failed to rendezvous, the system must be down.
 	 */
-	switch (sal_to_os_handoff_state->imsto_rendez_state) {
+	switch (sos->rv_rc) {
 		case -1: /* SAL rendezvous unsuccessful */
 			return MCA_IS_GLOBAL;
 		case  0: /* SAL rendezvous not required */
@@ -382,13 +402,16 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
  * @slidx:	pointer of index of SAL error record
  * @peidx:	pointer of index of processor error section
  * @pbci:	pointer of pal_bus_check_info
+ * @sos:	pointer to hand off struct between SAL and OS
  *
  * Return value:
  *	1 on Success / 0 on Failure
  */
 
 static int
-recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+recover_from_read_error(slidx_table_t *slidx,
+			peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			struct ia64_sal_os_state *sos)
 {
 	sal_log_mod_error_info_t *smei;
 	pal_min_state_area_t *pmsa;
@@ -426,7 +449,7 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_chec
 			 *  setup for resume to bottom half of MCA,
 			 * "mca_handler_bhhook"
 			 */
-			pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61));
+			pmsa = sos->pal_min_state;
 			/* pass to bhhook as 1st argument (gr8) */
 			pmsa->pmsa_gr[8-1] = smei->target_identifier;
 			/* set interrupted return address (but no use) */
@@ -453,23 +476,28 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_chec
  * @slidx:	pointer of index of SAL error record
  * @peidx:	pointer of index of processor error section
  * @pbci:	pointer of pal_bus_check_info
+ * @sos:	pointer to hand off struct between SAL and OS
  *
  * Return value:
  *	1 on Success / 0 on Failure
  */
 
 static int
-recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
+			    pal_bus_check_info_t *pbci,
+			    struct ia64_sal_os_state *sos)
 {
 	int status = 0;
-	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
+	pal_processor_state_info_t *psp =
+		(pal_processor_state_info_t*)peidx_psp(peidx);
 
 	if (psp->bc && pbci->eb && pbci->bsi == 0) {
 		switch(pbci->type) {
 		case 1: /* partial read */
 		case 3: /* full line(cpu) read */
 		case 9: /* I/O space read */
-			status = recover_from_read_error(slidx, peidx, pbci);
+			status = recover_from_read_error(slidx, peidx, pbci,
+							 sos);
 			break;
 		case 0: /* unknown */
 		case 2: /* partial write */
@@ -480,7 +508,8 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_
 		case 8: /* write coalescing transactions */
 		case 10: /* I/O space write */
 		case 11: /* inter-processor interrupt message(IPI) */
-		case 12: /* interrupt acknowledge or external task priority cycle */
+		case 12: /* interrupt acknowledge or
+				external task priority cycle */
 		default:
 			break;
 		}
@@ -495,6 +524,7 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_
  * @slidx:	pointer of index of SAL error record
  * @peidx:	pointer of index of processor error section
  * @pbci:	pointer of pal_bus_check_info
+ * @sos:	pointer to hand off struct between SAL and OS
  *
  * Return value:
  *	1 on Success / 0 on Failure
@@ -508,14 +538,17 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_
  */
 
 static int
-recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+recover_from_processor_error(int platform, slidx_table_t *slidx,
+			     peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			     struct ia64_sal_os_state *sos)
 {
-	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
+	pal_processor_state_info_t *psp =
+		(pal_processor_state_info_t*)peidx_psp(peidx);
 
-	/* 
+	/*
 	 * We cannot recover errors with other than bus_check.
 	 */
-	if (psp->cc || psp->rc || psp->uc) 
+	if (psp->cc || psp->rc || psp->uc)
 		return 0;
 
 	/*
@@ -544,10 +577,10 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *
 	 * (e.g. a load from poisoned memory)
 	 * This means "there are some platform errors".
 	 */
-	if (platform) 
-		return recover_from_platform_error(slidx, peidx, pbci);
-	/* 
-	 * On account of strange SAL error record, we cannot recover. 
+	if (platform)
+		return recover_from_platform_error(slidx, peidx, pbci, sos);
+	/*
+	 * On account of strange SAL error record, we cannot recover.
 	 */
 	return 0;
 }
@@ -555,15 +588,14 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *
 /**
  * mca_try_to_recover - Try to recover from MCA
  * @rec:	pointer to a SAL error record
+ * @sos:	pointer to hand off struct between SAL and OS
  *
  * Return value:
  *	1 on Success / 0 on Failure
  */
 
 static int
-mca_try_to_recover(void *rec, 
-	ia64_mca_sal_to_os_state_t *sal_to_os_state,
-	ia64_mca_os_to_sal_state_t *os_to_sal_state)
+mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
 {
 	int platform_err;
 	int n_proc_err;
@@ -571,10 +603,6 @@ mca_try_to_recover(void *rec,
 	peidx_table_t peidx;
 	pal_bus_check_info_t pbci;
 
-	/* handoff state from/to mca.c */
-	sal_to_os_handoff_state = sal_to_os_state;
-	os_to_sal_handoff_state = os_to_sal_state;
-
 	/* Make index of SAL error record */
 	platform_err = mca_make_slidx(rec, &slidx);
 
@@ -591,17 +619,19 @@ mca_try_to_recover(void *rec,
 	}
 
 	/* Make index of processor error section */
-	mca_make_peidx((sal_log_processor_info_t*)slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
+	mca_make_peidx((sal_log_processor_info_t*)
+		slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
 
 	/* Extract Processor BUS_CHECK[0] */
 	*((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
 
 	/* Check whether MCA is global or not */
-	if (is_mca_global(&peidx, &pbci))
+	if (is_mca_global(&peidx, &pbci, sos))
 		return 0;
 	
 	/* Try to recover a processor error */
-	return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci);
+	return recover_from_processor_error(platform_err, &slidx, &peidx,
+					    &pbci, sos);
 }
 
 /*
@@ -614,7 +644,7 @@ int __init mca_external_handler_init(void)
 		return -ENOMEM;
 
 	/* register external mca handlers */
-	if (ia64_reg_MCA_extension(mca_try_to_recover)){	
+	if (ia64_reg_MCA_extension(mca_try_to_recover)) {	
 		printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
 		kfree(slidx_pool.buffer);
 		return -EFAULT;
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
index 0227b761f2c..e2f6fa1e0ef 100644
--- a/arch/ia64/kernel/mca_drv.h
+++ b/arch/ia64/kernel/mca_drv.h
@@ -6,7 +6,7 @@
  * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
  */
 /*
- * Processor error section: 
+ * Processor error section:
  *
  *  +-sal_log_processor_info_t *info-------------+
  *  | sal_log_section_hdr_t header;              |
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
index 2d7e0217638..3f298ee4d00 100644
--- a/arch/ia64/kernel/mca_drv_asm.S
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -13,45 +13,45 @@
 #include <asm/ptrace.h>
 
 GLOBAL_ENTRY(mca_handler_bhhook)
-	invala						// clear RSE ?
-	;;						//
-	cover						// 
-	;;						//
-	clrrrb						//
+	invala				// clear RSE ?
+	;;
+	cover
+	;;
+	clrrrb
 	;;						
-	alloc		r16=ar.pfs,0,2,1,0		// make a new frame
+	alloc	r16=ar.pfs,0,2,1,0	// make a new frame
 	;;
-	mov		ar.rsc=0
+	mov	ar.rsc=0
 	;;
-	mov		r13=IA64_KR(CURRENT)		// current task pointer
+	mov	r13=IA64_KR(CURRENT)	// current task pointer
 	;;
-	mov		r2=r13
+	mov	r2=r13
 	;;
-	addl		r22=IA64_RBS_OFFSET,r2
+	addl	r22=IA64_RBS_OFFSET,r2
 	;;
-	mov		ar.bspstore=r22
+	mov	ar.bspstore=r22
 	;;
-	addl		sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
+	addl	sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
 	;;
-	adds		r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	adds	r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
 	;;
-	st1		[r2]=r0				// clear current->thread.on_ustack flag
-	mov		loc0=r16
-	movl		loc1=mca_handler_bh		// recovery C function
+	st1	[r2]=r0		// clear current->thread.on_ustack flag
+	mov	loc0=r16
+	movl	loc1=mca_handler_bh	// recovery C function
 	;;
-	mov		out0=r8				// poisoned address
-	mov		b6=loc1
+	mov	out0=r8			// poisoned address
+	mov	b6=loc1
 	;;
-	mov		loc1=rp
+	mov	loc1=rp
 	;;
-	ssm		psr.i
+	ssm	psr.i
 	;;
-	br.call.sptk.many    rp=b6			// does not return ...
+	br.call.sptk.many rp=b6		// does not return ...
 	;;
-	mov		ar.pfs=loc0
-	mov 		rp=loc1
+	mov	ar.pfs=loc0
+	mov 	rp=loc1
 	;;
-	mov		r8=r0
+	mov	r8=r0
 	br.ret.sptk.many rp
 	;;
 END(mca_handler_bhhook)
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index f6d8a010d99..85ed54179af 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -5,73 +5,6 @@
 #include "entry.h"
 
 /*
- * For ivt.s we want to access the stack virtually so we don't have to disable translation
- * on interrupts.
- *
- *  On entry:
- *	r1:	pointer to current task (ar.k6)
- */
-#define MINSTATE_START_SAVE_MIN_VIRT								\
-(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-	;;											\
-(pUStk)	mov.m r24=ar.rnat;									\
-(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
-(pKStk) mov r1=sp;					/* get sp  */				\
-	;;											\
-(pUStk) lfetch.fault.excl.nt1 [r22];								\
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
-(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
-	;;											\
-(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
-(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
-	;;											\
-(pUStk)	mov r18=ar.bsp;										\
-(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */
-
-#define MINSTATE_END_SAVE_MIN_VIRT								\
-	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
-	;;
-
-/*
- * For mca_asm.S we want to access the stack physically since the state is saved before we
- * go virtual and don't want to destroy the iip or ipsr.
- */
-#define MINSTATE_START_SAVE_MIN_PHYS								\
-(pKStk) mov r3=IA64_KR(PER_CPU_DATA);;								\
-(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;;							\
-(pKStk) ld8 r3 = [r3];;										\
-(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;;						\
-(pKStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3;						\
-(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-(pUStk)	addl r22=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	\
-	;;											\
-(pUStk)	mov r24=ar.rnat;									\
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
-(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
-(pUStk)	dep r22=-1,r22,61,3;			/* compute kernel virtual addr of RBS */	\
-	;;											\
-(pUStk)	mov ar.bspstore=r22;			/* switch to kernel RBS */			\
-	;;											\
-(pUStk)	mov r18=ar.bsp;										\
-(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
-
-#define MINSTATE_END_SAVE_MIN_PHYS								\
-	dep r12=-1,r12,61,3;		/* make sp a kernel virtual address */			\
-	;;
-
-#ifdef MINSTATE_VIRT
-# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT)
-# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_VIRT
-# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_VIRT
-#endif
-
-#ifdef MINSTATE_PHYS
-# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT);; tpa reg=reg
-# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_PHYS
-# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_PHYS
-#endif
-
-/*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
  * on.
@@ -97,7 +30,7 @@
  * we can pass interruption state as arguments to a handler.
  */
 #define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)							\
-	MINSTATE_GET_CURRENT(r16);	/* M (or M;;I) */					\
+	mov r16=IA64_KR(CURRENT);	/* M */							\
 	mov r27=ar.rsc;			/* M */							\
 	mov r20=r1;			/* A */							\
 	mov r25=ar.unat;		/* M */							\
@@ -118,7 +51,21 @@
 	SAVE_IFS;										\
 	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
 	;;											\
-	MINSTATE_START_SAVE_MIN									\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+	;;											\
+(pUStk)	mov.m r24=ar.rnat;									\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(pKStk) mov r1=sp;					/* get sp  */				\
+	;;											\
+(pUStk) lfetch.fault.excl.nt1 [r22];								\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+	;;											\
+(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
 	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
 	adds r16=PT(CR_IPSR),r1;								\
 	;;											\
@@ -181,7 +128,8 @@
 	EXTRA;											\
 	movl r1=__gp;		/* establish kernel global pointer */				\
 	;;											\
-	MINSTATE_END_SAVE_MIN
+	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
+	;;
 
 /*
  * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index 25e7c834456..89faa603c6b 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -307,11 +307,9 @@ vm_info(char *page)
 
 	if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
 		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
-		return 0;
-	}
+	} else {
 
-
-	p += sprintf(p,
+		p += sprintf(p,
 		     "Physical Address Space         : %d bits\n"
 		     "Virtual Address Space          : %d bits\n"
 		     "Protection Key Registers(PKR)  : %d\n"
@@ -319,92 +317,99 @@ vm_info(char *page)
 		     "Hash Tag ID                    : 0x%x\n"
 		     "Size of RR.rid                 : %d\n",
 		     vm_info_1.pal_vm_info_1_s.phys_add_size,
-		     vm_info_2.pal_vm_info_2_s.impl_va_msb+1, vm_info_1.pal_vm_info_1_s.max_pkr+1,
-		     vm_info_1.pal_vm_info_1_s.key_size, vm_info_1.pal_vm_info_1_s.hash_tag_id,
+		     vm_info_2.pal_vm_info_2_s.impl_va_msb+1,
+		     vm_info_1.pal_vm_info_1_s.max_pkr+1,
+		     vm_info_1.pal_vm_info_1_s.key_size,
+		     vm_info_1.pal_vm_info_1_s.hash_tag_id,
 		     vm_info_2.pal_vm_info_2_s.rid_size);
+	}
 
-	if (ia64_pal_mem_attrib(&attrib) != 0)
-		return 0;
-
-	p += sprintf(p, "Supported memory attributes    : ");
-	sep = "";
-	for (i = 0; i < 8; i++) {
-		if (attrib & (1 << i)) {
-			p += sprintf(p, "%s%s", sep, mem_attrib[i]);
-			sep = ", ";
+	if (ia64_pal_mem_attrib(&attrib) == 0) {
+		p += sprintf(p, "Supported memory attributes    : ");
+		sep = "";
+		for (i = 0; i < 8; i++) {
+			if (attrib & (1 << i)) {
+				p += sprintf(p, "%s%s", sep, mem_attrib[i]);
+				sep = ", ";
+			}
 		}
+		p += sprintf(p, "\n");
 	}
-	p += sprintf(p, "\n");
 
 	if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) {
 		printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status);
-		return 0;
-	}
-
-	p += sprintf(p,
-		     "\nTLB walker                     : %simplemented\n"
-		     "Number of DTR                  : %d\n"
-		     "Number of ITR                  : %d\n"
-		     "TLB insertable page sizes      : ",
-		     vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
-		     vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
-		     vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
+	} else {
 
+		p += sprintf(p,
+			     "\nTLB walker                     : %simplemented\n"
+			     "Number of DTR                  : %d\n"
+			     "Number of ITR                  : %d\n"
+			     "TLB insertable page sizes      : ",
+			     vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
+			     vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
+			     vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
 
-	p = bitvector_process(p, tr_pages);
 
-	p += sprintf(p, "\nTLB purgeable page sizes       : ");
+		p = bitvector_process(p, tr_pages);
 
-	p = bitvector_process(p, vw_pages);
+		p += sprintf(p, "\nTLB purgeable page sizes       : ");
 
+		p = bitvector_process(p, vw_pages);
+	}
 	if ((status=ia64_get_ptce(&ptce)) != 0) {
 		printk(KERN_ERR "ia64_get_ptce=%ld\n", status);
-		return 0;
-	}
-
-	p += sprintf(p,
+	} else {
+		p += sprintf(p,
 		     "\nPurge base address             : 0x%016lx\n"
 		     "Purge outer loop count         : %d\n"
 		     "Purge inner loop count         : %d\n"
 		     "Purge outer loop stride        : %d\n"
 		     "Purge inner loop stride        : %d\n",
-		     ptce.base, ptce.count[0], ptce.count[1], ptce.stride[0], ptce.stride[1]);
+		     ptce.base, ptce.count[0], ptce.count[1],
+		     ptce.stride[0], ptce.stride[1]);
 
-	p += sprintf(p,
+		p += sprintf(p,
 		     "TC Levels                      : %d\n"
 		     "Unique TC(s)                   : %d\n",
 		     vm_info_1.pal_vm_info_1_s.num_tc_levels,
 		     vm_info_1.pal_vm_info_1_s.max_unique_tcs);
 
-	for(i=0; i < vm_info_1.pal_vm_info_1_s.num_tc_levels; i++) {
-		for (j=2; j>0 ; j--) {
-			tc_pages = 0; /* just in case */
+		for(i=0; i < vm_info_1.pal_vm_info_1_s.num_tc_levels; i++) {
+			for (j=2; j>0 ; j--) {
+				tc_pages = 0; /* just in case */
 
 
-			/* even without unification, some levels may not be present */
-			if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) {
-				continue;
-			}
+				/* even without unification, some levels may not be present */
+				if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) {
+					continue;
+				}
 
-			p += sprintf(p,
+				p += sprintf(p,
 				     "\n%s Translation Cache Level %d:\n"
 				     "\tHash sets           : %d\n"
 				     "\tAssociativity       : %d\n"
 				     "\tNumber of entries   : %d\n"
 				     "\tFlags               : ",
-				     cache_types[j+tc_info.tc_unified], i+1, tc_info.tc_num_sets,
-				     tc_info.tc_associativity, tc_info.tc_num_entries);
+				     cache_types[j+tc_info.tc_unified], i+1,
+				     tc_info.tc_num_sets,
+				     tc_info.tc_associativity,
+				     tc_info.tc_num_entries);
 
-			if (tc_info.tc_pf) p += sprintf(p, "PreferredPageSizeOptimized ");
-			if (tc_info.tc_unified) p += sprintf(p, "Unified ");
-			if (tc_info.tc_reduce_tr) p += sprintf(p, "TCReduction");
+				if (tc_info.tc_pf)
+					p += sprintf(p, "PreferredPageSizeOptimized ");
+				if (tc_info.tc_unified)
+					p += sprintf(p, "Unified ");
+				if (tc_info.tc_reduce_tr)
+					p += sprintf(p, "TCReduction");
 
-			p += sprintf(p, "\n\tSupported page sizes: ");
+				p += sprintf(p, "\n\tSupported page sizes: ");
 
-			p = bitvector_process(p, tc_pages);
+				p = bitvector_process(p, tc_pages);
 
-			/* when unified date (j=2) is enough */
-			if (tc_info.tc_unified) break;
+				/* when unified date (j=2) is enough */
+				if (tc_info.tc_unified)
+					break;
+			}
 		}
 	}
 	p += sprintf(p, "\n");
@@ -440,14 +445,14 @@ register_info(char *page)
 		p += sprintf(p, "\n");
 	}
 
-	if (ia64_pal_rse_info(&phys_stacked, &hints) != 0) return 0;
+	if (ia64_pal_rse_info(&phys_stacked, &hints) == 0) {
 
 	p += sprintf(p,
 		     "RSE stacked physical registers   : %ld\n"
 		     "RSE load/store hints             : %ld (%s)\n",
 		     phys_stacked, hints.ph_data,
 		     hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)");
-
+	}
 	if (ia64_pal_debug_info(&iregs, &dregs))
 		return 0;
 
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f1201ac8a11..d71731ee5b6 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -38,6 +38,7 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/bitops.h>
+#include <linux/rcupdate.h>
 
 #include <asm/errno.h>
 #include <asm/intrinsics.h>
@@ -496,7 +497,7 @@ typedef struct {
 static pfm_stats_t		pfm_stats[NR_CPUS];
 static pfm_session_t		pfm_sessions;	/* global sessions information */
 
-static spinlock_t pfm_alt_install_check = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pfm_alt_install_check);
 static pfm_intr_handler_desc_t  *pfm_alt_intr_handler;
 
 static struct proc_dir_entry 	*perfmon_dir;
@@ -573,7 +574,7 @@ pfm_protect_ctx_ctxsw(pfm_context_t *x)
 	return 0UL;
 }
 
-static inline unsigned long
+static inline void
 pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
 {
 	spin_unlock(&(x)->ctx_lock);
@@ -2217,15 +2218,18 @@ static void
 pfm_free_fd(int fd, struct file *file)
 {
 	struct files_struct *files = current->files;
+	struct fdtable *fdt;
 
 	/* 
 	 * there ie no fd_uninstall(), so we do it here
 	 */
 	spin_lock(&files->file_lock);
-        files->fd[fd] = NULL;
+	fdt = files_fdtable(files);
+	rcu_assign_pointer(fdt->fd[fd], NULL);
 	spin_unlock(&files->file_lock);
 
-	if (file) put_filp(file);
+	if (file)
+		put_filp(file);
 	put_unused_fd(fd);
 }
 
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 6f0cc7a6634..ca68e6e44a7 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -22,6 +22,11 @@
  *
  * Dec  5 2004	kaos@sgi.com
  *   Standardize which records are cleared automatically.
+ *
+ * Aug 18 2005	kaos@sgi.com
+ *   mca.c may not pass a buffer, a NULL buffer just indicates that a new
+ *   record is available in SAL.
+ *   Replace some NR_CPUS by cpus_online, for hotplug cpu.
  */
 
 #include <linux/types.h>
@@ -193,7 +198,7 @@ shift1_data_saved (struct salinfo_data *data, int shift)
  * The buffer passed from mca.c points to the output from ia64_log_get. This is
  * a persistent buffer but its contents can change between the interrupt and
  * when user space processes the record.  Save the record id to identify
- * changes.
+ * changes.  If the buffer is NULL then just update the bitmap.
  */
 void
 salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
@@ -206,27 +211,29 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
 
 	BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
 
-	if (irqsafe)
-		spin_lock_irqsave(&data_saved_lock, flags);
-	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
-		if (!data_saved->buffer)
-			break;
-	}
-	if (i == saved_size) {
-		if (!data->saved_num) {
-			shift1_data_saved(data, 0);
-			data_saved = data->data_saved + saved_size - 1;
-		} else
-			data_saved = NULL;
-	}
-	if (data_saved) {
-		data_saved->cpu = smp_processor_id();
-		data_saved->id = ((sal_log_record_header_t *)buffer)->id;
-		data_saved->size = size;
-		data_saved->buffer = buffer;
+	if (buffer) {
+		if (irqsafe)
+			spin_lock_irqsave(&data_saved_lock, flags);
+		for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+			if (!data_saved->buffer)
+				break;
+		}
+		if (i == saved_size) {
+			if (!data->saved_num) {
+				shift1_data_saved(data, 0);
+				data_saved = data->data_saved + saved_size - 1;
+			} else
+				data_saved = NULL;
+		}
+		if (data_saved) {
+			data_saved->cpu = smp_processor_id();
+			data_saved->id = ((sal_log_record_header_t *)buffer)->id;
+			data_saved->size = size;
+			data_saved->buffer = buffer;
+		}
+		if (irqsafe)
+			spin_unlock_irqrestore(&data_saved_lock, flags);
 	}
-	if (irqsafe)
-		spin_unlock_irqrestore(&data_saved_lock, flags);
 
 	if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) {
 		if (irqsafe)
@@ -244,7 +251,7 @@ salinfo_timeout_check(struct salinfo_data *data)
 	int i;
 	if (!data->open)
 		return;
-	for (i = 0; i < NR_CPUS; ++i) {
+	for_each_online_cpu(i) {
 		if (test_bit(i, &data->cpu_event)) {
 			/* double up() is not a problem, user space will see no
 			 * records for the additional "events".
@@ -291,7 +298,7 @@ retry:
 
 	n = data->cpu_check;
 	for (i = 0; i < NR_CPUS; i++) {
-		if (test_bit(n, &data->cpu_event)) {
+		if (test_bit(n, &data->cpu_event) && cpu_online(n)) {
 			cpu = n;
 			break;
 		}
@@ -585,11 +592,10 @@ salinfo_init(void)
 
 		/* we missed any events before now */
 		online = 0;
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j)) {
-				set_bit(j, &data->cpu_event);
-				++online;
-			}
+		for_each_online_cpu(j) {
+			set_bit(j, &data->cpu_event);
+			++online;
+		}
 		sema_init(&data->sem, online);
 
 		*sdir++ = dir;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 84f89da7c64..1f5c26dbe70 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -384,7 +384,7 @@ setup_arch (char **cmdline_p)
 	if (early_console_setup(*cmdline_p) == 0)
 		mark_bsp_online();
 
-#ifdef CONFIG_ACPI_BOOT
+#ifdef CONFIG_ACPI
 	/* Initialize the ACPI boot-time table parser */
 	acpi_table_init();
 # ifdef CONFIG_ACPI_NUMA
@@ -420,7 +420,7 @@ setup_arch (char **cmdline_p)
 
 	cpu_init();	/* initialize the bootstrap CPU */
 
-#ifdef CONFIG_ACPI_BOOT
+#ifdef CONFIG_ACPI
 	acpi_boot_init();
 #endif
 
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 92ff46ad21e..706b7734e19 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,7 +36,7 @@ int arch_register_cpu(int num)
 	parent = &sysfs_nodes[cpu_to_node(num)];
 #endif /* CONFIG_NUMA */
 
-#ifdef CONFIG_ACPI_BOOT
+#ifdef CONFIG_ACPI
 	/*
 	 * If CPEI cannot be re-targetted, and this is
 	 * CPEI target, then dont create the control file
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 4440c8343fa..f970359e7ed 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -15,6 +15,7 @@
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/module.h>       /* for EXPORT_SYMBOL */
 #include <linux/hardirq.h>
+#include <linux/kprobes.h>
 
 #include <asm/fpswa.h>
 #include <asm/ia32.h>
@@ -122,7 +123,7 @@ die_if_kernel (char *str, struct pt_regs *regs, long err)
 }
 
 void
-ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
+__kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
 {
 	siginfo_t siginfo;
 	int sig, code;
@@ -444,7 +445,7 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
 	return rv;
 }
 
-void
+void __kprobes
 ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 	    unsigned long iim, unsigned long itir, long arg5, long arg6,
 	    long arg7, struct pt_regs regs)
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 3288be47bc7..93d5a3b41f6 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -2020,28 +2020,6 @@ init_frame_info (struct unw_frame_info *info, struct task_struct *t,
 }
 
 void
-unw_init_from_interruption (struct unw_frame_info *info, struct task_struct *t,
-			    struct pt_regs *pt, struct switch_stack *sw)
-{
-	unsigned long sof;
-
-	init_frame_info(info, t, sw, pt->r12);
-	info->cfm_loc = &pt->cr_ifs;
-	info->unat_loc = &pt->ar_unat;
-	info->pfs_loc = &pt->ar_pfs;
-	sof = *info->cfm_loc & 0x7f;
-	info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sof);
-	info->ip = pt->cr_iip + ia64_psr(pt)->ri;
-	info->pt = (unsigned long) pt;
-	UNW_DPRINT(3, "unwind.%s:\n"
-		   "  bsp    0x%lx\n"
-		   "  sof    0x%lx\n"
-		   "  ip     0x%lx\n",
-		   __FUNCTION__, info->bsp, sof, info->ip);
-	find_save_locs(info);
-}
-
-void
 unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw)
 {
 	unsigned long sol;
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index a676e79e068..30d8564e960 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -48,6 +48,7 @@ SECTIONS
 	*(.text)
 	SCHED_TEXT
 	LOCK_TEXT
+	KPROBES_TEXT
 	*(.gnu.linkonce.t*)
     }
   .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
index 799407e7726..cb1af597370 100644
--- a/arch/ia64/lib/Makefile
+++ b/arch/ia64/lib/Makefile
@@ -15,7 +15,6 @@ lib-$(CONFIG_ITANIUM)	+= copy_page.o copy_user.o memcpy.o
 lib-$(CONFIG_MCKINLEY)	+= copy_page_mck.o memcpy_mck.o
 lib-$(CONFIG_PERFMON)	+= carta_random.o
 lib-$(CONFIG_MD_RAID5)	+= xor.o
-lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
 
 AFLAGS___divdi3.o	=
 AFLAGS___udivdi3.o	= -DUNSIGNED
diff --git a/arch/ia64/lib/dec_and_lock.c b/arch/ia64/lib/dec_and_lock.c
deleted file mode 100644
index c7ce92f968f..00000000000
--- a/arch/ia64/lib/dec_and_lock.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2003 Jerome Marchand, Bull S.A.
- *	Cleaned up by David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * This file is released under the GPLv2, or at your option any later version.
- *
- * ia64 version of "atomic_dec_and_lock()" using the atomic "cmpxchg" instruction.  This
- * code is an adaptation of the x86 version of "atomic_dec_and_lock()".
- */
-
-#include <linux/compiler.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-
-/*
- * Decrement REFCOUNT and if the count reaches zero, acquire the spinlock.  Both of these
- * operations have to be done atomically, so that the count doesn't drop to zero without
- * acquiring the spinlock first.
- */
-int
-_atomic_dec_and_lock (atomic_t *refcount, spinlock_t *lock)
-{
-	int old, new;
-
-	do {
-		old = atomic_read(refcount);
-		new = old - 1;
-
-		if (unlikely (old == 1)) {
-			/* oops, we may be decrementing to zero, do it the slow way... */
-			spin_lock(lock);
-			if (atomic_dec_and_test(refcount))
-				return 1;
-			spin_unlock(lock);
-			return 0;
-		}
-	} while (cmpxchg(&refcount->counter, old, new) != old);
-	return 0;
-}
-
-EXPORT_SYMBOL(_atomic_dec_and_lock);
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
index 3e2cfa2c6d3..2a0d27f2f21 100644
--- a/arch/ia64/lib/flush.S
+++ b/arch/ia64/lib/flush.S
@@ -20,6 +20,7 @@
 	 *
 	 *	Note: "in0" and "in1" are preserved for debugging purposes.
 	 */
+	.section .kprobes.text,"ax"
 GLOBAL_ENTRY(flush_icache_range)
 
 	.prologue
diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
index 6f308e62c13..46c9331e7ab 100644
--- a/arch/ia64/lib/memcpy_mck.S
+++ b/arch/ia64/lib/memcpy_mck.S
@@ -625,8 +625,11 @@ EK(.ex_handler,  (p17)	st8	[dst1]=r39,8);						\
 	clrrrb
 	;;
 	alloc	saved_pfs_stack=ar.pfs,3,3,3,0
+	cmp.lt	p8,p0=A,r0
 	sub	B = dst0, saved_in0	// how many byte copied so far
 	;;
+(p8)	mov	A = 0;			// A shouldn't be negative, cap it
+	;;
 	sub	C = A, B
 	sub	D = saved_in2, A
 	;;
diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c
index dbc0b3e449c..a604efc7f6c 100644
--- a/arch/ia64/lib/swiotlb.c
+++ b/arch/ia64/lib/swiotlb.c
@@ -123,8 +123,8 @@ swiotlb_init_with_default_size (size_t default_size)
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs *
-					       (1 << IO_TLB_SHIFT));
+	io_tlb_start = alloc_bootmem_low_pages_limit(io_tlb_nslabs *
+					     (1 << IO_TLB_SHIFT), 0x100000000);
 	if (!io_tlb_start)
 		panic("Cannot allocate SWIOTLB buffer");
 	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index ff62551eb3a..3c32af910d6 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
+#include <linux/kprobes.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -76,7 +77,7 @@ mapped_kernel_page_is_present (unsigned long address)
 	return pte_present(pte);
 }
 
-void
+void __kprobes
 ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs)
 {
 	int signal = SIGSEGV, code = SEGV_MAPERR;
@@ -229,9 +230,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 		return;
 	}
 
-	if (ia64_done_with_exception(regs))
-		return;
-
 	/*
 	 * Since we have no vma's for region 5, we might get here even if the address is
 	 * valid, due to the VHPT walker inserting a non present translation that becomes
@@ -242,6 +240,9 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address))
 		return;
 
+	if (ia64_done_with_exception(regs))
+		return;
+
 	/*
 	 * Oops. The kernel tried to access some bad page. We'll have to terminate things
 	 * with extreme prejudice.
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 65f9958db9f..1281c609ee9 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -382,13 +382,22 @@ ia64_mmu_init (void *my_cpu_data)
 
 	if (impl_va_bits < 51 || impl_va_bits > 61)
 		panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
+	/*
+	 * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need,
+	 * which must fit into "vmlpt_bits - pte_bits" slots. Second half of
+	 * the test makes sure that our mapped space doesn't overlap the
+	 * unimplemented hole in the middle of the region.
+	 */
+	if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) ||
+	    (mapped_space_bits > impl_va_bits - 1))
+		panic("Cannot build a big enough virtual-linear page table"
+		      " to cover mapped address space.\n"
+		      " Try using a smaller page size.\n");
+
 
 	/* place the VMLPT at the end of each page-table mapped region: */
 	pta = POW2(61) - POW2(vmlpt_bits);
 
-	if (POW2(mapped_space_bits) >= pta)
-		panic("mm/init: overlap between virtually mapped linear page table and "
-		      "mapped kernel space!");
 	/*
 	 * Set the (virtually mapped linear) page table address.  Bit
 	 * 8 selects between the short and long format, bits 2-7 the
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 9977c122e9f..9b5de589b82 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -498,13 +498,11 @@ pcibios_enable_device (struct pci_dev *dev, int mask)
 	return acpi_pci_irq_enable(dev);
 }
 
-#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
 void
 pcibios_disable_device (struct pci_dev *dev)
 {
 	acpi_pci_irq_disable(dev);
 }
-#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
 
 void
 pcibios_align_resource (void *data, struct resource *res,
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 4564ed0b5ff..906622d9f93 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -431,7 +431,7 @@ void sn_bus_store_sysdata(struct pci_dev *dev)
 {
 	struct sysdata_el *element;
 
-	element = kcalloc(1, sizeof(struct sysdata_el), GFP_KERNEL);
+	element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL);
 	if (!element) {
 		dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__);
 		return;
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 9fc74631ba8..01d18b7b5bb 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -23,7 +23,7 @@ static void force_interrupt(int irq);
 static void register_intr_pda(struct sn_irq_info *sn_irq_info);
 static void unregister_intr_pda(struct sn_irq_info *sn_irq_info);
 
-extern int sn_force_interrupt_flag;
+int sn_force_interrupt_flag = 1;
 extern int sn_ioif_inited;
 static struct list_head **sn_irq_lh;
 static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index a594aca959e..6f8c5883716 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -49,6 +49,7 @@
 #include <asm/sn/clksupport.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/geo.h>
+#include <asm/sn/sn_feature_sets.h>
 #include "xtalk/xwidgetdev.h"
 #include "xtalk/hubdev.h"
 #include <asm/sn/klconfig.h>
@@ -56,7 +57,7 @@
 
 DEFINE_PER_CPU(struct pda_s, pda_percpu);
 
-#define MAX_PHYS_MEMORY		(1UL << 49)	/* 1 TB */
+#define MAX_PHYS_MEMORY		(1UL << IA64_MAX_PHYS_BITS)	/* Max physical address supported */
 
 lboard_t *root_lboard[MAX_COMPACT_NODES];
 
@@ -97,6 +98,7 @@ EXPORT_SYMBOL(sn_region_size);
 int sn_prom_type;	/* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
 
 short physical_node_map[MAX_PHYSNODE_ID];
+static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS];
 
 EXPORT_SYMBOL(physical_node_map);
 
@@ -271,7 +273,10 @@ void __init sn_setup(char **cmdline_p)
 	u32 version = sn_sal_rev();
 	extern void sn_cpu_init(void);
 
-	ia64_sn_plat_set_error_handling_features();
+	ia64_sn_plat_set_error_handling_features();	// obsolete
+	ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV);
+	ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES);
+
 
 #if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
 	/*
@@ -314,16 +319,6 @@ void __init sn_setup(char **cmdline_p)
 
 	printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
 
-	/*
-	 * Confirm the SAL we're running on is recent enough...
-	 */
-	if (version < SN_SAL_MIN_VERSION) {
-		printk(KERN_ERR "This kernel needs SGI SAL version >= "
-		       "%x.%02x\n", SN_SAL_MIN_VERSION >> 8,
-		        SN_SAL_MIN_VERSION & 0x00FF);
-		panic("PROM version too old\n");
-	}
-
 	master_nasid = boot_get_nasid();
 
 	status =
@@ -480,6 +475,10 @@ void __init sn_cpu_init(void)
 	if (nodepdaindr[0] == NULL)
 		return;
 
+	for (i = 0; i < MAX_PROM_FEATURE_SETS; i++)
+		if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0)
+			break;
+
 	cpuid = smp_processor_id();
 	cpuphyid = get_sapicid();
 
@@ -651,3 +650,12 @@ nasid_slice_to_cpuid(int nasid, int slice)
 
 	return -1;
 }
+
+int sn_prom_feature_available(int id)
+{
+	if (id >= BITS_PER_LONG * MAX_PROM_FEATURE_SETS)
+		return 0;
+	return test_bit(id, sn_prom_features);
+}
+EXPORT_SYMBOL(sn_prom_feature_available);
+
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index 51bf82720d9..a06719d752a 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -52,7 +52,7 @@ static int licenseID_open(struct inode *inode, struct file *file)
  * the bridge chip.  The hardware will then send an interrupt message if the
  * interrupt line is active.  This mimics a level sensitive interrupt.
  */
-int sn_force_interrupt_flag = 1;
+extern int sn_force_interrupt_flag;
 
 static int sn_force_interrupt_show(struct seq_file *s, void *p)
 {
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index 254fe15c064..e0819ec5311 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -183,15 +183,16 @@ int cx_driver_unregister(struct cx_drv *cx_driver)
  * @part_num: device's part number
  * @mfg_num: device's manufacturer number
  * @hubdev: hub info associated with this device
+ * @bt: board type of the device
  *
  */
 int
 cx_device_register(nasid_t nasid, int part_num, int mfg_num,
-		   struct hubdev_info *hubdev)
+		   struct hubdev_info *hubdev, int bt)
 {
 	struct cx_dev *cx_dev;
 
-	cx_dev = kcalloc(1, sizeof(struct cx_dev), GFP_KERNEL);
+	cx_dev = kzalloc(sizeof(struct cx_dev), GFP_KERNEL);
 	DBG("cx_dev= 0x%p\n", cx_dev);
 	if (cx_dev == NULL)
 		return -ENOMEM;
@@ -200,6 +201,7 @@ cx_device_register(nasid_t nasid, int part_num, int mfg_num,
 	cx_dev->cx_id.mfg_num = mfg_num;
 	cx_dev->cx_id.nasid = nasid;
 	cx_dev->hubdev = hubdev;
+	cx_dev->bt = bt;
 
 	cx_dev->dev.parent = NULL;
 	cx_dev->dev.bus = &tiocx_bus_type;
@@ -238,7 +240,8 @@ static int cx_device_reload(struct cx_dev *cx_dev)
 {
 	cx_device_unregister(cx_dev);
 	return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num,
-				  cx_dev->cx_id.mfg_num, cx_dev->hubdev);
+				  cx_dev->cx_id.mfg_num, cx_dev->hubdev,
+				  cx_dev->bt);
 }
 
 static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget,
@@ -365,26 +368,20 @@ static void tio_corelet_reset(nasid_t nasid, int corelet)
 	udelay(2000);
 }
 
-static int tiocx_btchar_get(int nasid)
+static int is_fpga_tio(int nasid, int *bt)
 {
-	moduleid_t module_id;
-	geoid_t geoid;
-	int cnodeid;
-
-	cnodeid = nasid_to_cnodeid(nasid);
-	geoid = cnodeid_get_geoid(cnodeid);
-	module_id = geo_module(geoid);
-	return MODULE_GET_BTCHAR(module_id);
-}
+	int ioboard_type;
 
-static int is_fpga_brick(int nasid)
-{
-	switch (tiocx_btchar_get(nasid)) {
+	ioboard_type = ia64_sn_sysctl_ioboard_get(nasid);
+
+	switch (ioboard_type) {
 	case L1_BRICKTYPE_SA:
 	case L1_BRICKTYPE_ATHENA:
-	case L1_BRICKTYPE_DAYTONA:
+	case L1_BOARDTYPE_DAYTONA:
+		*bt = ioboard_type;
 		return 1;
 	}
+
 	return 0;
 }
 
@@ -407,16 +404,22 @@ static int tiocx_reload(struct cx_dev *cx_dev)
 
 	if (bitstream_loaded(nasid)) {
 		uint64_t cx_id;
-
-		cx_id =
-		    *(volatile uint64_t *)(TIO_SWIN_BASE(nasid, TIOCX_CORELET) +
+		int rv;
+
+		rv = ia64_sn_sysctl_tio_clock_reset(nasid);
+		if (rv) {
+			printk(KERN_ALERT "CX port JTAG reset failed.\n");
+		} else {
+			cx_id = *(volatile uint64_t *)
+				(TIO_SWIN_BASE(nasid, TIOCX_CORELET) +
 					  WIDGET_ID);
-		part_num = XWIDGET_PART_NUM(cx_id);
-		mfg_num = XWIDGET_MFG_NUM(cx_id);
-		DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num);
-		/* just ignore it if it's a CE */
-		if (part_num == TIO_CE_ASIC_PARTNUM)
-			return 0;
+			part_num = XWIDGET_PART_NUM(cx_id);
+			mfg_num = XWIDGET_MFG_NUM(cx_id);
+			DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num);
+			/* just ignore it if it's a CE */
+			if (part_num == TIO_CE_ASIC_PARTNUM)
+				return 0;
+		}
 	}
 
 	cx_dev->cx_id.part_num = part_num;
@@ -436,10 +439,10 @@ static ssize_t show_cxdev_control(struct device *dev, struct device_attribute *a
 {
 	struct cx_dev *cx_dev = to_cx_dev(dev);
 
-	return sprintf(buf, "0x%x 0x%x 0x%x %d\n",
+	return sprintf(buf, "0x%x 0x%x 0x%x 0x%x\n",
 		       cx_dev->cx_id.nasid,
 		       cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num,
-		       tiocx_btchar_get(cx_dev->cx_id.nasid));
+		       cx_dev->bt);
 }
 
 static ssize_t store_cxdev_control(struct device *dev, struct device_attribute *attr, const char *buf,
@@ -488,11 +491,12 @@ static int __init tiocx_init(void)
 
 	for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) {
 		nasid_t nasid;
+		int bt;
 
 		if ((nasid = cnodeid_to_nasid(cnodeid)) < 0)
 			break;	/* No more nasids .. bail out of loop */
 
-		if ((nasid & 0x1) && is_fpga_brick(nasid)) {
+		if ((nasid & 0x1) && is_fpga_tio(nasid, &bt)) {
 			struct hubdev_info *hubdev;
 			struct xwidget_info *widgetp;
 
@@ -512,7 +516,7 @@ static int __init tiocx_init(void)
 
 			if (cx_device_register
 			    (nasid, widgetp->xwi_hwid.part_num,
-			     widgetp->xwi_hwid.mfg_num, hubdev) < 0)
+			     widgetp->xwi_hwid.mfg_num, hubdev, bt) < 0)
 				return -ENXIO;
 			else
 				found_tiocx_device++;
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index bb1d5cf3044..ed7c21586e9 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -885,6 +885,10 @@ xpc_init(void)
 	pid_t pid;
 
 
+	if (!ia64_platform_is("sn2")) {
+		return -ENODEV;
+	}
+
 	/*
 	 * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
 	 * both a partition's reserved page and its XPC variables. Its size was
diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
index 78c13d676fa..e5c6d3c0a8e 100644
--- a/arch/ia64/sn/kernel/xpnet.c
+++ b/arch/ia64/sn/kernel/xpnet.c
@@ -130,7 +130,7 @@ struct net_device *xpnet_device;
  */
 static u64 xpnet_broadcast_partitions;
 /* protect above */
-static spinlock_t xpnet_broadcast_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(xpnet_broadcast_lock);
 
 /*
  * Since the Block Transfer Engine (BTE) is being used for the transfer
@@ -636,6 +636,10 @@ xpnet_init(void)
 	int result = -ENOMEM;
 
 
+	if (!ia64_platform_is("sn2")) {
+		return -ENODEV;
+	}
+
 	dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
 
 	/*
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 9b8dbce2b7b..46b646a6d34 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -149,7 +149,7 @@ tioca_gart_init(struct tioca_kernel *tioca_kern)
 	tioca_kern->ca_pcigart_entries =
 	    tioca_kern->ca_pciap_size / tioca_kern->ca_ap_pagesize;
 	tioca_kern->ca_pcigart_pagemap =
-	    kcalloc(1, tioca_kern->ca_pcigart_entries / 8, GFP_KERNEL);
+	    kzalloc(tioca_kern->ca_pcigart_entries / 8, GFP_KERNEL);
 	if (!tioca_kern->ca_pcigart_pagemap) {
 		free_pages((unsigned long)tioca_kern->ca_gart,
 			   get_order(tioca_kern->ca_gart_size));
@@ -392,7 +392,7 @@ tioca_dma_mapped(struct pci_dev *pdev, uint64_t paddr, size_t req_size)
 	 * allocate a map struct
 	 */
 
-	ca_dmamap = kcalloc(1, sizeof(struct tioca_dmamap), GFP_ATOMIC);
+	ca_dmamap = kzalloc(sizeof(struct tioca_dmamap), GFP_ATOMIC);
 	if (!ca_dmamap)
 		goto map_return;
 
@@ -600,7 +600,7 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 	 * Allocate kernel bus soft and copy from prom.
 	 */
 
-	tioca_common = kcalloc(1, sizeof(struct tioca_common), GFP_KERNEL);
+	tioca_common = kzalloc(sizeof(struct tioca_common), GFP_KERNEL);
 	if (!tioca_common)
 		return NULL;
 
@@ -609,7 +609,7 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 
 	/* init kernel-private area */
 
-	tioca_kern = kcalloc(1, sizeof(struct tioca_kernel), GFP_KERNEL);
+	tioca_kern = kzalloc(sizeof(struct tioca_kernel), GFP_KERNEL);
 	if (!tioca_kern) {
 		kfree(tioca_common);
 		return NULL;