30 files changed, 2206 insertions, 1779 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index e1fb68ddec2..307514f7a28 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -16,10 +16,11 @@ obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
 obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
 obj-$(CONFIG_IOSAPIC)		+= iosapic.o
 obj-$(CONFIG_MODULES)		+= module.o
-obj-$(CONFIG_SMP)		+= smp.o smpboot.o domain.o
+obj-$(CONFIG_SMP)		+= smp.o smpboot.o
 obj-$(CONFIG_NUMA)		+= numa.o
 obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
 obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
+obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c
index 2623df5e263..13a5b3b49bf 100644
--- a/arch/ia64/kernel/acpi-ext.c
+++ b/arch/ia64/kernel/acpi-ext.c
@@ -17,20 +17,20 @@
 #include <asm/acpi-ext.h>
 
 struct acpi_vendor_descriptor {
-	u8				guid_id;
-	efi_guid_t			guid;
+	u8 guid_id;
+	efi_guid_t guid;
 };
 
 struct acpi_vendor_info {
-	struct acpi_vendor_descriptor	*descriptor;
-	u8				*data;
-	u32				length;
+	struct acpi_vendor_descriptor *descriptor;
+	u8 *data;
+	u32 length;
 };
 
 acpi_status
 acpi_vendor_resource_match(struct acpi_resource *resource, void *context)
 {
-	struct acpi_vendor_info *info = (struct acpi_vendor_info *) context;
+	struct acpi_vendor_info *info = (struct acpi_vendor_info *)context;
 	struct acpi_resource_vendor *vendor;
 	struct acpi_vendor_descriptor *descriptor;
 	u32 length;
@@ -38,8 +38,8 @@ acpi_vendor_resource_match(struct acpi_resource *resource, void *context)
 	if (resource->id != ACPI_RSTYPE_VENDOR)
 		return AE_OK;
 
-	vendor = (struct acpi_resource_vendor *) &resource->data;
-	descriptor = (struct acpi_vendor_descriptor *) vendor->reserved;
+	vendor = (struct acpi_resource_vendor *)&resource->data;
+	descriptor = (struct acpi_vendor_descriptor *)vendor->reserved;
 	if (vendor->length <= sizeof(*info->descriptor) ||
 	    descriptor->guid_id != info->descriptor->guid_id ||
 	    efi_guidcmp(descriptor->guid, info->descriptor->guid))
@@ -50,21 +50,24 @@ acpi_vendor_resource_match(struct acpi_resource *resource, void *context)
 	if (!info->data)
 		return AE_NO_MEMORY;
 
-	memcpy(info->data, vendor->reserved + sizeof(struct acpi_vendor_descriptor), length);
+	memcpy(info->data,
+	       vendor->reserved + sizeof(struct acpi_vendor_descriptor),
+	       length);
 	info->length = length;
 	return AE_CTRL_TERMINATE;
 }
 
 acpi_status
-acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor *id,
-		u8 **data, u32 *length)
+acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor * id,
+			  u8 ** data, u32 * length)
 {
 	struct acpi_vendor_info info;
 
 	info.descriptor = id;
 	info.data = NULL;
 
-	acpi_walk_resources(obj, METHOD_NAME__CRS, acpi_vendor_resource_match, &info);
+	acpi_walk_resources(obj, METHOD_NAME__CRS, acpi_vendor_resource_match,
+			    &info);
 	if (!info.data)
 		return AE_NOT_FOUND;
 
@@ -75,17 +78,19 @@ acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor *id,
 
 struct acpi_vendor_descriptor hp_ccsr_descriptor = {
 	.guid_id = 2,
-	.guid    = EFI_GUID(0x69e9adf9, 0x924f, 0xab5f, 0xf6, 0x4a, 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad)
+	.guid =
+	    EFI_GUID(0x69e9adf9, 0x924f, 0xab5f, 0xf6, 0x4a, 0x24, 0xd2, 0x01,
+		     0x37, 0x0e, 0xad)
 };
 
-acpi_status
-hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
+acpi_status hp_acpi_csr_space(acpi_handle obj, u64 * csr_base, u64 * csr_length)
 {
 	acpi_status status;
 	u8 *data;
 	u32 length;
 
-	status = acpi_find_vendor_resource(obj, &hp_ccsr_descriptor, &data, &length);
+	status =
+	    acpi_find_vendor_resource(obj, &hp_ccsr_descriptor, &data, &length);
 
 	if (ACPI_FAILURE(status) || length != 16)
 		return AE_NOT_FOUND;
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 9609f243e5d..28a4529fdd6 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -74,12 +74,11 @@ unsigned int acpi_cpei_override;
 unsigned int acpi_cpei_phys_cpuid;
 
 #define MAX_SAPICS 256
-u16 ia64_acpiid_to_sapicid[MAX_SAPICS] =
-	{ [0 ... MAX_SAPICS - 1] = -1 };
+u16 ia64_acpiid_to_sapicid[MAX_SAPICS] = {[0 ... MAX_SAPICS - 1] = -1 };
+
 EXPORT_SYMBOL(ia64_acpiid_to_sapicid);
 
-const char *
-acpi_get_sysname (void)
+const char *acpi_get_sysname(void)
 {
 #ifdef CONFIG_IA64_GENERIC
 	unsigned long rsdp_phys;
@@ -89,27 +88,29 @@ acpi_get_sysname (void)
 
 	rsdp_phys = acpi_find_rsdp();
 	if (!rsdp_phys) {
-		printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n");
+		printk(KERN_ERR
+		       "ACPI 2.0 RSDP not found, default to \"dig\"\n");
 		return "dig";
 	}
 
-	rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
+	rsdp = (struct acpi20_table_rsdp *)__va(rsdp_phys);
 	if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
-		printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
+		printk(KERN_ERR
+		       "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
 		return "dig";
 	}
 
-	xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
+	xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_address);
 	hdr = &xsdt->header;
 	if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
-		printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
+		printk(KERN_ERR
+		       "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
 		return "dig";
 	}
 
 	if (!strcmp(hdr->oem_id, "HP")) {
 		return "hpzx1";
-	}
-	else if (!strcmp(hdr->oem_id, "SGI")) {
+	} else if (!strcmp(hdr->oem_id, "SGI")) {
 		return "sn2";
 	}
 
@@ -131,7 +132,7 @@ acpi_get_sysname (void)
 #endif
 }
 
-#ifdef CONFIG_ACPI_BOOT
+#ifdef CONFIG_ACPI
 
 #define ACPI_MAX_PLATFORM_INTERRUPTS	256
 
@@ -146,8 +147,7 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
  * Interrupt routing API for device drivers.  Provides interrupt vector for
  * a generic platform event.  Currently only CPEI is implemented.
  */
-int
-acpi_request_vector (u32 int_type)
+int acpi_request_vector(u32 int_type)
 {
 	int vector = -1;
 
@@ -155,12 +155,12 @@ acpi_request_vector (u32 int_type)
 		/* corrected platform error interrupt */
 		vector = platform_intr_list[int_type];
 	} else
-		printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n");
+		printk(KERN_ERR
+		       "acpi_request_vector(): invalid interrupt type\n");
 	return vector;
 }
 
-char *
-__acpi_map_table (unsigned long phys_addr, unsigned long size)
+char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
 {
 	return __va(phys_addr);
 }
@@ -169,19 +169,18 @@ __acpi_map_table (unsigned long phys_addr, unsigned long size)
                             Boot-time Table Parsing
    -------------------------------------------------------------------------- */
 
-static int			total_cpus __initdata;
-static int			available_cpus __initdata;
-struct acpi_table_madt *	acpi_madt __initdata;
-static u8			has_8259;
-
+static int total_cpus __initdata;
+static int available_cpus __initdata;
+struct acpi_table_madt *acpi_madt __initdata;
+static u8 has_8259;
 
 static int __init
-acpi_parse_lapic_addr_ovr (
-	acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
+			  const unsigned long end)
 {
 	struct acpi_table_lapic_addr_ovr *lapic;
 
-	lapic = (struct acpi_table_lapic_addr_ovr *) header;
+	lapic = (struct acpi_table_lapic_addr_ovr *)header;
 
 	if (BAD_MADT_ENTRY(lapic, end))
 		return -EINVAL;
@@ -193,22 +192,23 @@ acpi_parse_lapic_addr_ovr (
 	return 0;
 }
 
-
 static int __init
-acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_lsapic(acpi_table_entry_header * header, const unsigned long end)
 {
 	struct acpi_table_lsapic *lsapic;
 
-	lsapic = (struct acpi_table_lsapic *) header;
+	lsapic = (struct acpi_table_lsapic *)header;
 
 	if (BAD_MADT_ENTRY(lsapic, end))
 		return -EINVAL;
 
 	if (lsapic->flags.enabled) {
 #ifdef CONFIG_SMP
-		smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid;
+		smp_boot_data.cpu_phys_id[available_cpus] =
+		    (lsapic->id << 8) | lsapic->eid;
 #endif
-		ia64_acpiid_to_sapicid[lsapic->acpi_id] = (lsapic->id << 8) | lsapic->eid;
+		ia64_acpiid_to_sapicid[lsapic->acpi_id] =
+		    (lsapic->id << 8) | lsapic->eid;
 		++available_cpus;
 	}
 
@@ -216,13 +216,12 @@ acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
 	return 0;
 }
 
-
 static int __init
-acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
 {
 	struct acpi_table_lapic_nmi *lacpi_nmi;
 
-	lacpi_nmi = (struct acpi_table_lapic_nmi*) header;
+	lacpi_nmi = (struct acpi_table_lapic_nmi *)header;
 
 	if (BAD_MADT_ENTRY(lacpi_nmi, end))
 		return -EINVAL;
@@ -231,13 +230,12 @@ acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
 	return 0;
 }
 
-
 static int __init
-acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_iosapic(acpi_table_entry_header * header, const unsigned long end)
 {
 	struct acpi_table_iosapic *iosapic;
 
-	iosapic = (struct acpi_table_iosapic *) header;
+	iosapic = (struct acpi_table_iosapic *)header;
 
 	if (BAD_MADT_ENTRY(iosapic, end))
 		return -EINVAL;
@@ -245,15 +243,14 @@ acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
 	return iosapic_init(iosapic->address, iosapic->global_irq_base);
 }
 
-
 static int __init
-acpi_parse_plat_int_src (
-	acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_plat_int_src(acpi_table_entry_header * header,
+			const unsigned long end)
 {
 	struct acpi_table_plat_int_src *plintsrc;
 	int vector;
 
-	plintsrc = (struct acpi_table_plat_int_src *) header;
+	plintsrc = (struct acpi_table_plat_int_src *)header;
 
 	if (BAD_MADT_ENTRY(plintsrc, end))
 		return -EINVAL;
@@ -267,8 +264,12 @@ acpi_parse_plat_int_src (
 						plintsrc->iosapic_vector,
 						plintsrc->eid,
 						plintsrc->id,
-						(plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
-						(plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+						(plintsrc->flags.polarity ==
+						 1) ? IOSAPIC_POL_HIGH :
+						IOSAPIC_POL_LOW,
+						(plintsrc->flags.trigger ==
+						 1) ? IOSAPIC_EDGE :
+						IOSAPIC_LEVEL);
 
 	platform_intr_list[plintsrc->type] = vector;
 	if (acpi_madt_rev > 1) {
@@ -283,7 +284,6 @@ acpi_parse_plat_int_src (
 	return 0;
 }
 
-
 unsigned int can_cpei_retarget(void)
 {
 	extern int cpe_vector;
@@ -322,29 +322,30 @@ unsigned int get_cpei_target_cpu(void)
 }
 
 static int __init
-acpi_parse_int_src_ovr (
-	acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_int_src_ovr(acpi_table_entry_header * header,
+		       const unsigned long end)
 {
 	struct acpi_table_int_src_ovr *p;
 
-	p = (struct acpi_table_int_src_ovr *) header;
+	p = (struct acpi_table_int_src_ovr *)header;
 
 	if (BAD_MADT_ENTRY(p, end))
 		return -EINVAL;
 
 	iosapic_override_isa_irq(p->bus_irq, p->global_irq,
-				 (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
-				 (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+				 (p->flags.polarity ==
+				  1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+				 (p->flags.trigger ==
+				  1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
 	return 0;
 }
 
-
 static int __init
-acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
+acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
 {
 	struct acpi_table_nmi_src *nmi_src;
 
-	nmi_src = (struct acpi_table_nmi_src*) header;
+	nmi_src = (struct acpi_table_nmi_src *)header;
 
 	if (BAD_MADT_ENTRY(nmi_src, end))
 		return -EINVAL;
@@ -353,11 +354,9 @@ acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
 	return 0;
 }
 
-static void __init
-acpi_madt_oem_check (char *oem_id, char *oem_table_id)
+static void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-	if (!strncmp(oem_id, "IBM", 3) &&
-	    (!strncmp(oem_table_id, "SERMOW", 6))) {
+	if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERMOW", 6))) {
 
 		/*
 		 * Unfortunately ITC_DRIFT is not yet part of the
@@ -370,19 +369,18 @@ acpi_madt_oem_check (char *oem_id, char *oem_table_id)
 	}
 }
 
-static int __init
-acpi_parse_madt (unsigned long phys_addr, unsigned long size)
+static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
 {
 	if (!phys_addr || !size)
 		return -EINVAL;
 
-	acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+	acpi_madt = (struct acpi_table_madt *)__va(phys_addr);
 
 	acpi_madt_rev = acpi_madt->header.revision;
 
 	/* remember the value for reference after free_initmem() */
 #ifdef CONFIG_ITANIUM
-	has_8259 = 1; /* Firmware on old Itanium systems is broken */
+	has_8259 = 1;		/* Firmware on old Itanium systems is broken */
 #else
 	has_8259 = acpi_madt->flags.pcat_compat;
 #endif
@@ -396,19 +394,18 @@ acpi_parse_madt (unsigned long phys_addr, unsigned long size)
 	printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr);
 
 	acpi_madt_oem_check(acpi_madt->header.oem_id,
-		acpi_madt->header.oem_table_id);
+			    acpi_madt->header.oem_table_id);
 
 	return 0;
 }
 
-
 #ifdef CONFIG_ACPI_NUMA
 
 #undef SLIT_DEBUG
 
 #define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
 
-static int __initdata srat_num_cpus;			/* number of cpus */
+static int __initdata srat_num_cpus;	/* number of cpus */
 static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
 #define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
 #define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
@@ -421,15 +418,15 @@ static struct acpi_table_slit __initdata *slit_table;
  * ACPI 2.0 SLIT (System Locality Information Table)
  * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
  */
-void __init
-acpi_numa_slit_init (struct acpi_table_slit *slit)
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 {
 	u32 len;
 
 	len = sizeof(struct acpi_table_header) + 8
-		+ slit->localities * slit->localities;
+	    + slit->localities * slit->localities;
 	if (slit->header.length != len) {
-		printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
+		printk(KERN_ERR
+		       "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
 		       len, slit->header.length);
 		memset(numa_slit, 10, sizeof(numa_slit));
 		return;
@@ -438,19 +435,20 @@ acpi_numa_slit_init (struct acpi_table_slit *slit)
 }
 
 void __init
-acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
+acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
 {
 	/* record this node in proximity bitmap */
 	pxm_bit_set(pa->proximity_domain);
 
-	node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid);
+	node_cpuid[srat_num_cpus].phys_id =
+	    (pa->apic_id << 8) | (pa->lsapic_eid);
 	/* nid should be overridden as logical node id later */
 	node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
 	srat_num_cpus++;
 }
 
 void __init
-acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
+acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
 {
 	unsigned long paddr, size;
 	u8 pxm;
@@ -487,8 +485,7 @@ acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
 	num_node_memblks++;
 }
 
-void __init
-acpi_numa_arch_fixup (void)
+void __init acpi_numa_arch_fixup(void)
 {
 	int i, j, node_from, node_to;
 
@@ -534,21 +531,24 @@ acpi_numa_arch_fixup (void)
 	for (i = 0; i < srat_num_cpus; i++)
 		node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
 
-	printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes());
-	printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks);
+	printk(KERN_INFO "Number of logical nodes in system = %d\n",
+	       num_online_nodes());
+	printk(KERN_INFO "Number of memory chunks in system = %d\n",
+	       num_node_memblks);
 
-	if (!slit_table) return;
+	if (!slit_table)
+		return;
 	memset(numa_slit, -1, sizeof(numa_slit));
-	for (i=0; i<slit_table->localities; i++) {
+	for (i = 0; i < slit_table->localities; i++) {
 		if (!pxm_bit_test(i))
 			continue;
 		node_from = pxm_to_nid_map[i];
-		for (j=0; j<slit_table->localities; j++) {
+		for (j = 0; j < slit_table->localities; j++) {
 			if (!pxm_bit_test(j))
 				continue;
 			node_to = pxm_to_nid_map[j];
 			node_distance(node_from, node_to) =
-				slit_table->entry[i*slit_table->localities + j];
+			    slit_table->entry[i * slit_table->localities + j];
 		}
 	}
 
@@ -556,36 +556,41 @@ acpi_numa_arch_fixup (void)
 	printk("ACPI 2.0 SLIT locality table:\n");
 	for_each_online_node(i) {
 		for_each_online_node(j)
-			printk("%03d ", node_distance(i,j));
+		    printk("%03d ", node_distance(i, j));
 		printk("\n");
 	}
 #endif
 }
-#endif /* CONFIG_ACPI_NUMA */
+#endif				/* CONFIG_ACPI_NUMA */
 
-unsigned int
-acpi_register_gsi (u32 gsi, int edge_level, int active_high_low)
+/*
+ * success: return IRQ number (>=0)
+ * failure: return < 0
+ */
+int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
 {
 	if (has_8259 && gsi < 16)
 		return isa_irq_to_vector(gsi);
 
 	return iosapic_register_intr(gsi,
-			(active_high_low == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
-			(edge_level == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+				     (active_high_low ==
+				      ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH :
+				     IOSAPIC_POL_LOW,
+				     (edge_level ==
+				      ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE :
+				     IOSAPIC_LEVEL);
 }
+
 EXPORT_SYMBOL(acpi_register_gsi);
 
-#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
-void
-acpi_unregister_gsi (u32 gsi)
+void acpi_unregister_gsi(u32 gsi)
 {
 	iosapic_unregister_intr(gsi);
 }
+
 EXPORT_SYMBOL(acpi_unregister_gsi);
-#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
 
-static int __init
-acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
+static int __init acpi_parse_fadt(unsigned long phys_addr, unsigned long size)
 {
 	struct acpi_table_header *fadt_header;
 	struct fadt_descriptor_rev2 *fadt;
@@ -593,11 +598,11 @@ acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
 	if (!phys_addr || !size)
 		return -EINVAL;
 
-	fadt_header = (struct acpi_table_header *) __va(phys_addr);
+	fadt_header = (struct acpi_table_header *)__va(phys_addr);
 	if (fadt_header->revision != 3)
-		return -ENODEV;		/* Only deal with ACPI 2.0 FADT */
+		return -ENODEV;	/* Only deal with ACPI 2.0 FADT */
 
-	fadt = (struct fadt_descriptor_rev2 *) fadt_header;
+	fadt = (struct fadt_descriptor_rev2 *)fadt_header;
 
 	if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
 		acpi_kbd_controller_present = 0;
@@ -609,22 +614,19 @@ acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
 	return 0;
 }
 
-
-unsigned long __init
-acpi_find_rsdp (void)
+unsigned long __init acpi_find_rsdp(void)
 {
 	unsigned long rsdp_phys = 0;
 
 	if (efi.acpi20)
 		rsdp_phys = __pa(efi.acpi20);
 	else if (efi.acpi)
-		printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n");
+		printk(KERN_WARNING PREFIX
+		       "v1.0/r0.71 tables no longer supported\n");
 	return rsdp_phys;
 }
 
-
-int __init
-acpi_boot_init (void)
+int __init acpi_boot_init(void)
 {
 
 	/*
@@ -642,31 +644,43 @@ acpi_boot_init (void)
 
 	/* Local APIC */
 
-	if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
-		printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
+		printk(KERN_ERR PREFIX
+		       "Error parsing LAPIC address override entry\n");
 
-	if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1)
-		printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n");
+	if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS)
+	    < 1)
+		printk(KERN_ERR PREFIX
+		       "Error parsing MADT - no LAPIC entries\n");
 
-	if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0)
+	if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0)
+	    < 0)
 		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
 
 	/* I/O APIC */
 
-	if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
-		printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n");
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
+		printk(KERN_ERR PREFIX
+		       "Error parsing MADT - no IOSAPIC entries\n");
 
 	/* System-Level Interrupt Routing */
 
-	if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
-		printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n");
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src,
+	     ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
+		printk(KERN_ERR PREFIX
+		       "Error parsing platform interrupt source entry\n");
 
-	if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
-		printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
+		printk(KERN_ERR PREFIX
+		       "Error parsing interrupt source overrides entry\n");
 
 	if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
 		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
-  skip_madt:
+      skip_madt:
 
 	/*
 	 * FADT says whether a legacy keyboard controller is present.
@@ -681,8 +695,9 @@ acpi_boot_init (void)
 	if (available_cpus == 0) {
 		printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
 		printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
-		smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id();
-		available_cpus = 1; /* We've got at least one of these, no? */
+		smp_boot_data.cpu_phys_id[available_cpus] =
+		    hard_smp_processor_id();
+		available_cpus = 1;	/* We've got at least one of these, no? */
 	}
 	smp_boot_data.cpu_count = available_cpus;
 
@@ -691,8 +706,10 @@ acpi_boot_init (void)
 	if (srat_num_cpus == 0) {
 		int cpu, i = 1;
 		for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
-			if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id())
-				node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu];
+			if (smp_boot_data.cpu_phys_id[cpu] !=
+			    hard_smp_processor_id())
+				node_cpuid[i++].phys_id =
+				    smp_boot_data.cpu_phys_id[cpu];
 	}
 # endif
 #endif
@@ -700,12 +717,12 @@ acpi_boot_init (void)
 	build_cpu_to_node_map();
 #endif
 	/* Make boot-up look pretty */
-	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
+	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
+	       total_cpus);
 	return 0;
 }
 
-int
-acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
 {
 	int vector;
 
@@ -726,11 +743,10 @@ acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
  */
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 static
-int
-acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
+int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
 {
 #ifdef CONFIG_ACPI_NUMA
-	int 			pxm_id;
+	int pxm_id;
 
 	pxm_id = acpi_get_pxm(handle);
 
@@ -738,31 +754,28 @@ acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
 	 * Assuming that the container driver would have set the proximity
 	 * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag
 	 */
-	node_cpuid[cpu].nid = (pxm_id < 0) ? 0:
-			pxm_to_nid_map[pxm_id];
+	node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_nid_map[pxm_id];
 
-	node_cpuid[cpu].phys_id =  physid;
+	node_cpuid[cpu].phys_id = physid;
 #endif
-	return(0);
+	return (0);
 }
 
-
-int
-acpi_map_lsapic(acpi_handle handle, int *pcpu)
+int acpi_map_lsapic(acpi_handle handle, int *pcpu)
 {
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
 	struct acpi_table_lsapic *lsapic;
 	cpumask_t tmp_map;
 	long physid;
 	int cpu;
- 
+
 	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
 		return -EINVAL;
 
-	if (!buffer.length ||  !buffer.pointer)
+	if (!buffer.length || !buffer.pointer)
 		return -EINVAL;
- 
+
 	obj = buffer.pointer;
 	if (obj->type != ACPI_TYPE_BUFFER ||
 	    obj->buffer.length < sizeof(*lsapic)) {
@@ -778,7 +791,7 @@ acpi_map_lsapic(acpi_handle handle, int *pcpu)
 		return -EINVAL;
 	}
 
-	physid = ((lsapic->id <<8) | (lsapic->eid));
+	physid = ((lsapic->id << 8) | (lsapic->eid));
 
 	acpi_os_free(buffer.pointer);
 	buffer.length = ACPI_ALLOCATE_BUFFER;
@@ -786,50 +799,49 @@ acpi_map_lsapic(acpi_handle handle, int *pcpu)
 
 	cpus_complement(tmp_map, cpu_present_map);
 	cpu = first_cpu(tmp_map);
-	if(cpu >= NR_CPUS)
+	if (cpu >= NR_CPUS)
 		return -EINVAL;
 
 	acpi_map_cpu2node(handle, cpu, physid);
 
- 	cpu_set(cpu, cpu_present_map);
+	cpu_set(cpu, cpu_present_map);
 	ia64_cpu_to_sapicid[cpu] = physid;
 	ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu];
 
 	*pcpu = cpu;
-	return(0);
+	return (0);
 }
-EXPORT_SYMBOL(acpi_map_lsapic);
 
+EXPORT_SYMBOL(acpi_map_lsapic);
 
-int
-acpi_unmap_lsapic(int cpu)
+int acpi_unmap_lsapic(int cpu)
 {
 	int i;
 
-	for (i=0; i<MAX_SAPICS; i++) {
- 		if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) {
- 			ia64_acpiid_to_sapicid[i] = -1;
- 			break;
- 		}
- 	}
+	for (i = 0; i < MAX_SAPICS; i++) {
+		if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) {
+			ia64_acpiid_to_sapicid[i] = -1;
+			break;
+		}
+	}
 	ia64_cpu_to_sapicid[cpu] = -1;
-	cpu_clear(cpu,cpu_present_map);
+	cpu_clear(cpu, cpu_present_map);
 
 #ifdef CONFIG_ACPI_NUMA
 	/* NUMA specific cleanup's */
 #endif
 
-	return(0);
+	return (0);
 }
+
 EXPORT_SYMBOL(acpi_unmap_lsapic);
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
- 
+#endif				/* CONFIG_ACPI_HOTPLUG_CPU */
 
 #ifdef CONFIG_ACPI_NUMA
 acpi_status __devinit
-acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
+acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
 {
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
 	struct acpi_table_iosapic *iosapic;
 	unsigned int gsi_base;
@@ -878,10 +890,9 @@ acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
 	map_iosapic_to_node(gsi_base, node);
 	return AE_OK;
 }
-#endif /* CONFIG_NUMA */
+#endif				/* CONFIG_NUMA */
 
-int
-acpi_register_ioapic (acpi_handle handle, u64 phys_addr, u32 gsi_base)
+int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
 {
 	int err;
 
@@ -890,17 +901,18 @@ acpi_register_ioapic (acpi_handle handle, u64 phys_addr, u32 gsi_base)
 
 #if CONFIG_ACPI_NUMA
 	acpi_map_iosapic(handle, 0, NULL, NULL);
-#endif /* CONFIG_ACPI_NUMA */
+#endif				/* CONFIG_ACPI_NUMA */
 
 	return 0;
 }
+
 EXPORT_SYMBOL(acpi_register_ioapic);
 
-int
-acpi_unregister_ioapic (acpi_handle handle, u32 gsi_base)
+int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
 {
 	return iosapic_remove(gsi_base);
 }
+
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
-#endif /* CONFIG_ACPI_BOOT */
+#endif				/* CONFIG_ACPI */
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 7d1ae2982c5..f6a23428934 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -211,17 +211,41 @@ void foo(void)
 #endif
 
 	BLANK();
-	DEFINE(IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET,
-	       offsetof (struct ia64_mca_cpu, proc_state_dump));
-	DEFINE(IA64_MCA_CPU_STACK_OFFSET,
-	       offsetof (struct ia64_mca_cpu, stack));
-	DEFINE(IA64_MCA_CPU_STACKFRAME_OFFSET,
-	       offsetof (struct ia64_mca_cpu, stackframe));
-	DEFINE(IA64_MCA_CPU_RBSTORE_OFFSET,
-	       offsetof (struct ia64_mca_cpu, rbstore));
+	DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET,
+	       offsetof (struct ia64_mca_cpu, mca_stack));
 	DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
 	       offsetof (struct ia64_mca_cpu, init_stack));
 	BLANK();
+	DEFINE(IA64_SAL_OS_STATE_COMMON_OFFSET,
+	       offsetof (struct ia64_sal_os_state, sal_ra));
+	DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET,
+	       offsetof (struct ia64_sal_os_state, os_gp));
+	DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
+	       offsetof (struct ia64_sal_os_state, pal_min_state));
+	DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET,
+	       offsetof (struct ia64_sal_os_state, proc_state_param));
+	DEFINE(IA64_SAL_OS_STATE_SIZE,
+	       sizeof (struct ia64_sal_os_state));
+	DEFINE(IA64_PMSA_GR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_gr));
+	DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_bank1_gr));
+	DEFINE(IA64_PMSA_PR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_pr));
+	DEFINE(IA64_PMSA_BR0_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_br0));
+	DEFINE(IA64_PMSA_RSC_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_rsc));
+	DEFINE(IA64_PMSA_IIP_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_iip));
+	DEFINE(IA64_PMSA_IPSR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_ipsr));
+	DEFINE(IA64_PMSA_IFS_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_ifs));
+	DEFINE(IA64_PMSA_XIP_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_xip));
+	BLANK();
+
 	/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
 	DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
 	DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
diff --git a/arch/ia64/kernel/cpufreq/Kconfig b/arch/ia64/kernel/cpufreq/Kconfig
new file mode 100644
index 00000000000..2d9d5279b98
--- /dev/null
+++ b/arch/ia64/kernel/cpufreq/Kconfig
@@ -0,0 +1,29 @@
+
+#
+# CPU Frequency scaling
+#
+
+menu "CPU Frequency scaling"
+
+source "drivers/cpufreq/Kconfig"
+
+if CPU_FREQ
+
+comment "CPUFreq processor drivers"
+
+config IA64_ACPI_CPUFREQ
+	tristate "ACPI Processor P-States driver"
+	select CPU_FREQ_TABLE
+	depends on ACPI_PROCESSOR
+	help
+	This driver adds a CPUFreq driver which utilizes the ACPI
+	Processor Performance States.
+
+	For details, take a look at <file:Documentation/cpu-freq/>.
+
+	If in doubt, say N.
+
+endif   # CPU_FREQ
+
+endmenu
+
diff --git a/arch/ia64/kernel/cpufreq/Makefile b/arch/ia64/kernel/cpufreq/Makefile
new file mode 100644
index 00000000000..f748d34c02f
--- /dev/null
+++ b/arch/ia64/kernel/cpufreq/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_IA64_ACPI_CPUFREQ)		+= acpi-cpufreq.o
diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
new file mode 100644
index 00000000000..da4d5cf80a4
--- /dev/null
+++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
@@ -0,0 +1,499 @@
+/*
+ * arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+ * This file provides the ACPI based P-state support. This
+ * module works with generic cpufreq infrastructure. Most of
+ * the code is based on i386 version
+ * (arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c)
+ *
+ * Copyright (C) 2005 Intel Corp
+ *      Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pal.h>
+
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
+
+MODULE_AUTHOR("Venkatesh Pallipadi");
+MODULE_DESCRIPTION("ACPI Processor P-States Driver");
+MODULE_LICENSE("GPL");
+
+
+struct cpufreq_acpi_io {
+	struct acpi_processor_performance	acpi_data;
+	struct cpufreq_frequency_table		*freq_table;
+	unsigned int				resume;
+};
+
+static struct cpufreq_acpi_io	*acpi_io_data[NR_CPUS];
+
+static struct cpufreq_driver acpi_cpufreq_driver;
+
+
+static int
+processor_set_pstate (
+	u32	value)
+{
+	s64 retval;
+
+	dprintk("processor_set_pstate\n");
+
+	retval = ia64_pal_set_pstate((u64)value);
+
+	if (retval) {
+		dprintk("Failed to set freq to 0x%x, with error 0x%x\n",
+		        value, retval);
+		return -ENODEV;
+	}
+	return (int)retval;
+}
+
+
+static int
+processor_get_pstate (
+	u32	*value)
+{
+	u64	pstate_index = 0;
+	s64 	retval;
+
+	dprintk("processor_get_pstate\n");
+
+	retval = ia64_pal_get_pstate(&pstate_index);
+	*value = (u32) pstate_index;
+
+	if (retval)
+		dprintk("Failed to get current freq with "
+		        "error 0x%x, idx 0x%x\n", retval, *value);
+
+	return (int)retval;
+}
+
+
+/* To be used only after data->acpi_data is initialized */
+static unsigned
+extract_clock (
+	struct cpufreq_acpi_io *data,
+	unsigned value,
+	unsigned int cpu)
+{
+	unsigned long i;
+
+	dprintk("extract_clock\n");
+
+	for (i = 0; i < data->acpi_data.state_count; i++) {
+		if (value >= data->acpi_data.states[i].control)
+			return data->acpi_data.states[i].core_frequency;
+	}
+	return data->acpi_data.states[i-1].core_frequency;
+}
+
+
+static unsigned int
+processor_get_freq (
+	struct cpufreq_acpi_io	*data,
+	unsigned int		cpu)
+{
+	int			ret = 0;
+	u32			value = 0;
+	cpumask_t		saved_mask;
+	unsigned long 		clock_freq;
+
+	dprintk("processor_get_freq\n");
+
+	saved_mask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (smp_processor_id() != cpu) {
+		ret = -EAGAIN;
+		goto migrate_end;
+	}
+
+	/*
+	 * processor_get_pstate gets the average frequency since the
+	 * last get. So, do two PAL_get_freq()...
+	 */
+	ret = processor_get_pstate(&value);
+	ret = processor_get_pstate(&value);
+
+	if (ret) {
+		set_cpus_allowed(current, saved_mask);
+		printk(KERN_WARNING "get performance failed with error %d\n",
+		       ret);
+		ret = -EAGAIN;
+		goto migrate_end;
+	}
+	clock_freq = extract_clock(data, value, cpu);
+	ret = (clock_freq*1000);
+
+migrate_end:
+	set_cpus_allowed(current, saved_mask);
+	return ret;
+}
+
+
+static int
+processor_set_freq (
+	struct cpufreq_acpi_io	*data,
+	unsigned int		cpu,
+	int			state)
+{
+	int			ret = 0;
+	u32			value = 0;
+	struct cpufreq_freqs    cpufreq_freqs;
+	cpumask_t		saved_mask;
+	int			retval;
+
+	dprintk("processor_set_freq\n");
+
+	saved_mask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (smp_processor_id() != cpu) {
+		retval = -EAGAIN;
+		goto migrate_end;
+	}
+
+	if (state == data->acpi_data.state) {
+		if (unlikely(data->resume)) {
+			dprintk("Called after resume, resetting to P%d\n", state);
+			data->resume = 0;
+		} else {
+			dprintk("Already at target state (P%d)\n", state);
+			retval = 0;
+			goto migrate_end;
+		}
+	}
+
+	dprintk("Transitioning from P%d to P%d\n",
+		data->acpi_data.state, state);
+
+	/* cpufreq frequency struct */
+	cpufreq_freqs.cpu = cpu;
+	cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency;
+	cpufreq_freqs.new = data->freq_table[state].frequency;
+
+	/* notify cpufreq */
+	cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
+
+	/*
+	 * First we write the target state's 'control' value to the
+	 * control_register.
+	 */
+
+	value = (u32) data->acpi_data.states[state].control;
+
+	dprintk("Transitioning to state: 0x%08x\n", value);
+
+	ret = processor_set_pstate(value);
+	if (ret) {
+		unsigned int tmp = cpufreq_freqs.new;
+		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
+		cpufreq_freqs.new = cpufreq_freqs.old;
+		cpufreq_freqs.old = tmp;
+		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
+		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
+		printk(KERN_WARNING "Transition failed with error %d\n", ret);
+		retval = -ENODEV;
+		goto migrate_end;
+	}
+
+	cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
+
+	data->acpi_data.state = state;
+
+	retval = 0;
+
+migrate_end:
+	set_cpus_allowed(current, saved_mask);
+	return (retval);
+}
+
+
+static unsigned int
+acpi_cpufreq_get (
+	unsigned int		cpu)
+{
+	struct cpufreq_acpi_io *data = acpi_io_data[cpu];
+
+	dprintk("acpi_cpufreq_get\n");
+
+	return processor_get_freq(data, cpu);
+}
+
+
+static int
+acpi_cpufreq_target (
+	struct cpufreq_policy   *policy,
+	unsigned int target_freq,
+	unsigned int relation)
+{
+	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+	unsigned int next_state = 0;
+	unsigned int result = 0;
+
+	dprintk("acpi_cpufreq_setpolicy\n");
+
+	result = cpufreq_frequency_table_target(policy,
+			data->freq_table, target_freq, relation, &next_state);
+	if (result)
+		return (result);
+
+	result = processor_set_freq(data, policy->cpu, next_state);
+
+	return (result);
+}
+
+
+static int
+acpi_cpufreq_verify (
+	struct cpufreq_policy   *policy)
+{
+	unsigned int result = 0;
+	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+
+	dprintk("acpi_cpufreq_verify\n");
+
+	result = cpufreq_frequency_table_verify(policy,
+			data->freq_table);
+
+	return (result);
+}
+
+
+/*
+ * processor_init_pdc - let BIOS know about the SMP capabilities
+ * of this driver
+ * @perf: processor-specific acpi_io_data struct
+ * @cpu: CPU being initialized
+ *
+ * To avoid issues with legacy OSes, some BIOSes require to be informed of
+ * the SMP capabilities of OS P-state driver. Here we set the bits in _PDC
+ * accordingly. Actual call to _PDC is done in driver/acpi/processor.c
+ */
+static void
+processor_init_pdc (
+		struct acpi_processor_performance *perf,
+		unsigned int cpu,
+		struct acpi_object_list *obj_list
+		)
+{
+	union acpi_object *obj;
+	u32 *buf;
+
+	dprintk("processor_init_pdc\n");
+
+	perf->pdc = NULL;
+	/* Initialize pdc. It will be used later. */
+	if (!obj_list)
+		return;
+
+	if (!(obj_list->count && obj_list->pointer))
+		return;
+
+	obj = obj_list->pointer;
+	if ((obj->buffer.length == 12) && obj->buffer.pointer) {
+		buf = (u32 *)obj->buffer.pointer;
+       		buf[0] = ACPI_PDC_REVISION_ID;
+       		buf[1] = 1;
+       		buf[2] = ACPI_PDC_EST_CAPABILITY_SMP;
+		perf->pdc = obj_list;
+	}
+	return;
+}
+
+
+static int
+acpi_cpufreq_cpu_init (
+	struct cpufreq_policy   *policy)
+{
+	unsigned int		i;
+	unsigned int		cpu = policy->cpu;
+	struct cpufreq_acpi_io	*data;
+	unsigned int		result = 0;
+
+	union acpi_object		arg0 = {ACPI_TYPE_BUFFER};
+	u32				arg0_buf[3];
+	struct acpi_object_list 	arg_list = {1, &arg0};
+
+	dprintk("acpi_cpufreq_cpu_init\n");
+	/* setup arg_list for _PDC settings */
+        arg0.buffer.length = 12;
+        arg0.buffer.pointer = (u8 *) arg0_buf;
+
+	data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
+	if (!data)
+		return (-ENOMEM);
+
+	memset(data, 0, sizeof(struct cpufreq_acpi_io));
+
+	acpi_io_data[cpu] = data;
+
+	processor_init_pdc(&data->acpi_data, cpu, &arg_list);
+	result = acpi_processor_register_performance(&data->acpi_data, cpu);
+	data->acpi_data.pdc = NULL;
+
+	if (result)
+		goto err_free;
+
+	/* capability check */
+	if (data->acpi_data.state_count <= 1) {
+		dprintk("No P-States\n");
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	if ((data->acpi_data.control_register.space_id !=
+					ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+	    (data->acpi_data.status_register.space_id !=
+					ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+		dprintk("Unsupported address space [%d, %d]\n",
+			(u32) (data->acpi_data.control_register.space_id),
+			(u32) (data->acpi_data.status_register.space_id));
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	/* alloc freq_table */
+	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
+	                           (data->acpi_data.state_count + 1),
+	                           GFP_KERNEL);
+	if (!data->freq_table) {
+		result = -ENOMEM;
+		goto err_unreg;
+	}
+
+	/* detect transition latency */
+	policy->cpuinfo.transition_latency = 0;
+	for (i=0; i<data->acpi_data.state_count; i++) {
+		if ((data->acpi_data.states[i].transition_latency * 1000) >
+		    policy->cpuinfo.transition_latency) {
+			policy->cpuinfo.transition_latency =
+			    data->acpi_data.states[i].transition_latency * 1000;
+		}
+	}
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+
+	policy->cur = processor_get_freq(data, policy->cpu);
+
+	/* table init */
+	for (i = 0; i <= data->acpi_data.state_count; i++)
+	{
+		data->freq_table[i].index = i;
+		if (i < data->acpi_data.state_count) {
+			data->freq_table[i].frequency =
+			      data->acpi_data.states[i].core_frequency * 1000;
+		} else {
+			data->freq_table[i].frequency = CPUFREQ_TABLE_END;
+		}
+	}
+
+	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
+	if (result) {
+		goto err_freqfree;
+	}
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management "
+	       "activated.\n", cpu);
+
+	for (i = 0; i < data->acpi_data.state_count; i++)
+		dprintk("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
+			(i == data->acpi_data.state?'*':' '), i,
+			(u32) data->acpi_data.states[i].core_frequency,
+			(u32) data->acpi_data.states[i].power,
+			(u32) data->acpi_data.states[i].transition_latency,
+			(u32) data->acpi_data.states[i].bus_master_latency,
+			(u32) data->acpi_data.states[i].status,
+			(u32) data->acpi_data.states[i].control);
+
+	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
+
+	/* the first call to ->target() should result in us actually
+	 * writing something to the appropriate registers. */
+	data->resume = 1;
+
+	return (result);
+
+ err_freqfree:
+	kfree(data->freq_table);
+ err_unreg:
+	acpi_processor_unregister_performance(&data->acpi_data, cpu);
+ err_free:
+	kfree(data);
+	acpi_io_data[cpu] = NULL;
+
+	return (result);
+}
+
+
+static int
+acpi_cpufreq_cpu_exit (
+	struct cpufreq_policy   *policy)
+{
+	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+
+	dprintk("acpi_cpufreq_cpu_exit\n");
+
+	if (data) {
+		cpufreq_frequency_table_put_attr(policy->cpu);
+		acpi_io_data[policy->cpu] = NULL;
+		acpi_processor_unregister_performance(&data->acpi_data,
+		                                      policy->cpu);
+		kfree(data);
+	}
+
+	return (0);
+}
+
+
+static struct freq_attr* acpi_cpufreq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+
+static struct cpufreq_driver acpi_cpufreq_driver = {
+	.verify 	= acpi_cpufreq_verify,
+	.target 	= acpi_cpufreq_target,
+	.get 		= acpi_cpufreq_get,
+	.init		= acpi_cpufreq_cpu_init,
+	.exit		= acpi_cpufreq_cpu_exit,
+	.name		= "acpi-cpufreq",
+	.owner		= THIS_MODULE,
+	.attr           = acpi_cpufreq_attr,
+};
+
+
+static int __init
+acpi_cpufreq_init (void)
+{
+	dprintk("acpi_cpufreq_init\n");
+
+ 	return cpufreq_register_driver(&acpi_cpufreq_driver);
+}
+
+
+static void __exit
+acpi_cpufreq_exit (void)
+{
+	dprintk("acpi_cpufreq_exit\n");
+
+	cpufreq_unregister_driver(&acpi_cpufreq_driver);
+	return;
+}
+
+
+late_initcall(acpi_cpufreq_init);
+module_exit(acpi_cpufreq_exit);
+
diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c
deleted file mode 100644
index bbb8efe126b..00000000000
--- a/arch/ia64/kernel/domain.c
+++ /dev/null
@@ -1,396 +0,0 @@
-/*
- * arch/ia64/kernel/domain.c
- * Architecture specific sched-domains builder.
- *
- * Copyright (C) 2004 Jesse Barnes
- * Copyright (C) 2004 Silicon Graphics, Inc.
- */
-
-#include <linux/sched.h>
-#include <linux/percpu.h>
-#include <linux/slab.h>
-#include <linux/cpumask.h>
-#include <linux/init.h>
-#include <linux/topology.h>
-#include <linux/nodemask.h>
-
-#define SD_NODES_PER_DOMAIN 16
-
-#ifdef CONFIG_NUMA
-/**
- * find_next_best_node - find the next node to include in a sched_domain
- * @node: node whose sched_domain we're building
- * @used_nodes: nodes already in the sched_domain
- *
- * Find the next node to include in a given scheduling domain.  Simply
- * finds the closest node not already in the @used_nodes map.
- *
- * Should use nodemask_t.
- */
-static int find_next_best_node(int node, unsigned long *used_nodes)
-{
-	int i, n, val, min_val, best_node = 0;
-
-	min_val = INT_MAX;
-
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		/* Start at @node */
-		n = (node + i) % MAX_NUMNODES;
-
-		if (!nr_cpus_node(n))
-			continue;
-
-		/* Skip already used nodes */
-		if (test_bit(n, used_nodes))
-			continue;
-
-		/* Simple min distance search */
-		val = node_distance(node, n);
-
-		if (val < min_val) {
-			min_val = val;
-			best_node = n;
-		}
-	}
-
-	set_bit(best_node, used_nodes);
-	return best_node;
-}
-
-/**
- * sched_domain_node_span - get a cpumask for a node's sched_domain
- * @node: node whose cpumask we're constructing
- * @size: number of nodes to include in this span
- *
- * Given a node, construct a good cpumask for its sched_domain to span.  It
- * should be one that prevents unnecessary balancing, but also spreads tasks
- * out optimally.
- */
-static cpumask_t sched_domain_node_span(int node)
-{
-	int i;
-	cpumask_t span, nodemask;
-	DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
-
-	cpus_clear(span);
-	bitmap_zero(used_nodes, MAX_NUMNODES);
-
-	nodemask = node_to_cpumask(node);
-	cpus_or(span, span, nodemask);
-	set_bit(node, used_nodes);
-
-	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
-		int next_node = find_next_best_node(node, used_nodes);
-		nodemask = node_to_cpumask(next_node);
-		cpus_or(span, span, nodemask);
-	}
-
-	return span;
-}
-#endif
-
-/*
- * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
- * can switch it on easily if needed.
- */
-#ifdef CONFIG_SCHED_SMT
-static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
-static struct sched_group sched_group_cpus[NR_CPUS];
-static int cpu_to_cpu_group(int cpu)
-{
-	return cpu;
-}
-#endif
-
-static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static struct sched_group sched_group_phys[NR_CPUS];
-static int cpu_to_phys_group(int cpu)
-{
-#ifdef CONFIG_SCHED_SMT
-	return first_cpu(cpu_sibling_map[cpu]);
-#else
-	return cpu;
-#endif
-}
-
-#ifdef CONFIG_NUMA
-/*
- * The init_sched_build_groups can't handle what we want to do with node
- * groups, so roll our own. Now each node has its own list of groups which
- * gets dynamically allocated.
- */
-static DEFINE_PER_CPU(struct sched_domain, node_domains);
-static struct sched_group *sched_group_nodes[MAX_NUMNODES];
-
-static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
-static struct sched_group sched_group_allnodes[MAX_NUMNODES];
-
-static int cpu_to_allnodes_group(int cpu)
-{
-	return cpu_to_node(cpu);
-}
-#endif
-
-/*
- * Build sched domains for a given set of cpus and attach the sched domains
- * to the individual cpus
- */
-void build_sched_domains(const cpumask_t *cpu_map)
-{
-	int i;
-
-	/*
-	 * Set up domains for cpus specified by the cpu_map.
-	 */
-	for_each_cpu_mask(i, *cpu_map) {
-		int group;
-		struct sched_domain *sd = NULL, *p;
-		cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
-
-		cpus_and(nodemask, nodemask, *cpu_map);
-
-#ifdef CONFIG_NUMA
-		if (num_online_cpus()
-				> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
-			sd = &per_cpu(allnodes_domains, i);
-			*sd = SD_ALLNODES_INIT;
-			sd->span = *cpu_map;
-			group = cpu_to_allnodes_group(i);
-			sd->groups = &sched_group_allnodes[group];
-			p = sd;
-		} else
-			p = NULL;
-
-		sd = &per_cpu(node_domains, i);
-		*sd = SD_NODE_INIT;
-		sd->span = sched_domain_node_span(cpu_to_node(i));
-		sd->parent = p;
-		cpus_and(sd->span, sd->span, *cpu_map);
-#endif
-
-		p = sd;
-		sd = &per_cpu(phys_domains, i);
-		group = cpu_to_phys_group(i);
-		*sd = SD_CPU_INIT;
-		sd->span = nodemask;
-		sd->parent = p;
-		sd->groups = &sched_group_phys[group];
-
-#ifdef CONFIG_SCHED_SMT
-		p = sd;
-		sd = &per_cpu(cpu_domains, i);
-		group = cpu_to_cpu_group(i);
-		*sd = SD_SIBLING_INIT;
-		sd->span = cpu_sibling_map[i];
-		cpus_and(sd->span, sd->span, *cpu_map);
-		sd->parent = p;
-		sd->groups = &sched_group_cpus[group];
-#endif
-	}
-
-#ifdef CONFIG_SCHED_SMT
-	/* Set up CPU (sibling) groups */
-	for_each_cpu_mask(i, *cpu_map) {
-		cpumask_t this_sibling_map = cpu_sibling_map[i];
-		cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
-		if (i != first_cpu(this_sibling_map))
-			continue;
-
-		init_sched_build_groups(sched_group_cpus, this_sibling_map,
-						&cpu_to_cpu_group);
-	}
-#endif
-
-	/* Set up physical groups */
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		cpumask_t nodemask = node_to_cpumask(i);
-
-		cpus_and(nodemask, nodemask, *cpu_map);
-		if (cpus_empty(nodemask))
-			continue;
-
-		init_sched_build_groups(sched_group_phys, nodemask,
-						&cpu_to_phys_group);
-	}
-
-#ifdef CONFIG_NUMA
-	init_sched_build_groups(sched_group_allnodes, *cpu_map,
-				&cpu_to_allnodes_group);
-
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		/* Set up node groups */
-		struct sched_group *sg, *prev;
-		cpumask_t nodemask = node_to_cpumask(i);
-		cpumask_t domainspan;
-		cpumask_t covered = CPU_MASK_NONE;
-		int j;
-
-		cpus_and(nodemask, nodemask, *cpu_map);
-		if (cpus_empty(nodemask))
-			continue;
-
-		domainspan = sched_domain_node_span(i);
-		cpus_and(domainspan, domainspan, *cpu_map);
-
-		sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
-		sched_group_nodes[i] = sg;
-		for_each_cpu_mask(j, nodemask) {
-			struct sched_domain *sd;
-			sd = &per_cpu(node_domains, j);
-			sd->groups = sg;
-			if (sd->groups == NULL) {
-				/* Turn off balancing if we have no groups */
-				sd->flags = 0;
-			}
-		}
-		if (!sg) {
-			printk(KERN_WARNING
-			"Can not alloc domain group for node %d\n", i);
-			continue;
-		}
-		sg->cpu_power = 0;
-		sg->cpumask = nodemask;
-		cpus_or(covered, covered, nodemask);
-		prev = sg;
-
-		for (j = 0; j < MAX_NUMNODES; j++) {
-			cpumask_t tmp, notcovered;
-			int n = (i + j) % MAX_NUMNODES;
-
-			cpus_complement(notcovered, covered);
-			cpus_and(tmp, notcovered, *cpu_map);
-			cpus_and(tmp, tmp, domainspan);
-			if (cpus_empty(tmp))
-				break;
-
-			nodemask = node_to_cpumask(n);
-			cpus_and(tmp, tmp, nodemask);
-			if (cpus_empty(tmp))
-				continue;
-
-			sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
-			if (!sg) {
-				printk(KERN_WARNING
-				"Can not alloc domain group for node %d\n", j);
-				break;
-			}
-			sg->cpu_power = 0;
-			sg->cpumask = tmp;
-			cpus_or(covered, covered, tmp);
-			prev->next = sg;
-			prev = sg;
-		}
-		prev->next = sched_group_nodes[i];
-	}
-#endif
-
-	/* Calculate CPU power for physical packages and nodes */
-	for_each_cpu_mask(i, *cpu_map) {
-		int power;
-		struct sched_domain *sd;
-#ifdef CONFIG_SCHED_SMT
-		sd = &per_cpu(cpu_domains, i);
-		power = SCHED_LOAD_SCALE;
-		sd->groups->cpu_power = power;
-#endif
-
-		sd = &per_cpu(phys_domains, i);
-		power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-				(cpus_weight(sd->groups->cpumask)-1) / 10;
-		sd->groups->cpu_power = power;
-
-#ifdef CONFIG_NUMA
-		sd = &per_cpu(allnodes_domains, i);
-		if (sd->groups) {
-			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-				(cpus_weight(sd->groups->cpumask)-1) / 10;
-			sd->groups->cpu_power = power;
-		}
-#endif
-	}
-
-#ifdef CONFIG_NUMA
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		struct sched_group *sg = sched_group_nodes[i];
-		int j;
-
-		if (sg == NULL)
-			continue;
-next_sg:
-		for_each_cpu_mask(j, sg->cpumask) {
-			struct sched_domain *sd;
-			int power;
-
-			sd = &per_cpu(phys_domains, j);
-			if (j != first_cpu(sd->groups->cpumask)) {
-				/*
-				 * Only add "power" once for each
-				 * physical package.
-				 */
-				continue;
-			}
-			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-				(cpus_weight(sd->groups->cpumask)-1) / 10;
-
-			sg->cpu_power += power;
-		}
-		sg = sg->next;
-		if (sg != sched_group_nodes[i])
-			goto next_sg;
-	}
-#endif
-
-	/* Attach the domains */
-	for_each_cpu_mask(i, *cpu_map) {
-		struct sched_domain *sd;
-#ifdef CONFIG_SCHED_SMT
-		sd = &per_cpu(cpu_domains, i);
-#else
-		sd = &per_cpu(phys_domains, i);
-#endif
-		cpu_attach_domain(sd, i);
-	}
-}
-/*
- * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
- */
-void arch_init_sched_domains(const cpumask_t *cpu_map)
-{
-	cpumask_t cpu_default_map;
-
-	/*
-	 * Setup mask for cpus without special case scheduling requirements.
-	 * For now this just excludes isolated cpus, but could be used to
-	 * exclude other special cases in the future.
-	 */
-	cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
-
-	build_sched_domains(&cpu_default_map);
-}
-
-void arch_destroy_sched_domains(const cpumask_t *cpu_map)
-{
-#ifdef CONFIG_NUMA
-	int i;
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		cpumask_t nodemask = node_to_cpumask(i);
-		struct sched_group *oldsg, *sg = sched_group_nodes[i];
-
-		cpus_and(nodemask, nodemask, *cpu_map);
-		if (cpus_empty(nodemask))
-			continue;
-
-		if (sg == NULL)
-			continue;
-		sg = sg->next;
-next_sg:
-		oldsg = sg;
-		sg = sg->next;
-		kfree(oldsg);
-		if (oldsg != sched_group_nodes[i])
-			goto next_sg;
-		sched_group_nodes[i] = NULL;
-	}
-#endif
-}
-
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 9be53e1ea40..ba0b6a1f429 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -37,7 +37,7 @@
 #include <asm/cache.h>
 #include <asm/errno.h>
 #include <asm/kregs.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/pgtable.h>
 #include <asm/percpu.h>
 #include <asm/processor.h>
@@ -204,9 +204,6 @@ GLOBAL_ENTRY(ia64_switch_to)
 (p6)	br.cond.dpnt .map
 	;;
 .done:
-(p6)	ssm psr.ic			// if we had to map, reenable the psr.ic bit FIRST!!!
-	;;
-(p6)	srlz.d
 	ld8 sp=[r21]			// load kernel stack pointer of new task
 	mov IA64_KR(CURRENT)=in0	// update "current" application register
 	mov r8=r13			// return pointer to previously running task
@@ -234,6 +231,9 @@ GLOBAL_ENTRY(ia64_switch_to)
 	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
 	;;
 	itr.d dtr[r25]=r23		// wire in new mapping...
+	ssm psr.ic			// reenable the psr.ic bit
+	;;
+	srlz.d
 	br.cond.sptk .done
 END(ia64_switch_to)
 
@@ -470,6 +470,29 @@ ENTRY(load_switch_stack)
 	br.cond.sptk.many b7
 END(load_switch_stack)
 
+GLOBAL_ENTRY(prefetch_stack)
+	add r14 = -IA64_SWITCH_STACK_SIZE, sp
+	add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
+	;;
+	ld8 r16 = [r15]				// load next's stack pointer
+	lfetch.fault.excl [r14], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault [r16], 128
+	br.ret.sptk.many rp
+END(prefetch_switch_stack)
+
 GLOBAL_ENTRY(execve)
 	mov r15=__NR_execve			// put syscall number in place
 	break __BREAK_SYSCALL
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 7d7684a369d..2ddbac6f499 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -14,7 +14,7 @@
 
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/percpu.h>
 #include <asm/thread_info.h>
 #include <asm/sal.h>
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index 86948ce63e4..86064ca9895 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -10,7 +10,7 @@
 
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/sigcontext.h>
 #include <asm/system.h>
 #include <asm/unistd.h>
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 8d3a9291b47..bfe65b2e862 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -25,7 +25,7 @@
 #include <asm/fpu.h>
 #include <asm/kregs.h>
 #include <asm/mmu_context.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/pal.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 7936b62f7a2..574084f343f 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -561,7 +561,7 @@ static inline int vector_is_shared (int vector)
 	return (iosapic_intr_info[vector].count > 1);
 }
 
-static void
+static int
 register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	       unsigned long polarity, unsigned long trigger)
 {
@@ -576,7 +576,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	index = find_iosapic(gsi);
 	if (index < 0) {
 		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
-		return;
+		return -ENODEV;
 	}
 
 	iosapic_address = iosapic_lists[index].addr;
@@ -587,7 +587,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 		rte = iosapic_alloc_rte();
 		if (!rte) {
 			printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__);
-			return;
+			return -ENOMEM;
 		}
 
 		rte_index = gsi - gsi_base;
@@ -603,7 +603,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 		struct iosapic_intr_info *info = &iosapic_intr_info[vector];
 		if (info->trigger != trigger || info->polarity != polarity) {
 			printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__);
-			return;
+			return -EINVAL;
 		}
 	}
 
@@ -623,6 +623,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 			       __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
 		idesc->handler = irq_type;
 	}
+	return 0;
 }
 
 static unsigned int
@@ -710,7 +711,7 @@ int
 iosapic_register_intr (unsigned int gsi,
 		       unsigned long polarity, unsigned long trigger)
 {
-	int vector, mask = 1;
+	int vector, mask = 1, err;
 	unsigned int dest;
 	unsigned long flags;
 	struct iosapic_rte_info *rte;
@@ -737,8 +738,8 @@ again:
 	vector = assign_irq_vector(AUTO_ASSIGN);
 	if (vector < 0) {
 		vector = iosapic_find_sharable_vector(trigger, polarity);
-		if (vector < 0)
-			panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+  		if (vector < 0)
+			return -ENOSPC;
 	}
 
 	spin_lock_irqsave(&irq_descp(vector)->lock, flags);
@@ -753,8 +754,13 @@ again:
 		}
 
 		dest = get_target_cpu(gsi, vector);
-		register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
+		err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
 			      polarity, trigger);
+		if (err < 0) {
+			spin_unlock(&iosapic_lock);
+			spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+			return err;
+		}
 
 		/*
 		 * If the vector is shared and already unmasked for
@@ -776,7 +782,6 @@ again:
 	return vector;
 }
 
-#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
 void
 iosapic_unregister_intr (unsigned int gsi)
 {
@@ -859,7 +864,6 @@ iosapic_unregister_intr (unsigned int gsi)
 	spin_unlock(&iosapic_lock);
 	spin_unlock_irqrestore(&idesc->lock, flags);
 }
-#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
 
 /*
  * ACPI calls this when it finds an entry for a platform interrupt.
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 28f2aadc38d..205d9802826 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -91,23 +91,8 @@ skip:
 }
 
 #ifdef CONFIG_SMP
-/*
- * This is updated when the user sets irq affinity via /proc
- */
-static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
-static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)];
-
 static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
 
-/*
- * Arch specific routine for deferred write to iosapic rte to reprogram
- * intr destination.
- */
-void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
-{
-	pending_irq_cpumask[irq] = mask_val;
-}
-
 void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
 {
 	cpumask_t mask = CPU_MASK_NONE;
@@ -116,32 +101,10 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
 
 	if (irq < NR_IRQS) {
 		irq_affinity[irq] = mask;
+		set_irq_info(irq, mask);
 		irq_redir[irq] = (char) (redir & 0xff);
 	}
 }
-
-
-void move_irq(int irq)
-{
-	/* note - we hold desc->lock */
-	cpumask_t tmp;
-	irq_desc_t *desc = irq_descp(irq);
-	int redir = test_bit(irq, pending_irq_redir);
-
-	if (unlikely(!desc->handler->set_affinity))
-		return;
-
-	if (!cpus_empty(pending_irq_cpumask[irq])) {
-		cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
-		if (unlikely(!cpus_empty(tmp))) {
-			desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0),
-						    pending_irq_cpumask[irq]);
-		}
-		cpus_clear(pending_irq_cpumask[irq]);
-	}
-}
-
-
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 3bb3a13c404..c13ca0d49c4 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -44,7 +44,7 @@
 #include <asm/break.h>
 #include <asm/ia32.h>
 #include <asm/kregs.h>
-#include <asm/offsets.h>
+#include <asm/asm-offsets.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
@@ -69,7 +69,6 @@
 # define DBG_FAULT(i)
 #endif
 
-#define MINSTATE_VIRT	/* needed by minstate.h */
 #include "minstate.h"
 
 #define FAULT(n)									\
diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S
index b7fa3ccd2b0..2323377e369 100644
--- a/arch/ia64/kernel/jprobes.S
+++ b/arch/ia64/kernel/jprobes.S
@@ -49,6 +49,7 @@
 	/*
 	 * void jprobe_break(void)
 	 */
+	.section .kprobes.text, "ax"
 ENTRY(jprobe_break)
 	break.m 0x80300
 END(jprobe_break)
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 884f5cd27d8..471086b808a 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -87,12 +87,25 @@ static enum instruction_type bundle_encoding[32][3] = {
  * is IP relative instruction and update the kprobe
  * inst flag accordingly
  */
-static void update_kprobe_inst_flag(uint template, uint  slot, uint major_opcode,
-	unsigned long kprobe_inst, struct kprobe *p)
+static void __kprobes update_kprobe_inst_flag(uint template, uint  slot,
+					      uint major_opcode,
+					      unsigned long kprobe_inst,
+					      struct kprobe *p)
 {
 	p->ainsn.inst_flag = 0;
 	p->ainsn.target_br_reg = 0;
 
+	/* Check for Break instruction
+ 	 * Bits 37:40 Major opcode to be zero
+	 * Bits 27:32 X6 to be zero
+	 * Bits 32:35 X3 to be zero
+	 */
+	if ((!major_opcode) && (!((kprobe_inst >> 27) & 0x1FF)) ) {
+		/* is a break instruction */
+	 	p->ainsn.inst_flag |= INST_FLAG_BREAK_INST;
+		return;
+	}
+
 	if (bundle_encoding[template][slot] == B) {
 		switch (major_opcode) {
 		  case INDIRECT_CALL_OPCODE:
@@ -126,8 +139,10 @@ static void update_kprobe_inst_flag(uint template, uint  slot, uint major_opcode
  * Returns 0 if supported
  * Returns -EINVAL if unsupported
  */
-static int unsupported_inst(uint template, uint  slot, uint major_opcode,
-	unsigned long kprobe_inst, struct kprobe *p)
+static int __kprobes unsupported_inst(uint template, uint  slot,
+				      uint major_opcode,
+				      unsigned long kprobe_inst,
+				      struct kprobe *p)
 {
 	unsigned long addr = (unsigned long)p->addr;
 
@@ -168,8 +183,9 @@ static int unsupported_inst(uint template, uint  slot, uint major_opcode,
  * on which we are inserting kprobe is cmp instruction
  * with ctype as unc.
  */
-static uint is_cmp_ctype_unc_inst(uint template, uint slot, uint major_opcode,
-unsigned long kprobe_inst)
+static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot,
+					    uint major_opcode,
+					    unsigned long kprobe_inst)
 {
 	cmp_inst_t cmp_inst;
 	uint ctype_unc = 0;
@@ -201,8 +217,10 @@ out:
  * In this function we override the bundle with
  * the break instruction at the given slot.
  */
-static void prepare_break_inst(uint template, uint  slot, uint major_opcode,
-	unsigned long kprobe_inst, struct kprobe *p)
+static void __kprobes prepare_break_inst(uint template, uint  slot,
+					 uint major_opcode,
+					 unsigned long kprobe_inst,
+					 struct kprobe *p)
 {
 	unsigned long break_inst = BREAK_INST;
 	bundle_t *bundle = &p->ainsn.insn.bundle;
@@ -271,7 +289,8 @@ static inline int in_ivt_functions(unsigned long addr)
 		&& addr < (unsigned long)__end_ivt_text);
 }
 
-static int valid_kprobe_addr(int template, int slot, unsigned long addr)
+static int __kprobes valid_kprobe_addr(int template, int slot,
+				       unsigned long addr)
 {
 	if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) {
 		printk(KERN_WARNING "Attempting to insert unaligned kprobe "
@@ -323,7 +342,7 @@ static void kretprobe_trampoline(void)
  *    - cleanup by marking the instance as unused
  *    - long jump back to the original return address
  */
-int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head;
@@ -381,7 +400,8 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         return 1;
 }
 
-void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
+void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+				      struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri;
 
@@ -399,7 +419,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
 	}
 }
 
-int arch_prepare_kprobe(struct kprobe *p)
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
 	unsigned long addr = (unsigned long) p->addr;
 	unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL);
@@ -430,7 +450,7 @@ int arch_prepare_kprobe(struct kprobe *p)
 	return 0;
 }
 
-void arch_arm_kprobe(struct kprobe *p)
+void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
 	unsigned long addr = (unsigned long)p->addr;
 	unsigned long arm_addr = addr & ~0xFULL;
@@ -439,7 +459,7 @@ void arch_arm_kprobe(struct kprobe *p)
 	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
 }
 
-void arch_disarm_kprobe(struct kprobe *p)
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
 	unsigned long addr = (unsigned long)p->addr;
 	unsigned long arm_addr = addr & ~0xFULL;
@@ -449,7 +469,7 @@ void arch_disarm_kprobe(struct kprobe *p)
 	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
 }
 
-void arch_remove_kprobe(struct kprobe *p)
+void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 }
 
@@ -461,7 +481,7 @@ void arch_remove_kprobe(struct kprobe *p)
  * to original stack address, handle the case where we need to fixup the
  * relative IP address and/or fixup branch register.
  */
-static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
   	unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL;
   	unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
@@ -528,13 +548,16 @@ turn_ss_off:
   	ia64_psr(regs)->ss = 0;
 }
 
-static void prepare_ss(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
 {
 	unsigned long bundle_addr = (unsigned long) &p->opcode.bundle;
 	unsigned long slot = (unsigned long)p->addr & 0xf;
 
-	/* Update instruction pointer (IIP) and slot number (IPSR.ri) */
-	regs->cr_iip = bundle_addr & ~0xFULL;
+	/* single step inline if break instruction */
+	if (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)
+		regs->cr_iip = (unsigned long)p->addr & ~0xFULL;
+	else
+		regs->cr_iip = bundle_addr & ~0xFULL;
 
 	if (slot > 2)
 		slot = 0;
@@ -545,7 +568,39 @@ static void prepare_ss(struct kprobe *p, struct pt_regs *regs)
 	ia64_psr(regs)->ss = 1;
 }
 
-static int pre_kprobes_handler(struct die_args *args)
+static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
+{
+	unsigned int slot = ia64_psr(regs)->ri;
+	unsigned int template, major_opcode;
+	unsigned long kprobe_inst;
+	unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
+	bundle_t bundle;
+
+	memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
+	template = bundle.quad0.template;
+
+	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+	if (slot == 1 && bundle_encoding[template][1] == L)
+  		slot++;
+
+	/* Get Kprobe probe instruction at given slot*/
+	get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
+
+	/* For break instruction,
+	 * Bits 37:40 Major opcode to be zero
+	 * Bits 27:32 X6 to be zero
+	 * Bits 32:35 X3 to be zero
+	 */
+	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
+		/* Not a break instruction */
+		return 0;
+	}
+
+	/* Is a break instruction */
+	return 1;
+}
+
+static int __kprobes pre_kprobes_handler(struct die_args *args)
 {
 	struct kprobe *p;
 	int ret = 0;
@@ -558,7 +613,9 @@ static int pre_kprobes_handler(struct die_args *args)
 	if (kprobe_running()) {
 		p = get_kprobe(addr);
 		if (p) {
-			if (kprobe_status == KPROBE_HIT_SS) {
+			if ( (kprobe_status == KPROBE_HIT_SS) &&
+	 		     (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) {
+  				ia64_psr(regs)->ss = 0;
 				unlock_kprobes();
 				goto no_kprobe;
 			}
@@ -592,6 +649,19 @@ static int pre_kprobes_handler(struct die_args *args)
 	p = get_kprobe(addr);
 	if (!p) {
 		unlock_kprobes();
+		if (!is_ia64_break_inst(regs)) {
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it.  Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address.  In either case, no further
+			 * handling of this interrupt is appropriate.
+			 */
+			ret = 1;
+
+		}
+
+		/* Not one of our break, let kernel handle it */
 		goto no_kprobe;
 	}
 
@@ -616,7 +686,7 @@ no_kprobe:
 	return ret;
 }
 
-static int post_kprobes_handler(struct pt_regs *regs)
+static int __kprobes post_kprobes_handler(struct pt_regs *regs)
 {
 	if (!kprobe_running())
 		return 0;
@@ -641,7 +711,7 @@ out:
 	return 1;
 }
 
-static int kprobes_fault_handler(struct pt_regs *regs, int trapnr)
+static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	if (!kprobe_running())
 		return 0;
@@ -659,8 +729,8 @@ static int kprobes_fault_handler(struct pt_regs *regs, int trapnr)
 	return 0;
 }
 
-int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
-			     void *data)
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
 {
 	struct die_args *args = (struct die_args *)data;
 	switch(val) {
@@ -681,7 +751,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
 	return NOTIFY_DONE;
 }
 
-int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
@@ -703,7 +773,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	return 1;
 }
 
-int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	*regs = jprobe_saved_regs;
 	return 1;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 4ebbf397438..6dc726ad713 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -48,6 +48,9 @@
  *            Delete dead variables and functions.
  *            Reorder to remove the need for forward declarations and to consolidate
  *            related code.
+ *
+ * 2005-08-12 Keith Owens <kaos@sgi.com>
+ *	      Convert MCA/INIT handlers to use per event stacks and SAL/OS state.
  */
 #include <linux/config.h>
 #include <linux/types.h>
@@ -77,6 +80,8 @@
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
 
+#include "entry.h"
+
 #if defined(IA64_MCA_DEBUG_INFO)
 # define IA64_MCA_DEBUG(fmt...)	printk(fmt)
 #else
@@ -84,9 +89,7 @@
 #endif
 
 /* Used by mca_asm.S */
-ia64_mca_sal_to_os_state_t	ia64_sal_to_os_handoff_state;
-ia64_mca_os_to_sal_state_t	ia64_os_to_sal_handoff_state;
-u64				ia64_mca_serialize;
+u32				ia64_mca_serialize;
 DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
 DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
 DEFINE_PER_CPU(u64, ia64_mca_pal_pte);	    /* PTE to map PAL code */
@@ -95,8 +98,10 @@ DEFINE_PER_CPU(u64, ia64_mca_pal_base);    /* vaddr PAL code granule */
 unsigned long __per_cpu_mca[NR_CPUS];
 
 /* In mca_asm.S */
-extern void			ia64_monarch_init_handler (void);
-extern void			ia64_slave_init_handler (void);
+extern void			ia64_os_init_dispatch_monarch (void);
+extern void			ia64_os_init_dispatch_slave (void);
+
+static int monarch_cpu = -1;
 
 static ia64_mc_info_t		ia64_mc_info;
 
@@ -234,7 +239,8 @@ ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe)
  *  This function retrieves a specified error record type from SAL
  *  and wakes up any processes waiting for error records.
  *
- *  Inputs  :   sal_info_type   (Type of error record MCA/CMC/CPE/INIT)
+ *  Inputs  :   sal_info_type   (Type of error record MCA/CMC/CPE)
+ *              FIXME: remove MCA and irq_safe.
  */
 static void
 ia64_mca_log_sal_error_record(int sal_info_type)
@@ -242,7 +248,7 @@ ia64_mca_log_sal_error_record(int sal_info_type)
 	u8 *buffer;
 	sal_log_record_header_t *rh;
 	u64 size;
-	int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT;
+	int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA;
 #ifdef IA64_MCA_DEBUG_INFO
 	static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" };
 #endif
@@ -330,191 +336,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
 
 #endif /* CONFIG_ACPI */
 
-static void
-show_min_state (pal_min_state_area_t *minstate)
-{
-	u64 iip = minstate->pmsa_iip + ((struct ia64_psr *)(&minstate->pmsa_ipsr))->ri;
-	u64 xip = minstate->pmsa_xip + ((struct ia64_psr *)(&minstate->pmsa_xpsr))->ri;
-
-	printk("NaT bits\t%016lx\n", minstate->pmsa_nat_bits);
-	printk("pr\t\t%016lx\n", minstate->pmsa_pr);
-	printk("b0\t\t%016lx ", minstate->pmsa_br0); print_symbol("%s\n", minstate->pmsa_br0);
-	printk("ar.rsc\t\t%016lx\n", minstate->pmsa_rsc);
-	printk("cr.iip\t\t%016lx ", iip); print_symbol("%s\n", iip);
-	printk("cr.ipsr\t\t%016lx\n", minstate->pmsa_ipsr);
-	printk("cr.ifs\t\t%016lx\n", minstate->pmsa_ifs);
-	printk("xip\t\t%016lx ", xip); print_symbol("%s\n", xip);
-	printk("xpsr\t\t%016lx\n", minstate->pmsa_xpsr);
-	printk("xfs\t\t%016lx\n", minstate->pmsa_xfs);
-	printk("b1\t\t%016lx ", minstate->pmsa_br1);
-	print_symbol("%s\n", minstate->pmsa_br1);
-
-	printk("\nstatic registers r0-r15:\n");
-	printk(" r0- 3 %016lx %016lx %016lx %016lx\n",
-	       0UL, minstate->pmsa_gr[0], minstate->pmsa_gr[1], minstate->pmsa_gr[2]);
-	printk(" r4- 7 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_gr[3], minstate->pmsa_gr[4],
-	       minstate->pmsa_gr[5], minstate->pmsa_gr[6]);
-	printk(" r8-11 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_gr[7], minstate->pmsa_gr[8],
-	       minstate->pmsa_gr[9], minstate->pmsa_gr[10]);
-	printk("r12-15 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_gr[11], minstate->pmsa_gr[12],
-	       minstate->pmsa_gr[13], minstate->pmsa_gr[14]);
-
-	printk("\nbank 0:\n");
-	printk("r16-19 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[0], minstate->pmsa_bank0_gr[1],
-	       minstate->pmsa_bank0_gr[2], minstate->pmsa_bank0_gr[3]);
-	printk("r20-23 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[4], minstate->pmsa_bank0_gr[5],
-	       minstate->pmsa_bank0_gr[6], minstate->pmsa_bank0_gr[7]);
-	printk("r24-27 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[8], minstate->pmsa_bank0_gr[9],
-	       minstate->pmsa_bank0_gr[10], minstate->pmsa_bank0_gr[11]);
-	printk("r28-31 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[12], minstate->pmsa_bank0_gr[13],
-	       minstate->pmsa_bank0_gr[14], minstate->pmsa_bank0_gr[15]);
-
-	printk("\nbank 1:\n");
-	printk("r16-19 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[0], minstate->pmsa_bank1_gr[1],
-	       minstate->pmsa_bank1_gr[2], minstate->pmsa_bank1_gr[3]);
-	printk("r20-23 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[4], minstate->pmsa_bank1_gr[5],
-	       minstate->pmsa_bank1_gr[6], minstate->pmsa_bank1_gr[7]);
-	printk("r24-27 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[8], minstate->pmsa_bank1_gr[9],
-	       minstate->pmsa_bank1_gr[10], minstate->pmsa_bank1_gr[11]);
-	printk("r28-31 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[12], minstate->pmsa_bank1_gr[13],
-	       minstate->pmsa_bank1_gr[14], minstate->pmsa_bank1_gr[15]);
-}
-
-static void
-fetch_min_state (pal_min_state_area_t *ms, struct pt_regs *pt, struct switch_stack *sw)
-{
-	u64 *dst_banked, *src_banked, bit, shift, nat_bits;
-	int i;
-
-	/*
-	 * First, update the pt-regs and switch-stack structures with the contents stored
-	 * in the min-state area:
-	 */
-	if (((struct ia64_psr *) &ms->pmsa_ipsr)->ic == 0) {
-		pt->cr_ipsr = ms->pmsa_xpsr;
-		pt->cr_iip = ms->pmsa_xip;
-		pt->cr_ifs = ms->pmsa_xfs;
-	} else {
-		pt->cr_ipsr = ms->pmsa_ipsr;
-		pt->cr_iip = ms->pmsa_iip;
-		pt->cr_ifs = ms->pmsa_ifs;
-	}
-	pt->ar_rsc = ms->pmsa_rsc;
-	pt->pr = ms->pmsa_pr;
-	pt->r1 = ms->pmsa_gr[0];
-	pt->r2 = ms->pmsa_gr[1];
-	pt->r3 = ms->pmsa_gr[2];
-	sw->r4 = ms->pmsa_gr[3];
-	sw->r5 = ms->pmsa_gr[4];
-	sw->r6 = ms->pmsa_gr[5];
-	sw->r7 = ms->pmsa_gr[6];
-	pt->r8 = ms->pmsa_gr[7];
-	pt->r9 = ms->pmsa_gr[8];
-	pt->r10 = ms->pmsa_gr[9];
-	pt->r11 = ms->pmsa_gr[10];
-	pt->r12 = ms->pmsa_gr[11];
-	pt->r13 = ms->pmsa_gr[12];
-	pt->r14 = ms->pmsa_gr[13];
-	pt->r15 = ms->pmsa_gr[14];
-	dst_banked = &pt->r16;		/* r16-r31 are contiguous in struct pt_regs */
-	src_banked = ms->pmsa_bank1_gr;
-	for (i = 0; i < 16; ++i)
-		dst_banked[i] = src_banked[i];
-	pt->b0 = ms->pmsa_br0;
-	sw->b1 = ms->pmsa_br1;
-
-	/* construct the NaT bits for the pt-regs structure: */
-#	define PUT_NAT_BIT(dst, addr)					\
-	do {								\
-		bit = nat_bits & 1; nat_bits >>= 1;			\
-		shift = ((unsigned long) addr >> 3) & 0x3f;		\
-		dst = ((dst) & ~(1UL << shift)) | (bit << shift);	\
-	} while (0)
-
-	/* Rotate the saved NaT bits such that bit 0 corresponds to pmsa_gr[0]: */
-	shift = ((unsigned long) &ms->pmsa_gr[0] >> 3) & 0x3f;
-	nat_bits = (ms->pmsa_nat_bits >> shift) | (ms->pmsa_nat_bits << (64 - shift));
-
-	PUT_NAT_BIT(sw->caller_unat, &pt->r1);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r2);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r3);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r4);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r5);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r6);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r7);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r8);	PUT_NAT_BIT(sw->caller_unat, &pt->r9);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r10);	PUT_NAT_BIT(sw->caller_unat, &pt->r11);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r12);	PUT_NAT_BIT(sw->caller_unat, &pt->r13);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r14);	PUT_NAT_BIT(sw->caller_unat, &pt->r15);
-	nat_bits >>= 16;	/* skip over bank0 NaT bits */
-	PUT_NAT_BIT(sw->caller_unat, &pt->r16);	PUT_NAT_BIT(sw->caller_unat, &pt->r17);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r18);	PUT_NAT_BIT(sw->caller_unat, &pt->r19);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r20);	PUT_NAT_BIT(sw->caller_unat, &pt->r21);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r22);	PUT_NAT_BIT(sw->caller_unat, &pt->r23);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r24);	PUT_NAT_BIT(sw->caller_unat, &pt->r25);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r26);	PUT_NAT_BIT(sw->caller_unat, &pt->r27);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r28);	PUT_NAT_BIT(sw->caller_unat, &pt->r29);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r30);	PUT_NAT_BIT(sw->caller_unat, &pt->r31);
-}
-
-static void
-init_handler_platform (pal_min_state_area_t *ms,
-		       struct pt_regs *pt, struct switch_stack *sw)
-{
-	struct unw_frame_info info;
-
-	/* if a kernel debugger is available call it here else just dump the registers */
-
-	/*
-	 * Wait for a bit.  On some machines (e.g., HP's zx2000 and zx6000, INIT can be
-	 * generated via the BMC's command-line interface, but since the console is on the
-	 * same serial line, the user will need some time to switch out of the BMC before
-	 * the dump begins.
-	 */
-	printk("Delaying for 5 seconds...\n");
-	udelay(5*1000000);
-	show_min_state(ms);
-
-	printk("Backtrace of current task (pid %d, %s)\n", current->pid, current->comm);
-	fetch_min_state(ms, pt, sw);
-	unw_init_from_interruption(&info, current, pt, sw);
-	ia64_do_show_stack(&info, NULL);
-
-#ifdef CONFIG_SMP
-	/* read_trylock() would be handy... */
-	if (!tasklist_lock.write_lock)
-		read_lock(&tasklist_lock);
-#endif
-	{
-		struct task_struct *g, *t;
-		do_each_thread (g, t) {
-			if (t == current)
-				continue;
-
-			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
-			show_stack(t, NULL);
-		} while_each_thread (g, t);
-	}
-#ifdef CONFIG_SMP
-	if (!tasklist_lock.write_lock)
-		read_unlock(&tasklist_lock);
-#endif
-
-	printk("\nINIT dump complete.  Please reboot now.\n");
-	while (1);			/* hang city if no debugger */
-}
-
 #ifdef CONFIG_ACPI
 /*
  * ia64_mca_register_cpev
@@ -657,42 +478,6 @@ ia64_mca_cmc_vector_enable_keventd(void *unused)
 }
 
 /*
- * ia64_mca_wakeup_ipi_wait
- *
- *	Wait for the inter-cpu interrupt to be sent by the
- *	monarch processor once it is done with handling the
- *	MCA.
- *
- *  Inputs  :   None
- *  Outputs :   None
- */
-static void
-ia64_mca_wakeup_ipi_wait(void)
-{
-	int	irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6);
-	int	irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f);
-	u64	irr = 0;
-
-	do {
-		switch(irr_num) {
-		      case 0:
-			irr = ia64_getreg(_IA64_REG_CR_IRR0);
-			break;
-		      case 1:
-			irr = ia64_getreg(_IA64_REG_CR_IRR1);
-			break;
-		      case 2:
-			irr = ia64_getreg(_IA64_REG_CR_IRR2);
-			break;
-		      case 3:
-			irr = ia64_getreg(_IA64_REG_CR_IRR3);
-			break;
-		}
-		cpu_relax();
-	} while (!(irr & (1UL << irr_bit))) ;
-}
-
-/*
  * ia64_mca_wakeup
  *
  *	Send an inter-cpu interrupt to wake-up a particular cpu
@@ -757,11 +542,9 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs)
 	 */
 	ia64_sal_mc_rendez();
 
-	/* Wait for the wakeup IPI from the monarch
-	 * This waiting is done by polling on the wakeup-interrupt
-	 * vector bit in the processor's IRRs
-	 */
-	ia64_mca_wakeup_ipi_wait();
+	/* Wait for the monarch cpu to exit. */
+	while (monarch_cpu != -1)
+	       cpu_relax();	/* spin until monarch leaves */
 
 	/* Enable all interrupts */
 	local_irq_restore(flags);
@@ -789,53 +572,13 @@ ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs)
 	return IRQ_HANDLED;
 }
 
-/*
- * ia64_return_to_sal_check
- *
- *	This is function called before going back from the OS_MCA handler
- *	to the OS_MCA dispatch code which finally takes the control back
- *	to the SAL.
- *	The main purpose of this routine is to setup the OS_MCA to SAL
- *	return state which can be used by the OS_MCA dispatch code
- *	just before going back to SAL.
- *
- *  Inputs  :   None
- *  Outputs :   None
- */
-
-static void
-ia64_return_to_sal_check(int recover)
-{
-
-	/* Copy over some relevant stuff from the sal_to_os_mca_handoff
-	 * so that it can be used at the time of os_mca_to_sal_handoff
-	 */
-	ia64_os_to_sal_handoff_state.imots_sal_gp =
-		ia64_sal_to_os_handoff_state.imsto_sal_gp;
-
-	ia64_os_to_sal_handoff_state.imots_sal_check_ra =
-		ia64_sal_to_os_handoff_state.imsto_sal_check_ra;
-
-	if (recover)
-		ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED;
-	else
-		ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
-
-	/* Default = tell SAL to return to same context */
-	ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT;
-
-	ia64_os_to_sal_handoff_state.imots_new_min_state =
-		(u64 *)ia64_sal_to_os_handoff_state.pal_min_state;
-
-}
-
 /* Function pointer for extra MCA recovery */
 int (*ia64_mca_ucmc_extension)
-	(void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*)
+	(void*,struct ia64_sal_os_state*)
 	= NULL;
 
 int
-ia64_reg_MCA_extension(void *fn)
+ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *))
 {
 	if (ia64_mca_ucmc_extension)
 		return 1;
@@ -854,8 +597,321 @@ ia64_unreg_MCA_extension(void)
 EXPORT_SYMBOL(ia64_reg_MCA_extension);
 EXPORT_SYMBOL(ia64_unreg_MCA_extension);
 
+
+static inline void
+copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
+{
+	u64 fslot, tslot, nat;
+	*tr = *fr;
+	fslot = ((unsigned long)fr >> 3) & 63;
+	tslot = ((unsigned long)tr >> 3) & 63;
+	*tnat &= ~(1UL << tslot);
+	nat = (fnat >> fslot) & 1;
+	*tnat |= (nat << tslot);
+}
+
+/* On entry to this routine, we are running on the per cpu stack, see
+ * mca_asm.h.  The original stack has not been touched by this event.  Some of
+ * the original stack's registers will be in the RBS on this stack.  This stack
+ * also contains a partial pt_regs and switch_stack, the rest of the data is in
+ * PAL minstate.
+ *
+ * The first thing to do is modify the original stack to look like a blocked
+ * task so we can run backtrace on the original task.  Also mark the per cpu
+ * stack as current to ensure that we use the correct task state, it also means
+ * that we can do backtrace on the MCA/INIT handler code itself.
+ */
+
+static task_t *
+ia64_mca_modify_original_stack(struct pt_regs *regs,
+		const struct switch_stack *sw,
+		struct ia64_sal_os_state *sos,
+		const char *type)
+{
+	char *p, comm[sizeof(current->comm)];
+	ia64_va va;
+	extern char ia64_leave_kernel[];	/* Need asm address, not function descriptor */
+	const pal_min_state_area_t *ms = sos->pal_min_state;
+	task_t *previous_current;
+	struct pt_regs *old_regs;
+	struct switch_stack *old_sw;
+	unsigned size = sizeof(struct pt_regs) +
+			sizeof(struct switch_stack) + 16;
+	u64 *old_bspstore, *old_bsp;
+	u64 *new_bspstore, *new_bsp;
+	u64 old_unat, old_rnat, new_rnat, nat;
+	u64 slots, loadrs = regs->loadrs;
+	u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1];
+	u64 ar_bspstore = regs->ar_bspstore;
+	u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16);
+	const u64 *bank;
+	const char *msg;
+	int cpu = smp_processor_id();
+
+	previous_current = curr_task(cpu);
+	set_curr_task(cpu, current);
+	if ((p = strchr(current->comm, ' ')))
+		*p = '\0';
+
+	/* Best effort attempt to cope with MCA/INIT delivered while in
+	 * physical mode.
+	 */
+	regs->cr_ipsr = ms->pmsa_ipsr;
+	if (ia64_psr(regs)->dt == 0) {
+		va.l = r12;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			r12 = va.l;
+		}
+		va.l = r13;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			r13 = va.l;
+		}
+	}
+	if (ia64_psr(regs)->rt == 0) {
+		va.l = ar_bspstore;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			ar_bspstore = va.l;
+		}
+		va.l = ar_bsp;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			ar_bsp = va.l;
+		}
+	}
+
+	/* mca_asm.S ia64_old_stack() cannot assume that the dirty registers
+	 * have been copied to the old stack, the old stack may fail the
+	 * validation tests below.  So ia64_old_stack() must restore the dirty
+	 * registers from the new stack.  The old and new bspstore probably
+	 * have different alignments, so loadrs calculated on the old bsp
+	 * cannot be used to restore from the new bsp.  Calculate a suitable
+	 * loadrs for the new stack and save it in the new pt_regs, where
+	 * ia64_old_stack() can get it.
+	 */
+	old_bspstore = (u64 *)ar_bspstore;
+	old_bsp = (u64 *)ar_bsp;
+	slots = ia64_rse_num_regs(old_bspstore, old_bsp);
+	new_bspstore = (u64 *)((u64)current + IA64_RBS_OFFSET);
+	new_bsp = ia64_rse_skip_regs(new_bspstore, slots);
+	regs->loadrs = (new_bsp - new_bspstore) * 8 << 16;
+
+	/* Verify the previous stack state before we change it */
+	if (user_mode(regs)) {
+		msg = "occurred in user space";
+		goto no_mod;
+	}
+	if (r13 != sos->prev_IA64_KR_CURRENT) {
+		msg = "inconsistent previous current and r13";
+		goto no_mod;
+	}
+	if ((r12 - r13) >= KERNEL_STACK_SIZE) {
+		msg = "inconsistent r12 and r13";
+		goto no_mod;
+	}
+	if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
+		msg = "inconsistent ar.bspstore and r13";
+		goto no_mod;
+	}
+	va.p = old_bspstore;
+	if (va.f.reg < 5) {
+		msg = "old_bspstore is in the wrong region";
+		goto no_mod;
+	}
+	if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
+		msg = "inconsistent ar.bsp and r13";
+		goto no_mod;
+	}
+	size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
+	if (ar_bspstore + size > r12) {
+		msg = "no room for blocked state";
+		goto no_mod;
+	}
+
+	/* Change the comm field on the MCA/INT task to include the pid that
+	 * was interrupted, it makes for easier debugging.  If that pid was 0
+	 * (swapper or nested MCA/INIT) then use the start of the previous comm
+	 * field suffixed with its cpu.
+	 */
+	if (previous_current->pid)
+		snprintf(comm, sizeof(comm), "%s %d",
+			current->comm, previous_current->pid);
+	else {
+		int l;
+		if ((p = strchr(previous_current->comm, ' ')))
+			l = p - previous_current->comm;
+		else
+			l = strlen(previous_current->comm);
+		snprintf(comm, sizeof(comm), "%s %*s %d",
+			current->comm, l, previous_current->comm,
+			previous_current->thread_info->cpu);
+	}
+	memcpy(current->comm, comm, sizeof(current->comm));
+
+	/* Make the original task look blocked.  First stack a struct pt_regs,
+	 * describing the state at the time of interrupt.  mca_asm.S built a
+	 * partial pt_regs, copy it and fill in the blanks using minstate.
+	 */
+	p = (char *)r12 - sizeof(*regs);
+	old_regs = (struct pt_regs *)p;
+	memcpy(old_regs, regs, sizeof(*regs));
+	/* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
+	 * pmsa_{xip,xpsr,xfs}
+	 */
+	if (ia64_psr(regs)->ic) {
+		old_regs->cr_iip = ms->pmsa_iip;
+		old_regs->cr_ipsr = ms->pmsa_ipsr;
+		old_regs->cr_ifs = ms->pmsa_ifs;
+	} else {
+		old_regs->cr_iip = ms->pmsa_xip;
+		old_regs->cr_ipsr = ms->pmsa_xpsr;
+		old_regs->cr_ifs = ms->pmsa_xfs;
+	}
+	old_regs->pr = ms->pmsa_pr;
+	old_regs->b0 = ms->pmsa_br0;
+	old_regs->loadrs = loadrs;
+	old_regs->ar_rsc = ms->pmsa_rsc;
+	old_unat = old_regs->ar_unat;
+	copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat);
+	copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat);
+	copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat);
+	copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat);
+	copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat);
+	copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat);
+	copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat);
+	copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat);
+	copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat);
+	copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat);
+	copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat);
+	if (ia64_psr(old_regs)->bn)
+		bank = ms->pmsa_bank1_gr;
+	else
+		bank = ms->pmsa_bank0_gr;
+	copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat);
+	copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat);
+	copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat);
+	copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat);
+	copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat);
+	copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat);
+	copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat);
+	copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat);
+	copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat);
+	copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat);
+	copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat);
+	copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat);
+	copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat);
+	copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat);
+	copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat);
+	copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat);
+
+	/* Next stack a struct switch_stack.  mca_asm.S built a partial
+	 * switch_stack, copy it and fill in the blanks using pt_regs and
+	 * minstate.
+	 *
+	 * In the synthesized switch_stack, b0 points to ia64_leave_kernel,
+	 * ar.pfs is set to 0.
+	 *
+	 * unwind.c::unw_unwind() does special processing for interrupt frames.
+	 * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate
+	 * is clear then unw_unwind() does _not_ adjust bsp over pt_regs.  Not
+	 * that this is documented, of course.  Set PRED_NON_SYSCALL in the
+	 * switch_stack on the original stack so it will unwind correctly when
+	 * unwind.c reads pt_regs.
+	 *
+	 * thread.ksp is updated to point to the synthesized switch_stack.
+	 */
+	p -= sizeof(struct switch_stack);
+	old_sw = (struct switch_stack *)p;
+	memcpy(old_sw, sw, sizeof(*sw));
+	old_sw->caller_unat = old_unat;
+	old_sw->ar_fpsr = old_regs->ar_fpsr;
+	copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat);
+	copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat);
+	copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat);
+	copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat);
+	old_sw->b0 = (u64)ia64_leave_kernel;
+	old_sw->b1 = ms->pmsa_br1;
+	old_sw->ar_pfs = 0;
+	old_sw->ar_unat = old_unat;
+	old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL);
+	previous_current->thread.ksp = (u64)p - 16;
+
+	/* Finally copy the original stack's registers back to its RBS.
+	 * Registers from ar.bspstore through ar.bsp at the time of the event
+	 * are in the current RBS, copy them back to the original stack.  The
+	 * copy must be done register by register because the original bspstore
+	 * and the current one have different alignments, so the saved RNAT
+	 * data occurs at different places.
+	 *
+	 * mca_asm does cover, so the old_bsp already includes all registers at
+	 * the time of MCA/INIT.  It also does flushrs, so all registers before
+	 * this function have been written to backing store on the MCA/INIT
+	 * stack.
+	 */
+	new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore));
+	old_rnat = regs->ar_rnat;
+	while (slots--) {
+		if (ia64_rse_is_rnat_slot(new_bspstore)) {
+			new_rnat = ia64_get_rnat(new_bspstore++);
+		}
+		if (ia64_rse_is_rnat_slot(old_bspstore)) {
+			*old_bspstore++ = old_rnat;
+			old_rnat = 0;
+		}
+		nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL;
+		old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore));
+		old_rnat |= (nat << ia64_rse_slot_num(old_bspstore));
+		*old_bspstore++ = *new_bspstore++;
+	}
+	old_sw->ar_bspstore = (unsigned long)old_bspstore;
+	old_sw->ar_rnat = old_rnat;
+
+	sos->prev_task = previous_current;
+	return previous_current;
+
+no_mod:
+	printk(KERN_INFO "cpu %d, %s %s, original stack not modified\n",
+			smp_processor_id(), type, msg);
+	return previous_current;
+}
+
+/* The monarch/slave interaction is based on monarch_cpu and requires that all
+ * slaves have entered rendezvous before the monarch leaves.  If any cpu has
+ * not entered rendezvous yet then wait a bit.  The assumption is that any
+ * slave that has not rendezvoused after a reasonable time is never going to do
+ * so.  In this context, slave includes cpus that respond to the MCA rendezvous
+ * interrupt, as well as cpus that receive the INIT slave event.
+ */
+
+static void
+ia64_wait_for_slaves(int monarch)
+{
+	int c, wait = 0;
+	for_each_online_cpu(c) {
+		if (c == monarch)
+			continue;
+		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
+			udelay(1000);		/* short wait first */
+			wait = 1;
+			break;
+		}
+	}
+	if (!wait)
+		return;
+	for_each_online_cpu(c) {
+		if (c == monarch)
+			continue;
+		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
+			udelay(5*1000000);	/* wait 5 seconds for slaves (arbitrary) */
+			break;
+		}
+	}
+}
+
 /*
- * ia64_mca_ucmc_handler
+ * ia64_mca_handler
  *
  *	This is uncorrectable machine check handler called from OS_MCA
  *	dispatch code which is in turn called from SAL_CHECK().
@@ -866,16 +922,28 @@ EXPORT_SYMBOL(ia64_unreg_MCA_extension);
  *	further MCA logging is enabled by clearing logs.
  *	Monarch also has the duty of sending wakeup-IPIs to pull the
  *	slave processors out of rendezvous spinloop.
- *
- *  Inputs  :   None
- *  Outputs :   None
  */
 void
-ia64_mca_ucmc_handler(void)
+ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
+		 struct ia64_sal_os_state *sos)
 {
 	pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
-		&ia64_sal_to_os_handoff_state.proc_state_param;
-	int recover; 
+		&sos->proc_state_param;
+	int recover, cpu = smp_processor_id();
+	task_t *previous_current;
+
+	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
+	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
+	monarch_cpu = cpu;
+	ia64_wait_for_slaves(cpu);
+
+	/* Wakeup all the processors which are spinning in the rendezvous loop.
+	 * They will leave SAL, then spin in the OS with interrupts disabled
+	 * until this monarch cpu leaves the MCA handler.  That gets control
+	 * back to the OS so we can backtrace the other cpus, backtrace when
+	 * spinning in SAL does not work.
+	 */
+	ia64_mca_wakeup_all();
 
 	/* Get the MCA error record and log it */
 	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
@@ -883,25 +951,20 @@ ia64_mca_ucmc_handler(void)
 	/* TLB error is only exist in this SAL error record */
 	recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
 	/* other error recovery */
-	   || (ia64_mca_ucmc_extension 
+	   || (ia64_mca_ucmc_extension
 		&& ia64_mca_ucmc_extension(
 			IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),
-			&ia64_sal_to_os_handoff_state,
-			&ia64_os_to_sal_handoff_state)); 
+			sos));
 
 	if (recover) {
 		sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA);
 		rh->severity = sal_log_severity_corrected;
 		ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
+		sos->os_status = IA64_MCA_CORRECTED;
 	}
-	/*
-	 *  Wakeup all the processors which are spinning in the rendezvous
-	 *  loop.
-	 */
-	ia64_mca_wakeup_all();
 
-	/* Return to SAL */
-	ia64_return_to_sal_check(recover);
+	set_curr_task(cpu, previous_current);
+	monarch_cpu = -1;
 }
 
 static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL);
@@ -1125,34 +1188,114 @@ ia64_mca_cpe_poll (unsigned long dummy)
 /*
  * C portion of the OS INIT handler
  *
- * Called from ia64_monarch_init_handler
- *
- * Inputs: pointer to pt_regs where processor info was saved.
+ * Called from ia64_os_init_dispatch
  *
- * Returns:
- *   0 if SAL must warm boot the System
- *   1 if SAL must return to interrupted context using PAL_MC_RESUME
+ * Inputs: pointer to pt_regs where processor info was saved.  SAL/OS state for
+ * this event.  This code is used for both monarch and slave INIT events, see
+ * sos->monarch.
  *
+ * All INIT events switch to the INIT stack and change the previous process to
+ * blocked status.  If one of the INIT events is the monarch then we are
+ * probably processing the nmi button/command.  Use the monarch cpu to dump all
+ * the processes.  The slave INIT events all spin until the monarch cpu
+ * returns.  We can also get INIT slave events for MCA, in which case the MCA
+ * process is the monarch.
  */
+
 void
-ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw)
+ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
+		  struct ia64_sal_os_state *sos)
 {
-	pal_min_state_area_t *ms;
+	static atomic_t slaves;
+	static atomic_t monarchs;
+	task_t *previous_current;
+	int cpu = smp_processor_id(), c;
+	struct task_struct *g, *t;
 
-	oops_in_progress = 1;	/* avoid deadlock in printk, but it makes recovery dodgy */
+	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
 	console_loglevel = 15;	/* make sure printks make it to console */
 
-	printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n",
-		ia64_sal_to_os_handoff_state.proc_state_param);
+	printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
+		sos->proc_state_param, cpu, sos->monarch);
+	salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);
 
-	/*
-	 * Address of minstate area provided by PAL is physical,
-	 * uncacheable (bit 63 set). Convert to Linux virtual
-	 * address in region 6.
+	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT");
+	sos->os_status = IA64_INIT_RESUME;
+
+	/* FIXME: Workaround for broken proms that drive all INIT events as
+	 * slaves.  The last slave that enters is promoted to be a monarch.
+	 * Remove this code in September 2006, that gives platforms a year to
+	 * fix their proms and get their customers updated.
 	 */
-	ms = (pal_min_state_area_t *)(ia64_sal_to_os_handoff_state.pal_min_state | (6ul<<61));
+	if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
+		printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
+		       __FUNCTION__, cpu);
+		atomic_dec(&slaves);
+		sos->monarch = 1;
+	}
+
+	/* FIXME: Workaround for broken proms that drive all INIT events as
+	 * monarchs.  Second and subsequent monarchs are demoted to slaves.
+	 * Remove this code in September 2006, that gives platforms a year to
+	 * fix their proms and get their customers updated.
+	 */
+	if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
+		printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
+			       __FUNCTION__, cpu);
+		atomic_dec(&monarchs);
+		sos->monarch = 0;
+	}
 
-	init_handler_platform(ms, pt, sw);	/* call platform specific routines */
+	if (!sos->monarch) {
+		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
+		while (monarch_cpu == -1)
+		       cpu_relax();	/* spin until monarch enters */
+		while (monarch_cpu != -1)
+		       cpu_relax();	/* spin until monarch leaves */
+		printk("Slave on cpu %d returning to normal service.\n", cpu);
+		set_curr_task(cpu, previous_current);
+		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+		atomic_dec(&slaves);
+		return;
+	}
+
+	monarch_cpu = cpu;
+
+	/*
+	 * Wait for a bit.  On some machines (e.g., HP's zx2000 and zx6000, INIT can be
+	 * generated via the BMC's command-line interface, but since the console is on the
+	 * same serial line, the user will need some time to switch out of the BMC before
+	 * the dump begins.
+	 */
+	printk("Delaying for 5 seconds...\n");
+	udelay(5*1000000);
+	ia64_wait_for_slaves(cpu);
+	printk(KERN_ERR "Processes interrupted by INIT -");
+	for_each_online_cpu(c) {
+		struct ia64_sal_os_state *s;
+		t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET);
+		s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET);
+		g = s->prev_task;
+		if (g) {
+			if (g->pid)
+				printk(" %d", g->pid);
+			else
+				printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g);
+		}
+	}
+	printk("\n\n");
+	if (read_trylock(&tasklist_lock)) {
+		do_each_thread (g, t) {
+			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
+			show_stack(t, NULL);
+		} while_each_thread (g, t);
+		read_unlock(&tasklist_lock);
+	}
+	printk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
+	atomic_dec(&monarchs);
+	set_curr_task(cpu, previous_current);
+	monarch_cpu = -1;
+	return;
 }
 
 static int __init
@@ -1202,6 +1345,34 @@ static struct irqaction mca_cpep_irqaction = {
 };
 #endif /* CONFIG_ACPI */
 
+/* Minimal format of the MCA/INIT stacks.  The pseudo processes that run on
+ * these stacks can never sleep, they cannot return from the kernel to user
+ * space, they do not appear in a normal ps listing.  So there is no need to
+ * format most of the fields.
+ */
+
+static void
+format_mca_init_stack(void *mca_data, unsigned long offset,
+		const char *type, int cpu)
+{
+	struct task_struct *p = (struct task_struct *)((char *)mca_data + offset);
+	struct thread_info *ti;
+	memset(p, 0, KERNEL_STACK_SIZE);
+	ti = (struct thread_info *)((char *)p + IA64_TASK_SIZE);
+	ti->flags = _TIF_MCA_INIT;
+	ti->preempt_count = 1;
+	ti->task = p;
+	ti->cpu = cpu;
+	p->thread_info = ti;
+	p->state = TASK_UNINTERRUPTIBLE;
+	__set_bit(cpu, &p->cpus_allowed);
+	INIT_LIST_HEAD(&p->tasks);
+	p->parent = p->real_parent = p->group_leader = p;
+	INIT_LIST_HEAD(&p->children);
+	INIT_LIST_HEAD(&p->sibling);
+	strncpy(p->comm, type, sizeof(p->comm)-1);
+}
+
 /* Do per-CPU MCA-related initialization.  */
 
 void __devinit
@@ -1214,19 +1385,28 @@ ia64_mca_cpu_init(void *cpu_data)
 		int cpu;
 
 		mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
-					 * NR_CPUS);
+					 * NR_CPUS + KERNEL_STACK_SIZE);
+		mca_data = (void *)(((unsigned long)mca_data +
+					KERNEL_STACK_SIZE - 1) &
+				(-KERNEL_STACK_SIZE));
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
+			format_mca_init_stack(mca_data,
+					offsetof(struct ia64_mca_cpu, mca_stack),
+					"MCA", cpu);
+			format_mca_init_stack(mca_data,
+					offsetof(struct ia64_mca_cpu, init_stack),
+					"INIT", cpu);
 			__per_cpu_mca[cpu] = __pa(mca_data);
 			mca_data += sizeof(struct ia64_mca_cpu);
 		}
 	}
 
-        /*
-         * The MCA info structure was allocated earlier and its
-         * physical address saved in __per_cpu_mca[cpu].  Copy that
-         * address * to ia64_mca_data so we can access it as a per-CPU
-         * variable.
-         */
+	/*
+	 * The MCA info structure was allocated earlier and its
+	 * physical address saved in __per_cpu_mca[cpu].  Copy that
+	 * address * to ia64_mca_data so we can access it as a per-CPU
+	 * variable.
+	 */
 	__get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()];
 
 	/*
@@ -1236,11 +1416,11 @@ ia64_mca_cpu_init(void *cpu_data)
 	__get_cpu_var(ia64_mca_per_cpu_pte) =
 		pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL));
 
-        /*
-         * Also, stash away a copy of the PAL address and the PTE
-         * needed to map it.
-         */
-        pal_vaddr = efi_get_pal_addr();
+	/*
+	 * Also, stash away a copy of the PAL address and the PTE
+	 * needed to map it.
+	 */
+	pal_vaddr = efi_get_pal_addr();
 	if (!pal_vaddr)
 		return;
 	__get_cpu_var(ia64_mca_pal_base) =
@@ -1272,8 +1452,8 @@ ia64_mca_cpu_init(void *cpu_data)
 void __init
 ia64_mca_init(void)
 {
-	ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler;
-	ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler;
+	ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch;
+	ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave;
 	ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
 	int i;
 	s64 rc;
@@ -1351,9 +1531,9 @@ ia64_mca_init(void)
 	 * XXX - disable SAL checksum by setting size to 0, should be
 	 * size of the actual init handler in mca_asm.S.
 	 */
-	ia64_mc_info.imi_monarch_init_handler		= ia64_tpa(mon_init_ptr->fp);
+	ia64_mc_info.imi_monarch_init_handler		= ia64_tpa(init_hldlr_ptr_monarch->fp);
 	ia64_mc_info.imi_monarch_init_handler_size	= 0;
-	ia64_mc_info.imi_slave_init_handler		= ia64_tpa(slave_init_ptr->fp);
+	ia64_mc_info.imi_slave_init_handler		= ia64_tpa(init_hldlr_ptr_slave->fp);
 	ia64_mc_info.imi_slave_init_handler_size	= 0;
 
 	IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__,
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index ef3fd7265b6..499a065f4e6 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -16,6 +16,9 @@
 // 04/11/12 Russ Anderson <rja@sgi.com>
 //		   Added per cpu MCA/INIT stack save areas.
 //
+// 12/08/05 Keith Owens <kaos@sgi.com>
+//		   Use per cpu MCA/INIT stacks for all data.
+//
 #include <linux/config.h>
 #include <linux/threads.h>
 
@@ -25,96 +28,23 @@
 #include <asm/mca_asm.h>
 #include <asm/mca.h>
 
-/*
- * When we get a machine check, the kernel stack pointer is no longer
- * valid, so we need to set a new stack pointer.
- */
-#define	MINSTATE_PHYS	/* Make sure stack access is physical for MINSTATE */
-
-/*
- * Needed for return context to SAL
- */
-#define IA64_MCA_SAME_CONTEXT	0
-#define IA64_MCA_COLD_BOOT	-2
-
-#include "minstate.h"
-
-/*
- * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec)
- *		1. GR1 = OS GP
- *		2. GR8 = PAL_PROC physical address
- *		3. GR9 = SAL_PROC physical address
- *		4. GR10 = SAL GP (physical)
- *		5. GR11 = Rendez state
- *		6. GR12 = Return address to location within SAL_CHECK
- */
-#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp)		\
-	LOAD_PHYSICAL(p0, _tmp, ia64_sal_to_os_handoff_state);; \
-	st8	[_tmp]=r1,0x08;;			\
-	st8	[_tmp]=r8,0x08;;			\
-	st8	[_tmp]=r9,0x08;;			\
-	st8	[_tmp]=r10,0x08;;			\
-	st8	[_tmp]=r11,0x08;;			\
-	st8	[_tmp]=r12,0x08;;			\
-	st8	[_tmp]=r17,0x08;;			\
-	st8	[_tmp]=r18,0x08
-
-/*
- * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec)
- * (p6) is executed if we never entered virtual mode (TLB error)
- * (p7) is executed if we entered virtual mode as expected (normal case)
- *	1. GR8 = OS_MCA return status
- *	2. GR9 = SAL GP (physical)
- *	3. GR10 = 0/1 returning same/new context
- *	4. GR22 = New min state save area pointer
- *	returns ptr to SAL rtn save loc in _tmp
- */
-#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp)	\
-	movl	_tmp=ia64_os_to_sal_handoff_state;;	\
-	DATA_VA_TO_PA(_tmp);;				\
-	ld8	r8=[_tmp],0x08;;			\
-	ld8	r9=[_tmp],0x08;;			\
-	ld8	r10=[_tmp],0x08;;			\
-	ld8	r22=[_tmp],0x08;;
-	// now _tmp is pointing to SAL rtn save location
-
-/*
- * COLD_BOOT_HANDOFF_STATE() sets ia64_mca_os_to_sal_state
- *	imots_os_status=IA64_MCA_COLD_BOOT
- *	imots_sal_gp=SAL GP
- *	imots_context=IA64_MCA_SAME_CONTEXT
- *	imots_new_min_state=Min state save area pointer
- *	imots_sal_check_ra=Return address to location within SAL_CHECK
- *
- */
-#define COLD_BOOT_HANDOFF_STATE(sal_to_os_handoff,os_to_sal_handoff,tmp)\
-	movl	tmp=IA64_MCA_COLD_BOOT;					\
-	movl	sal_to_os_handoff=__pa(ia64_sal_to_os_handoff_state);	\
-	movl	os_to_sal_handoff=__pa(ia64_os_to_sal_handoff_state);;	\
-	st8	[os_to_sal_handoff]=tmp,8;;				\
-	ld8	tmp=[sal_to_os_handoff],48;;				\
-	st8	[os_to_sal_handoff]=tmp,8;;				\
-	movl	tmp=IA64_MCA_SAME_CONTEXT;;				\
-	st8	[os_to_sal_handoff]=tmp,8;;				\
-	ld8	tmp=[sal_to_os_handoff],-8;;				\
-	st8     [os_to_sal_handoff]=tmp,8;;				\
-	ld8	tmp=[sal_to_os_handoff];;				\
-	st8     [os_to_sal_handoff]=tmp;;
+#include "entry.h"
 
 #define GET_IA64_MCA_DATA(reg)						\
 	GET_THIS_PADDR(reg, ia64_mca_data)				\
 	;;								\
 	ld8 reg=[reg]
 
-	.global ia64_os_mca_dispatch
-	.global ia64_os_mca_dispatch_end
-	.global ia64_sal_to_os_handoff_state
-	.global	ia64_os_to_sal_handoff_state
 	.global ia64_do_tlb_purge
+	.global ia64_os_mca_dispatch
+	.global ia64_os_init_dispatch_monarch
+	.global ia64_os_init_dispatch_slave
 
 	.text
 	.align 16
 
+//StartMain////////////////////////////////////////////////////////////////////
+
 /*
  * Just the TLB purge part is moved to a separate function
  * so we can re-use the code for cpu hotplug code as well
@@ -207,34 +137,31 @@ ia64_do_tlb_purge:
 	br.sptk.many b1
 	;;
 
-ia64_os_mca_dispatch:
+//EndMain//////////////////////////////////////////////////////////////////////
+
+//StartMain////////////////////////////////////////////////////////////////////
 
+ia64_os_mca_dispatch:
 	// Serialize all MCA processing
 	mov	r3=1;;
 	LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
 ia64_os_mca_spin:
-	xchg8	r4=[r2],r3;;
+	xchg4	r4=[r2],r3;;
 	cmp.ne	p6,p0=r4,r0
 (p6)	br ia64_os_mca_spin
 
-	// Save the SAL to OS MCA handoff state as defined
-	// by SAL SPEC 3.0
-	// NOTE : The order in which the state gets saved
-	//	  is dependent on the way the C-structure
-	//	  for ia64_mca_sal_to_os_state_t has been
-	//	  defined in include/asm/mca.h
-	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
-	;;
-
-	// LOG PROCESSOR STATE INFO FROM HERE ON..
-begin_os_mca_dump:
-	br	ia64_os_mca_proc_state_dump;;
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	mov r19=1				// All MCA events are treated as monarch (for now)
+	br.sptk ia64_state_save			// save the state that is not in minstate
+1:
 
-ia64_os_mca_done_dump:
-
-	LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56)
+	GET_IA64_MCA_DATA(r2)
+	// Using MCA stack, struct ia64_sal_os_state, variable proc_state_param
+	;;
+	add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, r2
 	;;
-	ld8 r18=[r16]		// Get processor state parameter on existing PALE_CHECK.
+	ld8 r18=[r3]				// Get processor state parameter on existing PALE_CHECK.
 	;;
 	tbit.nz p6,p7=r18,60
 (p7)	br.spnt done_tlb_purge_and_reload
@@ -323,624 +250,775 @@ ia64_reload_tr:
 	itr.d dtr[r20]=r16
 	;;
 	srlz.d
-	;;
-	br.sptk.many done_tlb_purge_and_reload
-err:
-	COLD_BOOT_HANDOFF_STATE(r20,r21,r22)
-	br.sptk.many ia64_os_mca_done_restore
 
 done_tlb_purge_and_reload:
 
-	// Setup new stack frame for OS_MCA handling
-	GET_IA64_MCA_DATA(r2)
-	;;
-	add r3 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
-	add r2 = IA64_MCA_CPU_RBSTORE_OFFSET, r2
-	;;
-	rse_switch_context(r6,r3,r2);;	// RSC management in this new context
+	// switch to per cpu MCA stack
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_new_stack
+1:
+
+	// everything saved, now we can set the kernel registers
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_set_kernel_registers
+1:
 
+	// This must be done in physical mode
 	GET_IA64_MCA_DATA(r2)
 	;;
-	add r2 = IA64_MCA_CPU_STACK_OFFSET+IA64_MCA_STACK_SIZE-16, r2
-	;;
-	mov r12=r2		// establish new stack-pointer
+	mov r7=r2
 
         // Enter virtual mode from physical mode
 	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
-ia64_os_mca_virtual_begin:
+
+	// This code returns to SAL via SOS r2, in general SAL has no unwind
+	// data.  To get a clean termination when backtracing the C MCA/INIT
+	// handler, set a dummy return address of 0 in this routine.  That
+	// requires that ia64_os_mca_virtual_begin be a global function.
+ENTRY(ia64_os_mca_virtual_begin)
+	.prologue
+	.save rp,r0
+	.body
+
+	mov ar.rsc=3				// set eager mode for C handler
+	mov r2=r7				// see GET_IA64_MCA_DATA above
+	;;
 
 	// Call virtual mode handler
-	movl		r2=ia64_mca_ucmc_handler;;
-	mov		b6=r2;;
-	br.call.sptk.many    b0=b6;;
-.ret0:
+	alloc r14=ar.pfs,0,0,3,0
+	;;
+	DATA_PA_TO_VA(r2,r7)
+	;;
+	add out0=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
+	add out1=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
+	add out2=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET, r2
+	br.call.sptk.many    b0=ia64_mca_handler
+
 	// Revert back to physical mode before going back to SAL
 	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
 ia64_os_mca_virtual_end:
 
-	// restore the original stack frame here
+END(ia64_os_mca_virtual_begin)
+
+	// switch back to previous stack
+	alloc r14=ar.pfs,0,0,0,0		// remove the MCA handler frame
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_old_stack
+1:
+
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_restore		// restore the SAL state
+1:
+
+	mov		b0=r12			// SAL_CHECK return address
+
+	// release lock
+	LOAD_PHYSICAL(p0,r3,ia64_mca_serialize);;
+	st4.rel		[r3]=r0
+
+	br		b0
+
+//EndMain//////////////////////////////////////////////////////////////////////
+
+//StartMain////////////////////////////////////////////////////////////////////
+
+//
+// SAL to OS entry point for INIT on all processors.  This has been defined for
+// registration purposes with SAL as a part of ia64_mca_init.  Monarch and
+// slave INIT have identical processing, except for the value of the
+// sos->monarch flag in r19.
+//
+
+ia64_os_init_dispatch_monarch:
+	mov r19=1				// Bow, bow, ye lower middle classes!
+	br.sptk ia64_os_init_dispatch
+
+ia64_os_init_dispatch_slave:
+	mov r19=0				// <igor>yeth, mathter</igor>
+
+ia64_os_init_dispatch:
+
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_save			// save the state that is not in minstate
+1:
+
+	// switch to per cpu INIT stack
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_new_stack
+1:
+
+	// everything saved, now we can set the kernel registers
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_set_kernel_registers
+1:
+
+	// This must be done in physical mode
 	GET_IA64_MCA_DATA(r2)
 	;;
-	add r2 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
-	;;
-	movl    r4=IA64_PSR_MC
+	mov r7=r2
+
+        // Enter virtual mode from physical mode
+	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_begin, r4)
+
+	// This code returns to SAL via SOS r2, in general SAL has no unwind
+	// data.  To get a clean termination when backtracing the C MCA/INIT
+	// handler, set a dummy return address of 0 in this routine.  That
+	// requires that ia64_os_init_virtual_begin be a global function.
+ENTRY(ia64_os_init_virtual_begin)
+	.prologue
+	.save rp,r0
+	.body
+
+	mov ar.rsc=3				// set eager mode for C handler
+	mov r2=r7				// see GET_IA64_MCA_DATA above
 	;;
-	rse_return_context(r4,r3,r2)	// switch from interrupt context for RSE
 
-	// let us restore all the registers from our PSI structure
-	mov	r8=gp
+	// Call virtual mode handler
+	alloc r14=ar.pfs,0,0,3,0
+	;;
+	DATA_PA_TO_VA(r2,r7)
 	;;
-begin_os_mca_restore:
-	br	ia64_os_mca_proc_state_restore;;
+	add out0=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
+	add out1=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
+	add out2=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SOS_OFFSET, r2
+	br.call.sptk.many    b0=ia64_init_handler
 
-ia64_os_mca_done_restore:
-	OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);;
-	// branch back to SALE_CHECK
-	ld8		r3=[r2];;
-	mov		b0=r3;;		// SAL_CHECK return address
+	// Revert back to physical mode before going back to SAL
+	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_end, r4)
+ia64_os_init_virtual_end:
 
-	// release lock
-	movl		r3=ia64_mca_serialize;;
-	DATA_VA_TO_PA(r3);;
-	st8.rel		[r3]=r0
+END(ia64_os_init_virtual_begin)
+
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_restore		// restore the SAL state
+1:
 
+	// switch back to previous stack
+	alloc r14=ar.pfs,0,0,0,0		// remove the INIT handler frame
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_old_stack
+1:
+
+	mov		b0=r12			// SAL_CHECK return address
 	br		b0
-	;;
-ia64_os_mca_dispatch_end:
+
 //EndMain//////////////////////////////////////////////////////////////////////
 
+// common defines for the stubs
+#define	ms		r4
+#define	regs		r5
+#define	temp1		r2	/* careful, it overlaps with input registers */
+#define	temp2		r3	/* careful, it overlaps with input registers */
+#define	temp3		r7
+#define	temp4		r14
+
 
 //++
 // Name:
-//      ia64_os_mca_proc_state_dump()
+//	ia64_state_save()
 //
 // Stub Description:
 //
-//       This stub dumps the processor state during MCHK to a data area
+//	Save the state that is not in minstate.  This is sensitive to the layout of
+//	struct ia64_sal_os_state in mca.h.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	The OS to SAL section of struct ia64_sal_os_state is set to a default
+//	value of cold boot (MCA) or warm boot (INIT) and return to the same
+//	context.  ia64_sal_os_state is also used to hold some registers that
+//	need to be saved and restored across the stack switches.
+//
+//	Most input registers to this stub come from PAL/SAL
+//	r1  os gp, physical
+//	r8  pal_proc entry point
+//	r9  sal_proc entry point
+//	r10 sal gp
+//	r11 MCA - rendevzous state, INIT - reason code
+//	r12 sal return address
+//	r17 pal min_state
+//	r18 processor state parameter
+//	r19 monarch flag, set by the caller of this routine
+//
+//	In addition to the SAL to OS state, this routine saves all the
+//	registers that appear in struct pt_regs and struct switch_stack,
+//	excluding those that are already in the PAL minstate area.  This
+//	results in a partial pt_regs and switch_stack, the C code copies the
+//	remaining registers from PAL minstate to pt_regs and switch_stack.  The
+//	resulting structures contain all the state of the original process when
+//	MCA/INIT occurred.
 //
 //--
 
-ia64_os_mca_proc_state_dump:
-// Save bank 1 GRs 16-31 which will be used by c-language code when we switch
-//  to virtual addressing mode.
-	GET_IA64_MCA_DATA(r2)
+ia64_state_save:
+	add regs=MCA_SOS_OFFSET, r3
+	add ms=MCA_SOS_OFFSET+8, r3
+	mov b0=r2		// save return address
+	cmp.eq p1,p2=IA64_MCA_CPU_MCA_STACK_OFFSET, r3
+	;;
+	GET_IA64_MCA_DATA(temp2)
+	;;
+	add temp1=temp2, regs	// struct ia64_sal_os_state on MCA or INIT stack
+	add temp2=temp2, ms	// struct ia64_sal_os_state+8 on MCA or INIT stack
+	;;
+	mov regs=temp1		// save the start of sos
+	st8 [temp1]=r1,16	// os_gp
+	st8 [temp2]=r8,16	// pal_proc
+	;;
+	st8 [temp1]=r9,16	// sal_proc
+	st8 [temp2]=r11,16	// rv_rc
+	mov r11=cr.iipa
+	;;
+	st8 [temp1]=r18,16	// proc_state_param
+	st8 [temp2]=r19,16	// monarch
+	mov r6=IA64_KR(CURRENT)
+	;;
+	st8 [temp1]=r12,16	// sal_ra
+	st8 [temp2]=r10,16	// sal_gp
+	mov r12=cr.isr
+	;;
+	st8 [temp1]=r17,16	// pal_min_state
+	st8 [temp2]=r6,16	// prev_IA64_KR_CURRENT
+	mov r6=cr.ifa
+	;;
+	st8 [temp1]=r0,16	// prev_task, starts off as NULL
+	st8 [temp2]=r12,16	// cr.isr
+	mov r12=cr.itir
+	;;
+	st8 [temp1]=r6,16	// cr.ifa
+	st8 [temp2]=r12,16	// cr.itir
+	mov r12=cr.iim
+	;;
+	st8 [temp1]=r11,16	// cr.iipa
+	st8 [temp2]=r12,16	// cr.iim
+	mov r6=cr.iha
+(p1)	mov r12=IA64_MCA_COLD_BOOT
+(p2)	mov r12=IA64_INIT_WARM_BOOT
+	;;
+	st8 [temp1]=r6,16	// cr.iha
+	st8 [temp2]=r12		// os_status, default is cold boot
+	mov r6=IA64_MCA_SAME_CONTEXT
+	;;
+	st8 [temp1]=r6		// context, default is same context
+
+	// Save the pt_regs data that is not in minstate.  The previous code
+	// left regs at sos.
+	add regs=MCA_PT_REGS_OFFSET-MCA_SOS_OFFSET, regs
+	;;
+	add temp1=PT(B6), regs
+	mov temp3=b6
+	mov temp4=b7
+	add temp2=PT(B7), regs
+	;;
+	st8 [temp1]=temp3,PT(AR_CSD)-PT(B6)		// save b6
+	st8 [temp2]=temp4,PT(AR_SSD)-PT(B7)		// save b7
+	mov temp3=ar.csd
+	mov temp4=ar.ssd
+	cover						// must be last in group
 	;;
-	add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
-	;;
-// save ar.NaT
-	mov		r5=ar.unat                  // ar.unat
-
-// save banked GRs 16-31 along with NaT bits
-	bsw.1;;
-	st8.spill	[r2]=r16,8;;
-	st8.spill	[r2]=r17,8;;
-	st8.spill	[r2]=r18,8;;
-	st8.spill	[r2]=r19,8;;
-	st8.spill	[r2]=r20,8;;
-	st8.spill	[r2]=r21,8;;
-	st8.spill	[r2]=r22,8;;
-	st8.spill	[r2]=r23,8;;
-	st8.spill	[r2]=r24,8;;
-	st8.spill	[r2]=r25,8;;
-	st8.spill	[r2]=r26,8;;
-	st8.spill	[r2]=r27,8;;
-	st8.spill	[r2]=r28,8;;
-	st8.spill	[r2]=r29,8;;
-	st8.spill	[r2]=r30,8;;
-	st8.spill	[r2]=r31,8;;
-
-	mov		r4=ar.unat;;
-	st8		[r2]=r4,8                // save User NaT bits for r16-r31
-	mov		ar.unat=r5                  // restore original unat
-	bsw.0;;
-
-//save BRs
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r4
-
-	mov		r3=b0
-	mov		r5=b1
-	mov		r7=b2;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=b3
-	mov		r5=b4
-	mov		r7=b5;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=b6
-	mov		r5=b7;;
-	st8		[r2]=r3,2*8
-	st8		[r4]=r5,2*8;;
-
-cSaveCRs:
-// save CRs
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r4
-
-	mov		r3=cr.dcr
-	mov		r5=cr.itm
-	mov		r7=cr.iva;;
-
-	st8		[r2]=r3,8*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;            // 48 byte rements
-
-	mov		r3=cr.pta;;
-	st8		[r2]=r3,8*8;;            // 64 byte rements
-
-// if PSR.ic=0, reading interruption registers causes an illegal operation fault
-	mov		r3=psr;;
-	tbit.nz.unc	p6,p0=r3,PSR_IC;;           // PSI Valid Log bit pos. test
-(p6)    st8     [r2]=r0,9*8+160             // increment by 232 byte inc.
-begin_skip_intr_regs:
-(p6)	br		SkipIntrRegs;;
-
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r6
-
-	mov		r3=cr.ipsr
-	mov		r5=cr.isr
-	mov		r7=r0;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr.iip
-	mov		r5=cr.ifa
-	mov		r7=cr.itir;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr.iipa
-	mov		r5=cr.ifs
-	mov		r7=cr.iim;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr25;;                   // cr.iha
-	st8		[r2]=r3,160;;               // 160 byte rement
-
-SkipIntrRegs:
-	st8		[r2]=r0,152;;               // another 152 byte .
-
-	add		r4=8,r2                     // duplicate r2 in r4
-	add		r6=2*8,r2                   // duplicate r2 in r6
-
-	mov		r3=cr.lid
-//	mov		r5=cr.ivr                     // cr.ivr, don't read it
-	mov		r7=cr.tpr;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=r0                       // cr.eoi => cr67
-	mov		r5=r0                       // cr.irr0 => cr68
-	mov		r7=r0;;                     // cr.irr1 => cr69
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=r0                       // cr.irr2 => cr70
-	mov		r5=r0                       // cr.irr3 => cr71
-	mov		r7=cr.itv;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr.pmv
-	mov		r5=cr.cmcv;;
-	st8		[r2]=r3,7*8
-	st8		[r4]=r5,7*8;;
-
-	mov		r3=r0                       // cr.lrr0 => cr80
-	mov		r5=r0;;                     // cr.lrr1 => cr81
-	st8		[r2]=r3,23*8
-	st8		[r4]=r5,23*8;;
-
-	adds		r2=25*8,r2;;
-
-cSaveARs:
-// save ARs
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r6
-
-	mov		r3=ar.k0
-	mov		r5=ar.k1
-	mov		r7=ar.k2;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=ar.k3
-	mov		r5=ar.k4
-	mov		r7=ar.k5;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=ar.k6
-	mov		r5=ar.k7
-	mov		r7=r0;;                     // ar.kr8
-	st8		[r2]=r3,10*8
-	st8		[r4]=r5,10*8
-	st8		[r6]=r7,10*8;;           // rement by 72 bytes
-
-	mov		r3=ar.rsc
-	mov		ar.rsc=r0			    // put RSE in enforced lazy mode
-	mov		r5=ar.bsp
-	;;
-	mov		r7=ar.bspstore;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=ar.rnat;;
-	st8		[r2]=r3,8*13             // increment by 13x8 bytes
-
-	mov		r3=ar.ccv;;
-	st8		[r2]=r3,8*4
-
-	mov		r3=ar.unat;;
-	st8		[r2]=r3,8*4
-
-	mov		r3=ar.fpsr;;
-	st8		[r2]=r3,8*4
-
-	mov		r3=ar.itc;;
-	st8		[r2]=r3,160                 // 160
-
-	mov		r3=ar.pfs;;
-	st8		[r2]=r3,8
-
-	mov		r3=ar.lc;;
-	st8		[r2]=r3,8
-
-	mov		r3=ar.ec;;
-	st8		[r2]=r3
-	add		r2=8*62,r2               //padding
-
-// save RRs
-	mov		ar.lc=0x08-1
-	movl		r4=0x00;;
-
-cStRR:
-	dep.z		r5=r4,61,3;;
-	mov		r3=rr[r5];;
-	st8		[r2]=r3,8
-	add		r4=1,r4
-	br.cloop.sptk.few	cStRR
-	;;
-end_os_mca_dump:
-	br	ia64_os_mca_done_dump;;
+	st8 [temp1]=temp3,PT(AR_UNAT)-PT(AR_CSD)	// save ar.csd
+	st8 [temp2]=temp4,PT(AR_PFS)-PT(AR_SSD)		// save ar.ssd
+	mov temp3=ar.unat
+	mov temp4=ar.pfs
+	;;
+	st8 [temp1]=temp3,PT(AR_RNAT)-PT(AR_UNAT)	// save ar.unat
+	st8 [temp2]=temp4,PT(AR_BSPSTORE)-PT(AR_PFS)	// save ar.pfs
+	mov temp3=ar.rnat
+	mov temp4=ar.bspstore
+	;;
+	st8 [temp1]=temp3,PT(LOADRS)-PT(AR_RNAT)	// save ar.rnat
+	st8 [temp2]=temp4,PT(AR_FPSR)-PT(AR_BSPSTORE)	// save ar.bspstore
+	mov temp3=ar.bsp
+	;;
+	sub temp3=temp3, temp4	// ar.bsp - ar.bspstore
+	mov temp4=ar.fpsr
+	;;
+	shl temp3=temp3,16	// compute ar.rsc to be used for "loadrs"
+	;;
+	st8 [temp1]=temp3,PT(AR_CCV)-PT(LOADRS)		// save loadrs
+	st8 [temp2]=temp4,PT(F6)-PT(AR_FPSR)		// save ar.fpsr
+	mov temp3=ar.ccv
+	;;
+	st8 [temp1]=temp3,PT(F7)-PT(AR_CCV)		// save ar.ccv
+	stf.spill [temp2]=f6,PT(F8)-PT(F6)
+	;;
+	stf.spill [temp1]=f7,PT(F9)-PT(F7)
+	stf.spill [temp2]=f8,PT(F10)-PT(F8)
+	;;
+	stf.spill [temp1]=f9,PT(F11)-PT(F9)
+	stf.spill [temp2]=f10
+	;;
+	stf.spill [temp1]=f11
+
+	// Save the switch_stack data that is not in minstate nor pt_regs.  The
+	// previous code left regs at pt_regs.
+	add regs=MCA_SWITCH_STACK_OFFSET-MCA_PT_REGS_OFFSET, regs
+	;;
+	add temp1=SW(F2), regs
+	add temp2=SW(F3), regs
+	;;
+	stf.spill [temp1]=f2,32
+	stf.spill [temp2]=f3,32
+	;;
+	stf.spill [temp1]=f4,32
+	stf.spill [temp2]=f5,32
+	;;
+	stf.spill [temp1]=f12,32
+	stf.spill [temp2]=f13,32
+	;;
+	stf.spill [temp1]=f14,32
+	stf.spill [temp2]=f15,32
+	;;
+	stf.spill [temp1]=f16,32
+	stf.spill [temp2]=f17,32
+	;;
+	stf.spill [temp1]=f18,32
+	stf.spill [temp2]=f19,32
+	;;
+	stf.spill [temp1]=f20,32
+	stf.spill [temp2]=f21,32
+	;;
+	stf.spill [temp1]=f22,32
+	stf.spill [temp2]=f23,32
+	;;
+	stf.spill [temp1]=f24,32
+	stf.spill [temp2]=f25,32
+	;;
+	stf.spill [temp1]=f26,32
+	stf.spill [temp2]=f27,32
+	;;
+	stf.spill [temp1]=f28,32
+	stf.spill [temp2]=f29,32
+	;;
+	stf.spill [temp1]=f30,SW(B2)-SW(F30)
+	stf.spill [temp2]=f31,SW(B3)-SW(F31)
+	mov temp3=b2
+	mov temp4=b3
+	;;
+	st8 [temp1]=temp3,16	// save b2
+	st8 [temp2]=temp4,16	// save b3
+	mov temp3=b4
+	mov temp4=b5
+	;;
+	st8 [temp1]=temp3,SW(AR_LC)-SW(B4)	// save b4
+	st8 [temp2]=temp4	// save b5
+	mov temp3=ar.lc
+	;;
+	st8 [temp1]=temp3	// save ar.lc
+
+	// FIXME: Some proms are incorrectly accessing the minstate area as
+	// cached data.  The C code uses region 6, uncached virtual.  Ensure
+	// that there is no cache data lying around for the first 1K of the
+	// minstate area.
+	// Remove this code in September 2006, that gives platforms a year to
+	// fix their proms and get their customers updated.
+
+	add r1=32*1,r17
+	add r2=32*2,r17
+	add r3=32*3,r17
+	add r4=32*4,r17
+	add r5=32*5,r17
+	add r6=32*6,r17
+	add r7=32*7,r17
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+
+	br.sptk b0
 
 //EndStub//////////////////////////////////////////////////////////////////////
 
 
 //++
 // Name:
-//       ia64_os_mca_proc_state_restore()
+//	ia64_state_restore()
 //
 // Stub Description:
 //
-//       This is a stub to restore the saved processor state during MCHK
+//	Restore the SAL/OS state.  This is sensitive to the layout of struct
+//	ia64_sal_os_state in mca.h.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	In addition to the SAL to OS state, this routine restores all the
+//	registers that appear in struct pt_regs and struct switch_stack,
+//	excluding those in the PAL minstate area.
 //
 //--
 
-ia64_os_mca_proc_state_restore:
+ia64_state_restore:
+	// Restore the switch_stack data that is not in minstate nor pt_regs.
+	add regs=MCA_SWITCH_STACK_OFFSET, r3
+	mov b0=r2		// save return address
+	;;
+	GET_IA64_MCA_DATA(temp2)
+	;;
+	add regs=temp2, regs
+	;;
+	add temp1=SW(F2), regs
+	add temp2=SW(F3), regs
+	;;
+	ldf.fill f2=[temp1],32
+	ldf.fill f3=[temp2],32
+	;;
+	ldf.fill f4=[temp1],32
+	ldf.fill f5=[temp2],32
+	;;
+	ldf.fill f12=[temp1],32
+	ldf.fill f13=[temp2],32
+	;;
+	ldf.fill f14=[temp1],32
+	ldf.fill f15=[temp2],32
+	;;
+	ldf.fill f16=[temp1],32
+	ldf.fill f17=[temp2],32
+	;;
+	ldf.fill f18=[temp1],32
+	ldf.fill f19=[temp2],32
+	;;
+	ldf.fill f20=[temp1],32
+	ldf.fill f21=[temp2],32
+	;;
+	ldf.fill f22=[temp1],32
+	ldf.fill f23=[temp2],32
+	;;
+	ldf.fill f24=[temp1],32
+	ldf.fill f25=[temp2],32
+	;;
+	ldf.fill f26=[temp1],32
+	ldf.fill f27=[temp2],32
+	;;
+	ldf.fill f28=[temp1],32
+	ldf.fill f29=[temp2],32
+	;;
+	ldf.fill f30=[temp1],SW(B2)-SW(F30)
+	ldf.fill f31=[temp2],SW(B3)-SW(F31)
+	;;
+	ld8 temp3=[temp1],16	// restore b2
+	ld8 temp4=[temp2],16	// restore b3
+	;;
+	mov b2=temp3
+	mov b3=temp4
+	ld8 temp3=[temp1],SW(AR_LC)-SW(B4)	// restore b4
+	ld8 temp4=[temp2]	// restore b5
+	;;
+	mov b4=temp3
+	mov b5=temp4
+	ld8 temp3=[temp1]	// restore ar.lc
+	;;
+	mov ar.lc=temp3
 
-// Restore bank1 GR16-31
-	GET_IA64_MCA_DATA(r2)
+	// Restore the pt_regs data that is not in minstate.  The previous code
+	// left regs at switch_stack.
+	add regs=MCA_PT_REGS_OFFSET-MCA_SWITCH_STACK_OFFSET, regs
+	;;
+	add temp1=PT(B6), regs
+	add temp2=PT(B7), regs
+	;;
+	ld8 temp3=[temp1],PT(AR_CSD)-PT(B6)		// restore b6
+	ld8 temp4=[temp2],PT(AR_SSD)-PT(B7)		// restore b7
+	;;
+	mov b6=temp3
+	mov b7=temp4
+	ld8 temp3=[temp1],PT(AR_UNAT)-PT(AR_CSD)	// restore ar.csd
+	ld8 temp4=[temp2],PT(AR_PFS)-PT(AR_SSD)		// restore ar.ssd
+	;;
+	mov ar.csd=temp3
+	mov ar.ssd=temp4
+	ld8 temp3=[temp1]				// restore ar.unat
+	add temp1=PT(AR_CCV)-PT(AR_UNAT), temp1
+	ld8 temp4=[temp2],PT(AR_FPSR)-PT(AR_PFS)	// restore ar.pfs
+	;;
+	mov ar.unat=temp3
+	mov ar.pfs=temp4
+	// ar.rnat, ar.bspstore, loadrs are restore in ia64_old_stack.
+	ld8 temp3=[temp1],PT(F6)-PT(AR_CCV)		// restore ar.ccv
+	ld8 temp4=[temp2],PT(F7)-PT(AR_FPSR)		// restore ar.fpsr
+	;;
+	mov ar.ccv=temp3
+	mov ar.fpsr=temp4
+	ldf.fill f6=[temp1],PT(F8)-PT(F6)
+	ldf.fill f7=[temp2],PT(F9)-PT(F7)
+	;;
+	ldf.fill f8=[temp1],PT(F10)-PT(F8)
+	ldf.fill f9=[temp2],PT(F11)-PT(F9)
+	;;
+	ldf.fill f10=[temp1]
+	ldf.fill f11=[temp2]
+
+	// Restore the SAL to OS state. The previous code left regs at pt_regs.
+	add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs
 	;;
-	add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
-
-restore_GRs:                                    // restore bank-1 GRs 16-31
-	bsw.1;;
-	add		r3=16*8,r2;;                // to get to NaT of GR 16-31
-	ld8		r3=[r3];;
-	mov		ar.unat=r3;;                // first restore NaT
-
-	ld8.fill	r16=[r2],8;;
-	ld8.fill	r17=[r2],8;;
-	ld8.fill	r18=[r2],8;;
-	ld8.fill	r19=[r2],8;;
-	ld8.fill	r20=[r2],8;;
-	ld8.fill	r21=[r2],8;;
-	ld8.fill	r22=[r2],8;;
-	ld8.fill	r23=[r2],8;;
-	ld8.fill	r24=[r2],8;;
-	ld8.fill	r25=[r2],8;;
-	ld8.fill	r26=[r2],8;;
-	ld8.fill	r27=[r2],8;;
-	ld8.fill	r28=[r2],8;;
-	ld8.fill	r29=[r2],8;;
-	ld8.fill	r30=[r2],8;;
-	ld8.fill	r31=[r2],8;;
-
-	ld8		r3=[r2],8;;              // increment to skip NaT
-	bsw.0;;
-
-restore_BRs:
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		b0=r3
-	mov		b1=r5
-	mov		b2=r7;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		b3=r3
-	mov		b4=r5
-	mov		b5=r7;;
-
-	ld8		r3=[r2],2*8
-	ld8		r5=[r4],2*8;;
-	mov		b6=r3
-	mov		b7=r5;;
-
-restore_CRs:
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],8*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;            // 48 byte increments
-	mov		cr.dcr=r3
-	mov		cr.itm=r5
-	mov		cr.iva=r7;;
-
-	ld8		r3=[r2],8*8;;            // 64 byte increments
-//      mov		cr.pta=r3
-
-
-// if PSR.ic=1, reading interruption registers causes an illegal operation fault
-	mov		r3=psr;;
-	tbit.nz.unc	p6,p0=r3,PSR_IC;;           // PSI Valid Log bit pos. test
-(p6)    st8     [r2]=r0,9*8+160             // increment by 232 byte inc.
-
-begin_rskip_intr_regs:
-(p6)	br		rSkipIntrRegs;;
-
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		cr.ipsr=r3
-//	mov		cr.isr=r5                   // cr.isr is read only
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		cr.iip=r3
-	mov		cr.ifa=r5
-	mov		cr.itir=r7;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		cr.iipa=r3
-	mov		cr.ifs=r5
-	mov		cr.iim=r7
-
-	ld8		r3=[r2],160;;               // 160 byte increment
-	mov		cr.iha=r3
-
-rSkipIntrRegs:
-	ld8		r3=[r2],152;;               // another 152 byte inc.
-
-	add		r4=8,r2                     // duplicate r2 in r4
-	add		r6=2*8,r2;;                 // duplicate r2 in r6
-
-	ld8		r3=[r2],8*3
-	ld8		r5=[r4],8*3
-	ld8		r7=[r6],8*3;;
-	mov		cr.lid=r3
-//	mov		cr.ivr=r5                   // cr.ivr is read only
-	mov		cr.tpr=r7;;
-
-	ld8		r3=[r2],8*3
-	ld8		r5=[r4],8*3
-	ld8		r7=[r6],8*3;;
-//	mov		cr.eoi=r3
-//	mov		cr.irr0=r5                  // cr.irr0 is read only
-//	mov		cr.irr1=r7;;                // cr.irr1 is read only
-
-	ld8		r3=[r2],8*3
-	ld8		r5=[r4],8*3
-	ld8		r7=[r6],8*3;;
-//	mov		cr.irr2=r3                  // cr.irr2 is read only
-//	mov		cr.irr3=r5                  // cr.irr3 is read only
-	mov		cr.itv=r7;;
-
-	ld8		r3=[r2],8*7
-	ld8		r5=[r4],8*7;;
-	mov		cr.pmv=r3
-	mov		cr.cmcv=r5;;
-
-	ld8		r3=[r2],8*23
-	ld8		r5=[r4],8*23;;
-	adds		r2=8*23,r2
-	adds		r4=8*23,r4;;
-//	mov		cr.lrr0=r3
-//	mov		cr.lrr1=r5
-
-	adds		r2=8*2,r2;;
-
-restore_ARs:
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		ar.k0=r3
-	mov		ar.k1=r5
-	mov		ar.k2=r7;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		ar.k3=r3
-	mov		ar.k4=r5
-	mov		ar.k5=r7;;
-
-	ld8		r3=[r2],10*8
-	ld8		r5=[r4],10*8
-	ld8		r7=[r6],10*8;;
-	mov		ar.k6=r3
-	mov		ar.k7=r5
-	;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-//	mov		ar.rsc=r3
-//	mov		ar.bsp=r5                   // ar.bsp is read only
-	mov		ar.rsc=r0			    // make sure that RSE is in enforced lazy mode
-	;;
-	mov		ar.bspstore=r7;;
-
-	ld8		r9=[r2],8*13;;
-	mov		ar.rnat=r9
-
-	mov		ar.rsc=r3
-	ld8		r3=[r2],8*4;;
-	mov		ar.ccv=r3
-
-	ld8		r3=[r2],8*4;;
-	mov		ar.unat=r3
-
-	ld8		r3=[r2],8*4;;
-	mov		ar.fpsr=r3
-
-	ld8		r3=[r2],160;;               // 160
-//      mov		ar.itc=r3
-
-	ld8		r3=[r2],8;;
-	mov		ar.pfs=r3
-
-	ld8		r3=[r2],8;;
-	mov		ar.lc=r3
-
-	ld8		r3=[r2];;
-	mov		ar.ec=r3
-	add		r2=8*62,r2;;             // padding
-
-restore_RRs:
-	mov		r5=ar.lc
-	mov		ar.lc=0x08-1
-	movl		r4=0x00;;
-cStRRr:
-	dep.z		r7=r4,61,3
-	ld8		r3=[r2],8;;
-	mov		rr[r7]=r3                   // what are its access previledges?
-	add		r4=1,r4
-	br.cloop.sptk.few	cStRRr
-	;;
-	mov		ar.lc=r5
-	;;
-end_os_mca_restore:
-	br	ia64_os_mca_done_restore;;
+	add temp1=IA64_SAL_OS_STATE_COMMON_OFFSET, regs
+	add temp2=IA64_SAL_OS_STATE_COMMON_OFFSET+8, regs
+	;;
+	ld8 r12=[temp1],16	// sal_ra
+	ld8 r9=[temp2],16	// sal_gp
+	;;
+	ld8 r22=[temp1],24	// pal_min_state, virtual.  skip prev_task
+	ld8 r21=[temp2],16	// prev_IA64_KR_CURRENT
+	;;
+	ld8 temp3=[temp1],16	// cr.isr
+	ld8 temp4=[temp2],16	// cr.ifa
+	;;
+	mov cr.isr=temp3
+	mov cr.ifa=temp4
+	ld8 temp3=[temp1],16	// cr.itir
+	ld8 temp4=[temp2],16	// cr.iipa
+	;;
+	mov cr.itir=temp3
+	mov cr.iipa=temp4
+	ld8 temp3=[temp1],16	// cr.iim
+	ld8 temp4=[temp2],16	// cr.iha
+	;;
+	mov cr.iim=temp3
+	mov cr.iha=temp4
+	dep r22=0,r22,62,2	// pal_min_state, physical, uncached
+	mov IA64_KR(CURRENT)=r21
+	ld8 r8=[temp1]		// os_status
+	ld8 r10=[temp2]		// context
+
+	br.sptk b0
 
 //EndStub//////////////////////////////////////////////////////////////////////
 
 
-// ok, the issue here is that we need to save state information so
-// it can be useable by the kernel debugger and show regs routines.
-// In order to do this, our best bet is save the current state (plus
-// the state information obtain from the MIN_STATE_AREA) into a pt_regs
-// format.  This way we can pass it on in a useable format.
+//++
+// Name:
+//	ia64_new_stack()
 //
-
+// Stub Description:
 //
-// SAL to OS entry point for INIT on the monarch processor
-// This has been defined for registration purposes with SAL
-// as a part of ia64_mca_init.
+//	Switch to the MCA/INIT stack.
 //
-// When we get here, the following registers have been
-// set by the SAL for our use
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
 //
-//		1. GR1 = OS INIT GP
-//		2. GR8 = PAL_PROC physical address
-//		3. GR9 = SAL_PROC physical address
-//		4. GR10 = SAL GP (physical)
-//		5. GR11 = Init Reason
-//			0 = Received INIT for event other than crash dump switch
-//			1 = Received wakeup at the end of an OS_MCA corrected machine check
-//			2 = Received INIT dude to CrashDump switch assertion
+//	On entry RBS is still on the original stack, this routine switches RBS
+//	to use the MCA/INIT stack.
 //
-//		6. GR12 = Return address to location within SAL_INIT procedure
-
+//	On entry, sos->pal_min_state is physical, on exit it is virtual.
+//
+//--
 
-GLOBAL_ENTRY(ia64_monarch_init_handler)
-	.prologue
-	// stash the information the SAL passed to os
-	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+ia64_new_stack:
+	add regs=MCA_PT_REGS_OFFSET, r3
+	add temp2=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, r3
+	mov b0=r2			// save return address
+	GET_IA64_MCA_DATA(temp1)
+	invala
 	;;
-	SAVE_MIN_WITH_COVER
+	add temp2=temp2, temp1		// struct ia64_sal_os_state.pal_min_state on MCA or INIT stack
+	add regs=regs, temp1		// struct pt_regs on MCA or INIT stack
 	;;
-	mov r8=cr.ifa
-	mov r9=cr.isr
-	adds r3=8,r2				// set up second base pointer
+	// Address of minstate area provided by PAL is physical, uncacheable.
+	// Convert to Linux virtual address in region 6 for C code.
+	ld8 ms=[temp2]			// pal_min_state, physical
 	;;
-	SAVE_REST
-
-// ok, enough should be saved at this point to be dangerous, and supply
-// information for a dump
-// We need to switch to Virtual mode before hitting the C functions.
+	dep temp1=-1,ms,62,2		// set region 6
+	mov temp3=IA64_RBS_OFFSET-MCA_PT_REGS_OFFSET
+	;;
+	st8 [temp2]=temp1		// pal_min_state, virtual
 
-	movl	r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN
-	mov	r3=psr	// get the current psr, minimum enabled at this point
+	add temp4=temp3, regs		// start of bspstore on new stack
 	;;
-	or	r2=r2,r3
+	mov ar.bspstore=temp4		// switch RBS to MCA/INIT stack
 	;;
-	movl	r3=IVirtual_Switch
+	flushrs				// must be first in group
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//	ia64_old_stack()
+//
+// Stub Description:
+//
+//	Switch to the old stack.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	On entry, pal_min_state is virtual, on exit it is physical.
+//
+//	On entry RBS is on the MCA/INIT stack, this routine switches RBS
+//	back to the previous stack.
+//
+//	The psr is set to all zeroes.  SAL return requires either all zeroes or
+//	just psr.mc set.  Leaving psr.mc off allows INIT to be issued if this
+//	code does not perform correctly.
+//
+//	The dirty registers at the time of the event were flushed to the
+//	MCA/INIT stack in ia64_pt_regs_save().  Restore the dirty registers
+//	before reverting to the previous bspstore.
+//--
+
+ia64_old_stack:
+	add regs=MCA_PT_REGS_OFFSET, r3
+	mov b0=r2			// save return address
+	GET_IA64_MCA_DATA(temp2)
+	LOAD_PHYSICAL(p0,temp1,1f)
 	;;
-	mov	cr.iip=r3	// short return to set the appropriate bits
-	mov	cr.ipsr=r2	// need to do an rfi to set appropriate bits
+	mov cr.ipsr=r0
+	mov cr.ifs=r0
+	mov cr.iip=temp1
 	;;
+	invala
 	rfi
+1:
+
+	add regs=regs, temp2		// struct pt_regs on MCA or INIT stack
 	;;
-IVirtual_Switch:
-	//
-	// We should now be running virtual
-	//
-	// Let's call the C handler to get the rest of the state info
-	//
-	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
+	add temp1=PT(LOADRS), regs
 	;;
-	adds out0=16,sp				// out0 = pointer to pt_regs
+	ld8 temp2=[temp1],PT(AR_BSPSTORE)-PT(LOADRS)	// restore loadrs
 	;;
-	DO_SAVE_SWITCH_STACK
-	.body
-	adds out1=16,sp				// out0 = pointer to switch_stack
+	ld8 temp3=[temp1],PT(AR_RNAT)-PT(AR_BSPSTORE)	// restore ar.bspstore
+	mov ar.rsc=temp2
+	;;
+	loadrs
+	ld8 temp4=[temp1]		// restore ar.rnat
+	;;
+	mov ar.bspstore=temp3		// back to old stack
+	;;
+	mov ar.rnat=temp4
+	;;
+
+	br.sptk b0
 
-	br.call.sptk.many rp=ia64_init_handler
-.ret1:
+//EndStub//////////////////////////////////////////////////////////////////////
 
-return_from_init:
-	br.sptk return_from_init
-END(ia64_monarch_init_handler)
 
+//++
+// Name:
+//	ia64_set_kernel_registers()
 //
-// SAL to OS entry point for INIT on the slave processor
-// This has been defined for registration purposes with SAL
-// as a part of ia64_mca_init.
+// Stub Description:
+//
+//	Set the registers that are required by the C code in order to run on an
+//	MCA/INIT stack.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
 //
+//--
+
+ia64_set_kernel_registers:
+	add temp3=MCA_SP_OFFSET, r3
+	add temp4=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_OS_GP_OFFSET, r3
+	mov b0=r2		// save return address
+	GET_IA64_MCA_DATA(temp1)
+	;;
+	add temp4=temp4, temp1	// &struct ia64_sal_os_state.os_gp
+	add r12=temp1, temp3	// kernel stack pointer on MCA/INIT stack
+	add r13=temp1, r3	// set current to start of MCA/INIT stack
+	;;
+	ld8 r1=[temp4]		// OS GP from SAL OS state
+	;;
+	DATA_PA_TO_VA(r1,temp1)
+	DATA_PA_TO_VA(r12,temp2)
+	DATA_PA_TO_VA(r13,temp3)
+	;;
+	mov IA64_KR(CURRENT)=r13
+
+	// FIXME: do I need to wire IA64_KR_CURRENT_STACK and IA64_TR_CURRENT_STACK?
+
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+#undef	ms
+#undef	regs
+#undef	temp1
+#undef	temp2
+#undef	temp3
+#undef	temp4
+
 
-GLOBAL_ENTRY(ia64_slave_init_handler)
-1:	br.sptk 1b
-END(ia64_slave_init_handler)
+// Support function for mca.c, it is here to avoid using inline asm.  Given the
+// address of an rnat slot, if that address is below the current ar.bspstore
+// then return the contents of that slot, otherwise return the contents of
+// ar.rnat.
+GLOBAL_ENTRY(ia64_get_rnat)
+	alloc r14=ar.pfs,1,0,0,0
+	mov ar.rsc=0
+	;;
+	mov r14=ar.bspstore
+	;;
+	cmp.lt p6,p7=in0,r14
+	;;
+(p6)	ld8 r8=[in0]
+(p7)	mov r8=ar.rnat
+	mov ar.rsc=3
+	br.ret.sptk.many rp
+END(ia64_get_rnat)
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index abc0113a821..6e683745af4 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -4,6 +4,8 @@
  *
  * Copyright (C) 2004 FUJITSU LIMITED
  * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ * Copyright (C) 2005 Silicon Graphics, Inc
+ * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
  */
 #include <linux/config.h>
 #include <linux/types.h>
@@ -38,10 +40,6 @@
 /* max size of SAL error record (default) */
 static int sal_rec_max = 10000;
 
-/* from mca.c */
-static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state;
-static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state;
-
 /* from mca_drv_asm.S */
 extern void *mca_handler_bhhook(void);
 
@@ -316,7 +314,8 @@ init_record_index_pools(void)
  */
 
 static mca_type_t
-is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+	      struct ia64_sal_os_state *sos)
 {
 	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
 
@@ -327,7 +326,7 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
 	 * Therefore it is local MCA when rendezvous has not been requested.
 	 * Failed to rendezvous, the system must be down.
 	 */
-	switch (sal_to_os_handoff_state->imsto_rendez_state) {
+	switch (sos->rv_rc) {
 		case -1: /* SAL rendezvous unsuccessful */
 			return MCA_IS_GLOBAL;
 		case  0: /* SAL rendezvous not required */
@@ -388,7 +387,8 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
  */
 
 static int
-recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			struct ia64_sal_os_state *sos)
 {
 	sal_log_mod_error_info_t *smei;
 	pal_min_state_area_t *pmsa;
@@ -426,7 +426,7 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_chec
 			 *  setup for resume to bottom half of MCA,
 			 * "mca_handler_bhhook"
 			 */
-			pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61));
+			pmsa = sos->pal_min_state;
 			/* pass to bhhook as 1st argument (gr8) */
 			pmsa->pmsa_gr[8-1] = smei->target_identifier;
 			/* set interrupted return address (but no use) */
@@ -459,7 +459,8 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_chec
  */
 
 static int
-recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			    struct ia64_sal_os_state *sos)
 {
 	int status = 0;
 	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
@@ -469,7 +470,7 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_
 		case 1: /* partial read */
 		case 3: /* full line(cpu) read */
 		case 9: /* I/O space read */
-			status = recover_from_read_error(slidx, peidx, pbci);
+			status = recover_from_read_error(slidx, peidx, pbci, sos);
 			break;
 		case 0: /* unknown */
 		case 2: /* partial write */
@@ -508,7 +509,8 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_
  */
 
 static int
-recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			     struct ia64_sal_os_state *sos)
 {
 	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
 
@@ -545,7 +547,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *
 	 * This means "there are some platform errors".
 	 */
 	if (platform) 
-		return recover_from_platform_error(slidx, peidx, pbci);
+		return recover_from_platform_error(slidx, peidx, pbci, sos);
 	/* 
 	 * On account of strange SAL error record, we cannot recover. 
 	 */
@@ -562,8 +564,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *
 
 static int
 mca_try_to_recover(void *rec, 
-	ia64_mca_sal_to_os_state_t *sal_to_os_state,
-	ia64_mca_os_to_sal_state_t *os_to_sal_state)
+	struct ia64_sal_os_state *sos)
 {
 	int platform_err;
 	int n_proc_err;
@@ -571,10 +572,6 @@ mca_try_to_recover(void *rec,
 	peidx_table_t peidx;
 	pal_bus_check_info_t pbci;
 
-	/* handoff state from/to mca.c */
-	sal_to_os_handoff_state = sal_to_os_state;
-	os_to_sal_handoff_state = os_to_sal_state;
-
 	/* Make index of SAL error record */
 	platform_err = mca_make_slidx(rec, &slidx);
 
@@ -597,11 +594,11 @@ mca_try_to_recover(void *rec,
 	*((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
 
 	/* Check whether MCA is global or not */
-	if (is_mca_global(&peidx, &pbci))
+	if (is_mca_global(&peidx, &pbci, sos))
 		return 0;
 	
 	/* Try to recover a processor error */
-	return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci);
+	return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci, sos);
 }
 
 /*
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index f6d8a010d99..85ed54179af 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -5,73 +5,6 @@
 #include "entry.h"
 
 /*
- * For ivt.s we want to access the stack virtually so we don't have to disable translation
- * on interrupts.
- *
- *  On entry:
- *	r1:	pointer to current task (ar.k6)
- */
-#define MINSTATE_START_SAVE_MIN_VIRT								\
-(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-	;;											\
-(pUStk)	mov.m r24=ar.rnat;									\
-(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
-(pKStk) mov r1=sp;					/* get sp  */				\
-	;;											\
-(pUStk) lfetch.fault.excl.nt1 [r22];								\
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
-(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
-	;;											\
-(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
-(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
-	;;											\
-(pUStk)	mov r18=ar.bsp;										\
-(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */
-
-#define MINSTATE_END_SAVE_MIN_VIRT								\
-	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
-	;;
-
-/*
- * For mca_asm.S we want to access the stack physically since the state is saved before we
- * go virtual and don't want to destroy the iip or ipsr.
- */
-#define MINSTATE_START_SAVE_MIN_PHYS								\
-(pKStk) mov r3=IA64_KR(PER_CPU_DATA);;								\
-(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;;							\
-(pKStk) ld8 r3 = [r3];;										\
-(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;;						\
-(pKStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3;						\
-(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-(pUStk)	addl r22=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	\
-	;;											\
-(pUStk)	mov r24=ar.rnat;									\
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
-(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
-(pUStk)	dep r22=-1,r22,61,3;			/* compute kernel virtual addr of RBS */	\
-	;;											\
-(pUStk)	mov ar.bspstore=r22;			/* switch to kernel RBS */			\
-	;;											\
-(pUStk)	mov r18=ar.bsp;										\
-(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
-
-#define MINSTATE_END_SAVE_MIN_PHYS								\
-	dep r12=-1,r12,61,3;		/* make sp a kernel virtual address */			\
-	;;
-
-#ifdef MINSTATE_VIRT
-# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT)
-# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_VIRT
-# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_VIRT
-#endif
-
-#ifdef MINSTATE_PHYS
-# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT);; tpa reg=reg
-# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_PHYS
-# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_PHYS
-#endif
-
-/*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
  * on.
@@ -97,7 +30,7 @@
  * we can pass interruption state as arguments to a handler.
  */
 #define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)							\
-	MINSTATE_GET_CURRENT(r16);	/* M (or M;;I) */					\
+	mov r16=IA64_KR(CURRENT);	/* M */							\
 	mov r27=ar.rsc;			/* M */							\
 	mov r20=r1;			/* A */							\
 	mov r25=ar.unat;		/* M */							\
@@ -118,7 +51,21 @@
 	SAVE_IFS;										\
 	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
 	;;											\
-	MINSTATE_START_SAVE_MIN									\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+	;;											\
+(pUStk)	mov.m r24=ar.rnat;									\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(pKStk) mov r1=sp;					/* get sp  */				\
+	;;											\
+(pUStk) lfetch.fault.excl.nt1 [r22];								\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+	;;											\
+(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
 	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
 	adds r16=PT(CR_IPSR),r1;								\
 	;;											\
@@ -181,7 +128,8 @@
 	EXTRA;											\
 	movl r1=__gp;		/* establish kernel global pointer */				\
 	;;											\
-	MINSTATE_END_SAVE_MIN
+	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
+	;;
 
 /*
  * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f1201ac8a11..1650353e3f7 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -38,6 +38,7 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/bitops.h>
+#include <linux/rcupdate.h>
 
 #include <asm/errno.h>
 #include <asm/intrinsics.h>
@@ -496,7 +497,7 @@ typedef struct {
 static pfm_stats_t		pfm_stats[NR_CPUS];
 static pfm_session_t		pfm_sessions;	/* global sessions information */
 
-static spinlock_t pfm_alt_install_check = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pfm_alt_install_check);
 static pfm_intr_handler_desc_t  *pfm_alt_intr_handler;
 
 static struct proc_dir_entry 	*perfmon_dir;
@@ -2217,15 +2218,17 @@ static void
 pfm_free_fd(int fd, struct file *file)
 {
 	struct files_struct *files = current->files;
+	struct fdtable *fdt = files_fdtable(files);
 
 	/* 
 	 * there ie no fd_uninstall(), so we do it here
 	 */
 	spin_lock(&files->file_lock);
-        files->fd[fd] = NULL;
+	rcu_assign_pointer(fdt->fd[fd], NULL);
 	spin_unlock(&files->file_lock);
 
-	if (file) put_filp(file);
+	if (file)
+		put_filp(file);
 	put_unused_fd(fd);
 }
 
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 6f0cc7a6634..ca68e6e44a7 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -22,6 +22,11 @@
  *
  * Dec  5 2004	kaos@sgi.com
  *   Standardize which records are cleared automatically.
+ *
+ * Aug 18 2005	kaos@sgi.com
+ *   mca.c may not pass a buffer, a NULL buffer just indicates that a new
+ *   record is available in SAL.
+ *   Replace some NR_CPUS by cpus_online, for hotplug cpu.
  */
 
 #include <linux/types.h>
@@ -193,7 +198,7 @@ shift1_data_saved (struct salinfo_data *data, int shift)
  * The buffer passed from mca.c points to the output from ia64_log_get. This is
  * a persistent buffer but its contents can change between the interrupt and
  * when user space processes the record.  Save the record id to identify
- * changes.
+ * changes.  If the buffer is NULL then just update the bitmap.
  */
 void
 salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
@@ -206,27 +211,29 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
 
 	BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
 
-	if (irqsafe)
-		spin_lock_irqsave(&data_saved_lock, flags);
-	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
-		if (!data_saved->buffer)
-			break;
-	}
-	if (i == saved_size) {
-		if (!data->saved_num) {
-			shift1_data_saved(data, 0);
-			data_saved = data->data_saved + saved_size - 1;
-		} else
-			data_saved = NULL;
-	}
-	if (data_saved) {
-		data_saved->cpu = smp_processor_id();
-		data_saved->id = ((sal_log_record_header_t *)buffer)->id;
-		data_saved->size = size;
-		data_saved->buffer = buffer;
+	if (buffer) {
+		if (irqsafe)
+			spin_lock_irqsave(&data_saved_lock, flags);
+		for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+			if (!data_saved->buffer)
+				break;
+		}
+		if (i == saved_size) {
+			if (!data->saved_num) {
+				shift1_data_saved(data, 0);
+				data_saved = data->data_saved + saved_size - 1;
+			} else
+				data_saved = NULL;
+		}
+		if (data_saved) {
+			data_saved->cpu = smp_processor_id();
+			data_saved->id = ((sal_log_record_header_t *)buffer)->id;
+			data_saved->size = size;
+			data_saved->buffer = buffer;
+		}
+		if (irqsafe)
+			spin_unlock_irqrestore(&data_saved_lock, flags);
 	}
-	if (irqsafe)
-		spin_unlock_irqrestore(&data_saved_lock, flags);
 
 	if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) {
 		if (irqsafe)
@@ -244,7 +251,7 @@ salinfo_timeout_check(struct salinfo_data *data)
 	int i;
 	if (!data->open)
 		return;
-	for (i = 0; i < NR_CPUS; ++i) {
+	for_each_online_cpu(i) {
 		if (test_bit(i, &data->cpu_event)) {
 			/* double up() is not a problem, user space will see no
 			 * records for the additional "events".
@@ -291,7 +298,7 @@ retry:
 
 	n = data->cpu_check;
 	for (i = 0; i < NR_CPUS; i++) {
-		if (test_bit(n, &data->cpu_event)) {
+		if (test_bit(n, &data->cpu_event) && cpu_online(n)) {
 			cpu = n;
 			break;
 		}
@@ -585,11 +592,10 @@ salinfo_init(void)
 
 		/* we missed any events before now */
 		online = 0;
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j)) {
-				set_bit(j, &data->cpu_event);
-				++online;
-			}
+		for_each_online_cpu(j) {
+			set_bit(j, &data->cpu_event);
+			++online;
+		}
 		sema_init(&data->sem, online);
 
 		*sdir++ = dir;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 84f89da7c64..1f5c26dbe70 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -384,7 +384,7 @@ setup_arch (char **cmdline_p)
 	if (early_console_setup(*cmdline_p) == 0)
 		mark_bsp_online();
 
-#ifdef CONFIG_ACPI_BOOT
+#ifdef CONFIG_ACPI
 	/* Initialize the ACPI boot-time table parser */
 	acpi_table_init();
 # ifdef CONFIG_ACPI_NUMA
@@ -420,7 +420,7 @@ setup_arch (char **cmdline_p)
 
 	cpu_init();	/* initialize the bootstrap CPU */
 
-#ifdef CONFIG_ACPI_BOOT
+#ifdef CONFIG_ACPI
 	acpi_boot_init();
 #endif
 
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 770fab37928..f2dbcd1db0d 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -35,7 +35,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
 		return -ENOMEM;
 
 #ifdef CONFIG_HUGETLB_PAGE
-	if (REGION_NUMBER(addr) == REGION_HPAGE)
+	if (REGION_NUMBER(addr) == RGN_HPAGE)
 		addr = 0;
 #endif
 	if (!addr)
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 92ff46ad21e..706b7734e19 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,7 +36,7 @@ int arch_register_cpu(int num)
 	parent = &sysfs_nodes[cpu_to_node(num)];
 #endif /* CONFIG_NUMA */
 
-#ifdef CONFIG_ACPI_BOOT
+#ifdef CONFIG_ACPI
 	/*
 	 * If CPEI cannot be re-targetted, and this is
 	 * CPEI target, then dont create the control file
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 4440c8343fa..f970359e7ed 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -15,6 +15,7 @@
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/module.h>       /* for EXPORT_SYMBOL */
 #include <linux/hardirq.h>
+#include <linux/kprobes.h>
 
 #include <asm/fpswa.h>
 #include <asm/ia32.h>
@@ -122,7 +123,7 @@ die_if_kernel (char *str, struct pt_regs *regs, long err)
 }
 
 void
-ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
+__kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
 {
 	siginfo_t siginfo;
 	int sig, code;
@@ -444,7 +445,7 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
 	return rv;
 }
 
-void
+void __kprobes
 ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 	    unsigned long iim, unsigned long itir, long arg5, long arg6,
 	    long arg7, struct pt_regs regs)
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 490dfc9ab47..4e9d06c48a8 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -184,7 +184,7 @@ uncached_free_page(unsigned long maddr)
 {
 	int node;
 
-	node = nasid_to_cnodeid(NASID_GET(maddr));
+	node = paddr_to_nid(maddr - __IA64_UNCACHED_OFFSET);
 
 	dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node);
 
@@ -217,7 +217,7 @@ uncached_build_memmap(unsigned long start, unsigned long end, void *arg)
 
 	memset((char *)vstart, 0, length);
 
-	node = nasid_to_cnodeid(NASID_GET(start));
+	node = paddr_to_nid(start);
 
 	for (; vstart < vend ; vstart += PAGE_SIZE) {
 		dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart);
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 3288be47bc7..93d5a3b41f6 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -2020,28 +2020,6 @@ init_frame_info (struct unw_frame_info *info, struct task_struct *t,
 }
 
 void
-unw_init_from_interruption (struct unw_frame_info *info, struct task_struct *t,
-			    struct pt_regs *pt, struct switch_stack *sw)
-{
-	unsigned long sof;
-
-	init_frame_info(info, t, sw, pt->r12);
-	info->cfm_loc = &pt->cr_ifs;
-	info->unat_loc = &pt->ar_unat;
-	info->pfs_loc = &pt->ar_pfs;
-	sof = *info->cfm_loc & 0x7f;
-	info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sof);
-	info->ip = pt->cr_iip + ia64_psr(pt)->ri;
-	info->pt = (unsigned long) pt;
-	UNW_DPRINT(3, "unwind.%s:\n"
-		   "  bsp    0x%lx\n"
-		   "  sof    0x%lx\n"
-		   "  ip     0x%lx\n",
-		   __FUNCTION__, info->bsp, sof, info->ip);
-	find_save_locs(info);
-}
-
-void
 unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw)
 {
 	unsigned long sol;
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index a676e79e068..30d8564e960 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -48,6 +48,7 @@ SECTIONS
 	*(.text)
 	SCHED_TEXT
 	LOCK_TEXT
+	KPROBES_TEXT
 	*(.gnu.linkonce.t*)
     }
   .text2 : AT(ADDR(.text2) - LOAD_OFFSET)