21 files changed, 1278 insertions, 89 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index c1a02bbc252..b2e2f6509eb 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -20,6 +20,8 @@ obj-$(CONFIG_SMP)		+= smp.o smpboot.o domain.o
 obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
 obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
+obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
 
 # The gate DSO image is built using a special linker script.
diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c
index fe532c97043..d65e87b6394 100644
--- a/arch/ia64/kernel/domain.c
+++ b/arch/ia64/kernel/domain.c
@@ -14,7 +14,7 @@
 #include <linux/topology.h>
 #include <linux/nodemask.h>
 
-#define SD_NODES_PER_DOMAIN 6
+#define SD_NODES_PER_DOMAIN 16
 
 #ifdef CONFIG_NUMA
 /**
@@ -27,7 +27,7 @@
  *
  * Should use nodemask_t.
  */
-static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
+static int find_next_best_node(int node, unsigned long *used_nodes)
 {
 	int i, n, val, min_val, best_node = 0;
 
@@ -66,7 +66,7 @@ static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
  * should be one that prevents unnecessary balancing, but also spreads tasks
  * out optimally.
  */
-static cpumask_t __devinit sched_domain_node_span(int node)
+static cpumask_t sched_domain_node_span(int node)
 {
 	int i;
 	cpumask_t span, nodemask;
@@ -96,7 +96,7 @@ static cpumask_t __devinit sched_domain_node_span(int node)
 #ifdef CONFIG_SCHED_SMT
 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
 static struct sched_group sched_group_cpus[NR_CPUS];
-static int __devinit cpu_to_cpu_group(int cpu)
+static int cpu_to_cpu_group(int cpu)
 {
 	return cpu;
 }
@@ -104,7 +104,7 @@ static int __devinit cpu_to_cpu_group(int cpu)
 
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
 static struct sched_group sched_group_phys[NR_CPUS];
-static int __devinit cpu_to_phys_group(int cpu)
+static int cpu_to_phys_group(int cpu)
 {
 #ifdef CONFIG_SCHED_SMT
 	return first_cpu(cpu_sibling_map[cpu]);
@@ -125,44 +125,36 @@ static struct sched_group *sched_group_nodes[MAX_NUMNODES];
 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
 static struct sched_group sched_group_allnodes[MAX_NUMNODES];
 
-static int __devinit cpu_to_allnodes_group(int cpu)
+static int cpu_to_allnodes_group(int cpu)
 {
 	return cpu_to_node(cpu);
 }
 #endif
 
 /*
- * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
+ * Build sched domains for a given set of cpus and attach the sched domains
+ * to the individual cpus
  */
-void __devinit arch_init_sched_domains(void)
+void build_sched_domains(const cpumask_t *cpu_map)
 {
 	int i;
-	cpumask_t cpu_default_map;
 
 	/*
-	 * Setup mask for cpus without special case scheduling requirements.
-	 * For now this just excludes isolated cpus, but could be used to
-	 * exclude other special cases in the future.
+	 * Set up domains for cpus specified by the cpu_map.
 	 */
-	cpus_complement(cpu_default_map, cpu_isolated_map);
-	cpus_and(cpu_default_map, cpu_default_map, cpu_online_map);
-
-	/*
-	 * Set up domains. Isolated domains just stay on the dummy domain.
-	 */
-	for_each_cpu_mask(i, cpu_default_map) {
+	for_each_cpu_mask(i, *cpu_map) {
 		int group;
 		struct sched_domain *sd = NULL, *p;
 		cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
 
-		cpus_and(nodemask, nodemask, cpu_default_map);
+		cpus_and(nodemask, nodemask, *cpu_map);
 
 #ifdef CONFIG_NUMA
 		if (num_online_cpus()
 				> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
 			sd = &per_cpu(allnodes_domains, i);
 			*sd = SD_ALLNODES_INIT;
-			sd->span = cpu_default_map;
+			sd->span = *cpu_map;
 			group = cpu_to_allnodes_group(i);
 			sd->groups = &sched_group_allnodes[group];
 			p = sd;
@@ -173,7 +165,7 @@ void __devinit arch_init_sched_domains(void)
 		*sd = SD_NODE_INIT;
 		sd->span = sched_domain_node_span(cpu_to_node(i));
 		sd->parent = p;
-		cpus_and(sd->span, sd->span, cpu_default_map);
+		cpus_and(sd->span, sd->span, *cpu_map);
 #endif
 
 		p = sd;
@@ -190,7 +182,7 @@ void __devinit arch_init_sched_domains(void)
 		group = cpu_to_cpu_group(i);
 		*sd = SD_SIBLING_INIT;
 		sd->span = cpu_sibling_map[i];
-		cpus_and(sd->span, sd->span, cpu_default_map);
+		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		sd->groups = &sched_group_cpus[group];
 #endif
@@ -198,9 +190,9 @@ void __devinit arch_init_sched_domains(void)
 
 #ifdef CONFIG_SCHED_SMT
 	/* Set up CPU (sibling) groups */
-	for_each_cpu_mask(i, cpu_default_map) {
+	for_each_cpu_mask(i, *cpu_map) {
 		cpumask_t this_sibling_map = cpu_sibling_map[i];
-		cpus_and(this_sibling_map, this_sibling_map, cpu_default_map);
+		cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
 		if (i != first_cpu(this_sibling_map))
 			continue;
 
@@ -213,7 +205,7 @@ void __devinit arch_init_sched_domains(void)
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		cpumask_t nodemask = node_to_cpumask(i);
 
-		cpus_and(nodemask, nodemask, cpu_default_map);
+		cpus_and(nodemask, nodemask, *cpu_map);
 		if (cpus_empty(nodemask))
 			continue;
 
@@ -222,7 +214,7 @@ void __devinit arch_init_sched_domains(void)
 	}
 
 #ifdef CONFIG_NUMA
-	init_sched_build_groups(sched_group_allnodes, cpu_default_map,
+	init_sched_build_groups(sched_group_allnodes, *cpu_map,
 				&cpu_to_allnodes_group);
 
 	for (i = 0; i < MAX_NUMNODES; i++) {
@@ -233,12 +225,12 @@ void __devinit arch_init_sched_domains(void)
 		cpumask_t covered = CPU_MASK_NONE;
 		int j;
 
-		cpus_and(nodemask, nodemask, cpu_default_map);
+		cpus_and(nodemask, nodemask, *cpu_map);
 		if (cpus_empty(nodemask))
 			continue;
 
 		domainspan = sched_domain_node_span(i);
-		cpus_and(domainspan, domainspan, cpu_default_map);
+		cpus_and(domainspan, domainspan, *cpu_map);
 
 		sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
 		sched_group_nodes[i] = sg;
@@ -266,7 +258,7 @@ void __devinit arch_init_sched_domains(void)
 			int n = (i + j) % MAX_NUMNODES;
 
 			cpus_complement(notcovered, covered);
-			cpus_and(tmp, notcovered, cpu_default_map);
+			cpus_and(tmp, notcovered, *cpu_map);
 			cpus_and(tmp, tmp, domainspan);
 			if (cpus_empty(tmp))
 				break;
@@ -293,7 +285,7 @@ void __devinit arch_init_sched_domains(void)
 #endif
 
 	/* Calculate CPU power for physical packages and nodes */
-	for_each_cpu_mask(i, cpu_default_map) {
+	for_each_cpu_mask(i, *cpu_map) {
 		int power;
 		struct sched_domain *sd;
 #ifdef CONFIG_SCHED_SMT
@@ -359,13 +351,35 @@ next_sg:
 		cpu_attach_domain(sd, i);
 	}
 }
+/*
+ * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
+ */
+void arch_init_sched_domains(const cpumask_t *cpu_map)
+{
+	cpumask_t cpu_default_map;
+
+	/*
+	 * Setup mask for cpus without special case scheduling requirements.
+	 * For now this just excludes isolated cpus, but could be used to
+	 * exclude other special cases in the future.
+	 */
+	cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
+
+	build_sched_domains(&cpu_default_map);
+}
 
-void __devinit arch_destroy_sched_domains(void)
+void arch_destroy_sched_domains(const cpumask_t *cpu_map)
 {
 #ifdef CONFIG_NUMA
 	int i;
 	for (i = 0; i < MAX_NUMNODES; i++) {
+		cpumask_t nodemask = node_to_cpumask(i);
 		struct sched_group *oldsg, *sg = sched_group_nodes[i];
+
+		cpus_and(nodemask, nodemask, *cpu_map);
+		if (cpus_empty(nodemask))
+			continue;
+
 		if (sg == NULL)
 			continue;
 		sg = sg->next;
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 4a3b1aac43e..179f230816e 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -410,6 +410,38 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
 }
 
 /*
+ * Walk the EFI memory map to pull out leftover pages in the lower
+ * memory regions which do not end up in the regular memory map and
+ * stick them into the uncached allocator
+ *
+ * The regular walk function is significantly more complex than the
+ * uncached walk which means it really doesn't make sense to try and
+ * marge the two.
+ */
+void __init
+efi_memmap_walk_uc (efi_freemem_callback_t callback)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size, start, end;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (md->attribute == EFI_MEMORY_UC) {
+			start = PAGE_ALIGN(md->phys_addr);
+			end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK);
+			if ((*callback)(start, end, NULL) < 0)
+				return;
+		}
+	}
+}
+
+
+/*
  * Look for the PAL_CODE region reported by EFI and maps it using an
  * ITR to enable safe PAL calls in virtual mode.  See IA-64 Processor
  * Abstraction Layer chapter 11 in ADAG
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 81c45d44739..b1d5d3d5276 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1182,7 +1182,7 @@ ENTRY(notify_resume_user)
 	;;
 (pNonSys) mov out2=0				// out2==0 => not a syscall
 	.fframe 16
-	.spillpsp ar.unat, 16			// (note that offset is relative to psp+0x10!)
+	.spillsp ar.unat, 16
 	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
 	st8 [out1]=loc1,-8			// save ar.pfs, out1=&sigscratch
 	.body
@@ -1208,7 +1208,7 @@ GLOBAL_ENTRY(sys_rt_sigsuspend)
 	adds out2=8,sp				// out2=&sigscratch->ar_pfs
 	;;
 	.fframe 16
-	.spillpsp ar.unat, 16			// (note that offset is relative to psp+0x10!)
+	.spillsp ar.unat, 16
 	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
 	st8 [out2]=loc1,-8			// save ar.pfs, out2=&sigscratch
 	.body
@@ -1579,7 +1579,7 @@ sys_call_table:
 	data8 sys_keyctl
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall			// 1275
-	data8 sys_ni_syscall
+	data8 sys_set_zone_reclaim
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h
index 6d4ecec989b..78eeb079341 100644
--- a/arch/ia64/kernel/entry.h
+++ b/arch/ia64/kernel/entry.h
@@ -60,7 +60,7 @@
 	.spillsp @priunat,SW(AR_UNAT)+16+(off);					\
 	.spillsp ar.rnat,SW(AR_RNAT)+16+(off);					\
 	.spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off);				\
-	.spillsp pr,SW(PR)+16+(off))
+	.spillsp pr,SW(PR)+16+(off)
 
 #define DO_SAVE_SWITCH_STACK			\
 	movl r28=1f;				\
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 4f3cdef7579..962b6c4e32b 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -460,9 +460,9 @@ EX(.fail_efault, ld8 r14=[r33])			// r14 <- *set
 	;;
 
 	st8 [r2]=r14				// update current->blocked with new mask
-	cmpxchg4.acq r14=[r9],r18,ar.ccv	// current->thread_info->flags <- r18
+	cmpxchg4.acq r8=[r9],r18,ar.ccv		// current->thread_info->flags <- r18
 	;;
-	cmp.ne p6,p0=r17,r14			// update failed?
+	cmp.ne p6,p0=r17,r8			// update failed?
 (p6)	br.cond.spnt.few 1b			// yes -> retry
 
 #ifdef CONFIG_SMP
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index d9c05d53435..2bc085a73e3 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -405,17 +405,22 @@ ENTRY(nested_dtlb_miss)
 	 *		r30:	continuation address
 	 *		r31:	saved pr
 	 *
-	 * Clobbered:	b0, r18, r19, r21, psr.dt (cleared)
+	 * Clobbered:	b0, r18, r19, r21, r22, psr.dt (cleared)
 	 */
 	rsm psr.dt				// switch to using physical data addressing
 	mov r19=IA64_KR(PT_BASE)		// get the page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
+	mov r18=cr.itir
 	;;
 	shr.u r17=r16,61			// get the region number into r17
+	extr.u r18=r18,2,6			// get the faulting page size
 	;;
 	cmp.eq p6,p7=5,r17			// is faulting address in region 5?
-	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of faulting address
+	add r22=-PAGE_SHIFT,r18			// adjustment for hugetlb address
+	add r18=PGDIR_SHIFT-PAGE_SHIFT,r18
 	;;
+	shr.u r22=r16,r22
+	shr.u r18=r16,r18
 (p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
 
 	srlz.d
@@ -428,7 +433,7 @@ ENTRY(nested_dtlb_miss)
 (p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
 (p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
 	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
-	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
+	shr.u r18=r22,PMD_SHIFT			// shift L2 index into position
 	;;
 	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
 	;;
@@ -436,7 +441,7 @@ ENTRY(nested_dtlb_miss)
 	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
 	;;
 (p7)	ld8 r17=[r17]				// fetch the L2 entry (may be 0)
-	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
+	shr.u r19=r22,PAGE_SHIFT		// shift L3 index into position
 	;;
 (p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was L2 entry NULL?
 	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S
new file mode 100644
index 00000000000..b7fa3ccd2b0
--- /dev/null
+++ b/arch/ia64/kernel/jprobes.S
@@ -0,0 +1,61 @@
+/*
+ * Jprobe specific operations
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-May     Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ *              <anil.s.keshavamurthy@intel.com> initial implementation
+ *
+ * Jprobes (a.k.a. "jump probes" which is built on-top of kprobes) allow a
+ * probe to be inserted into the beginning of a function call.  The fundamental
+ * difference between a jprobe and a kprobe is the jprobe handler is executed
+ * in the same context as the target function, while the kprobe handlers
+ * are executed in interrupt context.
+ *
+ * For jprobes we initially gain control by placing a break point in the
+ * first instruction of the targeted function.  When we catch that specific
+ * break, we:
+ *        * set the return address to our jprobe_inst_return() function
+ *        * jump to the jprobe handler function
+ *
+ * Since we fixed up the return address, the jprobe handler will return to our
+ * jprobe_inst_return() function, giving us control again.  At this point we
+ * are back in the parents frame marker, so we do yet another call to our
+ * jprobe_break() function to fix up the frame marker as it would normally
+ * exist in the target function.
+ *
+ * Our jprobe_return function then transfers control back to kprobes.c by
+ * executing a break instruction using one of our reserved numbers.  When we
+ * catch that break in kprobes.c, we continue like we do for a normal kprobe
+ * by single stepping the emulated instruction, and then returning execution
+ * to the correct location.
+ */
+#include <asm/asmmacro.h>
+
+	/*
+	 * void jprobe_break(void)
+	 */
+ENTRY(jprobe_break)
+	break.m 0x80300
+END(jprobe_break)
+
+	/*
+	 * void jprobe_inst_return(void)
+	 */
+GLOBAL_ENTRY(jprobe_inst_return)
+	br.call.sptk.many b0=jprobe_break
+END(jprobe_inst_return)
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
new file mode 100644
index 00000000000..5978823d5c6
--- /dev/null
+++ b/arch/ia64/kernel/kprobes.c
@@ -0,0 +1,601 @@
+/*
+ *  Kernel Probes (KProbes)
+ *  arch/ia64/kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-Apr     Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ *              <anil.s.keshavamurthy@intel.com> adapted from i386
+ */
+
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/preempt.h>
+#include <linux/moduleloader.h>
+
+#include <asm/pgtable.h>
+#include <asm/kdebug.h>
+
+extern void jprobe_inst_return(void);
+
+/* kprobe_status settings */
+#define KPROBE_HIT_ACTIVE	0x00000001
+#define KPROBE_HIT_SS		0x00000002
+
+static struct kprobe *current_kprobe, *kprobe_prev;
+static unsigned long kprobe_status, kprobe_status_prev;
+static struct pt_regs jprobe_saved_regs;
+
+enum instruction_type {A, I, M, F, B, L, X, u};
+static enum instruction_type bundle_encoding[32][3] = {
+  { M, I, I },				/* 00 */
+  { M, I, I },				/* 01 */
+  { M, I, I },				/* 02 */
+  { M, I, I },				/* 03 */
+  { M, L, X },				/* 04 */
+  { M, L, X },				/* 05 */
+  { u, u, u },  			/* 06 */
+  { u, u, u },  			/* 07 */
+  { M, M, I },				/* 08 */
+  { M, M, I },				/* 09 */
+  { M, M, I },				/* 0A */
+  { M, M, I },				/* 0B */
+  { M, F, I },				/* 0C */
+  { M, F, I },				/* 0D */
+  { M, M, F },				/* 0E */
+  { M, M, F },				/* 0F */
+  { M, I, B },				/* 10 */
+  { M, I, B },				/* 11 */
+  { M, B, B },				/* 12 */
+  { M, B, B },				/* 13 */
+  { u, u, u },  			/* 14 */
+  { u, u, u },  			/* 15 */
+  { B, B, B },				/* 16 */
+  { B, B, B },				/* 17 */
+  { M, M, B },				/* 18 */
+  { M, M, B },				/* 19 */
+  { u, u, u },  			/* 1A */
+  { u, u, u },  			/* 1B */
+  { M, F, B },				/* 1C */
+  { M, F, B },				/* 1D */
+  { u, u, u },  			/* 1E */
+  { u, u, u },  			/* 1F */
+};
+
+/*
+ * In this function we check to see if the instruction
+ * is IP relative instruction and update the kprobe
+ * inst flag accordingly
+ */
+static void update_kprobe_inst_flag(uint template, uint  slot, uint major_opcode,
+	unsigned long kprobe_inst, struct kprobe *p)
+{
+	p->ainsn.inst_flag = 0;
+	p->ainsn.target_br_reg = 0;
+
+	if (bundle_encoding[template][slot] == B) {
+		switch (major_opcode) {
+		  case INDIRECT_CALL_OPCODE:
+	 		p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+ 			p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+ 			break;
+		  case IP_RELATIVE_PREDICT_OPCODE:
+		  case IP_RELATIVE_BRANCH_OPCODE:
+			p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
+ 			break;
+		  case IP_RELATIVE_CALL_OPCODE:
+ 			p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
+ 			p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+ 			p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+ 			break;
+		}
+ 	} else if (bundle_encoding[template][slot] == X) {
+		switch (major_opcode) {
+		  case LONG_CALL_OPCODE:
+			p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+			p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+		  break;
+		}
+	}
+	return;
+}
+
+/*
+ * In this function we check to see if the instruction
+ * on which we are inserting kprobe is supported.
+ * Returns 0 if supported
+ * Returns -EINVAL if unsupported
+ */
+static int unsupported_inst(uint template, uint  slot, uint major_opcode,
+	unsigned long kprobe_inst, struct kprobe *p)
+{
+	unsigned long addr = (unsigned long)p->addr;
+
+	if (bundle_encoding[template][slot] == I) {
+		switch (major_opcode) {
+			case 0x0: //I_UNIT_MISC_OPCODE:
+			/*
+			 * Check for Integer speculation instruction
+			 * - Bit 33-35 to be equal to 0x1
+			 */
+			if (((kprobe_inst >> 33) & 0x7) == 1) {
+				printk(KERN_WARNING
+					"Kprobes on speculation inst at <0x%lx> not supported\n",
+					addr);
+				return -EINVAL;
+			}
+
+			/*
+			 * IP relative mov instruction
+			 *  - Bit 27-35 to be equal to 0x30
+			 */
+			if (((kprobe_inst >> 27) & 0x1FF) == 0x30) {
+				printk(KERN_WARNING
+					"Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n",
+					addr);
+				return -EINVAL;
+
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ * In this function we check to see if the instruction
+ * (qp) cmpx.crel.ctype p1,p2=r2,r3
+ * on which we are inserting kprobe is cmp instruction
+ * with ctype as unc.
+ */
+static uint is_cmp_ctype_unc_inst(uint template, uint slot, uint major_opcode,
+unsigned long kprobe_inst)
+{
+	cmp_inst_t cmp_inst;
+	uint ctype_unc = 0;
+
+	if (!((bundle_encoding[template][slot] == I) ||
+		(bundle_encoding[template][slot] == M)))
+		goto out;
+
+	if (!((major_opcode == 0xC) || (major_opcode == 0xD) ||
+		(major_opcode == 0xE)))
+		goto out;
+
+	cmp_inst.l = kprobe_inst;
+	if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) {
+		/* Integere compare - Register Register (A6 type)*/
+		if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0)
+				&&(cmp_inst.f.c == 1))
+			ctype_unc = 1;
+	} else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) {
+		/* Integere compare - Immediate Register (A8 type)*/
+		if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1))
+			ctype_unc = 1;
+	}
+out:
+	return ctype_unc;
+}
+
+/*
+ * In this function we override the bundle with
+ * the break instruction at the given slot.
+ */
+static void prepare_break_inst(uint template, uint  slot, uint major_opcode,
+	unsigned long kprobe_inst, struct kprobe *p)
+{
+	unsigned long break_inst = BREAK_INST;
+	bundle_t *bundle = &p->ainsn.insn.bundle;
+
+	/*
+	 * Copy the original kprobe_inst qualifying predicate(qp)
+	 * to the break instruction iff !is_cmp_ctype_unc_inst
+	 * because for cmp instruction with ctype equal to unc,
+	 * which is a special instruction always needs to be
+	 * executed regradless of qp
+	 */
+	if (!is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst))
+		break_inst |= (0x3f & kprobe_inst);
+
+	switch (slot) {
+	  case 0:
+		bundle->quad0.slot0 = break_inst;
+		break;
+	  case 1:
+		bundle->quad0.slot1_p0 = break_inst;
+		bundle->quad1.slot1_p1 = break_inst >> (64-46);
+		break;
+	  case 2:
+		bundle->quad1.slot2 = break_inst;
+		break;
+	}
+
+	/*
+	 * Update the instruction flag, so that we can
+	 * emulate the instruction properly after we
+	 * single step on original instruction
+	 */
+	update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p);
+}
+
+static inline void get_kprobe_inst(bundle_t *bundle, uint slot,
+	       	unsigned long *kprobe_inst, uint *major_opcode)
+{
+	unsigned long kprobe_inst_p0, kprobe_inst_p1;
+	unsigned int template;
+
+	template = bundle->quad0.template;
+
+	switch (slot) {
+	  case 0:
+ 		*major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT);
+ 		*kprobe_inst = bundle->quad0.slot0;
+		break;
+	  case 1:
+ 		*major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT);
+  		kprobe_inst_p0 = bundle->quad0.slot1_p0;
+  		kprobe_inst_p1 = bundle->quad1.slot1_p1;
+  		*kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46));
+		break;
+	  case 2:
+ 		*major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT);
+ 		*kprobe_inst = bundle->quad1.slot2;
+		break;
+	}
+}
+
+static int valid_kprobe_addr(int template, int slot, unsigned long addr)
+{
+	if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) {
+		printk(KERN_WARNING "Attempting to insert unaligned kprobe at 0x%lx\n",
+				addr);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static inline void save_previous_kprobe(void)
+{
+	kprobe_prev = current_kprobe;
+	kprobe_status_prev = kprobe_status;
+}
+
+static inline void restore_previous_kprobe(void)
+{
+	current_kprobe = kprobe_prev;
+	kprobe_status = kprobe_status_prev;
+}
+
+static inline void set_current_kprobe(struct kprobe *p)
+{
+	current_kprobe = p;
+}
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+	unsigned long addr = (unsigned long) p->addr;
+	unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL);
+	unsigned long kprobe_inst=0;
+	unsigned int slot = addr & 0xf, template, major_opcode = 0;
+	bundle_t *bundle = &p->ainsn.insn.bundle;
+
+	memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t));
+	memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t));
+
+ 	template = bundle->quad0.template;
+
+	if(valid_kprobe_addr(template, slot, addr))
+		return -EINVAL;
+
+	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+ 	if (slot == 1 && bundle_encoding[template][1] == L)
+  		slot++;
+
+	/* Get kprobe_inst and major_opcode from the bundle */
+	get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
+
+	if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p))
+			return -EINVAL;
+
+	prepare_break_inst(template, slot, major_opcode, kprobe_inst, p);
+
+	return 0;
+}
+
+void arch_arm_kprobe(struct kprobe *p)
+{
+	unsigned long addr = (unsigned long)p->addr;
+	unsigned long arm_addr = addr & ~0xFULL;
+
+	memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t));
+	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+}
+
+void arch_disarm_kprobe(struct kprobe *p)
+{
+	unsigned long addr = (unsigned long)p->addr;
+	unsigned long arm_addr = addr & ~0xFULL;
+
+	/* p->opcode contains the original unaltered bundle */
+	memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t));
+	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+}
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+}
+
+/*
+ * We are resuming execution after a single step fault, so the pt_regs
+ * structure reflects the register state after we executed the instruction
+ * located in the kprobe (p->ainsn.insn.bundle).  We still need to adjust
+ * the ip to point back to the original stack address. To set the IP address
+ * to original stack address, handle the case where we need to fixup the
+ * relative IP address and/or fixup branch register.
+ */
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+  	unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL;
+  	unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
+ 	unsigned long template;
+ 	int slot = ((unsigned long)p->addr & 0xf);
+
+	template = p->opcode.bundle.quad0.template;
+
+ 	if (slot == 1 && bundle_encoding[template][1] == L)
+ 		slot = 2;
+
+	if (p->ainsn.inst_flag) {
+
+		if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
+			/* Fix relative IP address */
+ 			regs->cr_iip = (regs->cr_iip - bundle_addr) + resume_addr;
+		}
+
+		if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) {
+		/*
+		 * Fix target branch register, software convention is
+		 * to use either b0 or b6 or b7, so just checking
+		 * only those registers
+		 */
+			switch (p->ainsn.target_br_reg) {
+			case 0:
+				if ((regs->b0 == bundle_addr) ||
+					(regs->b0 == bundle_addr + 0x10)) {
+					regs->b0 = (regs->b0 - bundle_addr) +
+						resume_addr;
+				}
+				break;
+			case 6:
+				if ((regs->b6 == bundle_addr) ||
+					(regs->b6 == bundle_addr + 0x10)) {
+					regs->b6 = (regs->b6 - bundle_addr) +
+						resume_addr;
+				}
+				break;
+			case 7:
+				if ((regs->b7 == bundle_addr) ||
+					(regs->b7 == bundle_addr + 0x10)) {
+					regs->b7 = (regs->b7 - bundle_addr) +
+						resume_addr;
+				}
+				break;
+			} /* end switch */
+		}
+		goto turn_ss_off;
+	}
+
+	if (slot == 2) {
+ 		if (regs->cr_iip == bundle_addr + 0x10) {
+ 			regs->cr_iip = resume_addr + 0x10;
+ 		}
+ 	} else {
+ 		if (regs->cr_iip == bundle_addr) {
+ 			regs->cr_iip = resume_addr;
+ 		}
+	}
+
+turn_ss_off:
+  	/* Turn off Single Step bit */
+  	ia64_psr(regs)->ss = 0;
+}
+
+static void prepare_ss(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long bundle_addr = (unsigned long) &p->opcode.bundle;
+	unsigned long slot = (unsigned long)p->addr & 0xf;
+
+	/* Update instruction pointer (IIP) and slot number (IPSR.ri) */
+	regs->cr_iip = bundle_addr & ~0xFULL;
+
+	if (slot > 2)
+		slot = 0;
+
+	ia64_psr(regs)->ri = slot;
+
+	/* turn on single stepping */
+	ia64_psr(regs)->ss = 1;
+}
+
+static int pre_kprobes_handler(struct die_args *args)
+{
+	struct kprobe *p;
+	int ret = 0;
+	struct pt_regs *regs = args->regs;
+	kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs);
+
+	preempt_disable();
+
+	/* Handle recursion cases */
+	if (kprobe_running()) {
+		p = get_kprobe(addr);
+		if (p) {
+			if (kprobe_status == KPROBE_HIT_SS) {
+				unlock_kprobes();
+				goto no_kprobe;
+			}
+			/* We have reentered the pre_kprobe_handler(), since
+			 * another probe was hit while within the handler.
+			 * We here save the original kprobes variables and
+			 * just single step on the instruction of the new probe
+			 * without calling any user handlers.
+			 */
+			save_previous_kprobe();
+			set_current_kprobe(p);
+			p->nmissed++;
+			prepare_ss(p, regs);
+			kprobe_status = KPROBE_REENTER;
+			return 1;
+		} else if (args->err == __IA64_BREAK_JPROBE) {
+			/*
+			 * jprobe instrumented function just completed
+			 */
+			p = current_kprobe;
+			if (p->break_handler && p->break_handler(p, regs)) {
+				goto ss_probe;
+			}
+		} else {
+			/* Not our break */
+			goto no_kprobe;
+		}
+	}
+
+	lock_kprobes();
+	p = get_kprobe(addr);
+	if (!p) {
+		unlock_kprobes();
+		goto no_kprobe;
+	}
+
+	kprobe_status = KPROBE_HIT_ACTIVE;
+	set_current_kprobe(p);
+
+	if (p->pre_handler && p->pre_handler(p, regs))
+		/*
+		 * Our pre-handler is specifically requesting that we just
+		 * do a return.  This is handling the case where the
+		 * pre-handler is really our special jprobe pre-handler.
+		 */
+		return 1;
+
+ss_probe:
+	prepare_ss(p, regs);
+	kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	preempt_enable_no_resched();
+	return ret;
+}
+
+static int post_kprobes_handler(struct pt_regs *regs)
+{
+	if (!kprobe_running())
+		return 0;
+
+	if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
+		kprobe_status = KPROBE_HIT_SSDONE;
+		current_kprobe->post_handler(current_kprobe, regs, 0);
+	}
+
+	resume_execution(current_kprobe, regs);
+
+	/*Restore back the original saved kprobes variables and continue. */
+	if (kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe();
+		goto out;
+	}
+
+	unlock_kprobes();
+
+out:
+	preempt_enable_no_resched();
+	return 1;
+}
+
+static int kprobes_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	if (!kprobe_running())
+		return 0;
+
+	if (current_kprobe->fault_handler &&
+	    current_kprobe->fault_handler(current_kprobe, regs, trapnr))
+		return 1;
+
+	if (kprobe_status & KPROBE_HIT_SS) {
+		resume_execution(current_kprobe, regs);
+		unlock_kprobes();
+		preempt_enable_no_resched();
+	}
+
+	return 0;
+}
+
+int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
+			     void *data)
+{
+	struct die_args *args = (struct die_args *)data;
+	switch(val) {
+	case DIE_BREAK:
+		if (pre_kprobes_handler(args))
+			return NOTIFY_STOP;
+		break;
+	case DIE_SS:
+		if (post_kprobes_handler(args->regs))
+			return NOTIFY_STOP;
+		break;
+	case DIE_PAGE_FAULT:
+		if (kprobes_fault_handler(args->regs, args->trapnr))
+			return NOTIFY_STOP;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
+
+	/* save architectural state */
+	jprobe_saved_regs = *regs;
+
+	/* after rfi, execute the jprobe instrumented function */
+	regs->cr_iip = addr & ~0xFULL;
+	ia64_psr(regs)->ri = addr & 0xf;
+	regs->r1 = ((struct fnptr *)(jp->entry))->gp;
+
+	/*
+	 * fix the return address to our jprobe_inst_return() function
+	 * in the jprobes.S file
+	 */
+ 	regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip;
+
+	return 1;
+}
+
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	*regs = jprobe_saved_regs;
+	return 1;
+}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 4d6c7b8f667..736e328b5e6 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1103,8 +1103,6 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs)
 	return IRQ_HANDLED;
 }
 
-#endif /* CONFIG_ACPI */
-
 /*
  *  ia64_mca_cpe_poll
  *
@@ -1122,6 +1120,8 @@ ia64_mca_cpe_poll (unsigned long dummy)
 	platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
 }
 
+#endif /* CONFIG_ACPI */
+
 /*
  * C portion of the OS INIT handler
  *
@@ -1390,8 +1390,7 @@ ia64_mca_init(void)
 	register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction);
 
 #ifdef CONFIG_ACPI
-	/* Setup the CPEI/P vector and handler */
-	cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI);
+	/* Setup the CPEI/P handler */
 	register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
 #endif
 
@@ -1436,6 +1435,7 @@ ia64_mca_late_init(void)
 
 #ifdef CONFIG_ACPI
 	/* Setup the CPEI/P vector and handler */
+	cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI);
 	init_timer(&cpe_poll_timer);
 	cpe_poll_timer.function = ia64_mca_cpe_poll;
 
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index 1dbc7b2497c..f6d8a010d99 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -41,7 +41,7 @@
 (pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;;							\
 (pKStk) ld8 r3 = [r3];;										\
 (pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;;						\
-(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3;						\
+(pKStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3;						\
 (pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
 (pUStk)	addl r22=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	\
 	;;											\
@@ -50,7 +50,6 @@
 (pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
 (pUStk)	dep r22=-1,r22,61,3;			/* compute kernel virtual addr of RBS */	\
 	;;											\
-(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;		/* if in kernel mode, use sp (r12) */		\
 (pUStk)	mov ar.bspstore=r22;			/* switch to kernel RBS */			\
 	;;											\
 (pUStk)	mov r18=ar.bsp;										\
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index febc091c2f0..f1aca7cffd1 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -825,14 +825,16 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind
 		 * XXX Should have an arch-hook for running this after final section
 		 *     addresses have been selected...
 		 */
-		/* See if gp can cover the entire core module:  */
-		uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2;
-		if (mod->core_size >= MAX_LTOFF)
+		uint64_t gp;
+		if (mod->core_size > MAX_LTOFF)
 			/*
 			 * This takes advantage of fact that SHF_ARCH_SMALL gets allocated
 			 * at the end of the module.
 			 */
-			gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2;
+			gp = mod->core_size - MAX_LTOFF / 2;
+		else
+			gp = mod->core_size / 2;
+		gp = (uint64_t) mod->module_core + ((gp + 7) & -8);
 		mod->arch.gp = gp;
 		DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp);
 	}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 71c101601e3..6407bff6bfd 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -11,7 +11,7 @@
  * Version Perfmon-2.x is a rewrite of perfmon-1.x
  * by Stephane Eranian, Hewlett Packard Co.
  *
- * Copyright (C) 1999-2003, 2005  Hewlett Packard Co
+ * Copyright (C) 1999-2005  Hewlett Packard Co
  *               Stephane Eranian <eranian@hpl.hp.com>
  *               David Mosberger-Tang <davidm@hpl.hp.com>
  *
@@ -497,6 +497,9 @@ typedef struct {
 static pfm_stats_t		pfm_stats[NR_CPUS];
 static pfm_session_t		pfm_sessions;	/* global sessions information */
 
+static spinlock_t pfm_alt_install_check = SPIN_LOCK_UNLOCKED;
+static pfm_intr_handler_desc_t  *pfm_alt_intr_handler;
+
 static struct proc_dir_entry 	*perfmon_dir;
 static pfm_uuid_t		pfm_null_uuid = {0,};
 
@@ -606,6 +609,7 @@ DEFINE_PER_CPU(unsigned long, pfm_syst_info);
 DEFINE_PER_CPU(struct task_struct *, pmu_owner);
 DEFINE_PER_CPU(pfm_context_t  *, pmu_ctx);
 DEFINE_PER_CPU(unsigned long, pmu_activation_number);
+EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info);
 
 
 /* forward declaration */
@@ -1325,7 +1329,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 error_conflict:
 	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
   		pfm_sessions.pfs_sys_session[cpu]->pid,
-		smp_processor_id()));
+		cpu));
 abort:
 	UNLOCK_PFS(flags);
 
@@ -5555,26 +5559,32 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 	int ret;
 
 	this_cpu = get_cpu();
-	min      = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
-	max      = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
+	if (likely(!pfm_alt_intr_handler)) {
+		min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
+		max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
 
-	start_cycles = ia64_get_itc();
+		start_cycles = ia64_get_itc();
 
-	ret = pfm_do_interrupt_handler(irq, arg, regs);
+		ret = pfm_do_interrupt_handler(irq, arg, regs);
 
-	total_cycles = ia64_get_itc();
+		total_cycles = ia64_get_itc();
 
-	/*
-	 * don't measure spurious interrupts
-	 */
-	if (likely(ret == 0)) {
-		total_cycles -= start_cycles;
+		/*
+		 * don't measure spurious interrupts
+		 */
+		if (likely(ret == 0)) {
+			total_cycles -= start_cycles;
 
-		if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
-		if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
+			if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
+			if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
 
-		pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
+			pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
+		}
+	}
+	else {
+		(*pfm_alt_intr_handler->handler)(irq, arg, regs);
 	}
+
 	put_cpu_no_resched();
 	return IRQ_HANDLED;
 }
@@ -6425,6 +6435,141 @@ static struct irqaction perfmon_irqaction = {
 	.name    = "perfmon"
 };
 
+static void
+pfm_alt_save_pmu_state(void *data)
+{
+	struct pt_regs *regs;
+
+	regs = ia64_task_regs(current);
+
+	DPRINT(("called\n"));
+
+	/*
+	 * should not be necessary but
+	 * let's take not risk
+	 */
+	pfm_clear_psr_up();
+	pfm_clear_psr_pp();
+	ia64_psr(regs)->pp = 0;
+
+	/*
+	 * This call is required
+	 * May cause a spurious interrupt on some processors
+	 */
+	pfm_freeze_pmu();
+
+	ia64_srlz_d();
+}
+
+void
+pfm_alt_restore_pmu_state(void *data)
+{
+	struct pt_regs *regs;
+
+	regs = ia64_task_regs(current);
+
+	DPRINT(("called\n"));
+
+	/*
+	 * put PMU back in state expected
+	 * by perfmon
+	 */
+	pfm_clear_psr_up();
+	pfm_clear_psr_pp();
+	ia64_psr(regs)->pp = 0;
+
+	/*
+	 * perfmon runs with PMU unfrozen at all times
+	 */
+	pfm_unfreeze_pmu();
+
+	ia64_srlz_d();
+}
+
+int
+pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
+{
+	int ret, i;
+	int reserve_cpu;
+
+	/* some sanity checks */
+	if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
+
+	/* do the easy test first */
+	if (pfm_alt_intr_handler) return -EBUSY;
+
+	/* one at a time in the install or remove, just fail the others */
+	if (!spin_trylock(&pfm_alt_install_check)) {
+		return -EBUSY;
+	}
+
+	/* reserve our session */
+	for_each_online_cpu(reserve_cpu) {
+		ret = pfm_reserve_session(NULL, 1, reserve_cpu);
+		if (ret) goto cleanup_reserve;
+	}
+
+	/* save the current system wide pmu states */
+	ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1);
+	if (ret) {
+		DPRINT(("on_each_cpu() failed: %d\n", ret));
+		goto cleanup_reserve;
+	}
+
+	/* officially change to the alternate interrupt handler */
+	pfm_alt_intr_handler = hdl;
+
+	spin_unlock(&pfm_alt_install_check);
+
+	return 0;
+
+cleanup_reserve:
+	for_each_online_cpu(i) {
+		/* don't unreserve more than we reserved */
+		if (i >= reserve_cpu) break;
+
+		pfm_unreserve_session(NULL, 1, i);
+	}
+
+	spin_unlock(&pfm_alt_install_check);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt);
+
+int
+pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
+{
+	int i;
+	int ret;
+
+	if (hdl == NULL) return -EINVAL;
+
+	/* cannot remove someone else's handler! */
+	if (pfm_alt_intr_handler != hdl) return -EINVAL;
+
+	/* one at a time in the install or remove, just fail the others */
+	if (!spin_trylock(&pfm_alt_install_check)) {
+		return -EBUSY;
+	}
+
+	pfm_alt_intr_handler = NULL;
+
+	ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1);
+	if (ret) {
+		DPRINT(("on_each_cpu() failed: %d\n", ret));
+	}
+
+	for_each_online_cpu(i) {
+		pfm_unreserve_session(NULL, 1, i);
+	}
+
+	spin_unlock(&pfm_alt_install_check);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt);
+
 /*
  * perfmon initialization routine, called from the initcall() table
  */
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 907464ee727..6d57aebad48 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -635,11 +635,17 @@ ia64_flush_fph (struct task_struct *task)
 {
 	struct ia64_psr *psr = ia64_psr(ia64_task_regs(task));
 
+	/*
+	 * Prevent migrating this task while
+	 * we're fiddling with the FPU state
+	 */
+	preempt_disable();
 	if (ia64_is_local_fpu_owner(task) && psr->mfh) {
 		psr->mfh = 0;
 		task->thread.flags |= IA64_THREAD_FPH_VALID;
 		ia64_save_fpu(&task->thread.fph[0]);
 	}
+	preempt_enable();
 }
 
 /*
@@ -692,16 +698,30 @@ convert_to_non_syscall (struct task_struct *child, struct pt_regs  *pt,
 			unsigned long cfm)
 {
 	struct unw_frame_info info, prev_info;
-	unsigned long ip, pr;
+	unsigned long ip, sp, pr;
 
 	unw_init_from_blocked_task(&info, child);
 	while (1) {
 		prev_info = info;
 		if (unw_unwind(&info) < 0)
 			return;
-		if (unw_get_rp(&info, &ip) < 0)
+
+		unw_get_sp(&info, &sp);
+		if ((long)((unsigned long)child + IA64_STK_OFFSET - sp)
+		    < IA64_PT_REGS_SIZE) {
+			dprintk("ptrace.%s: ran off the top of the kernel "
+				"stack\n", __FUNCTION__);
 			return;
-		if (ip < FIXADDR_USER_END)
+		}
+		if (unw_get_pr (&prev_info, &pr) < 0) {
+			unw_get_rp(&prev_info, &ip);
+			dprintk("ptrace.%s: failed to read "
+				"predicate register (ip=0x%lx)\n",
+				__FUNCTION__, ip);
+			return;
+		}
+		if (unw_is_intr_frame(&info)
+		    && (pr & (1UL << PRED_USER_STACK)))
 			break;
 	}
 
@@ -925,6 +945,13 @@ access_uarea (struct task_struct *child, unsigned long addr,
 				*data = (pt->cr_ipsr & IPSR_MASK);
 			return 0;
 
+		      case PT_AR_RSC:
+			if (write_access)
+				pt->ar_rsc = *data | (3 << 2); /* force PL3 */
+			else
+				*data = pt->ar_rsc;
+			return 0;
+
 		      case PT_AR_RNAT:
 			urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
 			rnat_addr = (long) ia64_rse_rnat_addr((long *)
@@ -976,9 +1003,6 @@ access_uarea (struct task_struct *child, unsigned long addr,
 		      case PT_AR_BSPSTORE:
 			ptr = pt_reg_addr(pt, ar_bspstore);
 			break;
-		      case PT_AR_RSC:
-			ptr = pt_reg_addr(pt, ar_rsc);
-			break;
 		      case PT_AR_UNAT:
 			ptr = pt_reg_addr(pt, ar_unat);
 			break;
@@ -1214,7 +1238,7 @@ ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
 static long
 ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
 {
-	unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
+	unsigned long psr, rsc, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
 	struct unw_frame_info info;
 	struct switch_stack *sw;
 	struct ia64_fpreg fpval;
@@ -1247,7 +1271,7 @@ ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
 	/* app regs */
 
 	retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
-	retval |= __get_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]);
+	retval |= __get_user(rsc, &ppr->ar[PT_AUR_RSC]);
 	retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
 	retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
 	retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
@@ -1345,6 +1369,7 @@ ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
 	retval |= __get_user(nat_bits, &ppr->nat);
 
 	retval |= access_uarea(child, PT_CR_IPSR, &psr, 1);
+	retval |= access_uarea(child, PT_AR_RSC, &rsc, 1);
 	retval |= access_uarea(child, PT_AR_EC, &ec, 1);
 	retval |= access_uarea(child, PT_AR_LC, &lc, 1);
 	retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index b7e6b4cb374..d14692e0920 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -720,7 +720,8 @@ cpu_init (void)
 	ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
 
 	/*
-	 * Initialize default control register to defer all speculative faults.  The
+	 * Initialize default control register to defer speculative faults except
+	 * for those arising from TLB misses, which are not deferred.  The
 	 * kernel MUST NOT depend on a particular setting of these bits (in other words,
 	 * the kernel must have recovery code for all speculative accesses).  Turn on
 	 * dcr.lc as per recommendation by the architecture team.  Most IA-32 apps
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 499b7e5317c..edd9f07860b 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -94,7 +94,7 @@ sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2,
 static long
 restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
 {
-	unsigned long ip, flags, nat, um, cfm;
+	unsigned long ip, flags, nat, um, cfm, rsc;
 	long err;
 
 	/* Always make any pending restarted system calls return -EINTR */
@@ -106,7 +106,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
 	err |= __get_user(ip, &sc->sc_ip);			/* instruction pointer */
 	err |= __get_user(cfm, &sc->sc_cfm);
 	err |= __get_user(um, &sc->sc_um);			/* user mask */
-	err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
+	err |= __get_user(rsc, &sc->sc_ar_rsc);
 	err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
 	err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
 	err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
@@ -119,6 +119,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
 	err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8);	/* r15 */
 
 	scr->pt.cr_ifs = cfm | (1UL << 63);
+	scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */
 
 	/* establish new instruction pointer: */
 	scr->pt.cr_iip = ip & ~0x3UL;
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 953095e2ce1..b49d4ddaab9 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -269,7 +269,7 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
 	int me = get_cpu(); /* prevent preemption and reschedule on another processor */
 
 	if (cpuid == me) {
-		printk("%s: trying to call self\n", __FUNCTION__);
+		printk(KERN_INFO "%s: trying to call self\n", __FUNCTION__);
 		put_cpu();
 		return -EBUSY;
 	}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 0d5ee57c986..623b0a54670 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -346,6 +346,7 @@ smp_callin (void)
 	lock_ipi_calllock();
 	cpu_set(cpuid, cpu_online_map);
 	unlock_ipi_calllock();
+	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
 
 	smp_setup_percpu_timer();
 
@@ -611,6 +612,7 @@ void __devinit smp_prepare_boot_cpu(void)
 {
 	cpu_set(smp_processor_id(), cpu_online_map);
 	cpu_set(smp_processor_id(), cpu_callin_map);
+	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 }
 
 /*
@@ -624,7 +626,7 @@ static struct {
 	__u16   thread_id;
 	__u16   proc_fixed_addr;
 	__u8    valid;
-}mt_info[NR_CPUS] __devinit;
+} mt_info[NR_CPUS] __devinitdata;
 
 #ifdef CONFIG_HOTPLUG_CPU
 static inline void
@@ -688,6 +690,7 @@ int __cpu_disable(void)
 		return -EBUSY;
 
 	remove_siblinginfo(cpu);
+	cpu_clear(cpu, cpu_online_map);
 	fixup_irqs();
 	local_flush_tlb_all();
 	cpu_clear(cpu, cpu_callin_map);
@@ -774,6 +777,7 @@ __cpu_up (unsigned int cpu)
 	if (cpu_isset(cpu, cpu_callin_map))
 		return -EINVAL;
 
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 	/* Processor goes to start_secondary(), sets online flag */
 	ret = do_boot_cpu(sapicid, cpu);
 	if (ret < 0)
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index a8cf6d8a509..770fab37928 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -182,13 +182,6 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un
 		}
 	}
 
-	/*
-	 * A zero mmap always succeeds in Linux, independent of whether or not the
-	 * remaining arguments are valid.
-	 */
-	if (len == 0)
-		goto out;
-
 	/* Careful about overflows.. */
 	len = PAGE_ALIGN(len);
 	if (!len || len > TASK_SIZE) {
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index e82ad78081b..e7e520d90f0 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -21,12 +21,26 @@
 #include <asm/intrinsics.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
+#include <asm/kdebug.h>
 
 extern spinlock_t timerlist_lock;
 
 fpswa_interface_t *fpswa_interface;
 EXPORT_SYMBOL(fpswa_interface);
 
+struct notifier_block *ia64die_chain;
+static DEFINE_SPINLOCK(die_notifier_lock);
+
+int register_die_notifier(struct notifier_block *nb)
+{
+	int err = 0;
+	unsigned long flags;
+	spin_lock_irqsave(&die_notifier_lock, flags);
+	err = notifier_chain_register(&ia64die_chain, nb);
+	spin_unlock_irqrestore(&die_notifier_lock, flags);
+	return err;
+}
+
 void __init
 trap_init (void)
 {
@@ -111,6 +125,24 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
 	siginfo_t siginfo;
 	int sig, code;
 
+	/* break.b always sets cr.iim to 0, which causes problems for
+	 * debuggers.  Get the real break number from the original instruction,
+	 * but only for kernel code.  User space break.b is left alone, to
+	 * preserve the existing behaviour.  All break codings have the same
+	 * format, so there is no need to check the slot type.
+	 */
+	if (break_num == 0 && !user_mode(regs)) {
+		struct ia64_psr *ipsr = ia64_psr(regs);
+		unsigned long *bundle = (unsigned long *)regs->cr_iip;
+		unsigned long slot;
+		switch (ipsr->ri) {
+		      case 0:  slot = (bundle[0] >>  5); break;
+		      case 1:  slot = (bundle[0] >> 46) | (bundle[1] << 18); break;
+		      default: slot = (bundle[1] >> 23); break;
+		}
+		break_num = ((slot >> 36 & 1) << 20) | (slot >> 6 & 0xfffff);
+	}
+
 	/* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
 	siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
 	siginfo.si_imm = break_num;
@@ -119,6 +151,10 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
 
 	switch (break_num) {
 	      case 0: /* unknown error (used by GCC for __builtin_abort()) */
+		if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP)
+			       	== NOTIFY_STOP) {
+			return;
+		}
 		die_if_kernel("bugcheck!", regs, break_num);
 		sig = SIGILL; code = ILL_ILLOPC;
 		break;
@@ -171,6 +207,15 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
 		sig = SIGILL; code = __ILL_BNDMOD;
 		break;
 
+	      case 0x80200:
+	      case 0x80300:
+		if (notify_die(DIE_BREAK, "kprobe", regs, break_num, TRAP_BRKPT, SIGTRAP)
+			       	== NOTIFY_STOP) {
+			return;
+		}
+		sig = SIGTRAP; code = TRAP_BRKPT;
+		break;
+
 	      default:
 		if (break_num < 0x40000 || break_num > 0x100000)
 			die_if_kernel("Bad break", regs, break_num);
@@ -202,13 +247,21 @@ disabled_fph_fault (struct pt_regs *regs)
 
 	/* first, grant user-level access to fph partition: */
 	psr->dfh = 0;
+
+	/*
+	 * Make sure that no other task gets in on this processor
+	 * while we're claiming the FPU
+	 */
+	preempt_disable();
 #ifndef CONFIG_SMP
 	{
 		struct task_struct *fpu_owner
 			= (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER);
 
-		if (ia64_is_local_fpu_owner(current))
+		if (ia64_is_local_fpu_owner(current)) {
+			preempt_enable_no_resched();
 			return;
+		}
 
 		if (fpu_owner)
 			ia64_flush_fph(fpu_owner);
@@ -226,6 +279,7 @@ disabled_fph_fault (struct pt_regs *regs)
 		 */
 		psr->mfh = 1;
 	}
+	preempt_enable_no_resched();
 }
 
 static inline int
@@ -521,7 +575,11 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 #endif
 			break;
 		      case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
-		      case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
+		      case 36:
+			      if (notify_die(DIE_SS, "ss", &regs, vector,
+					     vector, SIGTRAP) == NOTIFY_STOP)
+				      return;
+			      siginfo.si_code = TRAP_TRACE; ifa = 0; break;
 		}
 		siginfo.si_signo = SIGTRAP;
 		siginfo.si_errno = 0;
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
new file mode 100644
index 00000000000..490dfc9ab47
--- /dev/null
+++ b/arch/ia64/kernel/uncached.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2001-2005 Silicon Graphics, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * A simple uncached page allocator using the generic allocator. This
+ * allocator first utilizes the spare (spill) pages found in the EFI
+ * memmap and will then start converting cached pages to uncached ones
+ * at a granule at a time. Node awareness is implemented by having a
+ * pool of pages per node.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/efi.h>
+#include <linux/genalloc.h>
+#include <asm/page.h>
+#include <asm/pal.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/atomic.h>
+#include <asm/tlbflush.h>
+#include <asm/sn/arch.h>
+
+#define DEBUG	0
+
+#if DEBUG
+#define dprintk			printk
+#else
+#define dprintk(x...)		do { } while (0)
+#endif
+
+void __init efi_memmap_walk_uc (efi_freemem_callback_t callback);
+
+#define MAX_UNCACHED_GRANULES	5
+static int allocated_granules;
+
+struct gen_pool *uncached_pool[MAX_NUMNODES];
+
+
+static void uncached_ipi_visibility(void *data)
+{
+	int status;
+
+	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+	if ((status != PAL_VISIBILITY_OK) &&
+	    (status != PAL_VISIBILITY_OK_REMOTE_NEEDED))
+		printk(KERN_DEBUG "pal_prefetch_visibility() returns %i on "
+		       "CPU %i\n", status, get_cpu());
+}
+
+
+static void uncached_ipi_mc_drain(void *data)
+{
+	int status;
+	status = ia64_pal_mc_drain();
+	if (status)
+		printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on "
+		       "CPU %i\n", status, get_cpu());
+}
+
+
+static unsigned long
+uncached_get_new_chunk(struct gen_pool *poolp)
+{
+	struct page *page;
+	void *tmp;
+	int status, i;
+	unsigned long addr, node;
+
+	if (allocated_granules >= MAX_UNCACHED_GRANULES)
+		return 0;
+
+	node = poolp->private;
+	page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO,
+				IA64_GRANULE_SHIFT-PAGE_SHIFT);
+
+	dprintk(KERN_INFO "get_new_chunk page %p, addr %lx\n",
+		page, (unsigned long)(page-vmem_map) << PAGE_SHIFT);
+
+	/*
+	 * Do magic if no mem on local node! XXX
+	 */
+	if (!page)
+		return 0;
+	tmp = page_address(page);
+
+	/*
+	 * There's a small race here where it's possible for someone to
+	 * access the page through /dev/mem halfway through the conversion
+	 * to uncached - not sure it's really worth bothering about
+	 */
+	for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
+		SetPageUncached(&page[i]);
+
+	flush_tlb_kernel_range(tmp, tmp + IA64_GRANULE_SIZE);
+
+	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+
+	dprintk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n",
+		status, get_cpu());
+
+	if (!status) {
+		status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1);
+		if (status)
+			printk(KERN_WARNING "smp_call_function failed for "
+			       "uncached_ipi_visibility! (%i)\n", status);
+	}
+
+	if (ia64_platform_is("sn2"))
+		sn_flush_all_caches((unsigned long)tmp, IA64_GRANULE_SIZE);
+	else
+		flush_icache_range((unsigned long)tmp,
+				   (unsigned long)tmp+IA64_GRANULE_SIZE);
+
+	ia64_pal_mc_drain();
+	status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1);
+	if (status)
+		printk(KERN_WARNING "smp_call_function failed for "
+		       "uncached_ipi_mc_drain! (%i)\n", status);
+
+	addr = (unsigned long)tmp - PAGE_OFFSET + __IA64_UNCACHED_OFFSET;
+
+	allocated_granules++;
+	return addr;
+}
+
+
+/*
+ * uncached_alloc_page
+ *
+ * Allocate 1 uncached page. Allocates on the requested node. If no
+ * uncached pages are available on the requested node, roundrobin starting
+ * with higher nodes.
+ */
+unsigned long
+uncached_alloc_page(int nid)
+{
+	unsigned long maddr;
+
+	maddr = gen_pool_alloc(uncached_pool[nid], PAGE_SIZE);
+
+	dprintk(KERN_DEBUG "uncached_alloc_page returns %lx on node %i\n",
+		maddr, nid);
+
+	/*
+	 * If no memory is availble on our local node, try the
+	 * remaining nodes in the system.
+	 */
+	if (!maddr) {
+		int i;
+
+		for (i = MAX_NUMNODES - 1; i >= 0; i--) {
+			if (i == nid || !node_online(i))
+				continue;
+			maddr = gen_pool_alloc(uncached_pool[i], PAGE_SIZE);
+			dprintk(KERN_DEBUG "uncached_alloc_page alternate search "
+				"returns %lx on node %i\n", maddr, i);
+			if (maddr) {
+				break;
+			}
+		}
+	}
+
+	return maddr;
+}
+EXPORT_SYMBOL(uncached_alloc_page);
+
+
+/*
+ * uncached_free_page
+ *
+ * Free a single uncached page.
+ */
+void
+uncached_free_page(unsigned long maddr)
+{
+	int node;
+
+	node = nasid_to_cnodeid(NASID_GET(maddr));
+
+	dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node);
+
+	if ((maddr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
+		panic("uncached_free_page invalid address %lx\n", maddr);
+
+	gen_pool_free(uncached_pool[node], maddr, PAGE_SIZE);
+}
+EXPORT_SYMBOL(uncached_free_page);
+
+
+/*
+ * uncached_build_memmap,
+ *
+ * Called at boot time to build a map of pages that can be used for
+ * memory special operations.
+ */
+static int __init
+uncached_build_memmap(unsigned long start, unsigned long end, void *arg)
+{
+	long length;
+	unsigned long vstart, vend;
+	int node;
+
+	length = end - start;
+	vstart = start + __IA64_UNCACHED_OFFSET;
+	vend = end + __IA64_UNCACHED_OFFSET;
+
+	dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end);
+
+	memset((char *)vstart, 0, length);
+
+	node = nasid_to_cnodeid(NASID_GET(start));
+
+	for (; vstart < vend ; vstart += PAGE_SIZE) {
+		dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart);
+		gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE);
+	}
+
+	return 0;
+}
+
+
+static int __init uncached_init(void) {
+	int i;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		if (!node_online(i))
+			continue;
+		uncached_pool[i] = gen_pool_create(0, IA64_GRANULE_SHIFT,
+						   &uncached_get_new_chunk, i);
+	}
+
+	efi_memmap_walk_uc(uncached_build_memmap);
+
+	return 0;
+}
+
+__initcall(uncached_init);