40 files changed, 930 insertions, 725 deletions
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b940424f8cc..5260fb55ab7 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -38,6 +38,7 @@ config SUPERH32
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_FTRACE_SYSCALLS
+	select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_ARCH_KGDB
 	select ARCH_HIBERNATION_POSSIBLE if MMU
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index c08d33fe210..ce01d6a953b 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -18,6 +18,8 @@
 #include <linux/input.h>
 #include <linux/i2c.h>
 #include <linux/usb/r8a66597.h>
+#include <media/soc_camera.h>
+#include <media/sh_mobile_ceu.h>
 #include <video/sh_mobile_lcdc.h>
 #include <asm/clock.h>
 #include <asm/machvec.h>
@@ -212,11 +214,131 @@ static struct platform_device kfr2r09_usb0_gadget_device = {
 	.resource	= kfr2r09_usb0_gadget_resources,
 };
 
+static struct sh_mobile_ceu_info sh_mobile_ceu_info = {
+	.flags = SH_CEU_FLAG_USE_8BIT_BUS,
+};
+
+static struct resource kfr2r09_ceu_resources[] = {
+	[0] = {
+		.name	= "CEU",
+		.start	= 0xfe910000,
+		.end	= 0xfe91009f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 52,
+		.end  = 52,
+		.flags  = IORESOURCE_IRQ,
+	},
+	[2] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device kfr2r09_ceu_device = {
+	.name		= "sh_mobile_ceu",
+	.id             = 0, /* "ceu0" clock */
+	.num_resources	= ARRAY_SIZE(kfr2r09_ceu_resources),
+	.resource	= kfr2r09_ceu_resources,
+	.dev	= {
+		.platform_data	= &sh_mobile_ceu_info,
+	},
+	.archdata = {
+		.hwblk_id = HWBLK_CEU0,
+	},
+};
+
+static struct i2c_board_info kfr2r09_i2c_camera = {
+	I2C_BOARD_INFO("rj54n1cb0c", 0x50),
+};
+
+static struct clk *camera_clk;
+
+#define DRVCRB 0xA405018C
+static int camera_power(struct device *dev, int mode)
+{
+	int ret;
+
+	if (mode) {
+		long rate;
+
+		camera_clk = clk_get(NULL, "video_clk");
+		if (IS_ERR(camera_clk))
+			return PTR_ERR(camera_clk);
+
+		/* set VIO_CKO clock to 25MHz */
+		rate = clk_round_rate(camera_clk, 25000000);
+		ret = clk_set_rate(camera_clk, rate);
+		if (ret < 0)
+			goto eclkrate;
+
+		/* set DRVCRB
+		 *
+		 * use 1.8 V for VccQ_VIO
+		 * use 2.85V for VccQ_SR
+		 */
+		ctrl_outw((ctrl_inw(DRVCRB) & ~0x0003) | 0x0001, DRVCRB);
+
+		/* reset clear */
+		ret = gpio_request(GPIO_PTB4, NULL);
+		if (ret < 0)
+			goto eptb4;
+		ret = gpio_request(GPIO_PTB7, NULL);
+		if (ret < 0)
+			goto eptb7;
+
+		ret = gpio_direction_output(GPIO_PTB4, 1);
+		if (!ret)
+			ret = gpio_direction_output(GPIO_PTB7, 1);
+		if (ret < 0)
+			goto egpioout;
+		msleep(1);
+
+		ret = clk_enable(camera_clk);	/* start VIO_CKO */
+		if (ret < 0)
+			goto eclkon;
+
+		return 0;
+	}
+
+	ret = 0;
+
+	clk_disable(camera_clk);
+eclkon:
+	gpio_set_value(GPIO_PTB7, 0);
+egpioout:
+	gpio_set_value(GPIO_PTB4, 0);
+	gpio_free(GPIO_PTB7);
+eptb7:
+	gpio_free(GPIO_PTB4);
+eptb4:
+eclkrate:
+	clk_put(camera_clk);
+	return ret;
+}
+
+static struct soc_camera_link rj54n1_link = {
+	.power		= camera_power,
+	.board_info	= &kfr2r09_i2c_camera,
+	.i2c_adapter_id	= 1,
+	.module_name	= "rj54n1cb0c",
+};
+
+static struct platform_device kfr2r09_camera = {
+	.name	= "soc-camera-pdrv",
+	.id	= 0,
+	.dev	= {
+		.platform_data = &rj54n1_link,
+	},
+};
+
 static struct platform_device *kfr2r09_devices[] __initdata = {
 	&kfr2r09_nor_flash_device,
 	&kfr2r09_nand_flash_device,
 	&kfr2r09_sh_keysc_device,
 	&kfr2r09_sh_lcdc_device,
+	&kfr2r09_ceu_device,
+	&kfr2r09_camera,
 };
 
 #define BSC_CS0BCR 0xfec10004
@@ -361,6 +483,23 @@ static int __init kfr2r09_devices_setup(void)
 	if (kfr2r09_usb0_gadget_setup() == 0)
 		platform_device_register(&kfr2r09_usb0_gadget_device);
 
+	/* CEU */
+	gpio_request(GPIO_FN_VIO_CKO, NULL);
+	gpio_request(GPIO_FN_VIO0_CLK, NULL);
+	gpio_request(GPIO_FN_VIO0_VD, NULL);
+	gpio_request(GPIO_FN_VIO0_HD, NULL);
+	gpio_request(GPIO_FN_VIO0_FLD, NULL);
+	gpio_request(GPIO_FN_VIO0_D7, NULL);
+	gpio_request(GPIO_FN_VIO0_D6, NULL);
+	gpio_request(GPIO_FN_VIO0_D5, NULL);
+	gpio_request(GPIO_FN_VIO0_D4, NULL);
+	gpio_request(GPIO_FN_VIO0_D3, NULL);
+	gpio_request(GPIO_FN_VIO0_D2, NULL);
+	gpio_request(GPIO_FN_VIO0_D1, NULL);
+	gpio_request(GPIO_FN_VIO0_D0, NULL);
+
+	platform_resource_setup_memory(&kfr2r09_ceu_device, "ceu", 4 << 20);
+
 	return platform_add_devices(kfr2r09_devices,
 				    ARRAY_SIZE(kfr2r09_devices));
 }
diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c
index fd56a71ca9d..b51b1fc4baa 100644
--- a/arch/sh/boot/compressed/misc.c
+++ b/arch/sh/boot/compressed/misc.c
@@ -131,7 +131,7 @@ void decompress_kernel(void)
 #ifdef CONFIG_SUPERH64
 	output_addr = (CONFIG_MEMORY_START + 0x2000);
 #else
-	output_addr = PHYSADDR((unsigned long)&_text+PAGE_SIZE);
+	output_addr = __pa((unsigned long)&_text+PAGE_SIZE);
 #ifdef CONFIG_29BIT
 	output_addr |= P2SEG;
 #endif
diff --git a/arch/sh/include/asm/addrspace.h b/arch/sh/include/asm/addrspace.h
index 80d40813e05..99d6b3ecbe2 100644
--- a/arch/sh/include/asm/addrspace.h
+++ b/arch/sh/include/asm/addrspace.h
@@ -28,9 +28,6 @@
 /* Returns the privileged segment base of a given address  */
 #define PXSEG(a)	(((unsigned long)(a)) & 0xe0000000)
 
-/* Returns the physical address of a PnSEG (n=1,2) address   */
-#define PHYSADDR(a)	(((unsigned long)(a)) & 0x1fffffff)
-
 #if defined(CONFIG_29BIT) || defined(CONFIG_PMB_FIXED)
 /*
  * Map an address to a certain privileged segment
@@ -60,5 +57,11 @@
 #define P3_ADDR_MAX		P4SEG
 #endif
 
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_PMB
+extern int __in_29bit_mode(void);
+#endif /* CONFIG_PMB */
+#endif /* __ASSEMBLY__ */
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_ADDRSPACE_H */
diff --git a/arch/sh/include/asm/dwarf.h b/arch/sh/include/asm/dwarf.h
index ced6795891a..bdccbbfdc0b 100644
--- a/arch/sh/include/asm/dwarf.h
+++ b/arch/sh/include/asm/dwarf.h
@@ -194,6 +194,12 @@
 #define DWARF_ARCH_RA_REG	17
 
 #ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <linux/bug.h>
+#include <linux/list.h>
+#include <linux/module.h>
+
 /*
  * Read either the frame pointer (r14) or the stack pointer (r15).
  * NOTE: this MUST be inlined.
@@ -241,6 +247,12 @@ struct dwarf_cie {
 
 	unsigned long flags;
 #define DWARF_CIE_Z_AUGMENTATION	(1 << 0)
+
+	/*
+	 * 'mod' will be non-NULL if this CIE came from a module's
+	 * .eh_frame section.
+	 */
+	struct module *mod;
 };
 
 /**
@@ -255,6 +267,12 @@ struct dwarf_fde {
 	unsigned char *instructions;
 	unsigned char *end;
 	struct list_head link;
+
+	/*
+	 * 'mod' will be non-NULL if this FDE came from a module's
+	 * .eh_frame section.
+	 */
+	struct module *mod;
 };
 
 /**
@@ -364,6 +382,12 @@ static inline unsigned int DW_CFA_operand(unsigned long insn)
 
 extern struct dwarf_frame *dwarf_unwind_stack(unsigned long,
 					      struct dwarf_frame *);
+extern void dwarf_free_frame(struct dwarf_frame *);
+
+extern int module_dwarf_finalize(const Elf_Ehdr *, const Elf_Shdr *,
+				 struct module *);
+extern void module_dwarf_cleanup(struct module *);
+
 #endif /* !__ASSEMBLY__ */
 
 #define CFI_STARTPROC	.cfi_startproc
@@ -391,6 +415,10 @@ extern struct dwarf_frame *dwarf_unwind_stack(unsigned long,
 static inline void dwarf_unwinder_init(void)
 {
 }
+
+#define module_dwarf_finalize(hdr, sechdrs, me)	(0)
+#define module_dwarf_cleanup(mod)		do { } while (0)
+
 #endif
 
 #endif /* CONFIG_DWARF_UNWINDER */
diff --git a/arch/sh/include/asm/fixmap.h b/arch/sh/include/asm/fixmap.h
index 721fcc4d5e9..76c5a3099cb 100644
--- a/arch/sh/include/asm/fixmap.h
+++ b/arch/sh/include/asm/fixmap.h
@@ -14,9 +14,9 @@
 #define _ASM_FIXMAP_H
 
 #include <linux/kernel.h>
+#include <linux/threads.h>
 #include <asm/page.h>
 #ifdef CONFIG_HIGHMEM
-#include <linux/threads.h>
 #include <asm/kmap_types.h>
 #endif
 
@@ -46,9 +46,9 @@
  * fix-mapped?
  */
 enum fixed_addresses {
-#define FIX_N_COLOURS 16
+#define FIX_N_COLOURS 8
 	FIX_CMAP_BEGIN,
-	FIX_CMAP_END = FIX_CMAP_BEGIN + FIX_N_COLOURS,
+	FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS),
 	FIX_UNCACHED,
 #ifdef CONFIG_HIGHMEM
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index 12f3a31f20a..13e9966464c 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -35,4 +35,21 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_FUNCTION_TRACER */
 
+#ifndef __ASSEMBLY__
+
+/* arch/sh/kernel/return_address.c */
+extern void *return_address(unsigned int);
+
+#define HAVE_ARCH_CALLER_ADDR
+
+#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+#define CALLER_ADDR1 ((unsigned long)return_address(1))
+#define CALLER_ADDR2 ((unsigned long)return_address(2))
+#define CALLER_ADDR3 ((unsigned long)return_address(3))
+#define CALLER_ADDR4 ((unsigned long)return_address(4))
+#define CALLER_ADDR5 ((unsigned long)return_address(5))
+#define CALLER_ADDR6 ((unsigned long)return_address(6))
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* __ASM_SH_FTRACE_H */
diff --git a/arch/sh/include/asm/hardirq.h b/arch/sh/include/asm/hardirq.h
index a5be4afa790..48b191313a9 100644
--- a/arch/sh/include/asm/hardirq.h
+++ b/arch/sh/include/asm/hardirq.h
@@ -1,9 +1,16 @@
 #ifndef __ASM_SH_HARDIRQ_H
 #define __ASM_SH_HARDIRQ_H
 
-extern void ack_bad_irq(unsigned int irq);
-#define ack_bad_irq ack_bad_irq
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+typedef struct {
+	unsigned int __softirq_pending;
+	unsigned int __nmi_count;		/* arch dependent */
+} ____cacheline_aligned irq_cpustat_t;
 
-#include <asm-generic/hardirq.h>
+#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+
+extern void ack_bad_irq(unsigned int irq);
 
 #endif /* __ASM_SH_HARDIRQ_H */
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 5be45ea4dfe..0cf2a5708e2 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -246,7 +246,7 @@ void __iounmap(void __iomem *addr);
 static inline void __iomem *
 __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 {
-#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED)
+#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED) && !defined(CONFIG_PMB)
 	unsigned long last_addr = offset + size - 1;
 #endif
 	void __iomem *ret;
@@ -255,7 +255,7 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 	if (ret)
 		return ret;
 
-#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED)
+#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED) && !defined(CONFIG_PMB)
 	/*
 	 * For P1 and P2 space this is trivial, as everything is already
 	 * mapped. Uncached access for P1 addresses are done through P2.
diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h
index f5963037c9d..c7426ad9926 100644
--- a/arch/sh/include/asm/mmu.h
+++ b/arch/sh/include/asm/mmu.h
@@ -7,12 +7,16 @@
 #define PMB_PASCR		0xff000070
 #define PMB_IRMCR		0xff000078
 
+#define PASCR_SE		0x80000000
+
 #define PMB_ADDR		0xf6100000
 #define PMB_DATA		0xf7100000
 #define PMB_ENTRY_MAX		16
 #define PMB_E_MASK		0x0000000f
 #define PMB_E_SHIFT		8
 
+#define PMB_PFN_MASK		0xff000000
+
 #define PMB_SZ_16M		0x00000000
 #define PMB_SZ_64M		0x00000010
 #define PMB_SZ_128M		0x00000080
@@ -62,17 +66,10 @@ struct pmb_entry {
 };
 
 /* arch/sh/mm/pmb.c */
-int __set_pmb_entry(unsigned long vpn, unsigned long ppn,
-		    unsigned long flags, int *entry);
-int set_pmb_entry(struct pmb_entry *pmbe);
-void clear_pmb_entry(struct pmb_entry *pmbe);
-struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
-			    unsigned long flags);
-void pmb_free(struct pmb_entry *pmbe);
 long pmb_remap(unsigned long virt, unsigned long phys,
 	       unsigned long size, unsigned long flags);
 void pmb_unmap(unsigned long addr);
+int pmb_init(void);
 #endif /* __ASSEMBLY__ */
 
 #endif /* __MMU_H */
-
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 4f3efa7d5a6..ba3046e4f06 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -75,13 +75,31 @@ static inline unsigned long long neff_sign_extend(unsigned long val)
 #define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
 #define FIRST_USER_ADDRESS	0
 
-#ifdef CONFIG_32BIT
-#define PHYS_ADDR_MASK		0xffffffff
+#define PHYS_ADDR_MASK29		0x1fffffff
+#define PHYS_ADDR_MASK32		0xffffffff
+
+#ifdef CONFIG_PMB
+static inline unsigned long phys_addr_mask(void)
+{
+	/* Is the MMU in 29bit mode? */
+	if (__in_29bit_mode())
+		return PHYS_ADDR_MASK29;
+
+	return PHYS_ADDR_MASK32;
+}
+#elif defined(CONFIG_32BIT)
+static inline unsigned long phys_addr_mask(void)
+{
+	return PHYS_ADDR_MASK32;
+}
 #else
-#define PHYS_ADDR_MASK		0x1fffffff
+static inline unsigned long phys_addr_mask(void)
+{
+	return PHYS_ADDR_MASK29;
+}
 #endif
 
-#define PTE_PHYS_MASK		(PHYS_ADDR_MASK & PAGE_MASK)
+#define PTE_PHYS_MASK		(phys_addr_mask() & PAGE_MASK)
 #define PTE_FLAGS_MASK		(~(PTE_PHYS_MASK) << PAGE_SHIFT)
 
 #ifdef CONFIG_SUPERH32
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index c0d359ce337..b3543551620 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -108,7 +108,7 @@ static inline unsigned long copy_ptea_attributes(unsigned long x)
 #define _PAGE_CLEAR_FLAGS	(_PAGE_PROTNONE | _PAGE_ACCESSED | _PAGE_FILE)
 #endif
 
-#define _PAGE_FLAGS_HARDWARE_MASK	(PHYS_ADDR_MASK & ~(_PAGE_CLEAR_FLAGS))
+#define _PAGE_FLAGS_HARDWARE_MASK	(phys_addr_mask() & ~(_PAGE_CLEAR_FLAGS))
 
 /* Hardware flags, page size encoding */
 #if !defined(CONFIG_MMU)
diff --git a/arch/sh/include/asm/scatterlist.h b/arch/sh/include/asm/scatterlist.h
index 327cc2e4c97..e38d1d4c7f6 100644
--- a/arch/sh/include/asm/scatterlist.h
+++ b/arch/sh/include/asm/scatterlist.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_SH_SCATTERLIST_H
 #define __ASM_SH_SCATTERLIST_H
 
-#define ISA_DMA_THRESHOLD	PHYS_ADDR_MASK
+#define ISA_DMA_THRESHOLD	phys_addr_mask()
 
 #include <asm-generic/scatterlist.h>
 
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index bdeb9d46d17..23eeed89467 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -19,6 +19,7 @@ struct thread_info {
 	struct task_struct	*task;		/* main task structure */
 	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
+	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;
 	int			preempt_count; /* 0 => preemptable, <0 => BUG */
 	mm_segment_t		addr_limit;	/* thread address space */
@@ -111,7 +112,6 @@ extern void free_thread_info(struct thread_info *ti);
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_RESTORE_SIGMASK	3	/* restore signal mask in do_signal() */
 #define TIF_SINGLESTEP		4	/* singlestepping active */
 #define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
 #define TIF_SECCOMP		6	/* secure computing */
@@ -125,7 +125,6 @@ extern void free_thread_info(struct thread_info *ti);
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
@@ -149,13 +148,32 @@ extern void free_thread_info(struct thread_info *ti);
 /* work to do on any return to u-space */
 #define _TIF_ALLWORK_MASK	(_TIF_SYSCALL_TRACE | _TIF_SIGPENDING      | \
 				 _TIF_NEED_RESCHED  | _TIF_SYSCALL_AUDIT   | \
-				 _TIF_SINGLESTEP    | _TIF_RESTORE_SIGMASK | \
-				 _TIF_NOTIFY_RESUME | _TIF_SYSCALL_TRACEPOINT)
+				 _TIF_SINGLESTEP    | _TIF_NOTIFY_RESUME   | \
+				 _TIF_SYSCALL_TRACEPOINT)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK		(_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \
 				 _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP))
 
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_RESTORE_SIGMASK	0x0001	/* restore signal mask in do_signal() */
+
+#ifndef __ASSEMBLY__
+#define HAVE_SET_RESTORE_SIGMASK	1
+static inline void set_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->status |= TS_RESTORE_SIGMASK;
+	set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+}
+#endif	/* !__ASSEMBLY__ */
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASM_SH_THREAD_INFO_H */
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index a2d0a40f384..f8791203cfe 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -9,8 +9,11 @@ ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_ftrace.o = -pg
 endif
 
+CFLAGS_REMOVE_return_address.o = -pg
+
 obj-y	:= debugtraps.o dumpstack.o idle.o io.o io_generic.o irq.o	\
 	   machvec.o nmi_debug.o process_$(BITS).o ptrace_$(BITS).o	\
+	   return_address.o						\
 	   setup.o signal_$(BITS).o sys_sh.o sys_sh$(BITS).o		\
 	   syscalls_$(BITS).o time.o topology.o traps.o			\
 	   traps_$(BITS).o unwinder.o
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index e848443deeb..485330cf854 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -268,7 +268,11 @@ enum {
 	UNUSED = 0,
 
 	/* interrupt sources */
-	IRL, IRQ0, IRQ1, IRQ2, IRQ3,
+	IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH,
+	IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH,
+	IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH,
+	IRL_HHLL, IRL_HHLH, IRL_HHHL,
+	IRQ0, IRQ1, IRQ2, IRQ3,
 	HUDII,
 	TMU0, TMU1, TMU2, TMU3, TMU4, TMU5,
 	PCII0, PCII1, PCII2, PCII3, PCII4,
@@ -291,7 +295,7 @@ enum {
 	INTICI4, INTICI5, INTICI6, INTICI7,
 
 	/* interrupt groups */
-	PCII56789, SCIF0, SCIF1, SCIF2, SCIF3,
+	IRL, PCII56789, SCIF0, SCIF1, SCIF2, SCIF3,
 	DMAC0, DMAC1,
 };
 
@@ -344,6 +348,10 @@ static struct intc_vect vectors[] __initdata = {
 };
 
 static struct intc_group groups[] __initdata = {
+	INTC_GROUP(IRL, IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH,
+		   IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH,
+		   IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH,
+		   IRL_HHLL, IRL_HHLH, IRL_HHHL),
 	INTC_GROUP(PCII56789, PCII5, PCII6, PCII7, PCII8, PCII9),
 	INTC_GROUP(SCIF0, SCIF0_ERI, SCIF0_RXI, SCIF0_BRI, SCIF0_TXI),
 	INTC_GROUP(SCIF1, SCIF1_ERI, SCIF1_RXI, SCIF1_BRI, SCIF1_TXI),
@@ -419,14 +427,14 @@ static DECLARE_INTC_DESC(intc_desc_irq, "shx3-irq", vectors_irq, groups,
 
 /* External interrupt pins in IRL mode */
 static struct intc_vect vectors_irl[] __initdata = {
-	INTC_VECT(IRL, 0x200), INTC_VECT(IRL, 0x220),
-	INTC_VECT(IRL, 0x240), INTC_VECT(IRL, 0x260),
-	INTC_VECT(IRL, 0x280), INTC_VECT(IRL, 0x2a0),
-	INTC_VECT(IRL, 0x2c0), INTC_VECT(IRL, 0x2e0),
-	INTC_VECT(IRL, 0x300), INTC_VECT(IRL, 0x320),
-	INTC_VECT(IRL, 0x340), INTC_VECT(IRL, 0x360),
-	INTC_VECT(IRL, 0x380), INTC_VECT(IRL, 0x3a0),
-	INTC_VECT(IRL, 0x3c0),
+	INTC_VECT(IRL_LLLL, 0x200), INTC_VECT(IRL_LLLH, 0x220),
+	INTC_VECT(IRL_LLHL, 0x240), INTC_VECT(IRL_LLHH, 0x260),
+	INTC_VECT(IRL_LHLL, 0x280), INTC_VECT(IRL_LHLH, 0x2a0),
+	INTC_VECT(IRL_LHHL, 0x2c0), INTC_VECT(IRL_LHHH, 0x2e0),
+	INTC_VECT(IRL_HLLL, 0x300), INTC_VECT(IRL_HLLH, 0x320),
+	INTC_VECT(IRL_HLHL, 0x340), INTC_VECT(IRL_HLHH, 0x360),
+	INTC_VECT(IRL_HHLL, 0x380), INTC_VECT(IRL_HHLH, 0x3a0),
+	INTC_VECT(IRL_HHHL, 0x3c0),
 };
 
 static DECLARE_INTC_DESC(intc_desc_irl, "shx3-irl", vectors_irl, groups,
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
index b0aacf67525..8f13f73cb2c 100644
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ b/arch/sh/kernel/cpu/sh5/entry.S
@@ -933,7 +933,7 @@ ret_with_reschedule:
 
 	pta	restore_all, tr1
 
-	movi	(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r8
+	movi	_TIF_SIGPENDING, r8
 	and	r8, r7, r8
 	pta	work_notifysig, tr0
 	bne	r8, ZERO, tr0
diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c
index 03b3616c80a..718286be664 100644
--- a/arch/sh/kernel/dwarf.c
+++ b/arch/sh/kernel/dwarf.c
@@ -20,6 +20,7 @@
 #include <linux/list.h>
 #include <linux/mempool.h>
 #include <linux/mm.h>
+#include <linux/elf.h>
 #include <asm/dwarf.h>
 #include <asm/unwinder.h>
 #include <asm/sections.h>
@@ -529,7 +530,18 @@ static int dwarf_cfa_execute_insns(unsigned char *insn_start,
 }
 
 /**
- *	dwarf_unwind_stack - recursively unwind the stack
+ *	dwarf_free_frame - free the memory allocated for @frame
+ *	@frame: the frame to free
+ */
+void dwarf_free_frame(struct dwarf_frame *frame)
+{
+	dwarf_frame_free_regs(frame);
+	mempool_free(frame, dwarf_frame_pool);
+}
+
+/**
+ *	dwarf_unwind_stack - unwind the stack
+ *
  *	@pc: address of the function to unwind
  *	@prev: struct dwarf_frame of the previous stackframe on the callstack
  *
@@ -547,9 +559,9 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
 	unsigned long addr;
 
 	/*
-	 * If this is the first invocation of this recursive function we
-	 * need get the contents of a physical register to get the CFA
-	 * in order to begin the virtual unwinding of the stack.
+	 * If we're starting at the top of the stack we need get the
+	 * contents of a physical register to get the CFA in order to
+	 * begin the virtual unwinding of the stack.
 	 *
 	 * NOTE: the return address is guaranteed to be setup by the
 	 * time this function makes its first function call.
@@ -571,9 +583,8 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
 	fde = dwarf_lookup_fde(pc);
 	if (!fde) {
 		/*
-		 * This is our normal exit path - the one that stops the
-		 * recursion. There's two reasons why we might exit
-		 * here,
+		 * This is our normal exit path. There are two reasons
+		 * why we might exit here,
 		 *
 		 *	a) pc has no asscociated DWARF frame info and so
 		 *	we don't know how to unwind this frame. This is
@@ -615,10 +626,10 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
 
 		} else {
 			/*
-			 * Again, this is the first invocation of this
-			 * recurisve function. We need to physically
-			 * read the contents of a register in order to
-			 * get the Canonical Frame Address for this
+			 * Again, we're starting from the top of the
+			 * stack. We need to physically read
+			 * the contents of a register in order to get
+			 * the Canonical Frame Address for this
 			 * function.
 			 */
 			frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
@@ -648,13 +659,12 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
 	return frame;
 
 bail:
-	dwarf_frame_free_regs(frame);
-	mempool_free(frame, dwarf_frame_pool);
+	dwarf_free_frame(frame);
 	return NULL;
 }
 
 static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
-			   unsigned char *end)
+			   unsigned char *end, struct module *mod)
 {
 	struct dwarf_cie *cie;
 	unsigned long flags;
@@ -750,6 +760,8 @@ static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
 	cie->initial_instructions = p;
 	cie->instructions_end = end;
 
+	cie->mod = mod;
+
 	/* Add to list */
 	spin_lock_irqsave(&dwarf_cie_lock, flags);
 	list_add_tail(&cie->link, &dwarf_cie_list);
@@ -760,7 +772,7 @@ static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
 
 static int dwarf_parse_fde(void *entry, u32 entry_type,
 			   void *start, unsigned long len,
-			   unsigned char *end)
+			   unsigned char *end, struct module *mod)
 {
 	struct dwarf_fde *fde;
 	struct dwarf_cie *cie;
@@ -809,6 +821,8 @@ static int dwarf_parse_fde(void *entry, u32 entry_type,
 	fde->instructions = p;
 	fde->end = end;
 
+	fde->mod = mod;
+
 	/* Add to list. */
 	spin_lock_irqsave(&dwarf_fde_lock, flags);
 	list_add_tail(&fde->link, &dwarf_fde_list);
@@ -832,10 +846,8 @@ static void dwarf_unwinder_dump(struct task_struct *task,
 	while (1) {
 		frame = dwarf_unwind_stack(return_addr, _frame);
 
-		if (_frame) {
-			dwarf_frame_free_regs(_frame);
-			mempool_free(_frame, dwarf_frame_pool);
-		}
+		if (_frame)
+			dwarf_free_frame(_frame);
 
 		_frame = frame;
 
@@ -845,6 +857,9 @@ static void dwarf_unwinder_dump(struct task_struct *task,
 		return_addr = frame->return_addr;
 		ops->address(data, return_addr, 1);
 	}
+
+	if (frame)
+		dwarf_free_frame(frame);
 }
 
 static struct unwinder dwarf_unwinder = {
@@ -874,15 +889,15 @@ static void dwarf_unwinder_cleanup(void)
 }
 
 /**
- *	dwarf_unwinder_init - initialise the dwarf unwinder
+ *	dwarf_parse_section - parse DWARF section
+ *	@eh_frame_start: start address of the .eh_frame section
+ *	@eh_frame_end: end address of the .eh_frame section
+ *	@mod: the kernel module containing the .eh_frame section
  *
- *	Build the data structures describing the .dwarf_frame section to
- *	make it easier to lookup CIE and FDE entries. Because the
- *	.eh_frame section is packed as tightly as possible it is not
- *	easy to lookup the FDE for a given PC, so we build a list of FDE
- *	and CIE entries that make it easier.
+ *	Parse the information in a .eh_frame section.
  */
-static int __init dwarf_unwinder_init(void)
+static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end,
+			       struct module *mod)
 {
 	u32 entry_type;
 	void *p, *entry;
@@ -890,32 +905,12 @@ static int __init dwarf_unwinder_init(void)
 	unsigned long len;
 	unsigned int c_entries, f_entries;
 	unsigned char *end;
-	INIT_LIST_HEAD(&dwarf_cie_list);
-	INIT_LIST_HEAD(&dwarf_fde_list);
 
 	c_entries = 0;
 	f_entries = 0;
-	entry = &__start_eh_frame;
-
-	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
-			sizeof(struct dwarf_frame), 0,
-			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
-
-	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
-			sizeof(struct dwarf_reg), 0,
-			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
+	entry = eh_frame_start;
 
-	dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ,
-					  mempool_alloc_slab,
-					  mempool_free_slab,
-					  dwarf_frame_cachep);
-
-	dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ,
-					 mempool_alloc_slab,
-					 mempool_free_slab,
-					 dwarf_reg_cachep);
-
-	while ((char *)entry < __stop_eh_frame) {
+	while ((char *)entry < eh_frame_end) {
 		p = entry;
 
 		count = dwarf_entry_len(p, &len);
@@ -927,6 +922,7 @@ static int __init dwarf_unwinder_init(void)
 			 * entry and move to the next one because 'len'
 			 * tells us where our next entry is.
 			 */
+			err = -EINVAL;
 			goto out;
 		} else
 			p += count;
@@ -938,13 +934,14 @@ static int __init dwarf_unwinder_init(void)
 		p += 4;
 
 		if (entry_type == DW_EH_FRAME_CIE) {
-			err = dwarf_parse_cie(entry, p, len, end);
+			err = dwarf_parse_cie(entry, p, len, end, mod);
 			if (err < 0)
 				goto out;
 			else
 				c_entries++;
 		} else {
-			err = dwarf_parse_fde(entry, entry_type, p, len, end);
+			err = dwarf_parse_fde(entry, entry_type, p, len,
+					      end, mod);
 			if (err < 0)
 				goto out;
 			else
@@ -957,6 +954,129 @@ static int __init dwarf_unwinder_init(void)
 	printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
 	       c_entries, f_entries);
 
+	return 0;
+
+out:
+	return err;
+}
+
+#ifdef CONFIG_MODULES
+int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+			  struct module *me)
+{
+	unsigned int i, err;
+	unsigned long start, end;
+	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	start = end = 0;
+
+	for (i = 1; i < hdr->e_shnum; i++) {
+		/* Alloc bit cleared means "ignore it." */
+		if ((sechdrs[i].sh_flags & SHF_ALLOC)
+		    && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) {
+			start = sechdrs[i].sh_addr;
+			end = start + sechdrs[i].sh_size;
+			break;
+		}
+	}
+
+	/* Did we find the .eh_frame section? */
+	if (i != hdr->e_shnum) {
+		err = dwarf_parse_section((char *)start, (char *)end, me);
+		if (err) {
+			printk(KERN_WARNING "%s: failed to parse DWARF info\n",
+			       me->name);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *	module_dwarf_cleanup - remove FDE/CIEs associated with @mod
+ *	@mod: the module that is being unloaded
+ *
+ *	Remove any FDEs and CIEs from the global lists that came from
+ *	@mod's .eh_frame section because @mod is being unloaded.
+ */
+void module_dwarf_cleanup(struct module *mod)
+{
+	struct dwarf_fde *fde;
+	struct dwarf_cie *cie;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dwarf_cie_lock, flags);
+
+again_cie:
+	list_for_each_entry(cie, &dwarf_cie_list, link) {
+		if (cie->mod == mod)
+			break;
+	}
+
+	if (&cie->link != &dwarf_cie_list) {
+		list_del(&cie->link);
+		kfree(cie);
+		goto again_cie;
+	}
+
+	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
+
+	spin_lock_irqsave(&dwarf_fde_lock, flags);
+
+again_fde:
+	list_for_each_entry(fde, &dwarf_fde_list, link) {
+		if (fde->mod == mod)
+			break;
+	}
+
+	if (&fde->link != &dwarf_fde_list) {
+		list_del(&fde->link);
+		kfree(fde);
+		goto again_fde;
+	}
+
+	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
+}
+#endif /* CONFIG_MODULES */
+
+/**
+ *	dwarf_unwinder_init - initialise the dwarf unwinder
+ *
+ *	Build the data structures describing the .dwarf_frame section to
+ *	make it easier to lookup CIE and FDE entries. Because the
+ *	.eh_frame section is packed as tightly as possible it is not
+ *	easy to lookup the FDE for a given PC, so we build a list of FDE
+ *	and CIE entries that make it easier.
+ */
+static int __init dwarf_unwinder_init(void)
+{
+	int err;
+	INIT_LIST_HEAD(&dwarf_cie_list);
+	INIT_LIST_HEAD(&dwarf_fde_list);
+
+	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
+			sizeof(struct dwarf_frame), 0,
+			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
+
+	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
+			sizeof(struct dwarf_reg), 0,
+			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
+
+	dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ,
+					  mempool_alloc_slab,
+					  mempool_free_slab,
+					  dwarf_frame_cachep);
+
+	dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ,
+					 mempool_alloc_slab,
+					 mempool_free_slab,
+					 dwarf_reg_cachep);
+
+	err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL);
+	if (err)
+		goto out;
+
 	err = unwinder_register(&dwarf_unwinder);
 	if (err)
 		goto out;
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index 3eb84931d2a..f0abd58c3a6 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -133,7 +133,7 @@ work_pending:
 	! r8: current_thread_info
 	! t:  result of "tst	#_TIF_NEED_RESCHED, r0"
 	bf/s	work_resched
-	 tst	#(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r0
+	 tst	#_TIF_SIGPENDING, r0
 work_notifysig:
 	bt/s	__restore_all
 	 mov	r15, r4
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 2c48e267256..b6f41c109be 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -62,6 +62,150 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 	return ftrace_replaced_code;
 }
 
+/*
+ * Modifying code must take extra care. On an SMP machine, if
+ * the code being modified is also being executed on another CPU
+ * that CPU will have undefined results and possibly take a GPF.
+ * We use kstop_machine to stop other CPUS from exectuing code.
+ * But this does not stop NMIs from happening. We still need
+ * to protect against that. We separate out the modification of
+ * the code to take care of this.
+ *
+ * Two buffers are added: An IP buffer and a "code" buffer.
+ *
+ * 1) Put the instruction pointer into the IP buffer
+ *    and the new code into the "code" buffer.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ *    we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag.
+ * 5) Wait for any running NMIs to finish.
+ *
+ * If an NMI is executed, the first thing it does is to call
+ * "ftrace_nmi_enter". This will check if the flag is set to write
+ * and if it is, it will write what is in the IP and "code" buffers.
+ *
+ * The trick is, it does not matter if everyone is writing the same
+ * content to the code location. Also, if a CPU is executing code
+ * it is OK to write to that code location if the contents being written
+ * are the same as what exists.
+ */
+#define MOD_CODE_WRITE_FLAG (1 << 31)	/* set when NMI should do the write */
+static atomic_t nmi_running = ATOMIC_INIT(0);
+static int mod_code_status;		/* holds return value of text write */
+static void *mod_code_ip;		/* holds the IP to write to */
+static void *mod_code_newcode;		/* holds the text to write to the IP */
+
+static unsigned nmi_wait_count;
+static atomic_t nmi_update_count = ATOMIC_INIT(0);
+
+int ftrace_arch_read_dyn_info(char *buf, int size)
+{
+	int r;
+
+	r = snprintf(buf, size, "%u %u",
+		     nmi_wait_count,
+		     atomic_read(&nmi_update_count));
+	return r;
+}
+
+static void clear_mod_flag(void)
+{
+	int old = atomic_read(&nmi_running);
+
+	for (;;) {
+		int new = old & ~MOD_CODE_WRITE_FLAG;
+
+		if (old == new)
+			break;
+
+		old = atomic_cmpxchg(&nmi_running, old, new);
+	}
+}
+
+static void ftrace_mod_code(void)
+{
+	/*
+	 * Yes, more than one CPU process can be writing to mod_code_status.
+	 *    (and the code itself)
+	 * But if one were to fail, then they all should, and if one were
+	 * to succeed, then they all should.
+	 */
+	mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
+					     MCOUNT_INSN_SIZE);
+
+	/* if we fail, then kill any new writers */
+	if (mod_code_status)
+		clear_mod_flag();
+}
+
+void ftrace_nmi_enter(void)
+{
+	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+		smp_rmb();
+		ftrace_mod_code();
+		atomic_inc(&nmi_update_count);
+	}
+	/* Must have previous changes seen before executions */
+	smp_mb();
+}
+
+void ftrace_nmi_exit(void)
+{
+	/* Finish all executions before clearing nmi_running */
+	smp_mb();
+	atomic_dec(&nmi_running);
+}
+
+static void wait_for_nmi_and_set_mod_flag(void)
+{
+	if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
+		return;
+
+	do {
+		cpu_relax();
+	} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
+
+	nmi_wait_count++;
+}
+
+static void wait_for_nmi(void)
+{
+	if (!atomic_read(&nmi_running))
+		return;
+
+	do {
+		cpu_relax();
+	} while (atomic_read(&nmi_running));
+
+	nmi_wait_count++;
+}
+
+static int
+do_ftrace_mod_code(unsigned long ip, void *new_code)
+{
+	mod_code_ip = (void *)ip;
+	mod_code_newcode = new_code;
+
+	/* The buffers need to be visible before we let NMIs write them */
+	smp_mb();
+
+	wait_for_nmi_and_set_mod_flag();
+
+	/* Make sure all running NMIs have finished before we write the code */
+	smp_mb();
+
+	ftrace_mod_code();
+
+	/* Make sure the write happens before clearing the bit */
+	smp_mb();
+
+	clear_mod_flag();
+	wait_for_nmi();
+
+	return mod_code_status;
+}
+
 static int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		       unsigned char *new_code)
 {
@@ -86,7 +230,7 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		return -EINVAL;
 
 	/* replace the text with the new text */
-	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+	if (do_ftrace_mod_code(ip, new_code))
 		return -EPERM;
 
 	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S
index a78be74b8d3..1151ecdffa7 100644
--- a/arch/sh/kernel/head_32.S
+++ b/arch/sh/kernel/head_32.S
@@ -33,7 +33,7 @@ ENTRY(empty_zero_page)
 	.long	1		/* LOADER_TYPE */
 	.long	0x00000000	/* INITRD_START */
 	.long	0x00000000	/* INITRD_SIZE */
-#ifdef CONFIG_32BIT
+#if defined(CONFIG_32BIT) && defined(CONFIG_PMB_FIXED)
 	.long	0x53453f00 + 32	/* "SE?" = 32 bit */
 #else
 	.long	0x53453f00 + 29	/* "SE?" = 29 bit */
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 7cb933ba495..11c289ecc09 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -36,7 +36,15 @@ void ack_bad_irq(unsigned int irq)
  */
 static int show_other_interrupts(struct seq_file *p, int prec)
 {
+	int j;
+
+	seq_printf(p, "%*s: ", prec, "NMI");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stat[j].__nmi_count);
+	seq_printf(p, "  Non-maskable interrupts\n");
+
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
+
 	return 0;
 }
 
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index 7ea2704ea03..de7cf5477d3 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -49,7 +49,7 @@ int machine_kexec_prepare(struct kimage *image)
 	/* older versions of kexec-tools are passing
 	 * the zImage entry point as a virtual address.
 	 */
-	if (image->start != PHYSADDR(image->start))
+	if (image->start != __pa(image->start))
 		return -EINVAL; /* upgrade your kexec-tools */
 
 	return 0;
diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c
index c2efdcde266..43adddfe4c0 100644
--- a/arch/sh/kernel/module.c
+++ b/arch/sh/kernel/module.c
@@ -32,6 +32,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <asm/unaligned.h>
+#include <asm/dwarf.h>
 
 void *module_alloc(unsigned long size)
 {
@@ -145,10 +146,16 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
-	return module_bug_finalize(hdr, sechdrs, me);
+	int ret = 0;
+
+	ret |= module_dwarf_finalize(hdr, sechdrs, me);
+	ret |= module_bug_finalize(hdr, sechdrs, me);
+
+	return ret;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
 	module_bug_cleanup(mod);
+	module_dwarf_cleanup(mod);
 }
diff --git a/arch/sh/kernel/return_address.c b/arch/sh/kernel/return_address.c
new file mode 100644
index 00000000000..df3ab581107
--- /dev/null
+++ b/arch/sh/kernel/return_address.c
@@ -0,0 +1,54 @@
+/*
+ * arch/sh/kernel/return_address.c
+ *
+ * Copyright (C) 2009  Matt Fleming
+ * Copyright (C) 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <asm/dwarf.h>
+
+#ifdef CONFIG_DWARF_UNWINDER
+
+void *return_address(unsigned int depth)
+{
+	struct dwarf_frame *frame;
+	unsigned long ra;
+	int i;
+
+	for (i = 0, frame = NULL, ra = 0; i <= depth; i++) {
+		struct dwarf_frame *tmp;
+
+		tmp = dwarf_unwind_stack(ra, frame);
+
+		if (frame)
+			dwarf_free_frame(frame);
+
+		frame = tmp;
+
+		if (!frame || !frame->return_addr)
+			break;
+
+		ra = frame->return_addr;
+	}
+
+	/* Failed to unwind the stack to the specified depth. */
+	WARN_ON(i != depth + 1);
+
+	if (frame)
+		dwarf_free_frame(frame);
+
+	return (void *)ra;
+}
+
+#else
+
+void *return_address(unsigned int depth)
+{
+	return NULL;
+}
+
+#endif
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 99b4fb553bf..5a947a2567e 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -453,6 +453,10 @@ void __init setup_arch(char **cmdline_p)
 
 	paging_init();
 
+#ifdef CONFIG_PMB_ENABLE
+	pmb_init();
+#endif
+
 #ifdef CONFIG_SMP
 	plat_smp_setup();
 #endif
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 3db37425210..12815ce01ec 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -67,7 +67,8 @@ sys_sigsuspend(old_sigset_t mask,
 
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
-	set_thread_flag(TIF_RESTORE_SIGMASK);
+	set_restore_sigmask();
+
 	return -ERESTARTNOHAND;
 }
 
@@ -590,7 +591,7 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0)
 	if (try_to_freeze())
 		goto no_signal;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
 		oldset = &current->saved_sigmask;
 	else
 		oldset = &current->blocked;
@@ -602,12 +603,13 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0)
 		/* Whee!  Actually deliver the signal.  */
 		if (handle_signal(signr, &ka, &info, oldset,
 				  regs, save_r0) == 0) {
-			/* a signal was successfully delivered; the saved
+			/*
+			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
-			 * clear the TIF_RESTORE_SIGMASK flag */
-			if (test_thread_flag(TIF_RESTORE_SIGMASK))
-				clear_thread_flag(TIF_RESTORE_SIGMASK);
+			 * clear the TS_RESTORE_SIGMASK flag
+			 */
+			current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
 
 			tracehook_signal_handler(signr, &info, &ka, regs,
 					test_thread_flag(TIF_SINGLESTEP));
@@ -631,10 +633,12 @@ no_signal:
 		}
 	}
 
-	/* if there's no signal to deliver, we just put the saved sigmask
-	 * back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
+	/*
+	 * If there's no signal to deliver, we just put the saved sigmask
+	 * back.
+	 */
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
 		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
 	}
 }
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 74793c80a57..feb3dddd319 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -101,7 +101,7 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset)
 	if (try_to_freeze())
 		goto no_signal;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
 		oldset = &current->saved_sigmask;
 	else if (!oldset)
 		oldset = &current->blocked;
@@ -115,11 +115,9 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset)
 			/*
 			 * If a signal was successfully delivered, the
 			 * saved sigmask is in its frame, and we can
-			 * clear the TIF_RESTORE_SIGMASK flag.
+			 * clear the TS_RESTORE_SIGMASK flag.
 			 */
-			if (test_thread_flag(TIF_RESTORE_SIGMASK))
-				clear_thread_flag(TIF_RESTORE_SIGMASK);
-
+			current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
 			tracehook_signal_handler(signr, &info, &ka, regs, 0);
 			return 1;
 		}
@@ -146,8 +144,8 @@ no_signal:
 	}
 
 	/* No signal to deliver -- put the saved sigmask back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
 		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
 	}
 
@@ -176,6 +174,7 @@ sys_sigsuspend(old_sigset_t mask,
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
+		set_restore_sigmask();
 		regs->pc += 4;    /* because sys_sigreturn decrements the pc */
 		if (do_signal(regs, &saveset)) {
 			/* pc now points at signal handler. Need to decrement
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 160db1003cf..983e0792d5f 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -122,7 +122,9 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	stack_start.bss_start = 0; /* don't clear bss for secondary cpus */
 	stack_start.start_kernel_fn = start_secondary;
 
-	flush_cache_all();
+	flush_icache_range((unsigned long)&stack_start,
+			   (unsigned long)&stack_start + sizeof(stack_start));
+	wmb();
 
 	plat_start_cpu(cpu, (unsigned long)_stext);
 
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index a8396f36bd1..d52695df270 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -95,9 +95,11 @@ BUILD_TRAP_HANDLER(bug)
 
 BUILD_TRAP_HANDLER(nmi)
 {
+	unsigned int cpu = smp_processor_id();
 	TRAP_HANDLER_DECL;
 
 	nmi_enter();
+	nmi_count(cpu)++;
 
 	switch (notify_die(DIE_NMI, "NMI", regs, 0, vec & 0xff, SIGINT)) {
 	case NOTIFY_OK:
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 64dc1ad5980..b8a9032c74b 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -83,7 +83,6 @@ config 32BIT
 config PMB_ENABLE
 	bool "Support 32-bit physical addressing through PMB"
 	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785)
-	select 32BIT
 	default y
 	help
 	  If you say Y here, physical addressing will be extended to
@@ -98,7 +97,6 @@ choice
 config PMB
 	bool "PMB"
 	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785)
-	select 32BIT
 	help
 	  If you say Y here, physical addressing will be extended to
 	  32-bits through the SH-4A PMB. If this is not set, legacy
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index 3759bf85329..8a70535fa7c 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -33,8 +33,7 @@ obj-y				+= $(tlb-y)
 endif
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_PMB)		+= pmb.o
-obj-$(CONFIG_PMB_FIXED)		+= pmb-fixed.o
+obj-$(CONFIG_PMB_ENABLE)	+= pmb.o
 obj-$(CONFIG_NUMA)		+= numa.o
 
 # Special flags for fault_64.o.  This puts restrictions on the number of
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 519e2d16cd0..4a2fbf2864d 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -2,7 +2,7 @@
  * arch/sh/mm/cache-sh4.c
  *
  * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2001 - 2007  Paul Mundt
+ * Copyright (C) 2001 - 2009  Paul Mundt
  * Copyright (C) 2003  Richard Curnow
  * Copyright (c) 2007 STMicroelectronics (R&D) Ltd.
  *
@@ -15,6 +15,8 @@
 #include <linux/io.h>
 #include <linux/mutex.h>
 #include <linux/fs.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
@@ -23,21 +25,12 @@
  * flushing. Anything exceeding this will simply flush the dcache in its
  * entirety.
  */
-#define MAX_DCACHE_PAGES	64	/* XXX: Tune for ways */
 #define MAX_ICACHE_PAGES	32
 
 static void __flush_cache_one(unsigned long addr, unsigned long phys,
 			       unsigned long exec_offset);
 
 /*
- * This is initialised here to ensure that it is not placed in the BSS.  If
- * that were to happen, note that cache_init gets called before the BSS is
- * cleared, so this would get nulled out which would be hopeless.
- */
-static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) =
-	(void (*)(unsigned long, unsigned long))0xdeadbeef;
-
-/*
  * Write back the range of D-cache, and purge the I-cache.
  *
  * Called from kernel/module.c:sys_init_module and routine for a.out format,
@@ -94,15 +87,16 @@ static inline void flush_cache_one(unsigned long start, unsigned long phys)
 	unsigned long flags, exec_offset = 0;
 
 	/*
-	 * All types of SH-4 require PC to be in P2 to operate on the I-cache.
-	 * Some types of SH-4 require PC to be in P2 to operate on the D-cache.
+	 * All types of SH-4 require PC to be uncached to operate on the I-cache.
+	 * Some types of SH-4 require PC to be uncached to operate on the D-cache.
 	 */
 	if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) ||
 	    (start < CACHE_OC_ADDRESS_ARRAY))
-		exec_offset = 0x20000000;
+		exec_offset = cached_to_uncached;
 
 	local_irq_save(flags);
-	__flush_cache_one(start | SH_CACHE_ASSOC, P1SEGADDR(phys), exec_offset);
+	__flush_cache_one(start | SH_CACHE_ASSOC,
+			  virt_to_phys(phys), exec_offset);
 	local_irq_restore(flags);
 }
 
@@ -121,13 +115,13 @@ static void sh4_flush_dcache_page(void *arg)
 	else
 #endif
 	{
-		unsigned long phys = PHYSADDR(page_address(page));
+		unsigned long phys = page_to_phys(page);
 		unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
 		int i, n;
 
 		/* Loop all the D-cache */
 		n = boot_cpu_data.dcache.n_aliases;
-		for (i = 0; i < n; i++, addr += PAGE_SIZE)
+		for (i = 0; i <= n; i++, addr += PAGE_SIZE)
 			flush_cache_one(addr, phys);
 	}
 
@@ -156,10 +150,27 @@ static void __uses_jump_to_uncached flush_icache_all(void)
 	local_irq_restore(flags);
 }
 
-static inline void flush_dcache_all(void)
+static void flush_dcache_all(void)
 {
-	(*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size);
-	wmb();
+	unsigned long addr, end_addr, entry_offset;
+
+	end_addr = CACHE_OC_ADDRESS_ARRAY +
+		(current_cpu_data.dcache.sets <<
+		 current_cpu_data.dcache.entry_shift) *
+			current_cpu_data.dcache.ways;
+
+	entry_offset = 1 << current_cpu_data.dcache.entry_shift;
+
+	for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; ) {
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+	}
 }
 
 static void sh4_flush_cache_all(void *unused)
@@ -168,89 +179,13 @@ static void sh4_flush_cache_all(void *unused)
 	flush_icache_all();
 }
 
-static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
-			     unsigned long end)
-{
-	unsigned long d = 0, p = start & PAGE_MASK;
-	unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
-	unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
-	unsigned long select_bit;
-	unsigned long all_aliases_mask;
-	unsigned long addr_offset;
-	pgd_t *dir;
-	pmd_t *pmd;
-	pud_t *pud;
-	pte_t *pte;
-	int i;
-
-	dir = pgd_offset(mm, p);
-	pud = pud_offset(dir, p);
-	pmd = pmd_offset(pud, p);
-	end = PAGE_ALIGN(end);
-
-	all_aliases_mask = (1 << n_aliases) - 1;
-
-	do {
-		if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
-			p &= PMD_MASK;
-			p += PMD_SIZE;
-			pmd++;
-
-			continue;
-		}
-
-		pte = pte_offset_kernel(pmd, p);
-
-		do {
-			unsigned long phys;
-			pte_t entry = *pte;
-
-			if (!(pte_val(entry) & _PAGE_PRESENT)) {
-				pte++;
-				p += PAGE_SIZE;
-				continue;
-			}
-
-			phys = pte_val(entry) & PTE_PHYS_MASK;
-
-			if ((p ^ phys) & alias_mask) {
-				d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
-				d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
-
-				if (d == all_aliases_mask)
-					goto loop_exit;
-			}
-
-			pte++;
-			p += PAGE_SIZE;
-		} while (p < end && ((unsigned long)pte & ~PAGE_MASK));
-		pmd++;
-	} while (p < end);
-
-loop_exit:
-	addr_offset = 0;
-	select_bit = 1;
-
-	for (i = 0; i < n_aliases; i++) {
-		if (d & select_bit) {
-			(*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
-			wmb();
-		}
-
-		select_bit <<= 1;
-		addr_offset += PAGE_SIZE;
-	}
-}
-
 /*
  * Note : (RPC) since the caches are physically tagged, the only point
  * of flush_cache_mm for SH-4 is to get rid of aliases from the
  * D-cache.  The assumption elsewhere, e.g. flush_cache_range, is that
  * lines can stay resident so long as the virtual address they were
  * accessed with (hence cache set) is in accord with the physical
- * address (i.e. tag).  It's no different here.  So I reckon we don't
- * need to flush the I-cache, since aliases don't matter for that.  We
- * should try that.
+ * address (i.e. tag).  It's no different here.
  *
  * Caller takes mm->mmap_sem.
  */
@@ -261,33 +196,7 @@ static void sh4_flush_cache_mm(void *arg)
 	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
 		return;
 
-	/*
-	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
-	 * the cache is physically tagged, the data can just be left in there.
-	 */
-	if (boot_cpu_data.dcache.n_aliases == 0)
-		return;
-
-	/*
-	 * Don't bother groveling around the dcache for the VMA ranges
-	 * if there are too many PTEs to make it worthwhile.
-	 */
-	if (mm->nr_ptes >= MAX_DCACHE_PAGES)
-		flush_dcache_all();
-	else {
-		struct vm_area_struct *vma;
-
-		/*
-		 * In this case there are reasonably sized ranges to flush,
-		 * iterate through the VMA list and take care of any aliases.
-		 */
-		for (vma = mm->mmap; vma; vma = vma->vm_next)
-			__flush_cache_mm(mm, vma->vm_start, vma->vm_end);
-	}
-
-	/* Only touch the icache if one of the VMAs has VM_EXEC set. */
-	if (mm->exec_vm)
-		flush_icache_all();
+	flush_dcache_all();
 }
 
 /*
@@ -300,44 +209,63 @@ static void sh4_flush_cache_page(void *args)
 {
 	struct flusher_data *data = args;
 	struct vm_area_struct *vma;
+	struct page *page;
 	unsigned long address, pfn, phys;
-	unsigned int alias_mask;
+	int map_coherent = 0;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	void *vaddr;
 
 	vma = data->vma;
-	address = data->addr1;
+	address = data->addr1 & PAGE_MASK;
 	pfn = data->addr2;
 	phys = pfn << PAGE_SHIFT;
+	page = pfn_to_page(pfn);
 
 	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
 		return;
 
-	alias_mask = boot_cpu_data.dcache.alias_mask;
-
-	/* We only need to flush D-cache when we have alias */
-	if ((address^phys) & alias_mask) {
-		/* Loop 4K of the D-cache */
-		flush_cache_one(
-			CACHE_OC_ADDRESS_ARRAY | (address & alias_mask),
-			phys);
-		/* Loop another 4K of the D-cache */
-		flush_cache_one(
-			CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask),
-			phys);
-	}
+	pgd = pgd_offset(vma->vm_mm, address);
+	pud = pud_offset(pgd, address);
+	pmd = pmd_offset(pud, address);
+	pte = pte_offset_kernel(pmd, address);
+
+	/* If the page isn't present, there is nothing to do here. */
+	if (!(pte_val(*pte) & _PAGE_PRESENT))
+		return;
 
-	alias_mask = boot_cpu_data.icache.alias_mask;
-	if (vma->vm_flags & VM_EXEC) {
+	if ((vma->vm_mm == current->active_mm))
+		vaddr = NULL;
+	else {
 		/*
-		 * Evict entries from the portion of the cache from which code
-		 * may have been executed at this address (virtual).  There's
-		 * no need to evict from the portion corresponding to the
-		 * physical address as for the D-cache, because we know the
-		 * kernel has never executed the code through its identity
-		 * translation.
+		 * Use kmap_coherent or kmap_atomic to do flushes for
+		 * another ASID than the current one.
 		 */
-		flush_cache_one(
-			CACHE_IC_ADDRESS_ARRAY | (address & alias_mask),
-			phys);
+		map_coherent = (current_cpu_data.dcache.n_aliases &&
+			!test_bit(PG_dcache_dirty, &page->flags) &&
+			page_mapped(page));
+		if (map_coherent)
+			vaddr = kmap_coherent(page, address);
+		else
+			vaddr = kmap_atomic(page, KM_USER0);
+
+		address = (unsigned long)vaddr;
+	}
+
+	if (pages_do_alias(address, phys))
+		flush_cache_one(CACHE_OC_ADDRESS_ARRAY |
+			(address & shm_align_mask), phys);
+
+	if (vma->vm_flags & VM_EXEC)
+		flush_icache_all();
+
+	if (vaddr) {
+		if (map_coherent)
+			kunmap_coherent(vaddr);
+		else
+			kunmap_atomic(vaddr, KM_USER0);
 	}
 }
 
@@ -370,24 +298,10 @@ static void sh4_flush_cache_range(void *args)
 	if (boot_cpu_data.dcache.n_aliases == 0)
 		return;
 
-	/*
-	 * Don't bother with the lookup and alias check if we have a
-	 * wide range to cover, just blow away the dcache in its
-	 * entirety instead. -- PFM.
-	 */
-	if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
-		flush_dcache_all();
-	else
-		__flush_cache_mm(vma->vm_mm, start, end);
+	flush_dcache_all();
 
-	if (vma->vm_flags & VM_EXEC) {
-		/*
-		 * TODO: Is this required???  Need to look at how I-cache
-		 * coherency is assured when new programs are loaded to see if
-		 * this matters.
-		 */
+	if (vma->vm_flags & VM_EXEC)
 		flush_icache_all();
-	}
 }
 
 /**
@@ -461,245 +375,6 @@ static void __flush_cache_one(unsigned long addr, unsigned long phys,
 	} while (--way_count != 0);
 }
 
-/*
- * Break the 1, 2 and 4 way variants of this out into separate functions to
- * avoid nearly all the overhead of having the conditional stuff in the function
- * bodies (+ the 1 and 2 way cases avoid saving any registers too).
- *
- * We want to eliminate unnecessary bus transactions, so this code uses
- * a non-obvious technique.
- *
- * Loop over a cache way sized block of, one cache line at a time. For each
- * line, use movca.a to cause the current cache line contents to be written
- * back, but without reading anything from main memory. However this has the
- * side effect that the cache is now caching that memory location. So follow
- * this with a cache invalidate to mark the cache line invalid. And do all
- * this with interrupts disabled, to avoid the cache line being accidently
- * evicted while it is holding garbage.
- *
- * This also breaks in a number of circumstances:
- * - if there are modifications to the region of memory just above
- *   empty_zero_page (for example because a breakpoint has been placed
- *   there), then these can be lost.
- *
- *   This is because the the memory address which the cache temporarily
- *   caches in the above description is empty_zero_page. So the
- *   movca.l hits the cache (it is assumed that it misses, or at least
- *   isn't dirty), modifies the line and then invalidates it, losing the
- *   required change.
- *
- * - If caches are disabled or configured in write-through mode, then
- *   the movca.l writes garbage directly into memory.
- */
-static void __flush_dcache_segment_writethrough(unsigned long start,
-					        unsigned long extent_per_way)
-{
-	unsigned long addr;
-	int i;
-
-	addr = CACHE_OC_ADDRESS_ARRAY | (start & cpu_data->dcache.entry_mask);
-
-	while (extent_per_way) {
-		for (i = 0; i < cpu_data->dcache.ways; i++)
-			__raw_writel(0, addr + cpu_data->dcache.way_incr * i);
-
-		addr += cpu_data->dcache.linesz;
-		extent_per_way -= cpu_data->dcache.linesz;
-	}
-}
-
-static void __flush_dcache_segment_1way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/*
-	 * The previous code aligned base_addr to 16k, i.e. the way_size of all
-	 * existing SH-4 D-caches.  Whilst I don't see a need to have this
-	 * aligned to any better than the cache line size (which it will be
-	 * anyway by construction), let's align it to at least the way_size of
-	 * any existing or conceivable SH-4 D-cache.  -- RPC
-	 */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-	} while (a0 < a0e);
-}
-
-static void __flush_dcache_segment_2way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a1, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/* See comment under 1-way above */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a1 = a0 + way_incr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-		a1 += linesz;
-	} while (a0 < a0e);
-}
-
-static void __flush_dcache_segment_4way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a1, a2, a3, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/* See comment under 1-way above */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a1 = a0 + way_incr;
-	a2 = a1 + way_incr;
-	a3 = a2 + way_incr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-	} while (a0 < a0e);
-}
-
 extern void __weak sh4__flush_region_init(void);
 
 /*
@@ -707,32 +382,11 @@ extern void __weak sh4__flush_region_init(void);
  */
 void __init sh4_cache_init(void)
 {
-	unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT);
-
 	printk("PVR=%08x CVR=%08x PRR=%08x\n",
 		ctrl_inl(CCN_PVR),
 		ctrl_inl(CCN_CVR),
 		ctrl_inl(CCN_PRR));
 
-	if (wt_enabled)
-		__flush_dcache_segment_fn = __flush_dcache_segment_writethrough;
-	else {
-		switch (boot_cpu_data.dcache.ways) {
-		case 1:
-			__flush_dcache_segment_fn = __flush_dcache_segment_1way;
-			break;
-		case 2:
-			__flush_dcache_segment_fn = __flush_dcache_segment_2way;
-			break;
-		case 4:
-			__flush_dcache_segment_fn = __flush_dcache_segment_4way;
-			break;
-		default:
-			panic("unknown number of cache ways\n");
-			break;
-		}
-	}
-
 	local_flush_icache_range	= sh4_flush_icache_range;
 	local_flush_dcache_page		= sh4_flush_dcache_page;
 	local_flush_cache_all		= sh4_flush_cache_all;
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 2601935eb58..f527fb70fce 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -141,7 +141,7 @@ static void sh7705_flush_dcache_page(void *arg)
 	if (mapping && !mapping_mapped(mapping))
 		set_bit(PG_dcache_dirty, &page->flags);
 	else
-		__flush_dcache_page(PHYSADDR(page_address(page)));
+		__flush_dcache_page(__pa(page_address(page)));
 }
 
 static void __uses_jump_to_uncached sh7705_flush_cache_all(void *args)
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index a2dc7f9ecc5..fc372a1d313 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -164,11 +164,17 @@ void flush_cache_all(void)
 
 void flush_cache_mm(struct mm_struct *mm)
 {
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
 	cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
 }
 
 void flush_cache_dup_mm(struct mm_struct *mm)
 {
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
 	cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
 }
 
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index e098ec158dd..9a8403d9344 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(dma_free_coherent);
 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 		    enum dma_data_direction direction)
 {
-#ifdef CONFIG_CPU_SH5
+#if defined(CONFIG_CPU_SH5) || defined(CONFIG_PMB)
 	void *p1addr = vaddr;
 #else
 	void *p1addr = (void*) P1SEGADDR((unsigned long)vaddr);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 8173e38afd3..c8af6c5fa58 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -323,4 +323,12 @@ int memory_add_physaddr_to_nid(u64 addr)
 }
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
+
 #endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_PMB
+int __in_29bit_mode(void)
+{
+	return !(ctrl_inl(PMB_PASCR) & PASCR_SE);
+}
+#endif /* CONFIG_PMB */
diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c
index 16e01b5fed0..15d74ea4209 100644
--- a/arch/sh/mm/kmap.c
+++ b/arch/sh/mm/kmap.c
@@ -39,7 +39,9 @@ void *kmap_coherent(struct page *page, unsigned long addr)
 	pagefault_disable();
 
 	idx = FIX_CMAP_END -
-		((addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT);
+		(((addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1)) +
+		 (FIX_N_COLOURS * smp_processor_id()));
+
 	vaddr = __fix_to_virt(idx);
 
 	BUG_ON(!pte_none(*(kmap_coherent_pte - idx)));
diff --git a/arch/sh/mm/pmb-fixed.c b/arch/sh/mm/pmb-fixed.c
deleted file mode 100644
index 43c8eac4d8a..00000000000
--- a/arch/sh/mm/pmb-fixed.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * arch/sh/mm/fixed_pmb.c
- *
- * Copyright (C) 2009  Renesas Solutions Corp.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/io.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-
-static int __uses_jump_to_uncached fixed_pmb_init(void)
-{
-	int i;
-	unsigned long addr, data;
-
-	jump_to_uncached();
-
-	for (i = 0; i < PMB_ENTRY_MAX; i++) {
-		addr = PMB_DATA + (i << PMB_E_SHIFT);
-		data = ctrl_inl(addr);
-		if (!(data & PMB_V))
-			continue;
-
-		if (data & PMB_C) {
-#if defined(CONFIG_CACHE_WRITETHROUGH)
-			data |= PMB_WT;
-#elif defined(CONFIG_CACHE_WRITEBACK)
-			data &= ~PMB_WT;
-#else
-			data &= ~(PMB_C | PMB_WT);
-#endif
-		}
-		ctrl_outl(data, addr);
-	}
-
-	back_to_cached();
-
-	return 0;
-}
-arch_initcall(fixed_pmb_init);
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index aade3110211..280f6a16603 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -35,29 +35,9 @@
 
 static void __pmb_unmap(struct pmb_entry *);
 
-static struct kmem_cache *pmb_cache;
+static struct pmb_entry pmb_entry_list[NR_PMB_ENTRIES];
 static unsigned long pmb_map;
 
-static struct pmb_entry pmb_init_map[] = {
-	/* vpn         ppn         flags (ub/sz/c/wt) */
-
-	/* P1 Section Mappings */
-	{ 0x80000000, 0x00000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x84000000, 0x04000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x88000000, 0x08000000, PMB_SZ_128M | PMB_C, },
-	{ 0x90000000, 0x10000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x94000000, 0x14000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x98000000, 0x18000000, PMB_SZ_64M  | PMB_C, },
-
-	/* P2 Section Mappings */
-	{ 0xa0000000, 0x00000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xa4000000, 0x04000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xa8000000, 0x08000000, PMB_UB | PMB_SZ_128M | PMB_WT, },
-	{ 0xb0000000, 0x10000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xb4000000, 0x14000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xb8000000, 0x18000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-};
-
 static inline unsigned long mk_pmb_entry(unsigned int entry)
 {
 	return (entry & PMB_E_MASK) << PMB_E_SHIFT;
@@ -73,81 +53,68 @@ static inline unsigned long mk_pmb_data(unsigned int entry)
 	return mk_pmb_entry(entry) | PMB_DATA;
 }
 
-static DEFINE_SPINLOCK(pmb_list_lock);
-static struct pmb_entry *pmb_list;
-
-static inline void pmb_list_add(struct pmb_entry *pmbe)
+static int pmb_alloc_entry(void)
 {
-	struct pmb_entry **p, *tmp;
+	unsigned int pos;
 
-	p = &pmb_list;
-	while ((tmp = *p) != NULL)
-		p = &tmp->next;
+repeat:
+	pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
 
-	pmbe->next = tmp;
-	*p = pmbe;
-}
+	if (unlikely(pos > NR_PMB_ENTRIES))
+		return -ENOSPC;
 
-static inline void pmb_list_del(struct pmb_entry *pmbe)
-{
-	struct pmb_entry **p, *tmp;
+	if (test_and_set_bit(pos, &pmb_map))
+		goto repeat;
 
-	for (p = &pmb_list; (tmp = *p); p = &tmp->next)
-		if (tmp == pmbe) {
-			*p = tmp->next;
-			return;
-		}
+	return pos;
 }
 
-struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
-			    unsigned long flags)
+static struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
+				   unsigned long flags, int entry)
 {
 	struct pmb_entry *pmbe;
+	int pos;
+
+	if (entry == PMB_NO_ENTRY) {
+		pos = pmb_alloc_entry();
+		if (pos < 0)
+			return ERR_PTR(pos);
+	} else {
+		if (test_bit(entry, &pmb_map))
+			return ERR_PTR(-ENOSPC);
+		pos = entry;
+	}
 
-	pmbe = kmem_cache_alloc(pmb_cache, GFP_KERNEL);
+	pmbe = &pmb_entry_list[pos];
 	if (!pmbe)
 		return ERR_PTR(-ENOMEM);
 
 	pmbe->vpn	= vpn;
 	pmbe->ppn	= ppn;
 	pmbe->flags	= flags;
-
-	spin_lock_irq(&pmb_list_lock);
-	pmb_list_add(pmbe);
-	spin_unlock_irq(&pmb_list_lock);
+	pmbe->entry	= pos;
 
 	return pmbe;
 }
 
-void pmb_free(struct pmb_entry *pmbe)
+static void pmb_free(struct pmb_entry *pmbe)
 {
-	spin_lock_irq(&pmb_list_lock);
-	pmb_list_del(pmbe);
-	spin_unlock_irq(&pmb_list_lock);
+	int pos = pmbe->entry;
 
-	kmem_cache_free(pmb_cache, pmbe);
+	pmbe->vpn	= 0;
+	pmbe->ppn	= 0;
+	pmbe->flags	= 0;
+	pmbe->entry	= 0;
+
+	clear_bit(pos, &pmb_map);
 }
 
 /*
  * Must be in P2 for __set_pmb_entry()
  */
-int __set_pmb_entry(unsigned long vpn, unsigned long ppn,
-		    unsigned long flags, int *entry)
+static void __set_pmb_entry(unsigned long vpn, unsigned long ppn,
+			    unsigned long flags, int pos)
 {
-	unsigned int pos = *entry;
-
-	if (unlikely(pos == PMB_NO_ENTRY))
-		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
-
-repeat:
-	if (unlikely(pos > NR_PMB_ENTRIES))
-		return -ENOSPC;
-
-	if (test_and_set_bit(pos, &pmb_map)) {
-		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
-		goto repeat;
-	}
-
 	ctrl_outl(vpn | PMB_V, mk_pmb_addr(pos));
 
 #ifdef CONFIG_CACHE_WRITETHROUGH
@@ -161,35 +128,21 @@ repeat:
 #endif
 
 	ctrl_outl(ppn | flags | PMB_V, mk_pmb_data(pos));
-
-	*entry = pos;
-
-	return 0;
 }
 
-int __uses_jump_to_uncached set_pmb_entry(struct pmb_entry *pmbe)
+static void __uses_jump_to_uncached set_pmb_entry(struct pmb_entry *pmbe)
 {
-	int ret;
-
 	jump_to_uncached();
-	ret = __set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, &pmbe->entry);
+	__set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, pmbe->entry);
 	back_to_cached();
-
-	return ret;
 }
 
-void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
+static void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
 {
 	unsigned int entry = pmbe->entry;
 	unsigned long addr;
 
-	/*
-	 * Don't allow clearing of wired init entries, P1 or P2 access
-	 * without a corresponding mapping in the PMB will lead to reset
-	 * by the TLB.
-	 */
-	if (unlikely(entry < ARRAY_SIZE(pmb_init_map) ||
-		     entry >= NR_PMB_ENTRIES))
+	if (unlikely(entry >= NR_PMB_ENTRIES))
 		return;
 
 	jump_to_uncached();
@@ -202,8 +155,6 @@ void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
 	ctrl_outl(ctrl_inl(addr) & ~PMB_V, addr);
 
 	back_to_cached();
-
-	clear_bit(entry, &pmb_map);
 }
 
 
@@ -239,23 +190,17 @@ long pmb_remap(unsigned long vaddr, unsigned long phys,
 
 again:
 	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++) {
-		int ret;
-
 		if (size < pmb_sizes[i].size)
 			continue;
 
-		pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag);
+		pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag,
+				 PMB_NO_ENTRY);
 		if (IS_ERR(pmbe)) {
 			err = PTR_ERR(pmbe);
 			goto out;
 		}
 
-		ret = set_pmb_entry(pmbe);
-		if (ret != 0) {
-			pmb_free(pmbe);
-			err = -EBUSY;
-			goto out;
-		}
+		set_pmb_entry(pmbe);
 
 		phys	+= pmb_sizes[i].size;
 		vaddr	+= pmb_sizes[i].size;
@@ -292,11 +237,16 @@ out:
 
 void pmb_unmap(unsigned long addr)
 {
-	struct pmb_entry **p, *pmbe;
+	struct pmb_entry *pmbe = NULL;
+	int i;
 
-	for (p = &pmb_list; (pmbe = *p); p = &pmbe->next)
-		if (pmbe->vpn == addr)
-			break;
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		if (test_bit(i, &pmb_map)) {
+			pmbe = &pmb_entry_list[i];
+			if (pmbe->vpn == addr)
+				break;
+		}
+	}
 
 	if (unlikely(!pmbe))
 		return;
@@ -306,13 +256,22 @@ void pmb_unmap(unsigned long addr)
 
 static void __pmb_unmap(struct pmb_entry *pmbe)
 {
-	WARN_ON(!test_bit(pmbe->entry, &pmb_map));
+	BUG_ON(!test_bit(pmbe->entry, &pmb_map));
 
 	do {
 		struct pmb_entry *pmblink = pmbe;
 
-		if (pmbe->entry != PMB_NO_ENTRY)
-			clear_pmb_entry(pmbe);
+		/*
+		 * We may be called before this pmb_entry has been
+		 * entered into the PMB table via set_pmb_entry(), but
+		 * that's OK because we've allocated a unique slot for
+		 * this entry in pmb_alloc() (even if we haven't filled
+		 * it yet).
+		 *
+		 * Therefore, calling clear_pmb_entry() is safe as no
+		 * other mapping can be using that slot.
+		 */
+		clear_pmb_entry(pmbe);
 
 		pmbe = pmblink->link;
 
@@ -320,42 +279,34 @@ static void __pmb_unmap(struct pmb_entry *pmbe)
 	} while (pmbe);
 }
 
-static void pmb_cache_ctor(void *pmb)
+#ifdef CONFIG_PMB
+int __uses_jump_to_uncached pmb_init(void)
 {
-	struct pmb_entry *pmbe = pmb;
-
-	memset(pmb, 0, sizeof(struct pmb_entry));
-
-	pmbe->entry = PMB_NO_ENTRY;
-}
-
-static int __uses_jump_to_uncached pmb_init(void)
-{
-	unsigned int nr_entries = ARRAY_SIZE(pmb_init_map);
-	unsigned int entry, i;
-
-	BUG_ON(unlikely(nr_entries >= NR_PMB_ENTRIES));
-
-	pmb_cache = kmem_cache_create("pmb", sizeof(struct pmb_entry), 0,
-				      SLAB_PANIC, pmb_cache_ctor);
+	unsigned int i;
+	long size, ret;
 
 	jump_to_uncached();
 
 	/*
-	 * Ordering is important, P2 must be mapped in the PMB before we
-	 * can set PMB.SE, and P1 must be mapped before we jump back to
-	 * P1 space.
+	 * Insert PMB entries for the P1 and P2 areas so that, after
+	 * we've switched the MMU to 32-bit mode, the semantics of P1
+	 * and P2 are the same as in 29-bit mode, e.g.
+	 *
+	 *	P1 - provides a cached window onto physical memory
+	 *	P2 - provides an uncached window onto physical memory
 	 */
-	for (entry = 0; entry < nr_entries; entry++) {
-		struct pmb_entry *pmbe = pmb_init_map + entry;
+	size = __MEMORY_START + __MEMORY_SIZE;
 
-		__set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, &entry);
-	}
+	ret = pmb_remap(P1SEG, 0x00000000, size, PMB_C);
+	BUG_ON(ret != size);
+
+	ret = pmb_remap(P2SEG, 0x00000000, size, PMB_WT | PMB_UB);
+	BUG_ON(ret != size);
 
 	ctrl_outl(0, PMB_IRMCR);
 
 	/* PMB.SE and UB[7] */
-	ctrl_outl((1 << 31) | (1 << 7), PMB_PASCR);
+	ctrl_outl(PASCR_SE | (1 << 7), PMB_PASCR);
 
 	/* Flush out the TLB */
 	i =  ctrl_inl(MMUCR);
@@ -366,7 +317,53 @@ static int __uses_jump_to_uncached pmb_init(void)
 
 	return 0;
 }
-arch_initcall(pmb_init);
+#else
+int __uses_jump_to_uncached pmb_init(void)
+{
+	int i;
+	unsigned long addr, data;
+
+	jump_to_uncached();
+
+	for (i = 0; i < PMB_ENTRY_MAX; i++) {
+		struct pmb_entry *pmbe;
+		unsigned long vpn, ppn, flags;
+
+		addr = PMB_DATA + (i << PMB_E_SHIFT);
+		data = ctrl_inl(addr);
+		if (!(data & PMB_V))
+			continue;
+
+		if (data & PMB_C) {
+#if defined(CONFIG_CACHE_WRITETHROUGH)
+			data |= PMB_WT;
+#elif defined(CONFIG_CACHE_WRITEBACK)
+			data &= ~PMB_WT;
+#else
+			data &= ~(PMB_C | PMB_WT);
+#endif
+		}
+		ctrl_outl(data, addr);
+
+		ppn = data & PMB_PFN_MASK;
+
+		flags = data & (PMB_C | PMB_WT | PMB_UB);
+		flags |= data & PMB_SZ_MASK;
+
+		addr = PMB_ADDR + (i << PMB_E_SHIFT);
+		data = ctrl_inl(addr);
+
+		vpn = data & PMB_PFN_MASK;
+
+		pmbe = pmb_alloc(vpn, ppn, flags, i);
+		WARN_ON(IS_ERR(pmbe));
+	}
+
+	back_to_cached();
+
+	return 0;
+}
+#endif /* CONFIG_PMB */
 
 static int pmb_seq_show(struct seq_file *file, void *iter)
 {
@@ -434,15 +431,18 @@ postcore_initcall(pmb_debugfs_init);
 static int pmb_sysdev_suspend(struct sys_device *dev, pm_message_t state)
 {
 	static pm_message_t prev_state;
+	int i;
 
 	/* Restore the PMB after a resume from hibernation */
 	if (state.event == PM_EVENT_ON &&
 	    prev_state.event == PM_EVENT_FREEZE) {
 		struct pmb_entry *pmbe;
-		spin_lock_irq(&pmb_list_lock);
-		for (pmbe = pmb_list; pmbe; pmbe = pmbe->next)
-			set_pmb_entry(pmbe);
-		spin_unlock_irq(&pmb_list_lock);
+		for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+			if (test_bit(i, &pmb_map)) {
+				pmbe = &pmb_entry_list[i];
+				set_pmb_entry(pmbe);
+			}
+		}
 	}
 	prev_state = state;
 	return 0;