aboutsummaryrefslogtreecommitdiff
path: root/arch/arm/mm
diff options
context:
space:
mode:
authorPaulius Zaleckas <paulius.zaleckas@teltonika.lt>2009-03-25 13:10:01 +0200
committerPaulius Zaleckas <paulius.zaleckas@teltonika.lt>2009-03-25 13:10:01 +0200
commit28853ac8fe5221de74a14f1182d7b2b383dfd85c (patch)
treedcfd9b20028e5a3a9504e26d2c9060f4746fc83a /arch/arm/mm
parentbba7d0b9ba0f04d25145de8170a17a3a07bbfdde (diff)
ARM: Add support for FA526 v2
Adds support for Faraday FA526 core. This core is used at least by: Cortina Systems Gemini and Centroid family Cavium Networks ECONA family Grain Media GM8120 Pixelplus ImageARM Prolific PL-1029 Faraday IP evaluation boards v2: - move TLB_BTB to separate patch - update copyrights Signed-off-by: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
Diffstat (limited to 'arch/arm/mm')
-rw-r--r--arch/arm/mm/Kconfig35
-rw-r--r--arch/arm/mm/Makefile4
-rw-r--r--arch/arm/mm/cache-fa.S220
-rw-r--r--arch/arm/mm/copypage-fa.c86
-rw-r--r--arch/arm/mm/proc-fa526.S248
-rw-r--r--arch/arm/mm/tlb-fa.S75
6 files changed, 666 insertions, 2 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index d490f3773c0..bc3331863d9 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -186,6 +186,24 @@ config CPU_ARM926T
Say Y if you want support for the ARM926T processor.
Otherwise, say N.
+# FA526
+config CPU_FA526
+ bool
+ select CPU_32v4
+ select CPU_ABRT_EV4
+ select CPU_PABRT_NOIFAR
+ select CPU_CACHE_VIVT
+ select CPU_CP15_MMU
+ select CPU_CACHE_FA
+ select CPU_COPY_FA if MMU
+ select CPU_TLB_FA if MMU
+ help
+ The FA526 is a version of the ARMv4 compatible processor with
+ Branch Target Buffer, Unified TLB and cache line size 16.
+
+ Say Y if you want support for the FA526 processor.
+ Otherwise, say N.
+
# ARM940T
config CPU_ARM940T
bool "Support ARM940T processor" if ARCH_INTEGRATOR
@@ -484,6 +502,9 @@ config CPU_CACHE_VIVT
config CPU_CACHE_VIPT
bool
+config CPU_CACHE_FA
+ bool
+
if MMU
# The copy-page model
config CPU_COPY_V3
@@ -498,6 +519,9 @@ config CPU_COPY_V4WB
config CPU_COPY_FEROCEON
bool
+config CPU_COPY_FA
+ bool
+
config CPU_COPY_V6
bool
@@ -528,6 +552,13 @@ config CPU_TLB_FEROCEON
help
Feroceon TLB (v4wbi with non-outer-cachable page table walks).
+config CPU_TLB_FA
+ bool
+ help
+ Faraday ARM FA526 architecture, unified TLB with writeback cache
+ and invalidate instruction cache entry. Branch target buffer is
+ also supported.
+
config CPU_TLB_V6
bool
@@ -638,7 +669,7 @@ config CPU_DCACHE_SIZE
config CPU_DCACHE_WRITETHROUGH
bool "Force write through D-cache"
- depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020) && !CPU_DCACHE_DISABLE
+ depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE
default y if CPU_ARM925T
help
Say Y here to use the data cache in writethrough mode. Unless you
@@ -653,7 +684,7 @@ config CPU_CACHE_ROUND_ROBIN
config CPU_BPREDICT_DISABLE
bool "Disable branch prediction"
- depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7
+ depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 || CPU_FA526
help
Say Y here to disable branch prediction. If unsure, say N.
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 480f78a3611..40f941c2245 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o
obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o
obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o
obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o
+obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o
obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o
obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o
@@ -41,6 +42,7 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o
obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o
obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o
obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o
+obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o
obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o
obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o
@@ -49,6 +51,7 @@ obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o
obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions
obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o
obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o
+obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o
obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o
obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o
@@ -62,6 +65,7 @@ obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o
obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o
obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o
obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o
+obj-$(CONFIG_CPU_FA526) += proc-fa526.o
obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o
obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o
obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
new file mode 100644
index 00000000000..b63a8f7b95c
--- /dev/null
+++ b/arch/arm/mm/cache-fa.S
@@ -0,0 +1,220 @@
+/*
+ * linux/arch/arm/mm/cache-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on cache-v4wb.S:
+ * Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Processors: FA520 FA526 FA626
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE 16
+
+/*
+ * The total size of the data cache.
+ */
+#ifdef CONFIG_ARCH_GEMINI
+#define CACHE_DSIZE 8192
+#else
+#define CACHE_DSIZE 16384
+#endif
+
+/* FIXME: put optimal value here. Current one is just estimation */
+#define CACHE_DLIMIT (CACHE_DSIZE * 2)
+
+/*
+ * flush_user_cache_all()
+ *
+ * Clean and invalidate all cache entries in a particular address
+ * space.
+ */
+ENTRY(fa_flush_user_cache_all)
+ /* FALLTHROUGH */
+/*
+ * flush_kern_cache_all()
+ *
+ * Clean and invalidate the entire cache.
+ */
+ENTRY(fa_flush_kern_cache_all)
+ mov ip, #0
+ mov r2, #VM_EXEC
+__flush_whole_cache:
+ mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache
+ tst r2, #VM_EXEC
+ mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
+ mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
+ mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * flush_user_cache_range(start, end, flags)
+ *
+ * Invalidate a range of cache entries in the specified
+ * address space.
+ *
+ * - start - start address (inclusive, page aligned)
+ * - end - end address (exclusive, page aligned)
+ * - flags - vma_area_struct flags describing address space
+ */
+ENTRY(fa_flush_user_cache_range)
+ mov ip, #0
+ sub r3, r1, r0 @ calculate total size
+ cmp r3, #CACHE_DLIMIT @ total size >= limit?
+ bhs __flush_whole_cache @ flush whole D cache
+
+1: tst r2, #VM_EXEC
+ mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ tst r2, #VM_EXEC
+ mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
+ mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * coherent_kern_range(start, end)
+ *
+ * Ensure coherency between the Icache and the Dcache in the
+ * region described by start. If you have non-snooping
+ * Harvard caches, you need to implement this function.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_coherent_kern_range)
+ /* fall through */
+
+/*
+ * coherent_user_range(start, end)
+ *
+ * Ensure coherency between the Icache and the Dcache in the
+ * region described by start. If you have non-snooping
+ * Harvard caches, you need to implement this function.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_coherent_user_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
+ mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * flush_kern_dcache_page(kaddr)
+ *
+ * Ensure that the data held in the page kaddr is written back
+ * to the page in question.
+ *
+ * - kaddr - kernel address (guaranteed to be page aligned)
+ */
+ENTRY(fa_flush_kern_dcache_page)
+ add r1, r0, #PAGE_SZ
+1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_inv_range(start, end)
+ *
+ * Invalidate (discard) the specified virtual address range.
+ * May not write back any entries. If 'start' or 'end'
+ * are not cache line aligned, those lines must be written
+ * back.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_dma_inv_range)
+ tst r0, #CACHE_DLINESIZE - 1
+ bic r0, r0, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
+ tst r1, #CACHE_DLINESIZE - 1
+ bic r1, r1, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry
+1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_clean_range(start, end)
+ *
+ * Clean (write back) the specified virtual address range.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_dma_clean_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_flush_range(start,end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(fa_dma_flush_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+ __INITDATA
+
+ .type fa_cache_fns, #object
+ENTRY(fa_cache_fns)
+ .long fa_flush_kern_cache_all
+ .long fa_flush_user_cache_all
+ .long fa_flush_user_cache_range
+ .long fa_coherent_kern_range
+ .long fa_coherent_user_range
+ .long fa_flush_kern_dcache_page
+ .long fa_dma_inv_range
+ .long fa_dma_clean_range
+ .long fa_dma_flush_range
+ .size fa_cache_fns, . - fa_cache_fns
diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c
new file mode 100644
index 00000000000..b2a6008b011
--- /dev/null
+++ b/arch/arm/mm/copypage-fa.c
@@ -0,0 +1,86 @@
+/*
+ * linux/arch/arm/lib/copypage-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on copypage-v4wb.S:
+ * Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+/*
+ * Faraday optimised copy_user_page
+ */
+static void __naked
+fa_copy_user_page(void *kto, const void *kfrom)
+{
+ asm("\
+ stmfd sp!, {r4, lr} @ 2\n\
+ mov r2, %0 @ 1\n\
+1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\
+ stmia r0, {r3, r4, ip, lr} @ 4\n\
+ mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add r0, r0, #16 @ 1\n\
+ ldmia r1!, {r3, r4, ip, lr} @ 4\n\
+ stmia r0, {r3, r4, ip, lr} @ 4\n\
+ mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add r0, r0, #16 @ 1\n\
+ subs r2, r2, #1 @ 1\n\
+ bne 1b @ 1\n\
+ mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\
+ ldmfd sp!, {r4, pc} @ 3"
+ :
+ : "I" (PAGE_SIZE / 32));
+}
+
+void fa_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr)
+{
+ void *kto, *kfrom;
+
+ kto = kmap_atomic(to, KM_USER0);
+ kfrom = kmap_atomic(from, KM_USER1);
+ fa_copy_user_page(kto, kfrom);
+ kunmap_atomic(kfrom, KM_USER1);
+ kunmap_atomic(kto, KM_USER0);
+}
+
+/*
+ * Faraday optimised clear_user_page
+ *
+ * Same story as above.
+ */
+void fa_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+ void *ptr, *kaddr = kmap_atomic(page, KM_USER0);
+ asm volatile("\
+ mov r1, %2 @ 1\n\
+ mov r2, #0 @ 1\n\
+ mov r3, #0 @ 1\n\
+ mov ip, #0 @ 1\n\
+ mov lr, #0 @ 1\n\
+1: stmia %0, {r2, r3, ip, lr} @ 4\n\
+ mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add %0, %0, #16 @ 1\n\
+ stmia %0, {r2, r3, ip, lr} @ 4\n\
+ mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add %0, %0, #16 @ 1\n\
+ subs r1, r1, #1 @ 1\n\
+ bne 1b @ 1\n\
+ mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB"
+ : "=r" (ptr)
+ : "0" (kaddr), "I" (PAGE_SIZE / 32)
+ : "r1", "r2", "r3", "ip", "lr");
+ kunmap_atomic(kaddr, KM_USER0);
+}
+
+struct cpu_user_fns fa_user_fns __initdata = {
+ .cpu_clear_user_highpage = fa_clear_user_highpage,
+ .cpu_copy_user_highpage = fa_copy_user_highpage,
+};
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
new file mode 100644
index 00000000000..08b8a955d5d
--- /dev/null
+++ b/arch/arm/mm/proc-fa526.S
@@ -0,0 +1,248 @@
+/*
+ * linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526
+ *
+ * Written by : Luke Lee
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the fa526.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+#include "proc-macros.S"
+
+#define CACHE_DLINESIZE 16
+
+ .text
+/*
+ * cpu_fa526_proc_init()
+ */
+ENTRY(cpu_fa526_proc_init)
+ mov pc, lr
+
+/*
+ * cpu_fa526_proc_fin()
+ */
+ENTRY(cpu_fa526_proc_fin)
+ stmfd sp!, {lr}
+ mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+ msr cpsr_c, ip
+ bl fa_flush_kern_cache_all
+ mrc p15, 0, r0, c1, c0, 0 @ ctrl register
+ bic r0, r0, #0x1000 @ ...i............
+ bic r0, r0, #0x000e @ ............wca.
+ mcr p15, 0, r0, c1, c0, 0 @ disable caches
+ nop
+ nop
+ ldmfd sp!, {pc}
+
+/*
+ * cpu_fa526_reset(loc)
+ *
+ * Perform a soft reset of the system. Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+ .align 4
+ENTRY(cpu_fa526_reset)
+/* TODO: Use CP8 if possible... */
+ mov ip, #0
+ mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
+ mcr p15, 0, ip, c7, c10, 4 @ drain WB
+#ifdef CONFIG_MMU
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
+#endif
+ mrc p15, 0, ip, c1, c0, 0 @ ctrl register
+ bic ip, ip, #0x000f @ ............wcam
+ bic ip, ip, #0x1100 @ ...i...s........
+ bic ip, ip, #0x0800 @ BTB off
+ mcr p15, 0, ip, c1, c0, 0 @ ctrl register
+ nop
+ nop
+ mov pc, r0
+
+/*
+ * cpu_fa526_do_idle()
+ */
+ .align 4
+ENTRY(cpu_fa526_do_idle)
+ mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
+ mov pc, lr
+
+
+ENTRY(cpu_fa526_dcache_clean_area)
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ add r0, r0, #CACHE_DLINESIZE
+ subs r1, r1, #CACHE_DLINESIZE
+ bhi 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_fa526_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+ .align 4
+ENTRY(cpu_fa526_switch_mm)
+#ifdef CONFIG_MMU
+ mov ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+ mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache
+#else
+ mcr p15, 0, ip, c7, c14, 0 @ clean and invalidate whole D cache
+#endif
+ mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcr p15, 0, ip, c7, c5, 6 @ invalidate BTB since mm changed
+ mcr p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcr p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB
+#endif
+ mov pc, lr
+
+/*
+ * cpu_fa526_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+ .align 4
+ENTRY(cpu_fa526_set_pte_ext)
+#ifdef CONFIG_MMU
+ armv3_set_pte_ext
+ mov r0, r0
+ mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+#endif
+ mov pc, lr
+
+ __INIT
+
+ .type __fa526_setup, #function
+__fa526_setup:
+ /* On return of this routine, r0 must carry correct flags for CFG register */
+ mov r0, #0
+ mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4
+#ifdef CONFIG_MMU
+ mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4
+#endif
+ mcr p15, 0, r0, c7, c5, 5 @ invalidate IScratchpad RAM
+
+ mov r0, #1
+ mcr p15, 0, r0, c1, c1, 0 @ turn-on ECR
+
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB All
+ mcr p15, 0, r0, c7, c10, 4 @ data write barrier
+ mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
+
+ mov r0, #0x1f @ Domains 0, 1 = manager, 2 = client
+ mcr p15, 0, r0, c3, c0 @ load domain access register
+
+ mrc p15, 0, r0, c1, c0 @ get control register v4
+ ldr r5, fa526_cr1_clear
+ bic r0, r0, r5
+ ldr r5, fa526_cr1_set
+ orr r0, r0, r5
+ mov pc, lr
+ .size __fa526_setup, . - __fa526_setup
+
+ /*
+ * .RVI ZFRS BLDP WCAM
+ * ..11 1001 .111 1101
+ *
+ */
+ .type fa526_cr1_clear, #object
+ .type fa526_cr1_set, #object
+fa526_cr1_clear:
+ .word 0x3f3f
+fa526_cr1_set:
+ .word 0x397D
+
+ __INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ * come through these
+ */
+ .type fa526_processor_functions, #object
+fa526_processor_functions:
+ .word v4_early_abort
+ .word pabort_noifar
+ .word cpu_fa526_proc_init
+ .word cpu_fa526_proc_fin
+ .word cpu_fa526_reset
+ .word cpu_fa526_do_idle
+ .word cpu_fa526_dcache_clean_area
+ .word cpu_fa526_switch_mm
+ .word cpu_fa526_set_pte_ext
+ .size fa526_processor_functions, . - fa526_processor_functions
+
+ .section ".rodata"
+
+ .type cpu_arch_name, #object
+cpu_arch_name:
+ .asciz "armv4"
+ .size cpu_arch_name, . - cpu_arch_name
+
+ .type cpu_elf_name, #object
+cpu_elf_name:
+ .asciz "v4"
+ .size cpu_elf_name, . - cpu_elf_name
+
+ .type cpu_fa526_name, #object
+cpu_fa526_name:
+ .asciz "FA526"
+ .size cpu_fa526_name, . - cpu_fa526_name
+
+ .align
+
+ .section ".proc.info.init", #alloc, #execinstr
+
+ .type __fa526_proc_info,#object
+__fa526_proc_info:
+ .long 0x66015261
+ .long 0xff01fff1
+ .long PMD_TYPE_SECT | \
+ PMD_SECT_BUFFERABLE | \
+ PMD_SECT_CACHEABLE | \
+ PMD_BIT4 | \
+ PMD_SECT_AP_WRITE | \
+ PMD_SECT_AP_READ
+ .long PMD_TYPE_SECT | \
+ PMD_BIT4 | \
+ PMD_SECT_AP_WRITE | \
+ PMD_SECT_AP_READ
+ b __fa526_setup
+ .long cpu_arch_name
+ .long cpu_elf_name
+ .long HWCAP_SWP | HWCAP_HALF
+ .long cpu_fa526_name
+ .long fa526_processor_functions
+ .long fa_tlb_fns
+ .long fa_user_fns
+ .long fa_cache_fns
+ .size __fa526_proc_info, . - __fa526_proc_info
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
new file mode 100644
index 00000000000..9694f1f6f48
--- /dev/null
+++ b/arch/arm/mm/tlb-fa.S
@@ -0,0 +1,75 @@
+/*
+ * linux/arch/arm/mm/tlb-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on tlb-v4wbi.S:
+ * Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * ARM architecture version 4, Faraday variation.
+ * This assume an unified TLBs, with a write buffer, and branch target buffer (BTB)
+ *
+ * Processors: FA520 FA526 FA626
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+
+/*
+ * flush_user_tlb_range(start, end, mm)
+ *
+ * Invalidate a range of TLB entries in the specified address space.
+ *
+ * - start - range start address
+ * - end - range end address
+ * - mm - mm_struct describing address space
+ */
+ .align 4
+ENTRY(fa_flush_user_tlb_range)
+ vma_vm_mm ip, r2
+ act_mm r3 @ get current->active_mm
+ eors r3, ip, r3 @ == mm ?
+ movne pc, lr @ no, we dont do anything
+ mov r3, #0
+ mcr p15, 0, r3, c7, c10, 4 @ drain WB
+ bic r0, r0, #0x0ff
+ bic r0, r0, #0xf00
+1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry
+ add r0, r0, #PAGE_SZ
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB
+ mcr p15, 0, r3, c7, c10, 4 @ data write barrier
+ mov pc, lr
+
+
+ENTRY(fa_flush_kern_tlb_range)
+ mov r3, #0
+ mcr p15, 0, r3, c7, c10, 4 @ drain WB
+ bic r0, r0, #0x0ff
+ bic r0, r0, #0xf00
+1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry
+ add r0, r0, #PAGE_SZ
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB
+ mcr p15, 0, r3, c7, c10, 4 @ data write barrier
+ mcr p15, 0, r3, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+ __INITDATA
+
+ .type fa_tlb_fns, #object
+ENTRY(fa_tlb_fns)
+ .long fa_flush_user_tlb_range
+ .long fa_flush_kern_tlb_range
+ .long fa_tlb_flags
+ .size fa_tlb_fns, . - fa_tlb_fns