diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/arm/mm |
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/arm/mm')
56 files changed, 13340 insertions, 0 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig new file mode 100644 index 00000000000..5b670c9ac5e --- /dev/null +++ b/arch/arm/mm/Kconfig @@ -0,0 +1,411 @@ +comment "Processor Type" + +config CPU_32 + bool + default y + +# Select CPU types depending on the architecture selected. This selects +# which CPUs we support in the kernel image, and the compiler instruction +# optimiser behaviour. + +# ARM610 +config CPU_ARM610 + bool "Support ARM610 processor" + depends on ARCH_RPC + select CPU_32v3 + select CPU_CACHE_V3 + select CPU_CACHE_VIVT + select CPU_COPY_V3 + select CPU_TLB_V3 + help + The ARM610 is the successor to the ARM3 processor + and was produced by VLSI Technology Inc. + + Say Y if you want support for the ARM610 processor. + Otherwise, say N. + +# ARM710 +config CPU_ARM710 + bool "Support ARM710 processor" if !ARCH_CLPS7500 && ARCH_RPC + default y if ARCH_CLPS7500 + select CPU_32v3 + select CPU_CACHE_V3 + select CPU_CACHE_VIVT + select CPU_COPY_V3 + select CPU_TLB_V3 + help + A 32-bit RISC microprocessor based on the ARM7 processor core + designed by Advanced RISC Machines Ltd. The ARM710 is the + successor to the ARM610 processor. It was released in + July 1994 by VLSI Technology Inc. + + Say Y if you want support for the ARM710 processor. + Otherwise, say N. + +# ARM720T +config CPU_ARM720T + bool "Support ARM720T processor" if !ARCH_CLPS711X && !ARCH_L7200 && !ARCH_CDB89712 && ARCH_INTEGRATOR + default y if ARCH_CLPS711X || ARCH_L7200 || ARCH_CDB89712 || ARCH_H720X + select CPU_32v4 + select CPU_ABRT_LV4T + select CPU_CACHE_V4 + select CPU_CACHE_VIVT + select CPU_COPY_V4WT + select CPU_TLB_V4WT + help + A 32-bit RISC processor with 8kByte Cache, Write Buffer and + MMU built around an ARM7TDMI core. + + Say Y if you want support for the ARM720T processor. + Otherwise, say N. + +# ARM920T +config CPU_ARM920T + bool "Support ARM920T processor" if !ARCH_S3C2410 + depends on ARCH_INTEGRATOR || ARCH_S3C2410 || ARCH_IMX + default y if ARCH_S3C2410 + select CPU_32v4 + select CPU_ABRT_EV4T + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_COPY_V4WB + select CPU_TLB_V4WBI + help + The ARM920T is licensed to be produced by numerous vendors, + and is used in the Maverick EP9312 and the Samsung S3C2410. + + More information on the Maverick EP9312 at + <http://linuxdevices.com/products/PD2382866068.html>. + + Say Y if you want support for the ARM920T processor. + Otherwise, say N. + +# ARM922T +config CPU_ARM922T + bool "Support ARM922T processor" if ARCH_INTEGRATOR + depends on ARCH_CAMELOT || ARCH_LH7A40X || ARCH_INTEGRATOR + default y if ARCH_CAMELOT || ARCH_LH7A40X + select CPU_32v4 + select CPU_ABRT_EV4T + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_COPY_V4WB + select CPU_TLB_V4WBI + help + The ARM922T is a version of the ARM920T, but with smaller + instruction and data caches. It is used in Altera's + Excalibur XA device family. + + Say Y if you want support for the ARM922T processor. + Otherwise, say N. + +# ARM925T +config CPU_ARM925T + bool "Support ARM925T processor" if ARCH_OMAP + depends on ARCH_OMAP1510 + default y if ARCH_OMAP1510 + select CPU_32v4 + select CPU_ABRT_EV4T + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_COPY_V4WB + select CPU_TLB_V4WBI + help + The ARM925T is a mix between the ARM920T and ARM926T, but with + different instruction and data caches. It is used in TI's OMAP + device family. + + Say Y if you want support for the ARM925T processor. + Otherwise, say N. + +# ARM926T +config CPU_ARM926T + bool "Support ARM926T processor" if ARCH_INTEGRATOR + depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX + default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX + select CPU_32v5 + select CPU_ABRT_EV5TJ + select CPU_CACHE_VIVT + select CPU_COPY_V4WB + select CPU_TLB_V4WBI + help + This is a variant of the ARM920. It has slightly different + instruction sequences for cache and TLB operations. Curiously, + there is no documentation on it at the ARM corporate website. + + Say Y if you want support for the ARM926T processor. + Otherwise, say N. + +# ARM1020 - needs validating +config CPU_ARM1020 + bool "Support ARM1020T (rev 0) processor" + depends on ARCH_INTEGRATOR + select CPU_32v5 + select CPU_ABRT_EV4T + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_COPY_V4WB + select CPU_TLB_V4WBI + help + The ARM1020 is the 32K cached version of the ARM10 processor, + with an addition of a floating-point unit. + + Say Y if you want support for the ARM1020 processor. + Otherwise, say N. + +# ARM1020E - needs validating +config CPU_ARM1020E + bool "Support ARM1020E processor" + depends on ARCH_INTEGRATOR + select CPU_32v5 + select CPU_ABRT_EV4T + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_COPY_V4WB + select CPU_TLB_V4WBI + depends on n + +# ARM1022E +config CPU_ARM1022 + bool "Support ARM1022E processor" + depends on ARCH_INTEGRATOR + select CPU_32v5 + select CPU_ABRT_EV4T + select CPU_CACHE_VIVT + select CPU_COPY_V4WB # can probably do better + select CPU_TLB_V4WBI + help + The ARM1022E is an implementation of the ARMv5TE architecture + based upon the ARM10 integer core with a 16KiB L1 Harvard cache, + embedded trace macrocell, and a floating-point unit. + + Say Y if you want support for the ARM1022E processor. + Otherwise, say N. + +# ARM1026EJ-S +config CPU_ARM1026 + bool "Support ARM1026EJ-S processor" + depends on ARCH_INTEGRATOR + select CPU_32v5 + select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10 + select CPU_CACHE_VIVT + select CPU_COPY_V4WB # can probably do better + select CPU_TLB_V4WBI + help + The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture + based upon the ARM10 integer core. + + Say Y if you want support for the ARM1026EJ-S processor. + Otherwise, say N. + +# SA110 +config CPU_SA110 + bool "Support StrongARM(R) SA-110 processor" if !ARCH_EBSA110 && !FOOTBRIDGE && !ARCH_TBOX && !ARCH_SHARK && !ARCH_NEXUSPCI && ARCH_RPC + default y if ARCH_EBSA110 || FOOTBRIDGE || ARCH_TBOX || ARCH_SHARK || ARCH_NEXUSPCI + select CPU_32v3 if ARCH_RPC + select CPU_32v4 if !ARCH_RPC + select CPU_ABRT_EV4 + select CPU_CACHE_V4WB + select CPU_CACHE_VIVT + select CPU_COPY_V4WB + select CPU_TLB_V4WB + help + The Intel StrongARM(R) SA-110 is a 32-bit microprocessor and + is available at five speeds ranging from 100 MHz to 233 MHz. + More information is available at + <http://developer.intel.com/design/strong/sa110.htm>. + + Say Y if you want support for the SA-110 processor. + Otherwise, say N. + +# SA1100 +config CPU_SA1100 + bool + depends on ARCH_SA1100 + default y + select CPU_32v4 + select CPU_ABRT_EV4 + select CPU_CACHE_V4WB + select CPU_CACHE_VIVT + select CPU_TLB_V4WB + select CPU_MINICACHE + +# XScale +config CPU_XSCALE + bool + depends on ARCH_IOP3XX || ARCH_PXA || ARCH_IXP4XX || ARCH_IXP2000 + default y + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_CACHE_VIVT + select CPU_TLB_V4WBI + select CPU_MINICACHE + +# ARMv6 +config CPU_V6 + bool "Support ARM V6 processor" + depends on ARCH_INTEGRATOR + select CPU_32v6 + select CPU_ABRT_EV6 + select CPU_CACHE_V6 + select CPU_CACHE_VIPT + select CPU_COPY_V6 + select CPU_TLB_V6 + +# Figure out what processor architecture version we should be using. +# This defines the compiler instruction set which depends on the machine type. +config CPU_32v3 + bool + +config CPU_32v4 + bool + +config CPU_32v5 + bool + +config CPU_32v6 + bool + +# The abort model +config CPU_ABRT_EV4 + bool + +config CPU_ABRT_EV4T + bool + +config CPU_ABRT_LV4T + bool + +config CPU_ABRT_EV5T + bool + +config CPU_ABRT_EV5TJ + bool + +config CPU_ABRT_EV6 + bool + +# The cache model +config CPU_CACHE_V3 + bool + +config CPU_CACHE_V4 + bool + +config CPU_CACHE_V4WT + bool + +config CPU_CACHE_V4WB + bool + +config CPU_CACHE_V6 + bool + +config CPU_CACHE_VIVT + bool + +config CPU_CACHE_VIPT + bool + +# The copy-page model +config CPU_COPY_V3 + bool + +config CPU_COPY_V4WT + bool + +config CPU_COPY_V4WB + bool + +config CPU_COPY_V6 + bool + +# This selects the TLB model +config CPU_TLB_V3 + bool + help + ARM Architecture Version 3 TLB. + +config CPU_TLB_V4WT + bool + help + ARM Architecture Version 4 TLB with writethrough cache. + +config CPU_TLB_V4WB + bool + help + ARM Architecture Version 4 TLB with writeback cache. + +config CPU_TLB_V4WBI + bool + help + ARM Architecture Version 4 TLB with writeback cache and invalidate + instruction cache entry. + +config CPU_TLB_V6 + bool + +config CPU_MINICACHE + bool + help + Processor has a minicache. + +comment "Processor Features" + +config ARM_THUMB + bool "Support Thumb user binaries" + depends on CPU_ARM720T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_V6 + default y + help + Say Y if you want to include kernel support for running user space + Thumb binaries. + + The Thumb instruction set is a compressed form of the standard ARM + instruction set resulting in smaller binaries at the expense of + slightly less efficient code. + + If you don't know what this all is, saying Y is a safe choice. + +config CPU_BIG_ENDIAN + bool "Build big-endian kernel" + depends on ARCH_SUPPORTS_BIG_ENDIAN + help + Say Y if you plan on running a kernel in big-endian mode. + Note that your board must be properly built and your board + port must properly enable any big-endian related features + of your chipset/board/processor. + +config CPU_ICACHE_DISABLE + bool "Disable I-Cache" + depends on CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020 + help + Say Y here to disable the processor instruction cache. Unless + you have a reason not to or are unsure, say N. + +config CPU_DCACHE_DISABLE + bool "Disable D-Cache" + depends on CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020 + help + Say Y here to disable the processor data cache. Unless + you have a reason not to or are unsure, say N. + +config CPU_DCACHE_WRITETHROUGH + bool "Force write through D-cache" + depends on (CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020) && !CPU_DISABLE_DCACHE + default y if CPU_ARM925T + help + Say Y here to use the data cache in writethrough mode. Unless you + specifically require this or are unsure, say N. + +config CPU_CACHE_ROUND_ROBIN + bool "Round robin I and D cache replacement algorithm" + depends on (CPU_ARM926T || CPU_ARM1020) && (!CPU_ICACHE_DISABLE || !CPU_DCACHE_DISABLE) + help + Say Y here to use the predictable round-robin cache replacement + policy. Unless you specifically require this or are unsure, say N. + +config CPU_BPREDICT_DISABLE + bool "Disable branch prediction" + depends on CPU_ARM1020 + help + Say Y here to disable branch prediction. If unsure, say N. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile new file mode 100644 index 00000000000..ccf316c11e0 --- /dev/null +++ b/arch/arm/mm/Makefile @@ -0,0 +1,56 @@ +# +# Makefile for the linux arm-specific parts of the memory manager. +# + +obj-y := consistent.o extable.o fault-armv.o \ + fault.o flush.o init.o ioremap.o mmap.o \ + mm-armv.o + +obj-$(CONFIG_MODULES) += proc-syms.o + +obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o +obj-$(CONFIG_DISCONTIGMEM) += discontig.o + +obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o +obj-$(CONFIG_CPU_ABRT_EV4T) += abort-ev4t.o +obj-$(CONFIG_CPU_ABRT_LV4T) += abort-lv4t.o +obj-$(CONFIG_CPU_ABRT_EV5T) += abort-ev5t.o +obj-$(CONFIG_CPU_ABRT_EV5TJ) += abort-ev5tj.o +obj-$(CONFIG_CPU_ABRT_EV6) += abort-ev6.o + +obj-$(CONFIG_CPU_CACHE_V3) += cache-v3.o +obj-$(CONFIG_CPU_CACHE_V4) += cache-v4.o +obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o +obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o +obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o + +obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o +obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o +obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o +obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o mmu.o +obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o +obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o + +obj-$(CONFIG_CPU_MINICACHE) += minicache.o + +obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o +obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o +obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o +obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o +obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o + +obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o +obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o +obj-$(CONFIG_CPU_ARM720T) += proc-arm720.o +obj-$(CONFIG_CPU_ARM920T) += proc-arm920.o +obj-$(CONFIG_CPU_ARM922T) += proc-arm922.o +obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o +obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o +obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o +obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o +obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o +obj-$(CONFIG_CPU_ARM1026) += proc-arm1026.o +obj-$(CONFIG_CPU_SA110) += proc-sa110.o +obj-$(CONFIG_CPU_SA1100) += proc-sa1100.o +obj-$(CONFIG_CPU_XSCALE) += proc-xscale.o +obj-$(CONFIG_CPU_V6) += proc-v6.o blockops.o diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S new file mode 100644 index 00000000000..4f18f9e87ba --- /dev/null +++ b/arch/arm/mm/abort-ev4.S @@ -0,0 +1,30 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: v4_early_abort + * + * Params : r2 = address of aborted instruction + * : r3 = saved SPSR + * + * Returns : r0 = address of abort + * : r1 = FSR, bit 11 = write + * : r2-r8 = corrupted + * : r9 = preserved + * : sp = pointer to registers + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v4_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + ldr r3, [r2] @ read aborted ARM instruction + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r3, #1 << 20 @ L = 1 -> write? + orreq r1, r1, #1 << 11 @ yes. + mov pc, lr + + diff --git a/arch/arm/mm/abort-ev4t.S b/arch/arm/mm/abort-ev4t.S new file mode 100644 index 00000000000..b6282548f92 --- /dev/null +++ b/arch/arm/mm/abort-ev4t.S @@ -0,0 +1,30 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v4t_early_abort + * + * Params : r2 = address of aborted instruction + * : r3 = saved SPSR + * + * Returns : r0 = address of abort + * : r1 = FSR, bit 11 = write + * : r2-r8 = corrupted + * : r9 = preserved + * : sp = pointer to registers + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v4t_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + do_thumb_abort + ldreq r3, [r2] @ read aborted ARM instruction + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r3, #1 << 20 @ check write + orreq r1, r1, #1 << 11 + mov pc, lr diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S new file mode 100644 index 00000000000..02251b526c0 --- /dev/null +++ b/arch/arm/mm/abort-ev5t.S @@ -0,0 +1,31 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v5t_early_abort + * + * Params : r2 = address of aborted instruction + * : r3 = saved SPSR + * + * Returns : r0 = address of abort + * : r1 = FSR, bit 11 = write + * : r2-r8 = corrupted + * : r9 = preserved + * : sp = pointer to registers + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v5t_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + do_thumb_abort + ldreq r3, [r2] @ read aborted ARM instruction + bic r1, r1, #1 << 11 @ clear bits 11 of FSR + do_ldrd_abort + tst r3, #1 << 20 @ check write + orreq r1, r1, #1 << 11 + mov pc, lr diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S new file mode 100644 index 00000000000..bce68d601c8 --- /dev/null +++ b/arch/arm/mm/abort-ev5tj.S @@ -0,0 +1,35 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v5tj_early_abort + * + * Params : r2 = address of aborted instruction + * : r3 = saved SPSR + * + * Returns : r0 = address of abort + * : r1 = FSR, bit 11 = write + * : r2-r8 = corrupted + * : r9 = preserved + * : sp = pointer to registers + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v5tj_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r3, #PSR_J_BIT @ Java? + movne pc, lr + do_thumb_abort + ldreq r3, [r2] @ read aborted ARM instruction + do_ldrd_abort + tst r3, #1 << 20 @ L = 0 -> write + orreq r1, r1, #1 << 11 @ yes. + mov pc, lr + + diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S new file mode 100644 index 00000000000..38b2cbb89be --- /dev/null +++ b/arch/arm/mm/abort-ev6.S @@ -0,0 +1,23 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: v6_early_abort + * + * Params : r2 = address of aborted instruction + * : r3 = saved SPSR + * + * Returns : r0 = address of abort + * : r1 = FSR, bit 11 = write + * : r2-r8 = corrupted + * : r9 = preserved + * : sp = pointer to registers + * + * Purpose : obtain information about current aborted instruction. + */ + .align 5 +ENTRY(v6_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + mov pc, lr + + diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S new file mode 100644 index 00000000000..db743e51021 --- /dev/null +++ b/arch/arm/mm/abort-lv4t.S @@ -0,0 +1,220 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: v4t_late_abort + * + * Params : r2 = address of aborted instruction + * : r3 = saved SPSR + * + * Returns : r0 = address of abort + * : r1 = FSR, bit 11 = write + * : r2-r8 = corrupted + * : r9 = preserved + * : sp = pointer to registers + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ +ENTRY(v4t_late_abort) + tst r3, #PSR_T_BIT @ check for thumb mode + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + bne .data_thumb_abort + ldr r8, [r2] @ read arm instruction + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r8, #1 << 20 @ L = 1 -> write? + orreq r1, r1, #1 << 11 @ yes. + and r7, r8, #15 << 24 + add pc, pc, r7, lsr #22 @ Now branch to the relevant processing routine + nop + +/* 0 */ b .data_arm_lateldrhpost @ ldrh rd, [rn], #m/rm +/* 1 */ b .data_arm_lateldrhpre @ ldrh rd, [rn, #m/rm] +/* 2 */ b .data_unknown +/* 3 */ b .data_unknown +/* 4 */ b .data_arm_lateldrpostconst @ ldr rd, [rn], #m +/* 5 */ b .data_arm_lateldrpreconst @ ldr rd, [rn, #m] +/* 6 */ b .data_arm_lateldrpostreg @ ldr rd, [rn], rm +/* 7 */ b .data_arm_lateldrprereg @ ldr rd, [rn, rm] +/* 8 */ b .data_arm_ldmstm @ ldm*a rn, <rlist> +/* 9 */ b .data_arm_ldmstm @ ldm*b rn, <rlist> +/* a */ b .data_unknown +/* b */ b .data_unknown +/* c */ mov pc, lr @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m +/* d */ mov pc, lr @ ldc rd, [rn, #m] +/* e */ b .data_unknown +/* f */ +.data_unknown: @ Part of jumptable + mov r0, r2 + mov r1, r8 + mov r2, sp + bl baddataabort + b ret_from_exception + +.data_arm_ldmstm: + tst r8, #1 << 21 @ check writeback bit + moveq pc, lr @ no writeback -> no fixup + mov r7, #0x11 + orr r7, r7, #0x1100 + and r6, r8, r7 + and r2, r8, r7, lsl #1 + add r6, r6, r2, lsr #1 + and r2, r8, r7, lsl #2 + add r6, r6, r2, lsr #2 + and r2, r8, r7, lsl #3 + add r6, r6, r2, lsr #3 + add r6, r6, r6, lsr #8 + add r6, r6, r6, lsr #4 + and r6, r6, #15 @ r6 = no. of registers to transfer. + and r5, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6, lsl #2 @ Undo increment + addeq r7, r7, r6, lsl #2 @ Undo decrement + str r7, [sp, r5, lsr #14] @ Put register 'Rn' + mov pc, lr + +.data_arm_lateldrhpre: + tst r8, #1 << 21 @ Check writeback bit + moveq pc, lr @ No writeback -> no fixup +.data_arm_lateldrhpost: + and r5, r8, #0x00f @ get Rm / low nibble of immediate value + tst r8, #1 << 22 @ if (immediate offset) + andne r6, r8, #0xf00 @ { immediate high nibble + orrne r6, r5, r6, lsr #4 @ combine nibbles } else + ldreq r6, [sp, r5, lsl #2] @ { load Rm value } +.data_arm_apply_r6_and_rn: + and r5, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6 @ Undo incrmenet + addeq r7, r7, r6 @ Undo decrement + str r7, [sp, r5, lsr #14] @ Put register 'Rn' + mov pc, lr + +.data_arm_lateldrpreconst: + tst r8, #1 << 21 @ check writeback bit + moveq pc, lr @ no writeback -> no fixup +.data_arm_lateldrpostconst: + movs r2, r8, lsl #20 @ Get offset + moveq pc, lr @ zero -> no fixup + and r5, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r2, lsr #20 @ Undo increment + addeq r7, r7, r2, lsr #20 @ Undo decrement + str r7, [sp, r5, lsr #14] @ Put register 'Rn' + mov pc, lr + +.data_arm_lateldrprereg: + tst r8, #1 << 21 @ check writeback bit + moveq pc, lr @ no writeback -> no fixup +.data_arm_lateldrpostreg: + and r7, r8, #15 @ Extract 'm' from instruction + ldr r6, [sp, r7, lsl #2] @ Get register 'Rm' + mov r5, r8, lsr #7 @ get shift count + ands r5, r5, #31 + and r7, r8, #0x70 @ get shift type + orreq r7, r7, #8 @ shift count = 0 + add pc, pc, r7 + nop + + mov r6, r6, lsl r5 @ 0: LSL #!0 + b .data_arm_apply_r6_and_rn + b .data_arm_apply_r6_and_rn @ 1: LSL #0 + nop + b .data_unknown @ 2: MUL? + nop + b .data_unknown @ 3: MUL? + nop + mov r6, r6, lsr r5 @ 4: LSR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, lsr #32 @ 5: LSR #32 + b .data_arm_apply_r6_and_rn + b .data_unknown @ 6: MUL? + nop + b .data_unknown @ 7: MUL? + nop + mov r6, r6, asr r5 @ 8: ASR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, asr #32 @ 9: ASR #32 + b .data_arm_apply_r6_and_rn + b .data_unknown @ A: MUL? + nop + b .data_unknown @ B: MUL? + nop + mov r6, r6, ror r5 @ C: ROR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, rrx @ D: RRX + b .data_arm_apply_r6_and_rn + b .data_unknown @ E: MUL? + nop + b .data_unknown @ F: MUL? + +.data_thumb_abort: + ldrh r8, [r2] @ read instruction + tst r8, #1 << 11 @ L = 1 -> write? + orreq r1, r1, #1 << 8 @ yes + and r7, r8, #15 << 12 + add pc, pc, r7, lsr #10 @ lookup in table + nop + +/* 0 */ b .data_unknown +/* 1 */ b .data_unknown +/* 2 */ b .data_unknown +/* 3 */ b .data_unknown +/* 4 */ b .data_unknown +/* 5 */ b .data_thumb_reg +/* 6 */ mov pc, lr +/* 7 */ mov pc, lr +/* 8 */ mov pc, lr +/* 9 */ mov pc, lr +/* A */ b .data_unknown +/* B */ b .data_thumb_pushpop +/* C */ b .data_thumb_ldmstm +/* D */ b .data_unknown +/* E */ b .data_unknown +/* F */ b .data_unknown + +.data_thumb_reg: + tst r8, #1 << 9 + moveq pc, lr + tst r8, #1 << 10 @ If 'S' (signed) bit is set + movne r1, #0 @ it must be a load instr + mov pc, lr + +.data_thumb_pushpop: + tst r8, #1 << 10 + beq .data_unknown + and r6, r8, #0x55 @ hweight8(r8) + R bit + and r2, r8, #0xaa + add r6, r6, r2, lsr #1 + and r2, r6, #0xcc + and r6, r6, #0x33 + add r6, r6, r2, lsr #2 + movs r7, r8, lsr #9 @ C = r8 bit 8 (R bit) + adc r6, r6, r6, lsr #4 @ high + low nibble + R bit + and r6, r6, #15 @ number of regs to transfer + ldr r7, [sp, #13 << 2] + tst r8, #1 << 11 + addeq r7, r7, r6, lsl #2 @ increment SP if PUSH + subne r7, r7, r6, lsl #2 @ decrement SP if POP + str r7, [sp, #13 << 2] + mov pc, lr + +.data_thumb_ldmstm: + and r6, r8, #0x55 @ hweight8(r8) + and r2, r8, #0xaa + add r6, r6, r2, lsr #1 + and r2, r6, #0xcc + and r6, r6, #0x33 + add r6, r6, r2, lsr #2 + add r6, r6, r6, lsr #4 + and r5, r8, #7 << 8 + ldr r7, [sp, r5, lsr #6] + and r6, r6, #15 @ number of regs to transfer + sub r7, r7, r6, lsl #2 @ always decrement + str r7, [sp, r5, lsr #6] + mov pc, lr diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S new file mode 100644 index 00000000000..d7cb1bfa51a --- /dev/null +++ b/arch/arm/mm/abort-macro.S @@ -0,0 +1,42 @@ +/* + * The ARM LDRD and Thumb LDRSB instructions use bit 20/11 (ARM/Thumb) + * differently than every other instruction, so it is set to 0 (write) + * even though the instructions are read instructions. This means that + * during an abort the instructions will be treated as a write and the + * handler will raise a signal from unwriteable locations if they + * fault. We have to specifically check for these instructions + * from the abort handlers to treat them properly. + * + */ + + .macro do_thumb_abort + tst r3, #PSR_T_BIT + beq not_thumb + ldrh r3, [r2] @ Read aborted Thumb instruction + and r3, r3, # 0xfe00 @ Mask opcode field + cmp r3, # 0x5600 @ Is it ldrsb? + orreq r3, r3, #1 << 11 @ Set L-bit if yes + tst r3, #1 << 11 @ L = 0 -> write + orreq r1, r1, #1 << 11 @ yes. + mov pc, lr +not_thumb: + .endm + +/* + * We check for the following insturction encoding for LDRD. + * + * [27:25] == 0 + * [7:4] == 1101 + * [20] == 0 + */ + .macro do_ldrd_abort + tst r3, #0x0e000000 @ [27:25] == 0 + bne not_ldrd + and r2, r3, #0x000000f0 @ [7:4] == 1101 + cmp r2, #0x000000d0 + bne not_ldrd + tst r3, #1 << 20 @ [20] == 0 + moveq pc, lr +not_ldrd: + .endm + diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c new file mode 100644 index 00000000000..81f4a8a2d34 --- /dev/null +++ b/arch/arm/mm/alignment.c @@ -0,0 +1,756 @@ +/* + * linux/arch/arm/mm/alignment.c + * + * Copyright (C) 1995 Linus Torvalds + * Modifications for ARM processor (c) 1995-2001 Russell King + * Thumb aligment fault fixups (c) 2004 MontaVista Software, Inc. + * - Adapted from gdb/sim/arm/thumbemu.c -- Thumb instruction emulation. + * Copyright (C) 1996, Cygnus Software Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/ptrace.h> +#include <linux/proc_fs.h> +#include <linux/init.h> + +#include <asm/uaccess.h> +#include <asm/unaligned.h> + +#include "fault.h" + +/* + * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998 + * /proc/sys/debug/alignment, modified and integrated into + * Linux 2.1 by Russell King + * + * Speed optimisations and better fault handling by Russell King. + * + * *** NOTE *** + * This code is not portable to processors with late data abort handling. + */ +#define CODING_BITS(i) (i & 0x0e000000) + +#define LDST_I_BIT(i) (i & (1 << 26)) /* Immediate constant */ +#define LDST_P_BIT(i) (i & (1 << 24)) /* Preindex */ +#define LDST_U_BIT(i) (i & (1 << 23)) /* Add offset */ +#define LDST_W_BIT(i) (i & (1 << 21)) /* Writeback */ +#define LDST_L_BIT(i) (i & (1 << 20)) /* Load */ + +#define LDST_P_EQ_U(i) ((((i) ^ ((i) >> 1)) & (1 << 23)) == 0) + +#define LDSTH_I_BIT(i) (i & (1 << 22)) /* half-word immed */ +#define LDM_S_BIT(i) (i & (1 << 22)) /* write CPSR from SPSR */ + +#define RN_BITS(i) ((i >> 16) & 15) /* Rn */ +#define RD_BITS(i) ((i >> 12) & 15) /* Rd */ +#define RM_BITS(i) (i & 15) /* Rm */ + +#define REGMASK_BITS(i) (i & 0xffff) +#define OFFSET_BITS(i) (i & 0x0fff) + +#define IS_SHIFT(i) (i & 0x0ff0) +#define SHIFT_BITS(i) ((i >> 7) & 0x1f) +#define SHIFT_TYPE(i) (i & 0x60) +#define SHIFT_LSL 0x00 +#define SHIFT_LSR 0x20 +#define SHIFT_ASR 0x40 +#define SHIFT_RORRRX 0x60 + +static unsigned long ai_user; +static unsigned long ai_sys; +static unsigned long ai_skipped; +static unsigned long ai_half; +static unsigned long ai_word; +static unsigned long ai_multi; +static int ai_usermode; + +#ifdef CONFIG_PROC_FS +static const char *usermode_action[] = { + "ignored", + "warn", + "fixup", + "fixup+warn", + "signal", + "signal+warn" +}; + +static int +proc_alignment_read(char *page, char **start, off_t off, int count, int *eof, + void *data) +{ + char *p = page; + int len; + + p += sprintf(p, "User:\t\t%lu\n", ai_user); + p += sprintf(p, "System:\t\t%lu\n", ai_sys); + p += sprintf(p, "Skipped:\t%lu\n", ai_skipped); + p += sprintf(p, "Half:\t\t%lu\n", ai_half); + p += sprintf(p, "Word:\t\t%lu\n", ai_word); + p += sprintf(p, "Multi:\t\t%lu\n", ai_multi); + p += sprintf(p, "User faults:\t%i (%s)\n", ai_usermode, + usermode_action[ai_usermode]); + + len = (p - page) - off; + if (len < 0) + len = 0; + + *eof = (len <= count) ? 1 : 0; + *start = page + off; + + return len; +} + +static int proc_alignment_write(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char mode; + + if (count > 0) { + if (get_user(mode, buffer)) + return -EFAULT; + if (mode >= '0' && mode <= '5') + ai_usermode = mode - '0'; + } + return count; +} + +#endif /* CONFIG_PROC_FS */ + +union offset_union { + unsigned long un; + signed long sn; +}; + +#define TYPE_ERROR 0 +#define TYPE_FAULT 1 +#define TYPE_LDST 2 +#define TYPE_DONE 3 + +#ifdef __ARMEB__ +#define BE 1 +#define FIRST_BYTE_16 "mov %1, %1, ror #8\n" +#define FIRST_BYTE_32 "mov %1, %1, ror #24\n" +#define NEXT_BYTE "ror #24" +#else +#define BE 0 +#define FIRST_BYTE_16 +#define FIRST_BYTE_32 +#define NEXT_BYTE "lsr #8" +#endif + +#define __get8_unaligned_check(ins,val,addr,err) \ + __asm__( \ + "1: "ins" %1, [%2], #1\n" \ + "2:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %0, #1\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 3b\n" \ + " .previous\n" \ + : "=r" (err), "=&r" (val), "=r" (addr) \ + : "0" (err), "2" (addr)) + +#define __get16_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v, a = addr; \ + __get8_unaligned_check(ins,v,a,err); \ + val = v << ((BE) ? 8 : 0); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 0 : 8); \ + if (err) \ + goto fault; \ + } while (0) + +#define get16_unaligned_check(val,addr) \ + __get16_unaligned_check("ldrb",val,addr) + +#define get16t_unaligned_check(val,addr) \ + __get16_unaligned_check("ldrbt",val,addr) + +#define __get32_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v, a = addr; \ + __get8_unaligned_check(ins,v,a,err); \ + val = v << ((BE) ? 24 : 0); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 16 : 8); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 8 : 16); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 0 : 24); \ + if (err) \ + goto fault; \ + } while (0) + +#define get32_unaligned_check(val,addr) \ + __get32_unaligned_check("ldrb",val,addr) + +#define get32t_unaligned_check(val,addr) \ + __get32_unaligned_check("ldrbt",val,addr) + +#define __put16_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v = val, a = addr; \ + __asm__( FIRST_BYTE_16 \ + "1: "ins" %1, [%2], #1\n" \ + " mov %1, %1, "NEXT_BYTE"\n" \ + "2: "ins" %1, [%2]\n" \ + "3:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "4: mov %0, #1\n" \ + " b 3b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 4b\n" \ + " .long 2b, 4b\n" \ + " .previous\n" \ + : "=r" (err), "=&r" (v), "=&r" (a) \ + : "0" (err), "1" (v), "2" (a)); \ + if (err) \ + goto fault; \ + } while (0) + +#define put16_unaligned_check(val,addr) \ + __put16_unaligned_check("strb",val,addr) + +#define put16t_unaligned_check(val,addr) \ + __put16_unaligned_check("strbt",val,addr) + +#define __put32_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v = val, a = addr; \ + __asm__( FIRST_BYTE_32 \ + "1: "ins" %1, [%2], #1\n" \ + " mov %1, %1, "NEXT_BYTE"\n" \ + "2: "ins" %1, [%2], #1\n" \ + " mov %1, %1, "NEXT_BYTE"\n" \ + "3: "ins" %1, [%2], #1\n" \ + " mov %1, %1, "NEXT_BYTE"\n" \ + "4: "ins" %1, [%2]\n" \ + "5:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "6: mov %0, #1\n" \ + " b 5b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 6b\n" \ + " .long 2b, 6b\n" \ + " .long 3b, 6b\n" \ + " .long 4b, 6b\n" \ + " .previous\n" \ + : "=r" (err), "=&r" (v), "=&r" (a) \ + : "0" (err), "1" (v), "2" (a)); \ + if (err) \ + goto fault; \ + } while (0) + +#define put32_unaligned_check(val,addr) \ + __put32_unaligned_check("strb", val, addr) + +#define put32t_unaligned_check(val,addr) \ + __put32_unaligned_check("strbt", val, addr) + +static void +do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs *regs, union offset_union offset) +{ + if (!LDST_U_BIT(instr)) + offset.un = -offset.un; + + if (!LDST_P_BIT(instr)) + addr += offset.un; + + if (!LDST_P_BIT(instr) || LDST_W_BIT(instr)) + regs->uregs[RN_BITS(instr)] = addr; +} + +static int +do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *regs) +{ + unsigned int rd = RD_BITS(instr); + + if ((instr & 0x01f00ff0) == 0x01000090) + goto swp; + + if ((instr & 0x90) != 0x90 || (instr & 0x60) == 0) + goto bad; + + ai_half += 1; + + if (user_mode(regs)) + goto user; + + if (LDST_L_BIT(instr)) { + unsigned long val; + get16_unaligned_check(val, addr); + + /* signed half-word? */ + if (instr & 0x40) + val = (signed long)((signed short) val); + + regs->uregs[rd] = val; + } else + put16_unaligned_check(regs->uregs[rd], addr); + + return TYPE_LDST; + + user: + if (LDST_L_BIT(instr)) { + unsigned long val; + get16t_unaligned_check(val, addr); + + /* signed half-word? */ + if (instr & 0x40) + val = (signed long)((signed short) val); + + regs->uregs[rd] = val; + } else + put16t_unaligned_check(regs->uregs[rd], addr); + + return TYPE_LDST; + + swp: + printk(KERN_ERR "Alignment trap: not handling swp instruction\n"); + bad: + return TYPE_ERROR; + + fault: + return TYPE_FAULT; +} + +static int +do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *regs) +{ + unsigned int rd = RD_BITS(instr); + + ai_word += 1; + + if ((!LDST_P_BIT(instr) && LDST_W_BIT(instr)) || user_mode(regs)) + goto trans; + + if (LDST_L_BIT(instr)) { + unsigned int val; + get32_unaligned_check(val, addr); + regs->uregs[rd] = val; + } else + put32_unaligned_check(regs->uregs[rd], addr); + return TYPE_LDST; + + trans: + if (LDST_L_BIT(instr)) { + unsigned int val; + get32t_unaligned_check(val, addr); + regs->uregs[rd] = val; + } else + put32t_unaligned_check(regs->uregs[rd], addr); + return TYPE_LDST; + + fault: + return TYPE_FAULT; +} + +/* + * LDM/STM alignment handler. + * + * There are 4 variants of this instruction: + * + * B = rn pointer before instruction, A = rn pointer after instruction + * ------ increasing address -----> + * | | r0 | r1 | ... | rx | | + * PU = 01 B A + * PU = 11 B A + * PU = 00 A B + * PU = 10 A B + */ +static int +do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *regs) +{ + unsigned int rd, rn, correction, nr_regs, regbits; + unsigned long eaddr, newaddr; + + if (LDM_S_BIT(instr)) + goto bad; + + correction = 4; /* processor implementation defined */ + regs->ARM_pc += correction; + + ai_multi += 1; + + /* count the number of registers in the mask to be transferred */ + nr_regs = hweight16(REGMASK_BITS(instr)) * 4; + + rn = RN_BITS(instr); + newaddr = eaddr = regs->uregs[rn]; + + if (!LDST_U_BIT(instr)) + nr_regs = -nr_regs; + newaddr += nr_regs; + if (!LDST_U_BIT(instr)) + eaddr = newaddr; + + if (LDST_P_EQ_U(instr)) /* U = P */ + eaddr += 4; + + /* + * For alignment faults on the ARM922T/ARM920T the MMU makes + * the FSR (and hence addr) equal to the updated base address + * of the multiple access rather than the restored value. + * Switch this message off if we've got a ARM92[02], otherwise + * [ls]dm alignment faults are noisy! + */ +#if !(defined CONFIG_CPU_ARM922T) && !(defined CONFIG_CPU_ARM920T) + /* + * This is a "hint" - we already have eaddr worked out by the + * processor for us. + */ + if (addr != eaddr) { + printk(KERN_ERR "LDMSTM: PC = %08lx, instr = %08lx, " + "addr = %08lx, eaddr = %08lx\n", + instruction_pointer(regs), instr, addr, eaddr); + show_regs(regs); + } +#endif + + if (user_mode(regs)) { + for (regbits = REGMASK_BITS(instr), rd = 0; regbits; + regbits >>= 1, rd += 1) + if (regbits & 1) { + if (LDST_L_BIT(instr)) { + unsigned int val; + get32t_unaligned_check(val, eaddr); + regs->uregs[rd] = val; + } else + put32t_unaligned_check(regs->uregs[rd], eaddr); + eaddr += 4; + } + } else { + for (regbits = REGMASK_BITS(instr), rd = 0; regbits; + regbits >>= 1, rd += 1) + if (regbits & 1) { + if (LDST_L_BIT(instr)) { + unsigned int val; + get32_unaligned_check(val, eaddr); + regs->uregs[rd] = val; + } else + put32_unaligned_check(regs->uregs[rd], eaddr); + eaddr += 4; + } + } + + if (LDST_W_BIT(instr)) + regs->uregs[rn] = newaddr; + if (!LDST_L_BIT(instr) || !(REGMASK_BITS(instr) & (1 << 15))) + regs->ARM_pc -= correction; + return TYPE_DONE; + +fault: + regs->ARM_pc -= correction; + return TYPE_FAULT; + +bad: + printk(KERN_ERR "Alignment trap: not handling ldm with s-bit set\n"); + return TYPE_ERROR; +} + +/* + * Convert Thumb ld/st instruction forms to equivalent ARM instructions so + * we can reuse ARM userland alignment fault fixups for Thumb. + * + * This implementation was initially based on the algorithm found in + * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same + * to convert only Thumb ld/st instruction forms to equivalent ARM forms. + * + * NOTES: + * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections. + * 2. If for some reason we're passed an non-ld/st Thumb instruction to + * decode, we return 0xdeadc0de. This should never happen under normal + * circumstances but if it does, we've got other problems to deal with + * elsewhere and we obviously can't fix those problems here. + */ + +static unsigned long +thumb2arm(u16 tinstr) +{ + u32 L = (tinstr & (1<<11)) >> 11; + + switch ((tinstr & 0xf800) >> 11) { + /* 6.5.1 Format 1: */ + case 0x6000 >> 11: /* 7.1.52 STR(1) */ + case 0x6800 >> 11: /* 7.1.26 LDR(1) */ + case 0x7000 >> 11: /* 7.1.55 STRB(1) */ + case 0x7800 >> 11: /* 7.1.30 LDRB(1) */ + return 0xe5800000 | + ((tinstr & (1<<12)) << (22-12)) | /* fixup */ + (L<<20) | /* L==1? */ + ((tinstr & (7<<0)) << (12-0)) | /* Rd */ + ((tinstr & (7<<3)) << (16-3)) | /* Rn */ + ((tinstr & (31<<6)) >> /* immed_5 */ + (6 - ((tinstr & (1<<12)) ? 0 : 2))); + case 0x8000 >> 11: /* 7.1.57 STRH(1) */ + case 0x8800 >> 11: /* 7.1.32 LDRH(1) */ + return 0xe1c000b0 | + (L<<20) | /* L==1? */ + ((tinstr & (7<<0)) << (12-0)) | /* Rd */ + ((tinstr & (7<<3)) << (16-3)) | /* Rn */ + ((tinstr & (7<<6)) >> (6-1)) | /* immed_5[2:0] */ + ((tinstr & (3<<9)) >> (9-8)); /* immed_5[4:3] */ + + /* 6.5.1 Format 2: */ + case 0x5000 >> 11: + case 0x5800 >> 11: + { + static const u32 subset[8] = { + 0xe7800000, /* 7.1.53 STR(2) */ + 0xe18000b0, /* 7.1.58 STRH(2) */ + 0xe7c00000, /* 7.1.56 STRB(2) */ + 0xe19000d0, /* 7.1.34 LDRSB */ + 0xe7900000, /* 7.1.27 LDR(2) */ + 0xe19000b0, /* 7.1.33 LDRH(2) */ + 0xe7d00000, /* 7.1.31 LDRB(2) */ + 0xe19000f0 /* 7.1.35 LDRSH */ + }; + return subset[(tinstr & (7<<9)) >> 9] | + ((tinstr & (7<<0)) << (12-0)) | /* Rd */ + ((tinstr & (7<<3)) << (16-3)) | /* Rn */ + ((tinstr & (7<<6)) >> (6-0)); /* Rm */ + } + + /* 6.5.1 Format 3: */ + case 0x4800 >> 11: /* 7.1.28 LDR(3) */ + /* NOTE: This case is not technically possible. We're + * loading 32-bit memory data via PC relative + * addressing mode. So we can and should eliminate + * this case. But I'll leave it here for now. + */ + return 0xe59f0000 | + ((tinstr & (7<<8)) << (12-8)) | /* Rd */ + ((tinstr & 255) << (2-0)); /* immed_8 */ + + /* 6.5.1 Format 4: */ + case 0x9000 >> 11: /* 7.1.54 STR(3) */ + case 0x9800 >> 11: /* 7.1.29 LDR(4) */ + return 0xe58d0000 | + (L<<20) | /* L==1? */ + ((tinstr & (7<<8)) << (12-8)) | /* Rd */ + ((tinstr & 255) << 2); /* immed_8 */ + + /* 6.6.1 Format 1: */ + case 0xc000 >> 11: /* 7.1.51 STMIA */ + case 0xc800 >> 11: /* 7.1.25 LDMIA */ + { + u32 Rn = (tinstr & (7<<8)) >> 8; + u32 W = ((L<<Rn) & (tinstr&255)) ? 0 : 1<<21; + + return 0xe8800000 | W | (L<<20) | (Rn<<16) | + (tinstr&255); + } + + /* 6.6.1 Format 2: */ + case 0xb000 >> 11: /* 7.1.48 PUSH */ + case 0xb800 >> 11: /* 7.1.47 POP */ + if ((tinstr & (3 << 9)) == 0x0400) { + static const u32 subset[4] = { + 0xe92d0000, /* STMDB sp!,{registers} */ + 0xe92d4000, /* STMDB sp!,{registers,lr} */ + 0xe8bd0000, /* LDMIA sp!,{registers} */ + 0xe8bd8000 /* LDMIA sp!,{registers,pc} */ + }; + return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] | + (tinstr & 255); /* register_list */ + } + /* Else fall through for illegal instruction case */ + + default: + return 0xdeadc0de; + } +} + +static int +do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + union offset_union offset; + unsigned long instr = 0, instrptr; + int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); + unsigned int type; + mm_segment_t fs; + unsigned int fault; + u16 tinstr = 0; + + instrptr = instruction_pointer(regs); + + fs = get_fs(); + set_fs(KERNEL_DS); + if thumb_mode(regs) { + fault = __get_user(tinstr, (u16 *)(instrptr & ~1)); + if (!(fault)) + instr = thumb2arm(tinstr); + } else + fault = __get_user(instr, (u32 *)instrptr); + set_fs(fs); + + if (fault) { + type = TYPE_FAULT; + goto bad_or_fault; + } + + if (user_mode(regs)) + goto user; + + ai_sys += 1; + + fixup: + + regs->ARM_pc += thumb_mode(regs) ? 2 : 4; + + switch (CODING_BITS(instr)) { + case 0x00000000: /* ldrh or strh */ + if (LDSTH_I_BIT(instr)) + offset.un = (instr & 0xf00) >> 4 | (instr & 15); + else + offset.un = regs->uregs[RM_BITS(instr)]; + handler = do_alignment_ldrhstrh; + break; + + case 0x04000000: /* ldr or str immediate */ + offset.un = OFFSET_BITS(instr); + handler = do_alignment_ldrstr; + break; + + case 0x06000000: /* ldr or str register */ + offset.un = regs->uregs[RM_BITS(instr)]; + + if (IS_SHIFT(instr)) { + unsigned int shiftval = SHIFT_BITS(instr); + + switch(SHIFT_TYPE(instr)) { + case SHIFT_LSL: + offset.un <<= shiftval; + break; + + case SHIFT_LSR: + offset.un >>= shiftval; + break; + + case SHIFT_ASR: + offset.sn >>= shiftval; + break; + + case SHIFT_RORRRX: + if (shiftval == 0) { + offset.un >>= 1; + if (regs->ARM_cpsr & PSR_C_BIT) + offset.un |= 1 << 31; + } else + offset.un = offset.un >> shiftval | + offset.un << (32 - shiftval); + break; + } + } + handler = do_alignment_ldrstr; + break; + + case 0x08000000: /* ldm or stm */ + handler = do_alignment_ldmstm; + break; + + default: + goto bad; + } + + type = handler(addr, instr, regs); + + if (type == TYPE_ERROR || type == TYPE_FAULT) + goto bad_or_fault; + + if (type == TYPE_LDST) + do_alignment_finish_ldst(addr, instr, regs, offset); + + return 0; + + bad_or_fault: + if (type == TYPE_ERROR) + goto bad; + regs->ARM_pc -= thumb_mode(regs) ? 2 : 4; + /* + * We got a fault - fix it up, or die. + */ + do_bad_area(current, current->mm, addr, fsr, regs); + return 0; + + bad: + /* + * Oops, we didn't handle the instruction. + */ + printk(KERN_ERR "Alignment trap: not handling instruction " + "%0*lx at [<%08lx>]\n", + thumb_mode(regs) ? 4 : 8, + thumb_mode(regs) ? tinstr : instr, instrptr); + ai_skipped += 1; + return 1; + + user: + ai_user += 1; + + if (ai_usermode & 1) + printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx " + "Address=0x%08lx FSR 0x%03x\n", current->comm, + current->pid, instrptr, + thumb_mode(regs) ? 4 : 8, + thumb_mode(regs) ? tinstr : instr, + addr, fsr); + + if (ai_usermode & 2) + goto fixup; + + if (ai_usermode & 4) + force_sig(SIGBUS, current); + else + set_cr(cr_no_alignment); + + return 0; +} + +/* + * This needs to be done after sysctl_init, otherwise sys/ will be + * overwritten. Actually, this shouldn't be in sys/ at all since + * it isn't a sysctl, and it doesn't contain sysctl information. + * We now locate it in /proc/cpu/alignment instead. + */ +static int __init alignment_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *res; + + res = proc_mkdir("cpu", NULL); + if (!res) + return -ENOMEM; + + res = create_proc_entry("alignment", S_IWUSR | S_IRUGO, res); + if (!res) + return -ENOMEM; + + res->read_proc = proc_alignment_read; + res->write_proc = proc_alignment_write; +#endif + + hook_fault_code(1, do_alignment, SIGILL, "alignment exception"); + hook_fault_code(3, do_alignment, SIGILL, "alignment exception"); + + return 0; +} + +fs_initcall(alignment_init); diff --git a/arch/arm/mm/blockops.c b/arch/arm/mm/blockops.c new file mode 100644 index 00000000000..806c6eeb1b0 --- /dev/null +++ b/arch/arm/mm/blockops.c @@ -0,0 +1,184 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/mm.h> + +#include <asm/memory.h> +#include <asm/ptrace.h> +#include <asm/cacheflush.h> +#include <asm/traps.h> + +extern struct cpu_cache_fns blk_cache_fns; + +#define HARVARD_CACHE + +/* + * blk_flush_kern_dcache_page(kaddr) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - kaddr - kernel address (guaranteed to be page aligned) + */ +static void __attribute__((naked)) +blk_flush_kern_dcache_page(void *kaddr) +{ + asm( + "add r1, r0, %0 \n\ +1: .word 0xec401f0e @ mcrr p15, 0, r0, r1, c14, 0 @ blocking \n\ + mov r0, #0 \n\ + mcr p15, 0, r0, c7, c5, 0 \n\ + mcr p15, 0, r0, c7, c10, 4 \n\ + mov pc, lr" + : + : "I" (PAGE_SIZE)); +} + +/* + * blk_dma_inv_range(start,end) + * + * Invalidate the data cache within the specified region; we will + * be performing a DMA operation in this region and we want to + * purge old data in the cache. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +static void __attribute__((naked)) +blk_dma_inv_range_unified(unsigned long start, unsigned long end) +{ + asm( + "tst r0, %0 \n\ + mcrne p15, 0, r0, c7, c11, 1 @ clean unified line \n\ + tst r1, %0 \n\ + mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line\n\ + .word 0xec401f06 @ mcrr p15, 0, r1, r0, c6, 0 @ blocking \n\ + mov r0, #0 \n\ + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer \n\ + mov pc, lr" + : + : "I" (L1_CACHE_BYTES - 1)); +} + +static void __attribute__((naked)) +blk_dma_inv_range_harvard(unsigned long start, unsigned long end) +{ + asm( + "tst r0, %0 \n\ + mcrne p15, 0, r0, c7, c10, 1 @ clean D line \n\ + tst r1, %0 \n\ + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line \n\ + .word 0xec401f06 @ mcrr p15, 0, r1, r0, c6, 0 @ blocking \n\ + mov r0, #0 \n\ + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer \n\ + mov pc, lr" + : + : "I" (L1_CACHE_BYTES - 1)); +} + +/* + * blk_dma_clean_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +static void __attribute__((naked)) +blk_dma_clean_range(unsigned long start, unsigned long end) +{ + asm( + ".word 0xec401f0c @ mcrr p15, 0, r1, r0, c12, 0 @ blocking \n\ + mov r0, #0 \n\ + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer \n\ + mov pc, lr"); +} + +/* + * blk_dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +static void __attribute__((naked)) +blk_dma_flush_range(unsigned long start, unsigned long end) +{ + asm( + ".word 0xec401f0e @ mcrr p15, 0, r1, r0, c14, 0 @ blocking \n\ + mov pc, lr"); +} + +static int blockops_trap(struct pt_regs *regs, unsigned int instr) +{ + regs->ARM_r4 |= regs->ARM_r2; + regs->ARM_pc += 4; + return 0; +} + +static char *func[] = { + "Prefetch data range", + "Clean+Invalidate data range", + "Clean data range", + "Invalidate data range", + "Invalidate instr range" +}; + +static struct undef_hook blockops_hook __initdata = { + .instr_mask = 0x0fffffd0, + .instr_val = 0x0c401f00, + .cpsr_mask = PSR_T_BIT, + .cpsr_val = 0, + .fn = blockops_trap, +}; + +static int __init blockops_check(void) +{ + register unsigned int err asm("r4") = 0; + unsigned int err_pos = 1; + unsigned int cache_type; + int i; + + asm("mrc p15, 0, %0, c0, c0, 1" : "=r" (cache_type)); + + printk("Checking V6 block cache operations:\n"); + register_undef_hook(&blockops_hook); + + __asm__ ("mov r0, %0\n\t" + "mov r1, %1\n\t" + "mov r2, #1\n\t" + ".word 0xec401f2c @ mcrr p15, 0, r1, r0, c12, 2\n\t" + "mov r2, #2\n\t" + ".word 0xec401f0e @ mcrr p15, 0, r1, r0, c14, 0\n\t" + "mov r2, #4\n\t" + ".word 0xec401f0c @ mcrr p15, 0, r1, r0, c12, 0\n\t" + "mov r2, #8\n\t" + ".word 0xec401f06 @ mcrr p15, 0, r1, r0, c6, 0\n\t" + "mov r2, #16\n\t" + ".word 0xec401f05 @ mcrr p15, 0, r1, r0, c5, 0\n\t" + : + : "r" (PAGE_OFFSET), "r" (PAGE_OFFSET + 128) + : "r0", "r1", "r2"); + + unregister_undef_hook(&blockops_hook); + + for (i = 0; i < ARRAY_SIZE(func); i++, err_pos <<= 1) + printk("%30s: %ssupported\n", func[i], err & err_pos ? "not " : ""); + + if ((err & 8) == 0) { + printk(" --> Using %s block cache invalidate\n", + cache_type & (1 << 24) ? "harvard" : "unified"); + if (cache_type & (1 << 24)) + cpu_cache.dma_inv_range = blk_dma_inv_range_harvard; + else + cpu_cache.dma_inv_range = blk_dma_inv_range_unified; + } + if ((err & 4) == 0) { + printk(" --> Using block cache clean\n"); + cpu_cache.dma_clean_range = blk_dma_clean_range; + } + if ((err & 2) == 0) { + printk(" --> Using block cache clean+invalidate\n"); + cpu_cache.dma_flush_range = blk_dma_flush_range; + cpu_cache.flush_kern_dcache_page = blk_flush_kern_dcache_page; + } + + return 0; +} + +__initcall(blockops_check); diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S new file mode 100644 index 00000000000..e1994788cf0 --- /dev/null +++ b/arch/arm/mm/cache-v3.S @@ -0,0 +1,137 @@ +/* + * linux/arch/arm/mm/cache-v3.S + * + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/hardware.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + * + * - mm - mm_struct describing address space + */ +ENTRY(v3_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v3_flush_kern_cache_all) + /* FALLTHROUGH */ + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v3_flush_user_cache_range) + mov ip, #0 + mcreq p15, 0, ip, c7, c0, 0 @ flush ID cache + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v3_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v3_coherent_user_range) + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - page aligned address + */ +ENTRY(v3_flush_kern_dcache_page) + /* FALLTHROUGH */ + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v3_dma_inv_range) + /* FALLTHROUGH */ + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v3_dma_flush_range) + mov r0, #0 + mcr p15, 0, r0, c7, c0, 0 @ flush ID cache + /* FALLTHROUGH */ + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v3_dma_clean_range) + mov pc, lr + + __INITDATA + + .type v3_cache_fns, #object +ENTRY(v3_cache_fns) + .long v3_flush_kern_cache_all + .long v3_flush_user_cache_all + .long v3_flush_user_cache_range + .long v3_coherent_kern_range + .long v3_coherent_user_range + .long v3_flush_kern_dcache_page + .long v3_dma_inv_range + .long v3_dma_clean_range + .long v3_dma_flush_range + .size v3_cache_fns, . - v3_cache_fns diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S new file mode 100644 index 00000000000..b8ad5d58ebe --- /dev/null +++ b/arch/arm/mm/cache-v4.S @@ -0,0 +1,139 @@ +/* + * linux/arch/arm/mm/cache-v4.S + * + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/hardware.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + * + * - mm - mm_struct describing address space + */ +ENTRY(v4_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v4_flush_kern_cache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ flush ID cache + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v4_flush_user_cache_range) + mov ip, #0 + mcreq p15, 0, ip, c7, c7, 0 @ flush ID cache + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_coherent_user_range) + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - page aligned address + */ +ENTRY(v4_flush_kern_dcache_page) + /* FALLTHROUGH */ + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_dma_inv_range) + /* FALLTHROUGH */ + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_dma_flush_range) + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ flush ID cache + /* FALLTHROUGH */ + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_dma_clean_range) + mov pc, lr + + __INITDATA + + .type v4_cache_fns, #object +ENTRY(v4_cache_fns) + .long v4_flush_kern_cache_all + .long v4_flush_user_cache_all + .long v4_flush_user_cache_range + .long v4_coherent_kern_range + .long v4_coherent_user_range + .long v4_flush_kern_dcache_page + .long v4_dma_inv_range + .long v4_dma_clean_range + .long v4_dma_flush_range + .size v4_cache_fns, . - v4_cache_fns diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S new file mode 100644 index 00000000000..5c4055b62d9 --- /dev/null +++ b/arch/arm/mm/cache-v4wb.S @@ -0,0 +1,216 @@ +/* + * linux/arch/arm/mm/cache-v4wb.S + * + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/hardware.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The total size of the data cache. + */ +#if defined(CONFIG_CPU_SA110) +# define CACHE_DSIZE 16384 +#elif defined(CONFIG_CPU_SA1100) +# define CACHE_DSIZE 8192 +#else +# error Unknown cache size +#endif + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintainence instructions. + * + * Size Clean (ticks) Dirty (ticks) + * 4096 21 20 21 53 55 54 + * 8192 40 41 40 106 100 102 + * 16384 77 77 76 140 140 138 + * 32768 150 149 150 214 216 212 <--- + * 65536 296 297 296 351 358 361 + * 131072 591 591 591 656 657 651 + * Whole 132 136 132 221 217 207 <--- + */ +#define CACHE_DLIMIT (CACHE_DSIZE * 4) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular address + * space. + */ +ENTRY(v4wb_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v4wb_flush_kern_cache_all) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache +__flush_whole_cache: + mov r0, #FLUSH_BASE + add r1, r0, #CACHE_DSIZE +1: ldr r2, [r0], #32 + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v4wb_flush_user_cache_range) + sub r3, r1, r0 @ calculate total size + tst r2, #VM_EXEC @ executable region? + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + + cmp r3, #CACHE_DLIMIT @ total size >= limit? + bhs __flush_whole_cache @ flush whole D cache + +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - page aligned address + */ +ENTRY(v4wb_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ + /* fall through */ + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wb_coherent_kern_range) + /* fall through */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wb_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wb_dma_inv_range) + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wb_dma_clean_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * This is actually the same as v4wb_coherent_kern_range() + */ + .globl v4wb_dma_flush_range + .set v4wb_dma_flush_range, v4wb_coherent_kern_range + + __INITDATA + + .type v4wb_cache_fns, #object +ENTRY(v4wb_cache_fns) + .long v4wb_flush_kern_cache_all + .long v4wb_flush_user_cache_all + .long v4wb_flush_user_cache_range + .long v4wb_coherent_kern_range + .long v4wb_coherent_user_range + .long v4wb_flush_kern_dcache_page + .long v4wb_dma_inv_range + .long v4wb_dma_clean_range + .long v4wb_dma_flush_range + .size v4wb_cache_fns, . - v4wb_cache_fns diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S new file mode 100644 index 00000000000..9bcabd86c6f --- /dev/null +++ b/arch/arm/mm/cache-v4wt.S @@ -0,0 +1,188 @@ +/* + * linux/arch/arm/mm/cache-v4wt.S + * + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARMv4 write through cache operations support. + * + * We assume that the write buffer is not enabled. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/hardware.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 8 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintainence instructions. + * + * *** This needs benchmarking + */ +#define CACHE_DLIMIT 16384 + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(v4wt_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v4wt_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v4wt_flush_user_cache_range) + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wt_coherent_kern_range) + /* FALLTRHOUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wt_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - page aligned address + */ +ENTRY(v4wt_flush_kern_dcache_page) + mov r2, #0 + mcr p15, 0, r2, c7, c5, 0 @ invalidate I cache + add r1, r0, #PAGE_SZ + /* fallthrough */ + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wt_dma_inv_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + /* FALLTHROUGH */ + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wt_dma_clean_range) + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ + .globl v4wt_dma_flush_range + .equ v4wt_dma_flush_range, v4wt_dma_inv_range + + __INITDATA + + .type v4wt_cache_fns, #object +ENTRY(v4wt_cache_fns) + .long v4wt_flush_kern_cache_all + .long v4wt_flush_user_cache_all + .long v4wt_flush_user_cache_range + .long v4wt_coherent_kern_range + .long v4wt_coherent_user_range + .long v4wt_flush_kern_dcache_page + .long v4wt_dma_inv_range + .long v4wt_dma_clean_range + .long v4wt_dma_flush_range + .size v4wt_cache_fns, . - v4wt_cache_fns diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S new file mode 100644 index 00000000000..85c10a71e7c --- /dev/null +++ b/arch/arm/mm/cache-v6.S @@ -0,0 +1,227 @@ +/* + * linux/arch/arm/mm/cache-v6.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv6 processor support. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> + +#include "proc-macros.S" + +#define HARVARD_CACHE +#define CACHE_LINE_SIZE 32 +#define D_CACHE_LINE_SIZE 32 + +/* + * v6_flush_cache_all() + * + * Flush the entire cache. + * + * It is assumed that: + */ +ENTRY(v6_flush_kern_cache_all) + mov r0, #0 +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate + mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate +#else + mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate +#endif + mov pc, lr + +/* + * v6_flush_cache_all() + * + * Flush all TLB entries in a particular address space + * + * - mm - mm_struct describing address space + */ +ENTRY(v6_flush_user_cache_all) + /*FALLTHROUGH*/ + +/* + * v6_flush_cache_range(start, end, flags) + * + * Flush a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vm_area_struct flags describing address space + * + * It is assumed that: + * - we have a VIPT cache. + */ +ENTRY(v6_flush_user_cache_range) + mov pc, lr + +/* + * v6_coherent_kern_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v6_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * v6_coherent_user_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v6_coherent_user_range) + bic r0, r0, #CACHE_LINE_SIZE - 1 +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c10, 1 @ clean D line + mcr p15, 0, r0, c7, c5, 1 @ invalidate I line +#endif + mcr p15, 0, r0, c7, c5, 7 @ invalidate BTB entry + add r0, r0, #CACHE_LINE_SIZE + cmp r0, r1 + blo 1b +#ifdef HARVARD_CACHE + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer +#endif + mov pc, lr + +/* + * v6_flush_kern_dcache_page(kaddr) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - kaddr - kernel address (guaranteed to be page aligned) + */ +ENTRY(v6_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line +#else + mcr p15, 0, r0, c7, c15, 1 @ clean & invalidate unified line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 + blo 1b +#ifdef HARVARD_CACHE + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 +#endif + mov pc, lr + + +/* + * v6_dma_inv_range(start,end) + * + * Invalidate the data cache within the specified region; we will + * be performing a DMA operation in this region and we want to + * purge old data in the cache. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(v6_dma_inv_range) + tst r0, #D_CACHE_LINE_SIZE - 1 + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +#ifdef HARVARD_CACHE + mcrne p15, 0, r0, c7, c10, 1 @ clean D line +#else + mcrne p15, 0, r0, c7, c11, 1 @ clean unified line +#endif + tst r1, #D_CACHE_LINE_SIZE - 1 + bic r1, r1, #D_CACHE_LINE_SIZE - 1 +#ifdef HARVARD_CACHE + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line +#else + mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line +#endif +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D line +#else + mcr p15, 0, r0, c7, c7, 1 @ invalidate unified line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * v6_dma_clean_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(v6_dma_clean_range) + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c10, 1 @ clean D line +#else + mcr p15, 0, r0, c7, c11, 1 @ clean unified line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * v6_dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(v6_dma_flush_range) + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line +#else + mcr p15, 0, r0, c7, c15, 1 @ clean & invalidate line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + + __INITDATA + + .type v6_cache_fns, #object +ENTRY(v6_cache_fns) + .long v6_flush_kern_cache_all + .long v6_flush_user_cache_all + .long v6_flush_user_cache_range + .long v6_coherent_kern_range + .long v6_coherent_user_range + .long v6_flush_kern_dcache_page + .long v6_dma_inv_range + .long v6_dma_clean_range + .long v6_dma_flush_range + .size v6_cache_fns, . - v6_cache_fns diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c new file mode 100644 index 00000000000..26356ce4da5 --- /dev/null +++ b/arch/arm/mm/consistent.c @@ -0,0 +1,451 @@ +/* + * linux/arch/arm/mm/consistent.c + * + * Copyright (C) 2000-2004 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * DMA uncached mapping support. + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> + +#include <asm/cacheflush.h> +#include <asm/io.h> +#include <asm/tlbflush.h> + +#define CONSISTENT_BASE (0xffc00000) +#define CONSISTENT_END (0xffe00000) +#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) + +/* + * This is the page table (2MB) covering uncached, DMA consistent allocations + */ +static pte_t *consistent_pte; +static DEFINE_SPINLOCK(consistent_lock); + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct vm_region region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call vm_region_alloc with an appropriate + * struct vm_region head (eg): + * + * struct vm_region vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling vm_region_alloc(). + */ +struct vm_region { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; + struct page *vm_pages; +}; + +static struct vm_region consistent_head = { + .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), + .vm_start = CONSISTENT_BASE, + .vm_end = CONSISTENT_END, +}; + +static struct vm_region * +vm_region_alloc(struct vm_region *head, size_t size, int gfp) +{ + unsigned long addr = head->vm_start, end = head->vm_end - size; + unsigned long flags; + struct vm_region *c, *new; + + new = kmalloc(sizeof(struct vm_region), gfp); + if (!new) + goto out; + + spin_lock_irqsave(&consistent_lock, flags); + + list_for_each_entry(c, &head->vm_list, vm_list) { + if ((addr + size) < addr) + goto nospc; + if ((addr + size) <= c->vm_start) + goto found; + addr = c->vm_end; + if (addr > end) + goto nospc; + } + + found: + /* + * Insert this entry _before_ the one we found. + */ + list_add_tail(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + + spin_unlock_irqrestore(&consistent_lock, flags); + return new; + + nospc: + spin_unlock_irqrestore(&consistent_lock, flags); + kfree(new); + out: + return NULL; +} + +static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr) +{ + struct vm_region *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_start == addr) + goto out; + } + c = NULL; + out: + return c; +} + +#ifdef CONFIG_HUGETLB_PAGE +#error ARM Coherent DMA allocator does not (yet) support huge TLB +#endif + +static void * +__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, int gfp, + pgprot_t prot) +{ + struct page *page; + struct vm_region *c; + unsigned long order; + u64 mask = ISA_DMA_THRESHOLD, limit; + + if (!consistent_pte) { + printk(KERN_ERR "%s: not initialised\n", __func__); + dump_stack(); + return NULL; + } + + if (dev) { + mask = dev->coherent_dma_mask; + + /* + * Sanity check the DMA mask - it must be non-zero, and + * must be able to be satisfied by a DMA allocation. + */ + if (mask == 0) { + dev_warn(dev, "coherent DMA mask is unset\n"); + goto no_page; + } + + if ((~mask) & ISA_DMA_THRESHOLD) { + dev_warn(dev, "coherent DMA mask %#llx is smaller " + "than system GFP_DMA mask %#llx\n", + mask, (unsigned long long)ISA_DMA_THRESHOLD); + goto no_page; + } + } + + /* + * Sanity check the allocation size. + */ + size = PAGE_ALIGN(size); + limit = (mask + 1) & ~mask; + if ((limit && size >= limit) || + size >= (CONSISTENT_END - CONSISTENT_BASE)) { + printk(KERN_WARNING "coherent allocation too big " + "(requested %#x mask %#llx)\n", size, mask); + goto no_page; + } + + order = get_order(size); + + if (mask != 0xffffffff) + gfp |= GFP_DMA; + + page = alloc_pages(gfp, order); + if (!page) + goto no_page; + + /* + * Invalidate any data that might be lurking in the + * kernel direct-mapped region for device DMA. + */ + { + unsigned long kaddr = (unsigned long)page_address(page); + memset(page_address(page), 0, size); + dmac_flush_range(kaddr, kaddr + size); + } + + /* + * Allocate a virtual address in the consistent mapping region. + */ + c = vm_region_alloc(&consistent_head, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); + if (c) { + pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start); + struct page *end = page + (1 << order); + + c->vm_pages = page; + + /* + * Set the "dma handle" + */ + *handle = page_to_dma(dev, page); + + do { + BUG_ON(!pte_none(*pte)); + + set_page_count(page, 1); + /* + * x86 does not mark the pages reserved... + */ + SetPageReserved(page); + set_pte(pte, mk_pte(page, prot)); + page++; + pte++; + } while (size -= PAGE_SIZE); + + /* + * Free the otherwise unused pages. + */ + while (page < end) { + set_page_count(page, 1); + __free_page(page); + page++; + } + + return (void *)c->vm_start; + } + + if (page) + __free_pages(page, order); + no_page: + *handle = ~0; + return NULL; +} + +/* + * Allocate DMA-coherent memory space and return both the kernel remapped + * virtual and bus address for that space. + */ +void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, int gfp) +{ + return __dma_alloc(dev, size, handle, gfp, + pgprot_noncached(pgprot_kernel)); +} +EXPORT_SYMBOL(dma_alloc_coherent); + +/* + * Allocate a writecombining region, in much the same way as + * dma_alloc_coherent above. + */ +void * +dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, int gfp) +{ + return __dma_alloc(dev, size, handle, gfp, + pgprot_writecombine(pgprot_kernel)); +} +EXPORT_SYMBOL(dma_alloc_writecombine); + +static int dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + unsigned long flags, user_size, kern_size; + struct vm_region *c; + int ret = -ENXIO; + + user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + spin_lock_irqsave(&consistent_lock, flags); + c = vm_region_find(&consistent_head, (unsigned long)cpu_addr); + spin_unlock_irqrestore(&consistent_lock, flags); + + if (c) { + unsigned long off = vma->vm_pgoff; + + kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; + + if (off < kern_size && + user_size <= (kern_size - off)) { + vma->vm_flags |= VM_RESERVED; + ret = remap_pfn_range(vma, vma->vm_start, + page_to_pfn(c->vm_pages) + off, + user_size << PAGE_SHIFT, + vma->vm_page_prot); + } + } + + return ret; +} + +int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return dma_mmap(dev, vma, cpu_addr, dma_addr, size); +} +EXPORT_SYMBOL(dma_mmap_coherent); + +int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + return dma_mmap(dev, vma, cpu_addr, dma_addr, size); +} +EXPORT_SYMBOL(dma_mmap_writecombine); + +/* + * free a page as defined by the above mapping. + */ +void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) +{ + struct vm_region *c; + unsigned long flags, addr; + pte_t *ptep; + + size = PAGE_ALIGN(size); + + spin_lock_irqsave(&consistent_lock, flags); + + c = vm_region_find(&consistent_head, (unsigned long)cpu_addr); + if (!c) + goto no_area; + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); + addr = c->vm_start; + do { + pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + unsigned long pfn; + + ptep++; + addr += PAGE_SIZE; + + if (!pte_none(pte) && pte_present(pte)) { + pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + + /* + * x86 does not mark the pages reserved... + */ + ClearPageReserved(page); + + __free_page(page); + continue; + } + } + + printk(KERN_CRIT "%s: bad page in kernel page table\n", + __func__); + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + list_del(&c->vm_list); + + spin_unlock_irqrestore(&consistent_lock, flags); + + kfree(c); + return; + + no_area: + spin_unlock_irqrestore(&consistent_lock, flags); + printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + __func__, cpu_addr); + dump_stack(); +} +EXPORT_SYMBOL(dma_free_coherent); + +/* + * Initialise the consistent memory allocation. + */ +static int __init consistent_init(void) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int ret = 0; + + spin_lock(&init_mm.page_table_lock); + + do { + pgd = pgd_offset(&init_mm, CONSISTENT_BASE); + pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE); + if (!pmd) { + printk(KERN_ERR "%s: no pmd tables\n", __func__); + ret = -ENOMEM; + break; + } + WARN_ON(!pmd_none(*pmd)); + + pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE); + if (!pte) { + printk(KERN_ERR "%s: no pte tables\n", __func__); + ret = -ENOMEM; + break; + } + + consistent_pte = pte; + } while (0); + + spin_unlock(&init_mm.page_table_lock); + + return ret; +} + +core_initcall(consistent_init); + +/* + * Make an area consistent for devices. + */ +void consistent_sync(void *vaddr, size_t size, int direction) +{ + unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; + + switch (direction) { + case DMA_FROM_DEVICE: /* invalidate only */ + dmac_inv_range(start, end); + break; + case DMA_TO_DEVICE: /* writeback only */ + dmac_clean_range(start, end); + break; + case DMA_BIDIRECTIONAL: /* writeback and invalidate */ + dmac_flush_range(start, end); + break; + default: + BUG(); + } +} +EXPORT_SYMBOL(consistent_sync); diff --git a/arch/arm/mm/copypage-v3.S b/arch/arm/mm/copypage-v3.S new file mode 100644 index 00000000000..4940f190831 --- /dev/null +++ b/arch/arm/mm/copypage-v3.S @@ -0,0 +1,67 @@ +/* + * linux/arch/arm/lib/copypage.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/constants.h> + + .text + .align 5 +/* + * ARMv3 optimised copy_user_page + * + * FIXME: do we need to handle cache stuff... + */ +ENTRY(v3_copy_user_page) + stmfd sp!, {r4, lr} @ 2 + mov r2, #PAGE_SZ/64 @ 1 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 +1: stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4 + subs r2, r2, #1 @ 1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmneia r1!, {r3, r4, ip, lr} @ 4 + bne 1b @ 1 + LOADREGS(fd, sp!, {r4, pc}) @ 3 + + .align 5 +/* + * ARMv3 optimised clear_user_page + * + * FIXME: do we need to handle cache stuff... + */ +ENTRY(v3_clear_user_page) + str lr, [sp, #-4]! + mov r1, #PAGE_SZ/64 @ 1 + mov r2, #0 @ 1 + mov r3, #0 @ 1 + mov ip, #0 @ 1 + mov lr, #0 @ 1 +1: stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + subs r1, r1, #1 @ 1 + bne 1b @ 1 + ldr pc, [sp], #4 + + __INITDATA + + .type v3_user_fns, #object +ENTRY(v3_user_fns) + .long v3_clear_user_page + .long v3_copy_user_page + .size v3_user_fns, . - v3_user_fns diff --git a/arch/arm/mm/copypage-v4mc.S b/arch/arm/mm/copypage-v4mc.S new file mode 100644 index 00000000000..305af3dab3d --- /dev/null +++ b/arch/arm/mm/copypage-v4mc.S @@ -0,0 +1,80 @@ +/* + * linux/arch/arm/lib/copy_page-armv4mc.S + * + * Copyright (C) 1995-2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/constants.h> + + .text + .align 5 +/* + * ARMv4 mini-dcache optimised copy_user_page + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + * + * Note: We rely on all ARMv4 processors implementing the "invalidate D line" + * instruction. If your processor does not supply this, you have to write your + * own copy_user_page that does the right thing. + */ +ENTRY(v4_mc_copy_user_page) + stmfd sp!, {r4, lr} @ 2 + mov r4, r0 + mov r0, r1 + bl map_page_minicache + mov r1, #PAGE_SZ/64 @ 1 + ldmia r0!, {r2, r3, ip, lr} @ 4 +1: mcr p15, 0, r4, c7, c6, 1 @ 1 invalidate D line + stmia r4!, {r2, r3, ip, lr} @ 4 + ldmia r0!, {r2, r3, ip, lr} @ 4+1 + stmia r4!, {r2, r3, ip, lr} @ 4 + ldmia r0!, {r2, r3, ip, lr} @ 4 + mcr p15, 0, r4, c7, c6, 1 @ 1 invalidate D line + stmia r4!, {r2, r3, ip, lr} @ 4 + ldmia r0!, {r2, r3, ip, lr} @ 4 + subs r1, r1, #1 @ 1 + stmia r4!, {r2, r3, ip, lr} @ 4 + ldmneia r0!, {r2, r3, ip, lr} @ 4 + bne 1b @ 1 + ldmfd sp!, {r4, pc} @ 3 + + .align 5 +/* + * ARMv4 optimised clear_user_page + * + * Same story as above. + */ +ENTRY(v4_mc_clear_user_page) + str lr, [sp, #-4]! + mov r1, #PAGE_SZ/64 @ 1 + mov r2, #0 @ 1 + mov r3, #0 @ 1 + mov ip, #0 @ 1 + mov lr, #0 @ 1 +1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line + stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line + stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + subs r1, r1, #1 @ 1 + bne 1b @ 1 + ldr pc, [sp], #4 + + __INITDATA + + .type v4_mc_user_fns, #object +ENTRY(v4_mc_user_fns) + .long v4_mc_clear_user_page + .long v4_mc_copy_user_page + .size v4_mc_user_fns, . - v4_mc_user_fns diff --git a/arch/arm/mm/copypage-v4wb.S b/arch/arm/mm/copypage-v4wb.S new file mode 100644 index 00000000000..b94c345ceb9 --- /dev/null +++ b/arch/arm/mm/copypage-v4wb.S @@ -0,0 +1,79 @@ +/* + * linux/arch/arm/lib/copypage.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/constants.h> + + .text + .align 5 +/* + * ARMv4 optimised copy_user_page + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + * + * Note: We rely on all ARMv4 processors implementing the "invalidate D line" + * instruction. If your processor does not supply this, you have to write your + * own copy_user_page that does the right thing. + */ +ENTRY(v4wb_copy_user_page) + stmfd sp!, {r4, lr} @ 2 + mov r2, #PAGE_SZ/64 @ 1 + ldmia r1!, {r3, r4, ip, lr} @ 4 +1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4 + mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4 + subs r2, r2, #1 @ 1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmneia r1!, {r3, r4, ip, lr} @ 4 + bne 1b @ 1 + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB + ldmfd sp!, {r4, pc} @ 3 + + .align 5 +/* + * ARMv4 optimised clear_user_page + * + * Same story as above. + */ +ENTRY(v4wb_clear_user_page) + str lr, [sp, #-4]! + mov r1, #PAGE_SZ/64 @ 1 + mov r2, #0 @ 1 + mov r3, #0 @ 1 + mov ip, #0 @ 1 + mov lr, #0 @ 1 +1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line + stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line + stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + subs r1, r1, #1 @ 1 + bne 1b @ 1 + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB + ldr pc, [sp], #4 + + __INITDATA + + .type v4wb_user_fns, #object +ENTRY(v4wb_user_fns) + .long v4wb_clear_user_page + .long v4wb_copy_user_page + .size v4wb_user_fns, . - v4wb_user_fns diff --git a/arch/arm/mm/copypage-v4wt.S b/arch/arm/mm/copypage-v4wt.S new file mode 100644 index 00000000000..976793937a9 --- /dev/null +++ b/arch/arm/mm/copypage-v4wt.S @@ -0,0 +1,73 @@ +/* + * linux/arch/arm/lib/copypage-v4.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + * + * This is for CPUs with a writethrough cache and 'flush ID cache' is + * the only supported cache operation. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/constants.h> + + .text + .align 5 +/* + * ARMv4 optimised copy_user_page + * + * Since we have writethrough caches, we don't have to worry about + * dirty data in the cache. However, we do have to ensure that + * subsequent reads are up to date. + */ +ENTRY(v4wt_copy_user_page) + stmfd sp!, {r4, lr} @ 2 + mov r2, #PAGE_SZ/64 @ 1 + ldmia r1!, {r3, r4, ip, lr} @ 4 +1: stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4 + subs r2, r2, #1 @ 1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmneia r1!, {r3, r4, ip, lr} @ 4 + bne 1b @ 1 + mcr p15, 0, r2, c7, c7, 0 @ flush ID cache + ldmfd sp!, {r4, pc} @ 3 + + .align 5 +/* + * ARMv4 optimised clear_user_page + * + * Same story as above. + */ +ENTRY(v4wt_clear_user_page) + str lr, [sp, #-4]! + mov r1, #PAGE_SZ/64 @ 1 + mov r2, #0 @ 1 + mov r3, #0 @ 1 + mov ip, #0 @ 1 + mov lr, #0 @ 1 +1: stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + subs r1, r1, #1 @ 1 + bne 1b @ 1 + mcr p15, 0, r2, c7, c7, 0 @ flush ID cache + ldr pc, [sp], #4 + + __INITDATA + + .type v4wt_user_fns, #object +ENTRY(v4wt_user_fns) + .long v4wt_clear_user_page + .long v4wt_copy_user_page + .size v4wt_user_fns, . - v4wt_user_fns diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c new file mode 100644 index 00000000000..694ac820885 --- /dev/null +++ b/arch/arm/mm/copypage-v6.c @@ -0,0 +1,155 @@ +/* + * linux/arch/arm/mm/copypage-v6.c + * + * Copyright (C) 2002 Deep Blue Solutions Ltd, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/mm.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/shmparam.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> + +#if SHMLBA > 16384 +#error FIX ME +#endif + +#define from_address (0xffff8000) +#define from_pgprot PAGE_KERNEL +#define to_address (0xffffc000) +#define to_pgprot PAGE_KERNEL + +static pte_t *from_pte; +static pte_t *to_pte; +static DEFINE_SPINLOCK(v6_lock); + +#define DCACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT) + +/* + * Copy the user page. No aliasing to deal with so we can just + * attack the kernel's existing mapping of these pages. + */ +void v6_copy_user_page_nonaliasing(void *kto, const void *kfrom, unsigned long vaddr) +{ + copy_page(kto, kfrom); +} + +/* + * Clear the user page. No aliasing to deal with so we can just + * attack the kernel's existing mapping of this page. + */ +void v6_clear_user_page_nonaliasing(void *kaddr, unsigned long vaddr) +{ + clear_page(kaddr); +} + +/* + * Copy the page, taking account of the cache colour. + */ +void v6_copy_user_page_aliasing(void *kto, const void *kfrom, unsigned long vaddr) +{ + unsigned int offset = DCACHE_COLOUR(vaddr); + unsigned long from, to; + + /* + * Discard data in the kernel mapping for the new page. + * FIXME: needs this MCRR to be supported. + */ + __asm__("mcrr p15, 0, %1, %0, c6 @ 0xec401f06" + : + : "r" (kto), + "r" ((unsigned long)kto + PAGE_SIZE - L1_CACHE_BYTES) + : "cc"); + + /* + * Now copy the page using the same cache colour as the + * pages ultimate destination. + */ + spin_lock(&v6_lock); + + set_pte(from_pte + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, from_pgprot)); + set_pte(to_pte + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, to_pgprot)); + + from = from_address + (offset << PAGE_SHIFT); + to = to_address + (offset << PAGE_SHIFT); + + flush_tlb_kernel_page(from); + flush_tlb_kernel_page(to); + + copy_page((void *)to, (void *)from); + + spin_unlock(&v6_lock); +} + +/* + * Clear the user page. We need to deal with the aliasing issues, + * so remap the kernel page into the same cache colour as the user + * page. + */ +void v6_clear_user_page_aliasing(void *kaddr, unsigned long vaddr) +{ + unsigned int offset = DCACHE_COLOUR(vaddr); + unsigned long to = to_address + (offset << PAGE_SHIFT); + + /* + * Discard data in the kernel mapping for the new page + * FIXME: needs this MCRR to be supported. + */ + __asm__("mcrr p15, 0, %1, %0, c6 @ 0xec401f06" + : + : "r" (kaddr), + "r" ((unsigned long)kaddr + PAGE_SIZE - L1_CACHE_BYTES) + : "cc"); + + /* + * Now clear the page using the same cache colour as + * the pages ultimate destination. + */ + spin_lock(&v6_lock); + + set_pte(to_pte + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, to_pgprot)); + flush_tlb_kernel_page(to); + clear_page((void *)to); + + spin_unlock(&v6_lock); +} + +struct cpu_user_fns v6_user_fns __initdata = { + .cpu_clear_user_page = v6_clear_user_page_nonaliasing, + .cpu_copy_user_page = v6_copy_user_page_nonaliasing, +}; + +static int __init v6_userpage_init(void) +{ + if (cache_is_vipt_aliasing()) { + pgd_t *pgd; + pmd_t *pmd; + + pgd = pgd_offset_k(from_address); + pmd = pmd_alloc(&init_mm, pgd, from_address); + if (!pmd) + BUG(); + from_pte = pte_alloc_kernel(&init_mm, pmd, from_address); + if (!from_pte) + BUG(); + + to_pte = pte_alloc_kernel(&init_mm, pmd, to_address); + if (!to_pte) + BUG(); + + cpu_user.cpu_clear_user_page = v6_clear_user_page_aliasing; + cpu_user.cpu_copy_user_page = v6_copy_user_page_aliasing; + } + + return 0; +} + +__initcall(v6_userpage_init); + diff --git a/arch/arm/mm/copypage-xscale.S b/arch/arm/mm/copypage-xscale.S new file mode 100644 index 00000000000..bb277316ef5 --- /dev/null +++ b/arch/arm/mm/copypage-xscale.S @@ -0,0 +1,113 @@ +/* + * linux/arch/arm/lib/copypage-xscale.S + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/constants.h> + +/* + * General note: + * We don't really want write-allocate cache behaviour for these functions + * since that will just eat through 8K of the cache. + */ + + .text + .align 5 +/* + * XScale optimised copy_user_page + * r0 = destination + * r1 = source + * r2 = virtual user address of ultimate destination page + * + * The source page may have some clean entries in the cache already, but we + * can safely ignore them - break_cow() will flush them out of the cache + * if we eventually end up using our copied page. + * + * What we could do is use the mini-cache to buffer reads from the source + * page. We rely on the mini-cache being smaller than one page, so we'll + * cycle through the complete cache anyway. + */ +ENTRY(xscale_mc_copy_user_page) + stmfd sp!, {r4, r5, lr} + mov r5, r0 + mov r0, r1 + bl map_page_minicache + mov r1, r5 + mov lr, #PAGE_SZ/64-1 + + /* + * Strangely enough, best performance is achieved + * when prefetching destination as well. (NP) + */ + pld [r0, #0] + pld [r0, #32] + pld [r1, #0] + pld [r1, #32] + +1: pld [r0, #64] + pld [r0, #96] + pld [r1, #64] + pld [r1, #96] + +2: ldrd r2, [r0], #8 + ldrd r4, [r0], #8 + mov ip, r1 + strd r2, [r1], #8 + ldrd r2, [r0], #8 + strd r4, [r1], #8 + ldrd r4, [r0], #8 + strd r2, [r1], #8 + strd r4, [r1], #8 + mcr p15, 0, ip, c7, c10, 1 @ clean D line + ldrd r2, [r0], #8 + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line + ldrd r4, [r0], #8 + mov ip, r1 + strd r2, [r1], #8 + ldrd r2, [r0], #8 + strd r4, [r1], #8 + ldrd r4, [r0], #8 + strd r2, [r1], #8 + strd r4, [r1], #8 + mcr p15, 0, ip, c7, c10, 1 @ clean D line + subs lr, lr, #1 + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line + bgt 1b + beq 2b + + ldmfd sp!, {r4, r5, pc} + + .align 5 +/* + * XScale optimised clear_user_page + * r0 = destination + * r1 = virtual user address of ultimate destination page + */ +ENTRY(xscale_mc_clear_user_page) + mov r1, #PAGE_SZ/32 + mov r2, #0 + mov r3, #0 +1: mov ip, r0 + strd r2, [r0], #8 + strd r2, [r0], #8 + strd r2, [r0], #8 + strd r2, [r0], #8 + mcr p15, 0, ip, c7, c10, 1 @ clean D line + subs r1, r1, #1 + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line + bne 1b + mov pc, lr + + __INITDATA + + .type xscale_mc_user_fns, #object +ENTRY(xscale_mc_user_fns) + .long xscale_mc_clear_user_page + .long xscale_mc_copy_user_page + .size xscale_mc_user_fns, . - xscale_mc_user_fns diff --git a/arch/arm/mm/discontig.c b/arch/arm/mm/discontig.c new file mode 100644 index 00000000000..0d097bb1bc4 --- /dev/null +++ b/arch/arm/mm/discontig.c @@ -0,0 +1,49 @@ +/* + * linux/arch/arm/mm/discontig.c + * + * Discontiguous memory support. + * + * Initial code: Copyright (C) 1999-2000 Nicolas Pitre + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/bootmem.h> + +#if MAX_NUMNODES != 4 && MAX_NUMNODES != 16 +# error Fix Me Please +#endif + +/* + * Our node_data structure for discontiguous memory. + */ + +static bootmem_data_t node_bootmem_data[MAX_NUMNODES]; + +pg_data_t discontig_node_data[MAX_NUMNODES] = { + { .bdata = &node_bootmem_data[0] }, + { .bdata = &node_bootmem_data[1] }, + { .bdata = &node_bootmem_data[2] }, + { .bdata = &node_bootmem_data[3] }, +#if MAX_NUMNODES == 16 + { .bdata = &node_bootmem_data[4] }, + { .bdata = &node_bootmem_data[5] }, + { .bdata = &node_bootmem_data[6] }, + { .bdata = &node_bootmem_data[7] }, + { .bdata = &node_bootmem_data[8] }, + { .bdata = &node_bootmem_data[9] }, + { .bdata = &node_bootmem_data[10] }, + { .bdata = &node_bootmem_data[11] }, + { .bdata = &node_bootmem_data[12] }, + { .bdata = &node_bootmem_data[13] }, + { .bdata = &node_bootmem_data[14] }, + { .bdata = &node_bootmem_data[15] }, +#endif +}; + +EXPORT_SYMBOL(discontig_node_data); diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c new file mode 100644 index 00000000000..9592c3ee4cb --- /dev/null +++ b/arch/arm/mm/extable.c @@ -0,0 +1,16 @@ +/* + * linux/arch/arm/mm/extable.c + */ +#include <linux/module.h> +#include <asm/uaccess.h> + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(instruction_pointer(regs)); + if (fixup) + regs->ARM_pc = fixup->fixup; + + return fixup != NULL; +} diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c new file mode 100644 index 00000000000..01967ddeef5 --- /dev/null +++ b/arch/arm/mm/fault-armv.c @@ -0,0 +1,223 @@ +/* + * linux/arch/arm/mm/fault-armv.c + * + * Copyright (C) 1995 Linus Torvalds + * Modifications for ARM processor (c) 1995-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/bitops.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/pagemap.h> + +#include <asm/cacheflush.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +static unsigned long shared_pte_mask = L_PTE_CACHEABLE; + +/* + * We take the easy way out of this problem - we make the + * PTE uncacheable. However, we leave the write buffer on. + */ +static int adjust_pte(struct vm_area_struct *vma, unsigned long address) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte, entry; + int ret = 0; + + pgd = pgd_offset(vma->vm_mm, address); + if (pgd_none(*pgd)) + goto no_pgd; + if (pgd_bad(*pgd)) + goto bad_pgd; + + pmd = pmd_offset(pgd, address); + if (pmd_none(*pmd)) + goto no_pmd; + if (pmd_bad(*pmd)) + goto bad_pmd; + + pte = pte_offset_map(pmd, address); + entry = *pte; + + /* + * If this page isn't present, or is already setup to + * fault (ie, is old), we can safely ignore any issues. + */ + if (pte_present(entry) && pte_val(entry) & shared_pte_mask) { + flush_cache_page(vma, address, pte_pfn(entry)); + pte_val(entry) &= ~shared_pte_mask; + set_pte(pte, entry); + flush_tlb_page(vma, address); + ret = 1; + } + pte_unmap(pte); + return ret; + +bad_pgd: + pgd_ERROR(*pgd); + pgd_clear(pgd); +no_pgd: + return 0; + +bad_pmd: + pmd_ERROR(*pmd); + pmd_clear(pmd); +no_pmd: + return 0; +} + +static void +make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page, int dirty) +{ + struct address_space *mapping = page_mapping(page); + struct mm_struct *mm = vma->vm_mm; + struct vm_area_struct *mpnt; + struct prio_tree_iter iter; + unsigned long offset; + pgoff_t pgoff; + int aliases = 0; + + if (!mapping) + return; + + pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); + + /* + * If we have any shared mappings that are in the same mm + * space, then we need to handle them specially to maintain + * cache coherency. + */ + flush_dcache_mmap_lock(mapping); + vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + /* + * If this VMA is not in our MM, we can ignore it. + * Note that we intentionally mask out the VMA + * that we are fixing up. + */ + if (mpnt->vm_mm != mm || mpnt == vma) + continue; + if (!(mpnt->vm_flags & VM_MAYSHARE)) + continue; + offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; + aliases += adjust_pte(mpnt, mpnt->vm_start + offset); + } + flush_dcache_mmap_unlock(mapping); + if (aliases) + adjust_pte(vma, addr); + else + flush_cache_page(vma, addr, page_to_pfn(page)); +} + +/* + * Take care of architecture specific things when placing a new PTE into + * a page table, or changing an existing PTE. Basically, there are two + * things that we need to take care of: + * + * 1. If PG_dcache_dirty is set for the page, we need to ensure + * that any cache entries for the kernels virtual memory + * range are written back to the page. + * 2. If we have multiple shared mappings of the same space in + * an object, we need to deal with the cache aliasing issues. + * + * Note that the page_table_lock will be held. + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + struct page *page; + + if (!pfn_valid(pfn)) + return; + page = pfn_to_page(pfn); + if (page_mapping(page)) { + int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags); + + if (dirty) { + /* + * This is our first userspace mapping of this page. + * Ensure that the physical page is coherent with + * the kernel mapping. + * + * FIXME: only need to do this on VIVT and aliasing + * VIPT cache architectures. We can do that + * by choosing whether to set this bit... + */ + __cpuc_flush_dcache_page(page_address(page)); + } + + if (cache_is_vivt()) + make_coherent(vma, addr, page, dirty); + } +} + +/* + * Check whether the write buffer has physical address aliasing + * issues. If it has, we need to avoid them for the case where + * we have several shared mappings of the same object in user + * space. + */ +static int __init check_writebuffer(unsigned long *p1, unsigned long *p2) +{ + register unsigned long zero = 0, one = 1, val; + + local_irq_disable(); + mb(); + *p1 = one; + mb(); + *p2 = zero; + mb(); + val = *p1; + mb(); + local_irq_enable(); + return val != zero; +} + +void __init check_writebuffer_bugs(void) +{ + struct page *page; + const char *reason; + unsigned long v = 1; + + printk(KERN_INFO "CPU: Testing write buffer coherency: "); + + page = alloc_page(GFP_KERNEL); + if (page) { + unsigned long *p1, *p2; + pgprot_t prot = __pgprot(L_PTE_PRESENT|L_PTE_YOUNG| + L_PTE_DIRTY|L_PTE_WRITE| + L_PTE_BUFFERABLE); + + p1 = vmap(&page, 1, VM_IOREMAP, prot); + p2 = vmap(&page, 1, VM_IOREMAP, prot); + + if (p1 && p2) { + v = check_writebuffer(p1, p2); + reason = "enabling work-around"; + } else { + reason = "unable to map memory\n"; + } + + vunmap(p1); + vunmap(p2); + put_page(page); + } else { + reason = "unable to grab page\n"; + } + + if (v) { + printk("failed, %s\n", reason); + shared_pte_mask |= L_PTE_BUFFERABLE; + } else { + printk("ok\n"); + } +} diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c new file mode 100644 index 00000000000..29be1c01894 --- /dev/null +++ b/arch/arm/mm/fault.c @@ -0,0 +1,462 @@ +/* + * linux/arch/arm/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + * Modifications for ARM processor (c) 1995-2004 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/signal.h> +#include <linux/ptrace.h> +#include <linux/mm.h> +#include <linux/init.h> + +#include <asm/system.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/uaccess.h> + +#include "fault.h" + +/* + * This is useful to dump out the page tables associated with + * 'addr' in mm 'mm'. + */ +void show_pte(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + + if (!mm) + mm = &init_mm; + + printk(KERN_ALERT "pgd = %p\n", mm->pgd); + pgd = pgd_offset(mm, addr); + printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); + + do { + pmd_t *pmd; + pte_t *pte; + + if (pgd_none(*pgd)) + break; + + if (pgd_bad(*pgd)) { + printk("(bad)"); + break; + } + + pmd = pmd_offset(pgd, addr); +#if PTRS_PER_PMD != 1 + printk(", *pmd=%08lx", pmd_val(*pmd)); +#endif + + if (pmd_none(*pmd)) + break; + + if (pmd_bad(*pmd)) { + printk("(bad)"); + break; + } + +#ifndef CONFIG_HIGHMEM + /* We must not map this if we have highmem enabled */ + pte = pte_offset_map(pmd, addr); + printk(", *pte=%08lx", pte_val(*pte)); + printk(", *ppte=%08lx", pte_val(pte[-PTRS_PER_PTE])); + pte_unmap(pte); +#endif + } while(0); + + printk("\n"); +} + +/* + * Oops. The kernel tried to access some page that wasn't present. + */ +static void +__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + /* + * Are we prepared to handle this kernel fault? + */ + if (fixup_exception(regs)) + return; + + /* + * No handler, we'll have to terminate things with extreme prejudice. + */ + bust_spinlocks(1); + printk(KERN_ALERT + "Unable to handle kernel %s at virtual address %08lx\n", + (addr < PAGE_SIZE) ? "NULL pointer dereference" : + "paging request", addr); + + show_pte(mm, addr); + die("Oops", regs, fsr); + bust_spinlocks(0); + do_exit(SIGKILL); +} + +/* + * Something tried to access memory that isn't in our memory map.. + * User mode accesses just cause a SIGSEGV + */ +static void +__do_user_fault(struct task_struct *tsk, unsigned long addr, + unsigned int fsr, int code, struct pt_regs *regs) +{ + struct siginfo si; + +#ifdef CONFIG_DEBUG_USER + if (user_debug & UDBG_SEGV) { + printk(KERN_DEBUG "%s: unhandled page fault at 0x%08lx, code 0x%03x\n", + tsk->comm, addr, fsr); + show_pte(tsk->mm, addr); + show_regs(regs); + } +#endif + + tsk->thread.address = addr; + tsk->thread.error_code = fsr; + tsk->thread.trap_no = 14; + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = code; + si.si_addr = (void __user *)addr; + force_sig_info(SIGSEGV, &si, tsk); +} + +void +do_bad_area(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, + unsigned int fsr, struct pt_regs *regs) +{ + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (user_mode(regs)) + __do_user_fault(tsk, addr, fsr, SEGV_MAPERR, regs); + else + __do_kernel_fault(mm, addr, fsr, regs); +} + +#define VM_FAULT_BADMAP (-20) +#define VM_FAULT_BADACCESS (-21) + +static int +__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, + struct task_struct *tsk) +{ + struct vm_area_struct *vma; + int fault, mask; + + vma = find_vma(mm, addr); + fault = VM_FAULT_BADMAP; + if (!vma) + goto out; + if (vma->vm_start > addr) + goto check_stack; + + /* + * Ok, we have a good vm_area for this + * memory access, so we can handle it. + */ +good_area: + if (fsr & (1 << 11)) /* write? */ + mask = VM_WRITE; + else + mask = VM_READ|VM_EXEC; + + fault = VM_FAULT_BADACCESS; + if (!(vma->vm_flags & mask)) + goto out; + + /* + * If for any reason at all we couldn't handle + * the fault, make sure we exit gracefully rather + * than endlessly redo the fault. + */ +survive: + fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, fsr & (1 << 11)); + + /* + * Handle the "normal" cases first - successful and sigbus + */ + switch (fault) { + case VM_FAULT_MAJOR: + tsk->maj_flt++; + return fault; + case VM_FAULT_MINOR: + tsk->min_flt++; + case VM_FAULT_SIGBUS: + return fault; + } + + if (tsk->pid != 1) + goto out; + + /* + * If we are out of memory for pid1, + * sleep for a while and retry + */ + yield(); + goto survive; + +check_stack: + if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) + goto good_area; +out: + return fault; +} + +static int +do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct mm_struct *mm; + int fault; + + tsk = current; + mm = tsk->mm; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_interrupt() || !mm) + goto no_context; + + down_read(&mm->mmap_sem); + fault = __do_page_fault(mm, addr, fsr, tsk); + up_read(&mm->mmap_sem); + + /* + * Handle the "normal" case first + */ + if (fault > 0) + return 0; + + /* + * We had some memory, but were unable to + * successfully fix up this page fault. + */ + if (fault == 0) + goto do_sigbus; + + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + + if (fault == VM_FAULT_OOM) { + /* + * We ran out of memory, or some other thing happened to + * us that made us unable to handle the page fault gracefully. + */ + printk("VM: killing process %s\n", tsk->comm); + do_exit(SIGKILL); + } else + __do_user_fault(tsk, addr, fsr, fault == VM_FAULT_BADACCESS ? + SEGV_ACCERR : SEGV_MAPERR, regs); + return 0; + + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +do_sigbus: + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + tsk->thread.address = addr; + tsk->thread.error_code = fsr; + tsk->thread.trap_no = 14; + force_sig(SIGBUS, tsk); +#ifdef CONFIG_DEBUG_USER + if (user_debug & UDBG_BUS) { + printk(KERN_DEBUG "%s: sigbus at 0x%08lx, pc=0x%08lx\n", + current->comm, addr, instruction_pointer(regs)); + } +#endif + + /* Kernel mode? Handle exceptions or die */ + if (user_mode(regs)) + return 0; + +no_context: + __do_kernel_fault(mm, addr, fsr, regs); + return 0; +} + +/* + * First Level Translation Fault Handler + * + * We enter here because the first level page table doesn't contain + * a valid entry for the address. + * + * If the address is in kernel space (>= TASK_SIZE), then we are + * probably faulting in the vmalloc() area. + * + * If the init_task's first level page tables contains the relevant + * entry, we copy the it to this task. If not, we send the process + * a signal, fixup the exception, or oops the kernel. + * + * NOTE! We MUST NOT take any locks for this case. We may be in an + * interrupt or a critical region, and should only copy the information + * from the master page table, nothing more. + */ +static int +do_translation_fault(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + struct task_struct *tsk; + unsigned int index; + pgd_t *pgd, *pgd_k; + pmd_t *pmd, *pmd_k; + + if (addr < TASK_SIZE) + return do_page_fault(addr, fsr, regs); + + index = pgd_index(addr); + + /* + * FIXME: CP15 C1 is write only on ARMv3 architectures. + */ + pgd = cpu_get_pgd() + index; + pgd_k = init_mm.pgd + index; + + if (pgd_none(*pgd_k)) + goto bad_area; + + if (!pgd_present(*pgd)) + set_pgd(pgd, *pgd_k); + + pmd_k = pmd_offset(pgd_k, addr); + pmd = pmd_offset(pgd, addr); + + if (pmd_none(*pmd_k)) + goto bad_area; + + copy_pmd(pmd, pmd_k); + return 0; + +bad_area: + tsk = current; + + do_bad_area(tsk, tsk->active_mm, addr, fsr, regs); + return 0; +} + +/* + * Some section permission faults need to be handled gracefully. + * They can happen due to a __{get,put}_user during an oops. + */ +static int +do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + struct task_struct *tsk = current; + do_bad_area(tsk, tsk->active_mm, addr, fsr, regs); + return 0; +} + +/* + * This abort handler always returns "fault". + */ +static int +do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + return 1; +} + +static struct fsr_info { + int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs); + int sig; + const char *name; +} fsr_info[] = { + /* + * The following are the standard ARMv3 and ARMv4 aborts. ARMv5 + * defines these to be "precise" aborts. + */ + { do_bad, SIGSEGV, "vector exception" }, + { do_bad, SIGILL, "alignment exception" }, + { do_bad, SIGKILL, "terminal exception" }, + { do_bad, SIGILL, "alignment exception" }, + { do_bad, SIGBUS, "external abort on linefetch" }, + { do_translation_fault, SIGSEGV, "section translation fault" }, + { do_bad, SIGBUS, "external abort on linefetch" }, + { do_page_fault, SIGSEGV, "page translation fault" }, + { do_bad, SIGBUS, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, "section domain fault" }, + { do_bad, SIGBUS, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, "page domain fault" }, + { do_bad, SIGBUS, "external abort on translation" }, + { do_sect_fault, SIGSEGV, "section permission fault" }, + { do_bad, SIGBUS, "external abort on translation" }, + { do_page_fault, SIGSEGV, "page permission fault" }, + /* + * The following are "imprecise" aborts, which are signalled by bit + * 10 of the FSR, and may not be recoverable. These are only + * supported if the CPU abort handler supports bit 10. + */ + { do_bad, SIGBUS, "unknown 16" }, + { do_bad, SIGBUS, "unknown 17" }, + { do_bad, SIGBUS, "unknown 18" }, + { do_bad, SIGBUS, "unknown 19" }, + { do_bad, SIGBUS, "lock abort" }, /* xscale */ + { do_bad, SIGBUS, "unknown 21" }, + { do_bad, SIGBUS, "imprecise external abort" }, /* xscale */ + { do_bad, SIGBUS, "unknown 23" }, + { do_bad, SIGBUS, "dcache parity error" }, /* xscale */ + { do_bad, SIGBUS, "unknown 25" }, + { do_bad, SIGBUS, "unknown 26" }, + { do_bad, SIGBUS, "unknown 27" }, + { do_bad, SIGBUS, "unknown 28" }, + { do_bad, SIGBUS, "unknown 29" }, + { do_bad, SIGBUS, "unknown 30" }, + { do_bad, SIGBUS, "unknown 31" } +}; + +void __init +hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), + int sig, const char *name) +{ + if (nr >= 0 && nr < ARRAY_SIZE(fsr_info)) { + fsr_info[nr].fn = fn; + fsr_info[nr].sig = sig; + fsr_info[nr].name = name; + } +} + +/* + * Dispatch a data abort to the relevant handler. + */ +asmlinkage void +do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6); + + if (!inf->fn(addr, fsr, regs)) + return; + + printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n", + inf->name, fsr, addr); + force_sig(inf->sig, current); + show_pte(current->mm, addr); + die_if_kernel("Oops", regs, 0); +} + +asmlinkage void +do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) +{ + do_translation_fault(addr, 0, regs); +} + diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h new file mode 100644 index 00000000000..73b59e83227 --- /dev/null +++ b/arch/arm/mm/fault.h @@ -0,0 +1,6 @@ +void do_bad_area(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, unsigned int fsr, struct pt_regs *regs); + +void show_pte(struct mm_struct *mm, unsigned long addr); + +unsigned long search_exception_table(unsigned long addr); diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c new file mode 100644 index 00000000000..c6de48d8950 --- /dev/null +++ b/arch/arm/mm/flush.c @@ -0,0 +1,94 @@ +/* + * linux/arch/arm/mm/flush.c + * + * Copyright (C) 1995-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/pagemap.h> + +#include <asm/cacheflush.h> +#include <asm/system.h> + +static void __flush_dcache_page(struct address_space *mapping, struct page *page) +{ + struct mm_struct *mm = current->active_mm; + struct vm_area_struct *mpnt; + struct prio_tree_iter iter; + pgoff_t pgoff; + + /* + * Writeback any data associated with the kernel mapping of this + * page. This ensures that data in the physical page is mutually + * coherent with the kernels mapping. + */ + __cpuc_flush_dcache_page(page_address(page)); + + /* + * If there's no mapping pointer here, then this page isn't + * visible to userspace yet, so there are no cache lines + * associated with any other aliases. + */ + if (!mapping) + return; + + /* + * There are possible user space mappings of this page: + * - VIVT cache: we need to also write back and invalidate all user + * data in the current VM view associated with this page. + * - aliasing VIPT: we only need to find one mapping of this page. + */ + pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + + flush_dcache_mmap_lock(mapping); + vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + unsigned long offset; + + /* + * If this VMA is not in our MM, we can ignore it. + */ + if (mpnt->vm_mm != mm) + continue; + if (!(mpnt->vm_flags & VM_MAYSHARE)) + continue; + offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; + flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page)); + if (cache_is_vipt()) + break; + } + flush_dcache_mmap_unlock(mapping); +} + +/* + * Ensure cache coherency between kernel mapping and userspace mapping + * of this page. + * + * We have three cases to consider: + * - VIPT non-aliasing cache: fully coherent so nothing required. + * - VIVT: fully aliasing, so we need to handle every alias in our + * current VM view. + * - VIPT aliasing: need to handle one alias in our current VM view. + * + * If we need to handle aliasing: + * If the page only exists in the page cache and there are no user + * space mappings, we can be lazy and remember that we may have dirty + * kernel cache lines for later. Otherwise, we assume we have + * aliasing mappings. + */ +void flush_dcache_page(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + + if (cache_is_vipt_nonaliasing()) + return; + + if (mapping && !mapping_mapped(mapping)) + set_bit(PG_dcache_dirty, &page->flags); + else + __flush_dcache_page(mapping, page); +} +EXPORT_SYMBOL(flush_dcache_page); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c new file mode 100644 index 00000000000..41156c5370f --- /dev/null +++ b/arch/arm/mm/init.c @@ -0,0 +1,621 @@ +/* + * linux/arch/arm/mm/init.c + * + * Copyright (C) 1995-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/swap.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/mman.h> +#include <linux/nodemask.h> +#include <linux/initrd.h> + +#include <asm/mach-types.h> +#include <asm/hardware.h> +#include <asm/setup.h> +#include <asm/tlb.h> + +#include <asm/mach/arch.h> +#include <asm/mach/map.h> + +#define TABLE_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t)) + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end; +extern unsigned long phys_initrd_start; +extern unsigned long phys_initrd_size; + +/* + * The sole use of this is to pass memory configuration + * data from paging_init to mem_init. + */ +static struct meminfo meminfo __initdata = { 0, }; + +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; + +void show_mem(void) +{ + int free = 0, total = 0, reserved = 0; + int shared = 0, cached = 0, slab = 0, node; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + + for_each_online_node(node) { + struct page *page, *end; + + page = NODE_MEM_MAP(node); + end = page + NODE_DATA(node)->node_spanned_pages; + + do { + total++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (PageSlab(page)) + slab++; + else if (!page_count(page)) + free++; + else + shared += page_count(page) - 1; + page++; + } while (page < end); + } + + printk("%d pages of RAM\n", total); + printk("%d free pages\n", free); + printk("%d reserved pages\n", reserved); + printk("%d slab pages\n", slab); + printk("%d pages shared\n", shared); + printk("%d pages swap cached\n", cached); +} + +struct node_info { + unsigned int start; + unsigned int end; + int bootmap_pages; +}; + +#define O_PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define V_PFN_DOWN(x) O_PFN_DOWN(__pa(x)) + +#define O_PFN_UP(x) (PAGE_ALIGN(x) >> PAGE_SHIFT) +#define V_PFN_UP(x) O_PFN_UP(__pa(x)) + +#define PFN_SIZE(x) ((x) >> PAGE_SHIFT) +#define PFN_RANGE(s,e) PFN_SIZE(PAGE_ALIGN((unsigned long)(e)) - \ + (((unsigned long)(s)) & PAGE_MASK)) + +/* + * FIXME: We really want to avoid allocating the bootmap bitmap + * over the top of the initrd. Hopefully, this is located towards + * the start of a bank, so if we allocate the bootmap bitmap at + * the end, we won't clash. + */ +static unsigned int __init +find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages) +{ + unsigned int start_pfn, bank, bootmap_pfn; + + start_pfn = V_PFN_UP(&_end); + bootmap_pfn = 0; + + for (bank = 0; bank < mi->nr_banks; bank ++) { + unsigned int start, end; + + if (mi->bank[bank].node != node) + continue; + + start = O_PFN_UP(mi->bank[bank].start); + end = O_PFN_DOWN(mi->bank[bank].size + + mi->bank[bank].start); + + if (end < start_pfn) + continue; + + if (start < start_pfn) + start = start_pfn; + + if (end <= start) + continue; + + if (end - start >= bootmap_pages) { + bootmap_pfn = start; + break; + } + } + + if (bootmap_pfn == 0) + BUG(); + + return bootmap_pfn; +} + +/* + * Scan the memory info structure and pull out: + * - the end of memory + * - the number of nodes + * - the pfn range of each node + * - the number of bootmem bitmap pages + */ +static unsigned int __init +find_memend_and_nodes(struct meminfo *mi, struct node_info *np) +{ + unsigned int i, bootmem_pages = 0, memend_pfn = 0; + + for (i = 0; i < MAX_NUMNODES; i++) { + np[i].start = -1U; + np[i].end = 0; + np[i].bootmap_pages = 0; + } + + for (i = 0; i < mi->nr_banks; i++) { + unsigned long start, end; + int node; + + if (mi->bank[i].size == 0) { + /* + * Mark this bank with an invalid node number + */ + mi->bank[i].node = -1; + continue; + } + + node = mi->bank[i].node; + + /* + * Make sure we haven't exceeded the maximum number of nodes + * that we have in this configuration. If we have, we're in + * trouble. (maybe we ought to limit, instead of bugging?) + */ + if (node >= MAX_NUMNODES) + BUG(); + node_set_online(node); + + /* + * Get the start and end pfns for this bank + */ + start = O_PFN_UP(mi->bank[i].start); + end = O_PFN_DOWN(mi->bank[i].start + mi->bank[i].size); + + if (np[node].start > start) + np[node].start = start; + + if (np[node].end < end) + np[node].end = end; + + if (memend_pfn < end) + memend_pfn = end; + } + + /* + * Calculate the number of pages we require to + * store the bootmem bitmaps. + */ + for_each_online_node(i) { + if (np[i].end == 0) + continue; + + np[i].bootmap_pages = bootmem_bootmap_pages(np[i].end - + np[i].start); + bootmem_pages += np[i].bootmap_pages; + } + + high_memory = __va(memend_pfn << PAGE_SHIFT); + + /* + * This doesn't seem to be used by the Linux memory + * manager any more. If we can get rid of it, we + * also get rid of some of the stuff above as well. + */ + max_low_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET); + max_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET); + + return bootmem_pages; +} + +static int __init check_initrd(struct meminfo *mi) +{ + int initrd_node = -2; +#ifdef CONFIG_BLK_DEV_INITRD + unsigned long end = phys_initrd_start + phys_initrd_size; + + /* + * Make sure that the initrd is within a valid area of + * memory. + */ + if (phys_initrd_size) { + unsigned int i; + + initrd_node = -1; + + for (i = 0; i < mi->nr_banks; i++) { + unsigned long bank_end; + + bank_end = mi->bank[i].start + mi->bank[i].size; + + if (mi->bank[i].start <= phys_initrd_start && + end <= bank_end) + initrd_node = mi->bank[i].node; + } + } + + if (initrd_node == -1) { + printk(KERN_ERR "initrd (0x%08lx - 0x%08lx) extends beyond " + "physical memory - disabling initrd\n", + phys_initrd_start, end); + phys_initrd_start = phys_initrd_size = 0; + } +#endif + + return initrd_node; +} + +/* + * Reserve the various regions of node 0 + */ +static __init void reserve_node_zero(unsigned int bootmap_pfn, unsigned int bootmap_pages) +{ + pg_data_t *pgdat = NODE_DATA(0); + unsigned long res_size = 0; + + /* + * Register the kernel text and data with bootmem. + * Note that this can only be in node 0. + */ +#ifdef CONFIG_XIP_KERNEL + reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start); +#else + reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext); +#endif + + /* + * Reserve the page tables. These are already in use, + * and can only be in node 0. + */ + reserve_bootmem_node(pgdat, __pa(swapper_pg_dir), + PTRS_PER_PGD * sizeof(pgd_t)); + + /* + * And don't forget to reserve the allocator bitmap, + * which will be freed later. + */ + reserve_bootmem_node(pgdat, bootmap_pfn << PAGE_SHIFT, + bootmap_pages << PAGE_SHIFT); + + /* + * Hmm... This should go elsewhere, but we really really need to + * stop things allocating the low memory; ideally we need a better + * implementation of GFP_DMA which does not assume that DMA-able + * memory starts at zero. + */ + if (machine_is_integrator() || machine_is_cintegrator()) + res_size = __pa(swapper_pg_dir) - PHYS_OFFSET; + + /* + * These should likewise go elsewhere. They pre-reserve the + * screen memory region at the start of main system memory. + */ + if (machine_is_edb7211()) + res_size = 0x00020000; + if (machine_is_p720t()) + res_size = 0x00014000; + +#ifdef CONFIG_SA1111 + /* + * Because of the SA1111 DMA bug, we want to preserve our + * precious DMA-able memory... + */ + res_size = __pa(swapper_pg_dir) - PHYS_OFFSET; +#endif + if (res_size) + reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size); +} + +/* + * Register all available RAM in this node with the bootmem allocator. + */ +static inline void free_bootmem_node_bank(int node, struct meminfo *mi) +{ + pg_data_t *pgdat = NODE_DATA(node); + int bank; + + for (bank = 0; bank < mi->nr_banks; bank++) + if (mi->bank[bank].node == node) + free_bootmem_node(pgdat, mi->bank[bank].start, + mi->bank[bank].size); +} + +/* + * Initialise the bootmem allocator for all nodes. This is called + * early during the architecture specific initialisation. + */ +static void __init bootmem_init(struct meminfo *mi) +{ + struct node_info node_info[MAX_NUMNODES], *np = node_info; + unsigned int bootmap_pages, bootmap_pfn, map_pg; + int node, initrd_node; + + bootmap_pages = find_memend_and_nodes(mi, np); + bootmap_pfn = find_bootmap_pfn(0, mi, bootmap_pages); + initrd_node = check_initrd(mi); + + map_pg = bootmap_pfn; + + /* + * Initialise the bootmem nodes. + * + * What we really want to do is: + * + * unmap_all_regions_except_kernel(); + * for_each_node_in_reverse_order(node) { + * map_node(node); + * allocate_bootmem_map(node); + * init_bootmem_node(node); + * free_bootmem_node(node); + * } + * + * but this is a 2.5-type change. For now, we just set + * the nodes up in reverse order. + * + * (we could also do with rolling bootmem_init and paging_init + * into one generic "memory_init" type function). + */ + np += num_online_nodes() - 1; + for (node = num_online_nodes() - 1; node >= 0; node--, np--) { + /* + * If there are no pages in this node, ignore it. + * Note that node 0 must always have some pages. + */ + if (np->end == 0 || !node_online(node)) { + if (node == 0) + BUG(); + continue; + } + + /* + * Initialise the bootmem allocator. + */ + init_bootmem_node(NODE_DATA(node), map_pg, np->start, np->end); + free_bootmem_node_bank(node, mi); + map_pg += np->bootmap_pages; + + /* + * If this is node 0, we need to reserve some areas ASAP - + * we may use bootmem on node 0 to setup the other nodes. + */ + if (node == 0) + reserve_node_zero(bootmap_pfn, bootmap_pages); + } + + +#ifdef CONFIG_BLK_DEV_INITRD + if (phys_initrd_size && initrd_node >= 0) { + reserve_bootmem_node(NODE_DATA(initrd_node), phys_initrd_start, + phys_initrd_size); + initrd_start = __phys_to_virt(phys_initrd_start); + initrd_end = initrd_start + phys_initrd_size; + } +#endif + + BUG_ON(map_pg != bootmap_pfn + bootmap_pages); +} + +/* + * paging_init() sets up the page tables, initialises the zone memory + * maps, and sets up the zero page, bad page and bad page tables. + */ +void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc) +{ + void *zero_page; + int node; + + bootmem_init(mi); + + memcpy(&meminfo, mi, sizeof(meminfo)); + + /* + * allocate the zero page. Note that we count on this going ok. + */ + zero_page = alloc_bootmem_low_pages(PAGE_SIZE); + + /* + * initialise the page tables. + */ + memtable_init(mi); + if (mdesc->map_io) + mdesc->map_io(); + flush_tlb_all(); + + /* + * initialise the zones within each node + */ + for_each_online_node(node) { + unsigned long zone_size[MAX_NR_ZONES]; + unsigned long zhole_size[MAX_NR_ZONES]; + struct bootmem_data *bdata; + pg_data_t *pgdat; + int i; + + /* + * Initialise the zone size information. + */ + for (i = 0; i < MAX_NR_ZONES; i++) { + zone_size[i] = 0; + zhole_size[i] = 0; + } + + pgdat = NODE_DATA(node); + bdata = pgdat->bdata; + + /* + * The size of this node has already been determined. + * If we need to do anything fancy with the allocation + * of this memory to the zones, now is the time to do + * it. + */ + zone_size[0] = bdata->node_low_pfn - + (bdata->node_boot_start >> PAGE_SHIFT); + + /* + * If this zone has zero size, skip it. + */ + if (!zone_size[0]) + continue; + + /* + * For each bank in this node, calculate the size of the + * holes. holes = node_size - sum(bank_sizes_in_node) + */ + zhole_size[0] = zone_size[0]; + for (i = 0; i < mi->nr_banks; i++) { + if (mi->bank[i].node != node) + continue; + + zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT; + } + + /* + * Adjust the sizes according to any special + * requirements for this machine type. + */ + arch_adjust_zones(node, zone_size, zhole_size); + + free_area_init_node(node, pgdat, zone_size, + bdata->node_boot_start >> PAGE_SHIFT, zhole_size); + } + + /* + * finish off the bad pages once + * the mem_map is initialised + */ + memzero(zero_page, PAGE_SIZE); + empty_zero_page = virt_to_page(zero_page); + flush_dcache_page(empty_zero_page); +} + +static inline void free_area(unsigned long addr, unsigned long end, char *s) +{ + unsigned int size = (end - addr) >> 10; + + for (; addr < end; addr += PAGE_SIZE) { + struct page *page = virt_to_page(addr); + ClearPageReserved(page); + set_page_count(page, 1); + free_page(addr); + totalram_pages++; + } + + if (size && s) + printk(KERN_INFO "Freeing %s memory: %dK\n", s, size); +} + +/* + * mem_init() marks the free areas in the mem_map and tells us how much + * memory is free. This is done after various parts of the system have + * claimed their memory after the kernel image. + */ +void __init mem_init(void) +{ + unsigned int codepages, datapages, initpages; + int i, node; + + codepages = &_etext - &_text; + datapages = &_end - &__data_start; + initpages = &__init_end - &__init_begin; + +#ifndef CONFIG_DISCONTIGMEM + max_mapnr = virt_to_page(high_memory) - mem_map; +#endif + + /* + * We may have non-contiguous memory. + */ + if (meminfo.nr_banks != 1) + create_memmap_holes(&meminfo); + + /* this will put all unused low memory onto the freelists */ + for_each_online_node(node) { + pg_data_t *pgdat = NODE_DATA(node); + + if (pgdat->node_spanned_pages != 0) + totalram_pages += free_all_bootmem_node(pgdat); + } + +#ifdef CONFIG_SA1111 + /* now that our DMA memory is actually so designated, we can free it */ + free_area(PAGE_OFFSET, (unsigned long)swapper_pg_dir, NULL); +#endif + + /* + * Since our memory may not be contiguous, calculate the + * real number of pages we have in this system + */ + printk(KERN_INFO "Memory:"); + + num_physpages = 0; + for (i = 0; i < meminfo.nr_banks; i++) { + num_physpages += meminfo.bank[i].size >> PAGE_SHIFT; + printk(" %ldMB", meminfo.bank[i].size >> 20); + } + + printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); + printk(KERN_NOTICE "Memory: %luKB available (%dK code, " + "%dK data, %dK init)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + codepages >> 10, datapages >> 10, initpages >> 10); + + if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + extern int sysctl_overcommit_memory; + /* + * On a machine this small we won't get + * anywhere without overcommit, so turn + * it on by default. + */ + sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; + } +} + +void free_initmem(void) +{ + if (!machine_is_integrator() && !machine_is_cintegrator()) { + free_area((unsigned long)(&__init_begin), + (unsigned long)(&__init_end), + "init"); + } +} + +#ifdef CONFIG_BLK_DEV_INITRD + +static int keep_initrd; + +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (!keep_initrd) + free_area(start, end, "initrd"); +} + +static int __init keepinitrd_setup(char *__unused) +{ + keep_initrd = 1; + return 1; +} + +__setup("keepinitrd", keepinitrd_setup); +#endif diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c new file mode 100644 index 00000000000..00bb8fd37a5 --- /dev/null +++ b/arch/arm/mm/ioremap.c @@ -0,0 +1,172 @@ +/* + * linux/arch/arm/mm/ioremap.c + * + * Re-map IO memory to kernel address space so that we can access it. + * + * (C) Copyright 1995 1996 Linus Torvalds + * + * Hacked for ARM by Phil Blundell <philb@gnu.org> + * Hacked to allow all architectures to build, and various cleanups + * by Russell King + * + * This allows a driver to remap an arbitrary region of bus memory into + * virtual space. One should *only* use readl, writel, memcpy_toio and + * so on with such remapped areas. + * + * Because the ARM only has a 32-bit address space we can't address the + * whole of the (physical) PCI space at once. PCI huge-mode addressing + * allows us to circumvent this restriction by splitting PCI space into + * two 2GB chunks and mapping only one at a time into processor memory. + * We use MMU protection domains to trap any attempt to access the bank + * that is not currently mapped. (This isn't fully implemented yet.) + */ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> + +#include <asm/cacheflush.h> +#include <asm/io.h> +#include <asm/tlbflush.h> + +static inline void +remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, + unsigned long phys_addr, pgprot_t pgprot) +{ + unsigned long end; + + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + BUG_ON(address >= end); + do { + if (!pte_none(*pte)) + goto bad; + + set_pte(pte, pfn_pte(phys_addr >> PAGE_SHIFT, pgprot)); + address += PAGE_SIZE; + phys_addr += PAGE_SIZE; + pte++; + } while (address && (address < end)); + return; + + bad: + printk("remap_area_pte: page already exists\n"); + BUG(); +} + +static inline int +remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, + unsigned long phys_addr, unsigned long flags) +{ + unsigned long end; + pgprot_t pgprot; + + address &= ~PGDIR_MASK; + end = address + size; + + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + + phys_addr -= address; + BUG_ON(address >= end); + + pgprot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags); + do { + pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + if (!pte) + return -ENOMEM; + remap_area_pte(pte, address, end - address, address + phys_addr, pgprot); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address && (address < end)); + return 0; +} + +static int +remap_area_pages(unsigned long start, unsigned long phys_addr, + unsigned long size, unsigned long flags) +{ + unsigned long address = start; + unsigned long end = start + size; + int err = 0; + pgd_t * dir; + + phys_addr -= address; + dir = pgd_offset(&init_mm, address); + BUG_ON(address >= end); + spin_lock(&init_mm.page_table_lock); + do { + pmd_t *pmd = pmd_alloc(&init_mm, dir, address); + if (!pmd) { + err = -ENOMEM; + break; + } + if (remap_area_pmd(pmd, address, end - address, + phys_addr + address, flags)) { + err = -ENOMEM; + break; + } + + address = (address + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } while (address && (address < end)); + + spin_unlock(&init_mm.page_table_lock); + flush_cache_vmap(start, end); + return err; +} + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses + * directly. + * + * NOTE! We need to allow non-page-aligned mappings too: we will obviously + * have to convert them into an offset in a page-aligned mapping, but the + * caller shouldn't need to know that small detail. + * + * 'flags' are the extra L_PTE_ flags that you want to specify for this + * mapping. See include/asm-arm/proc-armv/pgtable.h for more information. + */ +void __iomem * +__ioremap(unsigned long phys_addr, size_t size, unsigned long flags, + unsigned long align) +{ + void * addr; + struct vm_struct * area; + unsigned long offset, last_addr; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr + 1) - phys_addr; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + addr = area->addr; + if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { + vfree(addr); + return NULL; + } + return (void __iomem *) (offset + (char *)addr); +} +EXPORT_SYMBOL(__ioremap); + +void __iounmap(void __iomem *addr) +{ + vfree((void *) (PAGE_MASK & (unsigned long) addr)); +} +EXPORT_SYMBOL(__iounmap); diff --git a/arch/arm/mm/minicache.c b/arch/arm/mm/minicache.c new file mode 100644 index 00000000000..dedf2ab01b2 --- /dev/null +++ b/arch/arm/mm/minicache.c @@ -0,0 +1,73 @@ +/* + * linux/arch/arm/mm/minicache.c + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This handles the mini data cache, as found on SA11x0 and XScale + * processors. When we copy a user page page, we map it in such a way + * that accesses to this page will not touch the main data cache, but + * will be cached in the mini data cache. This prevents us thrashing + * the main data cache on page faults. + */ +#include <linux/init.h> +#include <linux/mm.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +/* + * 0xffff8000 to 0xffffffff is reserved for any ARM architecture + * specific hacks for copying pages efficiently. + */ +#define minicache_address (0xffff8000) +#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ + L_PTE_CACHEABLE) + +static pte_t *minicache_pte; + +/* + * Note that this is intended to be called only from the copy_user_page + * asm code; anything else will require special locking to prevent the + * mini-cache space being re-used. (Note: probably preempt unsafe). + * + * We rely on the fact that the minicache is 2K, and we'll be pushing + * 4K of data through it, so we don't actually have to specifically + * flush the minicache when we change the mapping. + * + * Note also: assert(PAGE_OFFSET <= virt < high_memory). + * Unsafe: preempt, kmap. + */ +unsigned long map_page_minicache(unsigned long virt) +{ + set_pte(minicache_pte, pfn_pte(__pa(virt) >> PAGE_SHIFT, minicache_pgprot)); + flush_tlb_kernel_page(minicache_address); + + return minicache_address; +} + +static int __init minicache_init(void) +{ + pgd_t *pgd; + pmd_t *pmd; + + spin_lock(&init_mm.page_table_lock); + + pgd = pgd_offset_k(minicache_address); + pmd = pmd_alloc(&init_mm, pgd, minicache_address); + if (!pmd) + BUG(); + minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address); + if (!minicache_pte) + BUG(); + + spin_unlock(&init_mm.page_table_lock); + + return 0; +} + +core_initcall(minicache_init); diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c new file mode 100644 index 00000000000..f5a87db8b49 --- /dev/null +++ b/arch/arm/mm/mm-armv.c @@ -0,0 +1,760 @@ +/* + * linux/arch/arm/mm/mm-armv.c + * + * Copyright (C) 1998-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Page table sludge for ARM v3 and v4 processor architectures. + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/nodemask.h> + +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/io.h> +#include <asm/setup.h> +#include <asm/tlbflush.h> + +#include <asm/mach/map.h> + +#define CPOLICY_UNCACHED 0 +#define CPOLICY_BUFFERED 1 +#define CPOLICY_WRITETHROUGH 2 +#define CPOLICY_WRITEBACK 3 +#define CPOLICY_WRITEALLOC 4 + +static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; +static unsigned int ecc_mask __initdata = 0; +pgprot_t pgprot_kernel; + +EXPORT_SYMBOL(pgprot_kernel); + +struct cachepolicy { + const char policy[16]; + unsigned int cr_mask; + unsigned int pmd; + unsigned int pte; +}; + +static struct cachepolicy cache_policies[] __initdata = { + { + .policy = "uncached", + .cr_mask = CR_W|CR_C, + .pmd = PMD_SECT_UNCACHED, + .pte = 0, + }, { + .policy = "buffered", + .cr_mask = CR_C, + .pmd = PMD_SECT_BUFFERED, + .pte = PTE_BUFFERABLE, + }, { + .policy = "writethrough", + .cr_mask = 0, + .pmd = PMD_SECT_WT, + .pte = PTE_CACHEABLE, + }, { + .policy = "writeback", + .cr_mask = 0, + .pmd = PMD_SECT_WB, + .pte = PTE_BUFFERABLE|PTE_CACHEABLE, + }, { + .policy = "writealloc", + .cr_mask = 0, + .pmd = PMD_SECT_WBWA, + .pte = PTE_BUFFERABLE|PTE_CACHEABLE, + } +}; + +/* + * These are useful for identifing cache coherency + * problems by allowing the cache or the cache and + * writebuffer to be turned off. (Note: the write + * buffer should not be on and the cache off). + */ +static void __init early_cachepolicy(char **p) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { + int len = strlen(cache_policies[i].policy); + + if (memcmp(*p, cache_policies[i].policy, len) == 0) { + cachepolicy = i; + cr_alignment &= ~cache_policies[i].cr_mask; + cr_no_alignment &= ~cache_policies[i].cr_mask; + *p += len; + break; + } + } + if (i == ARRAY_SIZE(cache_policies)) + printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n"); + flush_cache_all(); + set_cr(cr_alignment); +} + +static void __init early_nocache(char **__unused) +{ + char *p = "buffered"; + printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p); + early_cachepolicy(&p); +} + +static void __init early_nowrite(char **__unused) +{ + char *p = "uncached"; + printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p); + early_cachepolicy(&p); +} + +static void __init early_ecc(char **p) +{ + if (memcmp(*p, "on", 2) == 0) { + ecc_mask = PMD_PROTECTION; + *p += 2; + } else if (memcmp(*p, "off", 3) == 0) { + ecc_mask = 0; + *p += 3; + } +} + +__early_param("nocache", early_nocache); +__early_param("nowb", early_nowrite); +__early_param("cachepolicy=", early_cachepolicy); +__early_param("ecc=", early_ecc); + +static int __init noalign_setup(char *__unused) +{ + cr_alignment &= ~CR_A; + cr_no_alignment &= ~CR_A; + set_cr(cr_alignment); + return 1; +} + +__setup("noalign", noalign_setup); + +#define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD) + +/* + * need to get a 16k page for level 1 + */ +pgd_t *get_pgd_slow(struct mm_struct *mm) +{ + pgd_t *new_pgd, *init_pgd; + pmd_t *new_pmd, *init_pmd; + pte_t *new_pte, *init_pte; + + new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2); + if (!new_pgd) + goto no_pgd; + + memzero(new_pgd, FIRST_KERNEL_PGD_NR * sizeof(pgd_t)); + + init_pgd = pgd_offset_k(0); + + if (!vectors_high()) { + /* + * This lock is here just to satisfy pmd_alloc and pte_lock + */ + spin_lock(&mm->page_table_lock); + + /* + * On ARM, first page must always be allocated since it + * contains the machine vectors. + */ + new_pmd = pmd_alloc(mm, new_pgd, 0); + if (!new_pmd) + goto no_pmd; + + new_pte = pte_alloc_map(mm, new_pmd, 0); + if (!new_pte) + goto no_pte; + + init_pmd = pmd_offset(init_pgd, 0); + init_pte = pte_offset_map_nested(init_pmd, 0); + set_pte(new_pte, *init_pte); + pte_unmap_nested(init_pte); + pte_unmap(new_pte); + + spin_unlock(&mm->page_table_lock); + } + + /* + * Copy over the kernel and IO PGD entries + */ + memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR, + (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t)); + + clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); + + return new_pgd; + +no_pte: + spin_unlock(&mm->page_table_lock); + pmd_free(new_pmd); + free_pages((unsigned long)new_pgd, 2); + return NULL; + +no_pmd: + spin_unlock(&mm->page_table_lock); + free_pages((unsigned long)new_pgd, 2); + return NULL; + +no_pgd: + return NULL; +} + +void free_pgd_slow(pgd_t *pgd) +{ + pmd_t *pmd; + struct page *pte; + + if (!pgd) + return; + + /* pgd is always present and good */ + pmd = (pmd_t *)pgd; + if (pmd_none(*pmd)) + goto free; + if (pmd_bad(*pmd)) { + pmd_ERROR(*pmd); + pmd_clear(pmd); + goto free; + } + + pte = pmd_page(*pmd); + pmd_clear(pmd); + dec_page_state(nr_page_table_pages); + pte_free(pte); + pmd_free(pmd); +free: + free_pages((unsigned long) pgd, 2); +} + +/* + * Create a SECTION PGD between VIRT and PHYS in domain + * DOMAIN with protection PROT. This operates on half- + * pgdir entry increments. + */ +static inline void +alloc_init_section(unsigned long virt, unsigned long phys, int prot) +{ + pmd_t *pmdp; + + pmdp = pmd_offset(pgd_offset_k(virt), virt); + if (virt & (1 << 20)) + pmdp++; + + *pmdp = __pmd(phys | prot); + flush_pmd_entry(pmdp); +} + +/* + * Create a SUPER SECTION PGD between VIRT and PHYS with protection PROT + */ +static inline void +alloc_init_supersection(unsigned long virt, unsigned long phys, int prot) +{ + int i; + + for (i = 0; i < 16; i += 1) { + alloc_init_section(virt, phys & SUPERSECTION_MASK, + prot | PMD_SECT_SUPER); + + virt += (PGDIR_SIZE / 2); + phys += (PGDIR_SIZE / 2); + } +} + +/* + * Add a PAGE mapping between VIRT and PHYS in domain + * DOMAIN with protection PROT. Note that due to the + * way we map the PTEs, we must allocate two PTE_SIZE'd + * blocks - one for the Linux pte table, and one for + * the hardware pte table. + */ +static inline void +alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot) +{ + pmd_t *pmdp; + pte_t *ptep; + + pmdp = pmd_offset(pgd_offset_k(virt), virt); + + if (pmd_none(*pmdp)) { + unsigned long pmdval; + ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * + sizeof(pte_t)); + + pmdval = __pa(ptep) | prot_l1; + pmdp[0] = __pmd(pmdval); + pmdp[1] = __pmd(pmdval + 256 * sizeof(pte_t)); + flush_pmd_entry(pmdp); + } + ptep = pte_offset_kernel(pmdp, virt); + + set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); +} + +/* + * Clear any PGD mapping. On a two-level page table system, + * the clearance is done by the middle-level functions (pmd) + * rather than the top-level (pgd) functions. + */ +static inline void clear_mapping(unsigned long virt) +{ + pmd_clear(pmd_offset(pgd_offset_k(virt), virt)); +} + +struct mem_types { + unsigned int prot_pte; + unsigned int prot_l1; + unsigned int prot_sect; + unsigned int domain; +}; + +static struct mem_types mem_types[] __initdata = { + [MT_DEVICE] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_WRITE, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_UNCACHED | + PMD_SECT_AP_WRITE, + .domain = DOMAIN_IO, + }, + [MT_CACHECLEAN] = { + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, + [MT_MINICLEAN] = { + .prot_sect = PMD_TYPE_SECT | PMD_SECT_MINICACHE, + .domain = DOMAIN_KERNEL, + }, + [MT_LOW_VECTORS] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_EXEC, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_USER, + }, + [MT_HIGH_VECTORS] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_USER | L_PTE_EXEC, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_USER, + }, + [MT_MEMORY] = { + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, + [MT_ROM] = { + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, + [MT_IXP2000_DEVICE] = { /* IXP2400 requires XCB=101 for on-chip I/O */ + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_WRITE, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_UNCACHED | + PMD_SECT_AP_WRITE | PMD_SECT_BUFFERABLE | + PMD_SECT_TEX(1), + .domain = DOMAIN_IO, + } +}; + +/* + * Adjust the PMD section entries according to the CPU in use. + */ +static void __init build_mem_type_table(void) +{ + struct cachepolicy *cp; + unsigned int cr = get_cr(); + int cpu_arch = cpu_architecture(); + int i; + +#if defined(CONFIG_CPU_DCACHE_DISABLE) + if (cachepolicy > CPOLICY_BUFFERED) + cachepolicy = CPOLICY_BUFFERED; +#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH) + if (cachepolicy > CPOLICY_WRITETHROUGH) + cachepolicy = CPOLICY_WRITETHROUGH; +#endif + if (cpu_arch < CPU_ARCH_ARMv5) { + if (cachepolicy >= CPOLICY_WRITEALLOC) + cachepolicy = CPOLICY_WRITEBACK; + ecc_mask = 0; + } + + if (cpu_arch <= CPU_ARCH_ARMv5) { + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + if (mem_types[i].prot_l1) + mem_types[i].prot_l1 |= PMD_BIT4; + if (mem_types[i].prot_sect) + mem_types[i].prot_sect |= PMD_BIT4; + } + } + + /* + * ARMv6 and above have extended page tables. + */ + if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { + /* + * bit 4 becomes XN which we must clear for the + * kernel memory mapping. + */ + mem_types[MT_MEMORY].prot_sect &= ~PMD_BIT4; + mem_types[MT_ROM].prot_sect &= ~PMD_BIT4; + /* + * Mark cache clean areas read only from SVC mode + * and no access from userspace. + */ + mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + } + + cp = &cache_policies[cachepolicy]; + + if (cpu_arch >= CPU_ARCH_ARMv5) { + mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE; + mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE; + } else { + mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte; + mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte; + mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1); + } + + mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; + mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; + mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_ROM].prot_sect |= cp->pmd; + + for (i = 0; i < 16; i++) { + unsigned long v = pgprot_val(protection_map[i]); + v &= (~(PTE_BUFFERABLE|PTE_CACHEABLE)) | cp->pte; + protection_map[i] = __pgprot(v); + } + + pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | + L_PTE_DIRTY | L_PTE_WRITE | + L_PTE_EXEC | cp->pte); + + switch (cp->pmd) { + case PMD_SECT_WT: + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT; + break; + case PMD_SECT_WB: + case PMD_SECT_WBWA: + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; + break; + } + printk("Memory policy: ECC %sabled, Data cache %s\n", + ecc_mask ? "en" : "dis", cp->policy); +} + +#define vectors_base() (vectors_high() ? 0xffff0000 : 0) + +/* + * Create the page directory entries and any necessary + * page tables for the mapping specified by `md'. We + * are able to cope here with varying sizes and address + * offsets, and we take full advantage of sections and + * supersections. + */ +static void __init create_mapping(struct map_desc *md) +{ + unsigned long virt, length; + int prot_sect, prot_l1, domain; + pgprot_t prot_pte; + long off; + + if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { + printk(KERN_WARNING "BUG: not creating mapping for " + "0x%08lx at 0x%08lx in user region\n", + md->physical, md->virtual); + return; + } + + if ((md->type == MT_DEVICE || md->type == MT_ROM) && + md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) { + printk(KERN_WARNING "BUG: mapping for 0x%08lx at 0x%08lx " + "overlaps vmalloc space\n", + md->physical, md->virtual); + } + + domain = mem_types[md->type].domain; + prot_pte = __pgprot(mem_types[md->type].prot_pte); + prot_l1 = mem_types[md->type].prot_l1 | PMD_DOMAIN(domain); + prot_sect = mem_types[md->type].prot_sect | PMD_DOMAIN(domain); + + virt = md->virtual; + off = md->physical - virt; + length = md->length; + + if (mem_types[md->type].prot_l1 == 0 && + (virt & 0xfffff || (virt + off) & 0xfffff || (virt + length) & 0xfffff)) { + printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not " + "be mapped using pages, ignoring.\n", + md->physical, md->virtual); + return; + } + + while ((virt & 0xfffff || (virt + off) & 0xfffff) && length >= PAGE_SIZE) { + alloc_init_page(virt, virt + off, prot_l1, prot_pte); + + virt += PAGE_SIZE; + length -= PAGE_SIZE; + } + + /* N.B. ARMv6 supersections are only defined to work with domain 0. + * Since domain assignments can in fact be arbitrary, the + * 'domain == 0' check below is required to insure that ARMv6 + * supersections are only allocated for domain 0 regardless + * of the actual domain assignments in use. + */ + if (cpu_architecture() >= CPU_ARCH_ARMv6 && domain == 0) { + /* Align to supersection boundary */ + while ((virt & ~SUPERSECTION_MASK || (virt + off) & + ~SUPERSECTION_MASK) && length >= (PGDIR_SIZE / 2)) { + alloc_init_section(virt, virt + off, prot_sect); + + virt += (PGDIR_SIZE / 2); + length -= (PGDIR_SIZE / 2); + } + + while (length >= SUPERSECTION_SIZE) { + alloc_init_supersection(virt, virt + off, prot_sect); + + virt += SUPERSECTION_SIZE; + length -= SUPERSECTION_SIZE; + } + } + + /* + * A section mapping covers half a "pgdir" entry. + */ + while (length >= (PGDIR_SIZE / 2)) { + alloc_init_section(virt, virt + off, prot_sect); + + virt += (PGDIR_SIZE / 2); + length -= (PGDIR_SIZE / 2); + } + + while (length >= PAGE_SIZE) { + alloc_init_page(virt, virt + off, prot_l1, prot_pte); + + virt += PAGE_SIZE; + length -= PAGE_SIZE; + } +} + +/* + * In order to soft-boot, we need to insert a 1:1 mapping in place of + * the user-mode pages. This will then ensure that we have predictable + * results when turning the mmu off + */ +void setup_mm_for_reboot(char mode) +{ + unsigned long pmdval; + pgd_t *pgd; + pmd_t *pmd; + int i; + int cpu_arch = cpu_architecture(); + + if (current->mm && current->mm->pgd) + pgd = current->mm->pgd; + else + pgd = init_mm.pgd; + + for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++) { + pmdval = (i << PGDIR_SHIFT) | + PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | + PMD_TYPE_SECT; + if (cpu_arch <= CPU_ARCH_ARMv5) + pmdval |= PMD_BIT4; + pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT); + pmd[0] = __pmd(pmdval); + pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1))); + flush_pmd_entry(pmd); + } +} + +extern void _stext, _etext; + +/* + * Setup initial mappings. We use the page we allocated for zero page to hold + * the mappings, which will get overwritten by the vectors in traps_init(). + * The mappings must be in virtual address order. + */ +void __init memtable_init(struct meminfo *mi) +{ + struct map_desc *init_maps, *p, *q; + unsigned long address = 0; + int i; + + build_mem_type_table(); + + init_maps = p = alloc_bootmem_low_pages(PAGE_SIZE); + +#ifdef CONFIG_XIP_KERNEL + p->physical = CONFIG_XIP_PHYS_ADDR & PMD_MASK; + p->virtual = (unsigned long)&_stext & PMD_MASK; + p->length = ((unsigned long)&_etext - p->virtual + ~PMD_MASK) & PMD_MASK; + p->type = MT_ROM; + p ++; +#endif + + for (i = 0; i < mi->nr_banks; i++) { + if (mi->bank[i].size == 0) + continue; + + p->physical = mi->bank[i].start; + p->virtual = __phys_to_virt(p->physical); + p->length = mi->bank[i].size; + p->type = MT_MEMORY; + p ++; + } + +#ifdef FLUSH_BASE + p->physical = FLUSH_BASE_PHYS; + p->virtual = FLUSH_BASE; + p->length = PGDIR_SIZE; + p->type = MT_CACHECLEAN; + p ++; +#endif + +#ifdef FLUSH_BASE_MINICACHE + p->physical = FLUSH_BASE_PHYS + PGDIR_SIZE; + p->virtual = FLUSH_BASE_MINICACHE; + p->length = PGDIR_SIZE; + p->type = MT_MINICLEAN; + p ++; +#endif + + /* + * Go through the initial mappings, but clear out any + * pgdir entries that are not in the description. + */ + q = init_maps; + do { + if (address < q->virtual || q == p) { + clear_mapping(address); + address += PGDIR_SIZE; + } else { + create_mapping(q); + + address = q->virtual + q->length; + address = (address + PGDIR_SIZE - 1) & PGDIR_MASK; + + q ++; + } + } while (address != 0); + + /* + * Create a mapping for the machine vectors at the high-vectors + * location (0xffff0000). If we aren't using high-vectors, also + * create a mapping at the low-vectors virtual address. + */ + init_maps->physical = virt_to_phys(init_maps); + init_maps->virtual = 0xffff0000; + init_maps->length = PAGE_SIZE; + init_maps->type = MT_HIGH_VECTORS; + create_mapping(init_maps); + + if (!vectors_high()) { + init_maps->virtual = 0; + init_maps->type = MT_LOW_VECTORS; + create_mapping(init_maps); + } + + flush_cache_all(); + flush_tlb_all(); +} + +/* + * Create the architecture specific mappings + */ +void __init iotable_init(struct map_desc *io_desc, int nr) +{ + int i; + + for (i = 0; i < nr; i++) + create_mapping(io_desc + i); +} + +static inline void +free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn) +{ + struct page *start_pg, *end_pg; + unsigned long pg, pgend; + + /* + * Convert start_pfn/end_pfn to a struct page pointer. + */ + start_pg = pfn_to_page(start_pfn); + end_pg = pfn_to_page(end_pfn); + + /* + * Convert to physical addresses, and + * round start upwards and end downwards. + */ + pg = PAGE_ALIGN(__pa(start_pg)); + pgend = __pa(end_pg) & PAGE_MASK; + + /* + * If there are free pages between these, + * free the section of the memmap array. + */ + if (pg < pgend) + free_bootmem_node(NODE_DATA(node), pg, pgend - pg); +} + +static inline void free_unused_memmap_node(int node, struct meminfo *mi) +{ + unsigned long bank_start, prev_bank_end = 0; + unsigned int i; + + /* + * [FIXME] This relies on each bank being in address order. This + * may not be the case, especially if the user has provided the + * information on the command line. + */ + for (i = 0; i < mi->nr_banks; i++) { + if (mi->bank[i].size == 0 || mi->bank[i].node != node) + continue; + + bank_start = mi->bank[i].start >> PAGE_SHIFT; + if (bank_start < prev_bank_end) { + printk(KERN_ERR "MEM: unordered memory banks. " + "Not freeing memmap.\n"); + break; + } + + /* + * If we had a previous bank, and there is a space + * between the current bank and the previous, free it. + */ + if (prev_bank_end && prev_bank_end != bank_start) + free_memmap(node, prev_bank_end, bank_start); + + prev_bank_end = PAGE_ALIGN(mi->bank[i].start + + mi->bank[i].size) >> PAGE_SHIFT; + } +} + +/* + * The mem_map array can get very big. Free + * the unused area of the memory map. + */ +void __init create_memmap_holes(struct meminfo *mi) +{ + int node; + + for_each_online_node(node) + free_unused_memmap_node(node, mi); +} diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c new file mode 100644 index 00000000000..32c4b0e35b3 --- /dev/null +++ b/arch/arm/mm/mmap.c @@ -0,0 +1,109 @@ +/* + * linux/arch/arm/mm/mmap.c + */ +#include <linux/config.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/shm.h> + +#include <asm/system.h> + +#define COLOUR_ALIGN(addr,pgoff) \ + ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ + (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) + +/* + * We need to ensure that shared mappings are correctly aligned to + * avoid aliasing issues with VIPT caches. We need to ensure that + * a specific page of an object is always mapped at a multiple of + * SHMLBA bytes. + * + * We unconditionally provide this function for all cases, however + * in the VIVT case, we optimise out the alignment rules. + */ +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr; +#ifdef CONFIG_CPU_V6 + unsigned int cache_type; + int do_align = 0, aliasing = 0; + + /* + * We only need to do colour alignment if either the I or D + * caches alias. This is indicated by bits 9 and 21 of the + * cache type register. + */ + cache_type = read_cpuid(CPUID_CACHETYPE); + if (cache_type != read_cpuid(CPUID_ID)) { + aliasing = (cache_type | cache_type >> 12) & (1 << 11); + if (aliasing) + do_align = filp || flags & MAP_SHARED; + } +#else +#define do_align 0 +#define aliasing 0 +#endif + + /* + * We should enforce the MAP_FIXED case. However, currently + * the generic kernel code doesn't allow us to handle this. + */ + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && addr & (SHMLBA - 1)) + return -EINVAL; + return addr; + } + + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + start_addr = addr = mm->free_area_cache; + +full_search: + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = addr = TASK_UNMAPPED_BASE; + goto full_search; + } + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + addr = vma->vm_end; + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + } +} + diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c new file mode 100644 index 00000000000..0d90227a0a3 --- /dev/null +++ b/arch/arm/mm/mmu.c @@ -0,0 +1,45 @@ +/* + * linux/arch/arm/mm/mmu.c + * + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/mm.h> + +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> + +unsigned int cpu_last_asid = { 1 << ASID_BITS }; + +/* + * We fork()ed a process, and we need a new context for the child + * to run in. We reserve version 0 for initial tasks so we will + * always allocate an ASID. + */ +void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + mm->context.id = 0; +} + +void __new_context(struct mm_struct *mm) +{ + unsigned int asid; + + asid = ++cpu_last_asid; + if (asid == 0) + asid = cpu_last_asid = 1 << ASID_BITS; + + /* + * If we've used up all our ASIDs, we need + * to start a new version and flush the TLB. + */ + if ((asid & ~ASID_MASK) == 0) + flush_tlb_all(); + + mm->context.id = asid; +} diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S new file mode 100644 index 00000000000..1f325231b9e --- /dev/null +++ b/arch/arm/mm/proc-arm1020.S @@ -0,0 +1,530 @@ +/* + * linux/arch/arm/mm/proc-arm1020.S: MMU functions for ARM1020 + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm1020. + * + * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/config.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/constants.h> +#include <asm/pgtable.h> +#include <asm/procinfo.h> +#include <asm/ptrace.h> +#include <asm/hardware.h> + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintainence instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1020_proc_init() + */ +ENTRY(cpu_arm1020_proc_init) + mov pc, lr + +/* + * cpu_arm1020_proc_fin() + */ +ENTRY(cpu_arm1020_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl arm1020_flush_kern_cache_all + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_arm1020_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_arm1020_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_arm1020_do_idle() + */ + .align 5 +ENTRY(cpu_arm1020_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* ================================= CACHE ================================ */ + + .align 5 +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1020_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1020_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + mcr p15, 0, ip, c7, c10, 4 @ drain WB + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1020_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, ip, c7, c10, 4 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020_coherent_kern_range) + /* FALLTRHOUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcr p15, 0, ip, c7, c10, 4 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - page - page aligned address + */ +ENTRY(arm1020_flush_kern_dcache_page) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm1020_dma_inv_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, ip, c7, c10, 4 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, ip, c7, c10, 4 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + mcrne p15, 0, ip, c7, c10, 4 @ drain WB +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm1020_dma_clean_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 + mcr p15, 0, ip, c7, c10, 4 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +ENTRY(arm1020_cache_fns) + .long arm1020_flush_kern_cache_all + .long arm1020_flush_user_cache_all + .long arm1020_flush_user_cache_range + .long arm1020_coherent_kern_range + .long arm1020_coherent_user_range + .long arm1020_flush_kern_dcache_page + .long arm1020_dma_inv_range + .long arm1020_dma_clean_range + .long arm1020_dma_flush_range + + .align 5 +ENTRY(cpu_arm1020_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1020_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1020_switch_mm) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r3, c7, c10, 4 + mov r1, #0xF @ 16 segments +1: mov r3, #0x3F @ 64 entries +2: mov ip, r3, LSL #26 @ shift up entry + orr ip, ip, r1, LSL #5 @ shift in/up index + mcr p15, 0, ip, c7, c14, 2 @ Clean & Inval DCache entry + mov ip, #0 + mcr p15, 0, ip, c7, c10, 4 + subs r3, r3, #1 + cmp r3, #0 + bge 2b @ entries 3F to 0 + subs r1, r1, #1 + cmp r1, #0 + bge 1b @ segments 15 to 0 + +#endif + mov r1, #0 +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_arm1020_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1020_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + eor r3, r1, #0x0a @ C & small page? + tst r3, #0x0b + biceq r2, r2, #4 +#endif + str r2, [r0] @ hardware version + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 4 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + __INIT + + .type __arm1020_setup, #function +__arm1020_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, arm1020_cr1_clear + bic r0, r0, r5 + ldr r5, arm1020_cr1_set + orr r0, r0, r5 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.. .... .... .... +#endif + mov pc, lr + .size __arm1020_setup, . - __arm1020_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .0.1 1001 ..11 0101 /* FIXME: why no V bit? */ + */ + .type arm1020_cr1_clear, #object + .type arm1020_cr1_set, #object +arm1020_cr1_clear: + .word 0x593f +arm1020_cr1_set: + .word 0x1935 + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm1020_processor_functions, #object +arm1020_processor_functions: + .word v4t_early_abort + .word cpu_arm1020_proc_init + .word cpu_arm1020_proc_fin + .word cpu_arm1020_reset + .word cpu_arm1020_do_idle + .word cpu_arm1020_dcache_clean_area + .word cpu_arm1020_switch_mm + .word cpu_arm1020_set_pte + .size arm1020_processor_functions, . - arm1020_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv5t" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v5" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_arm1020_name, #object +cpu_arm1020_name: + .ascii "ARM1020" +#ifndef CONFIG_CPU_ICACHE_DISABLE + .ascii "i" +#endif +#ifndef CONFIG_CPU_DCACHE_DISABLE + .ascii "d" +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + .ascii "(wt)" +#else + .ascii "(wb)" +#endif +#endif +#ifndef CONFIG_CPU_BPREDICT_DISABLE + .ascii "B" +#endif +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + .ascii "RR" +#endif + .ascii "\0" + .size cpu_arm1020_name, . - cpu_arm1020_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __arm1020_proc_info,#object +__arm1020_proc_info: + .long 0x4104a200 @ ARM 1020T (Architecture v5T) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm1020_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm1020_name + .long arm1020_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1020_cache_fns + .size __arm1020_proc_info, . - __arm1020_proc_info diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S new file mode 100644 index 00000000000..142a2c2d6f0 --- /dev/null +++ b/arch/arm/mm/proc-arm1020e.S @@ -0,0 +1,513 @@ +/* + * linux/arch/arm/mm/proc-arm1020e.S: MMU functions for ARM1020 + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm1020e. + * + * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/config.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/constants.h> +#include <asm/pgtable.h> +#include <asm/procinfo.h> +#include <asm/ptrace.h> +#include <asm/hardware.h> + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintainence instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1020e_proc_init() + */ +ENTRY(cpu_arm1020e_proc_init) + mov pc, lr + +/* + * cpu_arm1020e_proc_fin() + */ +ENTRY(cpu_arm1020e_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl arm1020e_flush_kern_cache_all + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_arm1020e_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_arm1020e_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_arm1020e_do_idle() + */ + .align 5 +ENTRY(cpu_arm1020e_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* ================================= CACHE ================================ */ + + .align 5 +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1020e_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1020e_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1020e_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020e_coherent_kern_range) + /* FALLTHROUGH */ +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020e_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - page - page aligned address + */ +ENTRY(arm1020e_flush_kern_dcache_page) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm1020e_dma_inv_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm1020e_dma_clean_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020e_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +ENTRY(arm1020e_cache_fns) + .long arm1020e_flush_kern_cache_all + .long arm1020e_flush_user_cache_all + .long arm1020e_flush_user_cache_range + .long arm1020e_coherent_kern_range + .long arm1020e_coherent_user_range + .long arm1020e_flush_kern_dcache_page + .long arm1020e_dma_inv_range + .long arm1020e_dma_clean_range + .long arm1020e_dma_flush_range + + .align 5 +ENTRY(cpu_arm1020e_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1020e_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1020e_switch_mm) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r3, c7, c10, 4 + mov r1, #0xF @ 16 segments +1: mov r3, #0x3F @ 64 entries +2: mov ip, r3, LSL #26 @ shift up entry + orr ip, ip, r1, LSL #5 @ shift in/up index + mcr p15, 0, ip, c7, c14, 2 @ Clean & Inval DCache entry + mov ip, #0 + subs r3, r3, #1 + cmp r3, #0 + bge 2b @ entries 3F to 0 + subs r1, r1, #1 + cmp r1, #0 + bge 1b @ segments 15 to 0 + +#endif + mov r1, #0 +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_arm1020e_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1020e_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + eor r3, r1, #0x0a @ C & small page? + tst r3, #0x0b + biceq r2, r2, #4 +#endif + str r2, [r0] @ hardware version + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mov pc, lr + + __INIT + + .type __arm1020e_setup, #function +__arm1020e_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, arm1020e_cr1_clear + bic r0, r0, r5 + ldr r5, arm1020e_cr1_set + orr r0, r0, r5 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.. .... .... .... +#endif + mov pc, lr + .size __arm1020e_setup, . - __arm1020e_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .0.1 1001 ..11 0101 /* FIXME: why no V bit? */ + */ + .type arm1020e_cr1_clear, #object + .type arm1020e_cr1_set, #object +arm1020e_cr1_clear: + .word 0x5f3f +arm1020e_cr1_set: + .word 0x1935 + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm1020e_processor_functions, #object +arm1020e_processor_functions: + .word v4t_early_abort + .word cpu_arm1020e_proc_init + .word cpu_arm1020e_proc_fin + .word cpu_arm1020e_reset + .word cpu_arm1020e_do_idle + .word cpu_arm1020e_dcache_clean_area + .word cpu_arm1020e_switch_mm + .word cpu_arm1020e_set_pte + .size arm1020e_processor_functions, . - arm1020e_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv5te" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v5" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_arm1020e_name, #object +cpu_arm1020e_name: + .ascii "ARM1020E" +#ifndef CONFIG_CPU_ICACHE_DISABLE + .ascii "i" +#endif +#ifndef CONFIG_CPU_DCACHE_DISABLE + .ascii "d" +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + .ascii "(wt)" +#else + .ascii "(wb)" +#endif +#endif +#ifndef CONFIG_CPU_BPREDICT_DISABLE + .ascii "B" +#endif +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + .ascii "RR" +#endif + .ascii "\0" + .size cpu_arm1020e_name, . - cpu_arm1020e_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __arm1020e_proc_info,#object +__arm1020e_proc_info: + .long 0x4105a200 @ ARM 1020TE (Architecture v5TE) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm1020e_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP + .long cpu_arm1020e_name + .long arm1020e_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1020e_cache_fns + .size __arm1020e_proc_info, . - __arm1020e_proc_info diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S new file mode 100644 index 00000000000..747ed963e1d --- /dev/null +++ b/arch/arm/mm/proc-arm1022.S @@ -0,0 +1,495 @@ +/* + * linux/arch/arm/mm/proc-arm1022.S: MMU functions for ARM1022E + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM1022E. + */ +#include <linux/linkage.h> +#include <linux/config.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/constants.h> +#include <asm/pgtable.h> +#include <asm/procinfo.h> +#include <asm/ptrace.h> + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintainence instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1022_proc_init() + */ +ENTRY(cpu_arm1022_proc_init) + mov pc, lr + +/* + * cpu_arm1022_proc_fin() + */ +ENTRY(cpu_arm1022_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl arm1022_flush_kern_cache_all + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_arm1022_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_arm1022_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_arm1022_do_idle() + */ + .align 5 +ENTRY(cpu_arm1022_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* ================================= CACHE ================================ */ + + .align 5 +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1022_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1022_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1022_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1022_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1022_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - page - page aligned address + */ +ENTRY(arm1022_flush_kern_dcache_page) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm1022_dma_inv_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm1022_dma_clean_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1022_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +ENTRY(arm1022_cache_fns) + .long arm1022_flush_kern_cache_all + .long arm1022_flush_user_cache_all + .long arm1022_flush_user_cache_range + .long arm1022_coherent_kern_range + .long arm1022_coherent_user_range + .long arm1022_flush_kern_dcache_page + .long arm1022_dma_inv_range + .long arm1022_dma_clean_range + .long arm1022_dma_flush_range + + .align 5 +ENTRY(cpu_arm1022_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1022_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1022_switch_mm) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + mov r1, #0 +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_arm1022_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1022_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + eor r3, r1, #0x0a @ C & small page? + tst r3, #0x0b + biceq r2, r2, #4 +#endif + str r2, [r0] @ hardware version + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mov pc, lr + + __INIT + + .type __arm1022_setup, #function +__arm1022_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, arm1022_cr1_clear + bic r0, r0, r5 + ldr r5, arm1022_cr1_set + orr r0, r0, r5 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.............. +#endif + mov pc, lr + .size __arm1022_setup, . - __arm1022_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..11 0101 + * + */ + .type arm1022_cr1_clear, #object + .type arm1022_cr1_set, #object +arm1022_cr1_clear: + .word 0x7f3f +arm1022_cr1_set: + .word 0x3935 + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm1022_processor_functions, #object +arm1022_processor_functions: + .word v4t_early_abort + .word cpu_arm1022_proc_init + .word cpu_arm1022_proc_fin + .word cpu_arm1022_reset + .word cpu_arm1022_do_idle + .word cpu_arm1022_dcache_clean_area + .word cpu_arm1022_switch_mm + .word cpu_arm1022_set_pte + .size arm1022_processor_functions, . - arm1022_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv5te" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v5" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_arm1022_name, #object +cpu_arm1022_name: + .ascii "arm1022" +#ifndef CONFIG_CPU_ICACHE_DISABLE + .ascii "i" +#endif +#ifndef CONFIG_CPU_DCACHE_DISABLE + .ascii "d" +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + .ascii "(wt)" +#else + .ascii "(wb)" +#endif +#endif +#ifndef CONFIG_CPU_BPREDICT_DISABLE + .ascii "B" +#endif +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + .ascii "RR" +#endif + .ascii "\0" + .size cpu_arm1022_name, . - cpu_arm1022_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __arm1022_proc_info,#object +__arm1022_proc_info: + .long 0x4105a220 @ ARM 1022E (v5TE) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm1022_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP + .long cpu_arm1022_name + .long arm1022_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1022_cache_fns + .size __arm1022_proc_info, . - __arm1022_proc_info diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S new file mode 100644 index 00000000000..248110c9cf1 --- /dev/null +++ b/arch/arm/mm/proc-arm1026.S @@ -0,0 +1,491 @@ +/* + * linux/arch/arm/mm/proc-arm1026.S: MMU functions for ARM1026EJ-S + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM1026EJ-S. + */ +#include <linux/linkage.h> +#include <linux/config.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/constants.h> +#include <asm/pgtable.h> +#include <asm/procinfo.h> +#include <asm/ptrace.h> + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintainence instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1026_proc_init() + */ +ENTRY(cpu_arm1026_proc_init) + mov pc, lr + +/* + * cpu_arm1026_proc_fin() + */ +ENTRY(cpu_arm1026_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl arm1026_flush_kern_cache_all + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_arm1026_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_arm1026_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_arm1026_do_idle() + */ + .align 5 +ENTRY(cpu_arm1026_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* ================================= CACHE ================================ */ + + .align 5 +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1026_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1026_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate + bne 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1026_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1026_coherent_kern_range) + /* FALLTHROUGH */ +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1026_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - page - page aligned address + */ +ENTRY(arm1026_flush_kern_dcache_page) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm1026_dma_inv_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm1026_dma_clean_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1026_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +ENTRY(arm1026_cache_fns) + .long arm1026_flush_kern_cache_all + .long arm1026_flush_user_cache_all + .long arm1026_flush_user_cache_range + .long arm1026_coherent_kern_range + .long arm1026_coherent_user_range + .long arm1026_flush_kern_dcache_page + .long arm1026_dma_inv_range + .long arm1026_dma_clean_range + .long arm1026_dma_flush_range + + .align 5 +ENTRY(cpu_arm1026_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1026_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1026_switch_mm) + mov r1, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate + bne 1b +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_arm1026_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1026_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + eor r3, r1, #0x0a @ C & small page? + tst r3, #0x0b + biceq r2, r2, #4 +#endif + str r2, [r0] @ hardware version + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mov pc, lr + + + __INIT + + .type __arm1026_setup, #function +__arm1026_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + mcr p15, 0, r4, c2, c0 @ load page table pointer +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #4 @ explicitly disable writeback + mcr p15, 7, r0, c15, c0, 0 +#endif + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, arm1026_cr1_clear + bic r0, r0, r5 + ldr r5, arm1026_cr1_set + orr r0, r0, r5 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.. .... .... .... +#endif + mov pc, lr + .size __arm1026_setup, . - __arm1026_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..11 0101 + * + */ + .type arm1026_cr1_clear, #object + .type arm1026_cr1_set, #object +arm1026_cr1_clear: + .word 0x7f3f +arm1026_cr1_set: + .word 0x3935 + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm1026_processor_functions, #object +arm1026_processor_functions: + .word v5t_early_abort + .word cpu_arm1026_proc_init + .word cpu_arm1026_proc_fin + .word cpu_arm1026_reset + .word cpu_arm1026_do_idle + .word cpu_arm1026_dcache_clean_area + .word cpu_arm1026_switch_mm + .word cpu_arm1026_set_pte + .size arm1026_processor_functions, . - arm1026_processor_functions + + .section .rodata + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv5tej" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v5" + .size cpu_elf_name, . - cpu_elf_name + .align + + .type cpu_arm1026_name, #object +cpu_arm1026_name: + .ascii "ARM1026EJ-S" +#ifndef CONFIG_CPU_ICACHE_DISABLE + .ascii "i" +#endif +#ifndef CONFIG_CPU_DCACHE_DISABLE + .ascii "d" +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + .ascii "(wt)" +#else + .ascii "(wb)" +#endif +#endif +#ifndef CONFIG_CPU_BPREDICT_DISABLE + .ascii "B" +#endif +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + .ascii "RR" +#endif + .ascii "\0" + .size cpu_arm1026_name, . - cpu_arm1026_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __arm1026_proc_info,#object +__arm1026_proc_info: + .long 0x4106a260 @ ARM 1026EJ-S (v5TEJ) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm1026_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA + .long cpu_arm1026_name + .long arm1026_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1026_cache_fns + .size __arm1026_proc_info, . - __arm1026_proc_info diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S new file mode 100644 index 00000000000..0ee214b824f --- /dev/null +++ b/arch/arm/mm/proc-arm6_7.S @@ -0,0 +1,404 @@ +/* + * linux/arch/arm/mm/proc-arm6,7.S + * + * Copyright (C) 1997-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM610 & ARM710. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/constants.h> +#include <asm/pgtable.h> +#include <asm/procinfo.h> +#include <asm/ptrace.h> + +ENTRY(cpu_arm6_dcache_clean_area) +ENTRY(cpu_arm7_dcache_clean_area) + mov pc, lr + +/* + * Function: arm6_7_data_abort () + * + * Params : r2 = address of aborted instruction + * : sp = pointer to registers + * + * Purpose : obtain information about current aborted instruction + * + * Returns : r0 = address of abort + * : r1 = FSR + */ + +ENTRY(cpu_arm7_data_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + ldr r8, [r0] @ read arm instruction + tst r8, #1 << 20 @ L = 1 -> write? + orreq r1, r1, #1 << 8 @ yes. + and r7, r8, #15 << 24 + add pc, pc, r7, lsr #22 @ Now branch to the relevant processing routine + nop + +/* 0 */ b .data_unknown +/* 1 */ mov pc, lr @ swp +/* 2 */ b .data_unknown +/* 3 */ b .data_unknown +/* 4 */ b .data_arm_lateldrpostconst @ ldr rd, [rn], #m +/* 5 */ b .data_arm_lateldrpreconst @ ldr rd, [rn, #m] +/* 6 */ b .data_arm_lateldrpostreg @ ldr rd, [rn], rm +/* 7 */ b .data_arm_lateldrprereg @ ldr rd, [rn, rm] +/* 8 */ b .data_arm_ldmstm @ ldm*a rn, <rlist> +/* 9 */ b .data_arm_ldmstm @ ldm*b rn, <rlist> +/* a */ b .data_unknown +/* b */ b .data_unknown +/* c */ mov pc, lr @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m +/* d */ mov pc, lr @ ldc rd, [rn, #m] +/* e */ b .data_unknown +/* f */ +.data_unknown: @ Part of jumptable + mov r0, r2 + mov r1, r8 + mov r2, sp + bl baddataabort + b ret_from_exception + +ENTRY(cpu_arm6_data_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + ldr r8, [r2] @ read arm instruction + tst r8, #1 << 20 @ L = 1 -> write? + orreq r1, r1, #1 << 8 @ yes. + and r7, r8, #14 << 24 + teq r7, #8 << 24 @ was it ldm/stm + movne pc, lr + +.data_arm_ldmstm: + tst r8, #1 << 21 @ check writeback bit + moveq pc, lr @ no writeback -> no fixup + mov r7, #0x11 + orr r7, r7, #0x1100 + and r6, r8, r7 + and r2, r8, r7, lsl #1 + add r6, r6, r2, lsr #1 + and r2, r8, r7, lsl #2 + add r6, r6, r2, lsr #2 + and r2, r8, r7, lsl #3 + add r6, r6, r2, lsr #3 + add r6, r6, r6, lsr #8 + add r6, r6, r6, lsr #4 + and r6, r6, #15 @ r6 = no. of registers to transfer. + and r5, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6, lsl #2 @ Undo increment + addeq r7, r7, r6, lsl #2 @ Undo decrement + str r7, [sp, r5, lsr #14] @ Put register 'Rn' + mov pc, lr + +.data_arm_apply_r6_and_rn: + and r5, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6 @ Undo incrmenet + addeq r7, r7, r6 @ Undo decrement + str r7, [sp, r5, lsr #14] @ Put register 'Rn' + mov pc, lr + +.data_arm_lateldrpreconst: + tst r8, #1 << 21 @ check writeback bit + moveq pc, lr @ no writeback -> no fixup +.data_arm_lateldrpostconst: + movs r2, r8, lsl #20 @ Get offset + moveq pc, lr @ zero -> no fixup + and r5, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r2, lsr #20 @ Undo increment + addeq r7, r7, r2, lsr #20 @ Undo decrement + str r7, [sp, r5, lsr #14] @ Put register 'Rn' + mov pc, lr + +.data_arm_lateldrprereg: + tst r8, #1 << 21 @ check writeback bit + moveq pc, lr @ no writeback -> no fixup +.data_arm_lateldrpostreg: + and r7, r8, #15 @ Extract 'm' from instruction + ldr r6, [sp, r7, lsl #2] @ Get register 'Rm' + mov r5, r8, lsr #7 @ get shift count + ands r5, r5, #31 + and r7, r8, #0x70 @ get shift type + orreq r7, r7, #8 @ shift count = 0 + add pc, pc, r7 + nop + + mov r6, r6, lsl r5 @ 0: LSL #!0 + b .data_arm_apply_r6_and_rn + b .data_arm_apply_r6_and_rn @ 1: LSL #0 + nop + b .data_unknown @ 2: MUL? + nop + b .data_unknown @ 3: MUL? + nop + mov r6, r6, lsr r5 @ 4: LSR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, lsr #32 @ 5: LSR #32 + b .data_arm_apply_r6_and_rn + b .data_unknown @ 6: MUL? + nop + b .data_unknown @ 7: MUL? + nop + mov r6, r6, asr r5 @ 8: ASR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, asr #32 @ 9: ASR #32 + b .data_arm_apply_r6_and_rn + b .data_unknown @ A: MUL? + nop + b .data_unknown @ B: MUL? + nop + mov r6, r6, ror r5 @ C: ROR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, rrx @ D: RRX + b .data_arm_apply_r6_and_rn + b .data_unknown @ E: MUL? + nop + b .data_unknown @ F: MUL? + +/* + * Function: arm6_7_proc_init (void) + * : arm6_7_proc_fin (void) + * + * Notes : This processor does not require these + */ +ENTRY(cpu_arm6_proc_init) +ENTRY(cpu_arm7_proc_init) + mov pc, lr + +ENTRY(cpu_arm6_proc_fin) +ENTRY(cpu_arm7_proc_fin) + mov r0, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, r0 + mov r0, #0x31 @ ....S..DP...M + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +ENTRY(cpu_arm6_do_idle) +ENTRY(cpu_arm7_do_idle) + mov pc, lr + +/* + * Function: arm6_7_switch_mm(unsigned long pgd_phys) + * Params : pgd_phys Physical address of page table + * Purpose : Perform a task switch, saving the old processes state, and restoring + * the new. + */ +ENTRY(cpu_arm6_switch_mm) +ENTRY(cpu_arm7_switch_mm) + mov r1, #0 + mcr p15, 0, r1, c7, c0, 0 @ flush cache + mcr p15, 0, r0, c2, c0, 0 @ update page table ptr + mcr p15, 0, r1, c5, c0, 0 @ flush TLBs + mov pc, lr + +/* + * Function: arm6_7_set_pte(pte_t *ptep, pte_t pte) + * Params : r0 = Address to set + * : r1 = value to set + * Purpose : Set a PTE and flush it out of any WB cache + */ + .align 5 +ENTRY(cpu_arm6_set_pte) +ENTRY(cpu_arm7_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young + movne r2, #0 + + str r2, [r0] @ hardware version + mov pc, lr + +/* + * Function: _arm6_7_reset + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ +ENTRY(cpu_arm6_reset) +ENTRY(cpu_arm7_reset) + mov r1, #0 + mcr p15, 0, r1, c7, c0, 0 @ flush cache + mcr p15, 0, r1, c5, c0, 0 @ flush TLB + mov r1, #0x30 + mcr p15, 0, r1, c1, c0, 0 @ turn off MMU etc + mov pc, r0 + + __INIT + + .type __arm6_setup, #function +__arm6_setup: mov r0, #0 + mcr p15, 0, r0, c7, c0 @ flush caches on v3 + mcr p15, 0, r0, c5, c0 @ flush TLBs on v3 + mov r0, #0x3d @ . ..RS BLDP WCAM + orr r0, r0, #0x100 @ . ..01 0011 1101 + mov pc, lr + .size __arm6_setup, . - __arm6_setup + + .type __arm7_setup, #function +__arm7_setup: mov r0, #0 + mcr p15, 0, r0, c7, c0 @ flush caches on v3 + mcr p15, 0, r0, c5, c0 @ flush TLBs on v3 + mcr p15, 0, r0, c3, c0 @ load domain access register + mov r0, #0x7d @ . ..RS BLDP WCAM + orr r0, r0, #0x100 @ . ..01 0111 1101 + mov pc, lr + .size __arm7_setup, . - __arm7_setup + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm6_processor_functions, #object +ENTRY(arm6_processor_functions) + .word cpu_arm6_data_abort + .word cpu_arm6_proc_init + .word cpu_arm6_proc_fin + .word cpu_arm6_reset + .word cpu_arm6_do_idle + .word cpu_arm6_dcache_clean_area + .word cpu_arm6_switch_mm + .word cpu_arm6_set_pte + .size arm6_processor_functions, . - arm6_processor_functions + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm7_processor_functions, #object +ENTRY(arm7_processor_functions) + .word cpu_arm7_data_abort + .word cpu_arm7_proc_init + .word cpu_arm7_proc_fin + .word cpu_arm7_reset + .word cpu_arm7_do_idle + .word cpu_arm7_dcache_clean_area + .word cpu_arm7_switch_mm + .word cpu_arm7_set_pte + .size arm7_processor_functions, . - arm7_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: .asciz "armv3" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: .asciz "v3" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_arm6_name, #object +cpu_arm6_name: .asciz "ARM6" + .size cpu_arm6_name, . - cpu_arm6_name + + .type cpu_arm610_name, #object +cpu_arm610_name: + .asciz "ARM610" + .size cpu_arm610_name, . - cpu_arm610_name + + .type cpu_arm7_name, #object +cpu_arm7_name: .asciz "ARM7" + .size cpu_arm7_name, . - cpu_arm7_name + + .type cpu_arm710_name, #object +cpu_arm710_name: + .asciz "ARM710" + .size cpu_arm710_name, . - cpu_arm710_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __arm6_proc_info, #object +__arm6_proc_info: + .long 0x41560600 + .long 0xfffffff0 + .long 0x00000c1e + b __arm6_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_26BIT + .long cpu_arm6_name + .long arm6_processor_functions + .long v3_tlb_fns + .long v3_user_fns + .long v3_cache_fns + .size __arm6_proc_info, . - __arm6_proc_info + + .type __arm610_proc_info, #object +__arm610_proc_info: + .long 0x41560610 + .long 0xfffffff0 + .long 0x00000c1e + b __arm6_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_26BIT + .long cpu_arm610_name + .long arm6_processor_functions + .long v3_tlb_fns + .long v3_user_fns + .long v3_cache_fns + .size __arm610_proc_info, . - __arm610_proc_info + + .type __arm7_proc_info, #object +__arm7_proc_info: + .long 0x41007000 + .long 0xffffff00 + .long 0x00000c1e + b __arm7_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_26BIT + .long cpu_arm7_name + .long arm7_processor_functions + .long v3_tlb_fns + .long v3_user_fns + .long v3_cache_fns + .size __arm7_proc_info, . - __arm7_proc_info + + .type __arm710_proc_info, #object +__arm710_proc_info: + .long 0x41007100 + .long 0xfff8ff00 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm7_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_26BIT + .long cpu_arm710_name + .long arm7_processor_functions + .long v3_tlb_fns + .long v3_user_fns + .long v3_cache_fns + .size __arm710_proc_info, . - __arm710_proc_info diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S new file mode 100644 index 00000000000..57cfa6a2f54 --- /dev/null +++ b/arch/arm/mm/proc-arm720.S @@ -0,0 +1,267 @@ +/* + * linux/arch/arm/mm/proc-arm720.S: MMU functions for ARM720 + * + * Copyright (C) 2000 Steve Hill (sjhill@cotw.com) + * Rob Scott (rscott@mtrob.fdns.net) + * Copyright (C) 2000 ARM Limited, Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM720T. The ARM720T has a writethrough IDC + * cache, so we don't need to clean it. + * + * Changelog: + * 05-09-2000 SJH Created by moving 720 specific functions + * out of 'proc-arm6,7.S' per RMK discussion + * 07-25-2000 SJH Added idle function. + * 08-25-2000 DBS Updated for integration of ARM Ltd version. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/constants.h> +#include <asm/pgtable.h> +#include <asm/procinfo.h> +#include <asm/ptrace.h> +#include <asm/hardware.h> + +/* + * Function: arm720_proc_init (void) + * : arm720_proc_fin (void) + * + * Notes : This processor does not require these + */ +ENTRY(cpu_arm720_dcache_clean_area) +ENTRY(cpu_arm720_proc_init) + mov pc, lr + +ENTRY(cpu_arm720_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + mrc p15, 0, r0, c1, c0, 0 + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mcr p15, 0, r1, c7, c7, 0 @ invalidate cache + ldmfd sp!, {pc} + +/* + * Function: arm720_proc_do_idle(void) + * Params : r0 = unused + * Purpose : put the processer in proper idle mode + */ +ENTRY(cpu_arm720_do_idle) + mov pc, lr + +/* + * Function: arm720_switch_mm(unsigned long pgd_phys) + * Params : pgd_phys Physical address of page table + * Purpose : Perform a task switch, saving the old process' state and restoring + * the new. + */ +ENTRY(cpu_arm720_switch_mm) + mov r1, #0 + mcr p15, 0, r1, c7, c7, 0 @ invalidate cache + mcr p15, 0, r0, c2, c0, 0 @ update page table ptr + mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4) + mov pc, lr + +/* + * Function: arm720_set_pte(pte_t *ptep, pte_t pte) + * Params : r0 = Address to set + * : r1 = value to set + * Purpose : Set a PTE and flush it out of any WB cache + */ + .align 5 +ENTRY(cpu_arm720_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young + movne r2, #0 + + str r2, [r0] @ hardware version + mov pc, lr + +/* + * Function: arm720_reset + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ +ENTRY(cpu_arm720_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate cache + mcr p15, 0, ip, c8, c7, 0 @ flush TLB (v4) + mrc p15, 0, ip, c1, c0, 0 @ get ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x2100 @ ..v....s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + + __INIT + + .type __arm710_setup, #function +__arm710_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ invalidate caches + mcr p15, 0, r0, c8, c7, 0 @ flush TLB (v4) + mrc p15, 0, r0, c1, c0 @ get control register + ldr r5, arm710_cr1_clear + bic r0, r0, r5 + ldr r5, arm710_cr1_set + orr r0, r0, r5 + mov pc, lr @ __ret (head.S) + .size __arm710_setup, . - __arm710_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .... 0001 ..11 1101 + * + */ + .type arm710_cr1_clear, #object + .type arm710_cr1_set, #object +arm710_cr1_clear: + .word 0x0f3f +arm710_cr1_set: + .word 0x013d + + .type __arm720_setup, #function +__arm720_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ invalidate caches + mcr p15, 0, r0, c8, c7, 0 @ flush TLB (v4) + mrc p15, 0, r0, c1, c0 @ get control register + ldr r5, arm720_cr1_clear + bic r0, r0, r5 + ldr r5, arm720_cr1_set + orr r0, r0, r5 + mov pc, lr @ __ret (head.S) + .size __arm720_setup, . - __arm720_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..1. 1001 ..11 1101 + * + */ + .type arm720_cr1_clear, #object + .type arm720_cr1_set, #object +arm720_cr1_clear: + .word 0x2f3f +arm720_cr1_set: + .word 0x213d + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm720_processor_functions, #object +ENTRY(arm720_processor_functions) + .word v4t_late_abort + .word cpu_arm720_proc_init + .word cpu_arm720_proc_fin + .word cpu_arm720_reset + .word cpu_arm720_do_idle + .word cpu_arm720_dcache_clean_area + .word cpu_arm720_switch_mm + .word cpu_arm720_set_pte + .size arm720_processor_functions, . - arm720_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: .asciz "armv4t" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: .asciz "v4" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_arm710_name, #object +cpu_arm710_name: + .asciz "ARM710T" + .size cpu_arm710_name, . - cpu_arm710_name + + .type cpu_arm720_name, #object +cpu_arm720_name: + .asciz "ARM720T" + .size cpu_arm720_name, . - cpu_arm720_name + + .align + +/* + * See linux/include/asm-arm/procinfo.h for a definition of this structure. + */ + + .section ".proc.info", #alloc, #execinstr + + .type __arm710_proc_info, #object +__arm710_proc_info: + .long 0x41807100 @ cpu_val + .long 0xffffff00 @ cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm710_setup @ cpu_flush + .long cpu_arch_name @ arch_name + .long cpu_elf_name @ elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap + .long cpu_arm710_name @ name + .long arm720_processor_functions + .long v4_tlb_fns + .long v4wt_user_fns + .long v4_cache_fns + .size __arm710_proc_info, . - __arm710_proc_info + + .type __arm720_proc_info, #object +__arm720_proc_info: + .long 0x41807200 @ cpu_val + .long 0xffffff00 @ cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm720_setup @ cpu_flush + .long cpu_arch_name @ arch_name + .long cpu_elf_name @ elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap + .long cpu_arm720_name @ name + .long arm720_processor_functions + .long v4_tlb_fns + .long v4wt_user_fns + .long v4_cache_fns + .size __arm720_proc_info, . - __arm720_proc_info diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S new file mode 100644 index 00000000000..0f490a0fcb7 --- /dev/null +++ b/arch/arm/mm/proc-arm920.S @@ -0,0 +1,480 @@ +/* + * linux/arch/arm/mm/proc-arm920.S: MMU functions for ARM920 + * + * Copyright (C) 1999,2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm920. + * + * CONFIG_CPU_ARM920_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/config.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/pgtable.h> +#include <asm/procinfo.h> +#include <asm/hardware.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 8 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintainence instructions. + */ +#define CACHE_DLIMIT 65536 + + + .text +/* + * cpu_arm920_proc_init() + */ +ENTRY(cpu_arm920_proc_init) + mov pc, lr + +/* + * cpu_arm920_proc_fin() + */ +ENTRY(cpu_arm920_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bl arm920_flush_kern_cache_all +#else + bl v4wt_flush_kern_cache_all +#endif + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_arm920_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_arm920_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_arm920_do_idle() + */ + .align 5 +ENTRY(cpu_arm920_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + + +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm920_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm920_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for address space + */ +ENTRY(arm920_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm920_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm920_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - page aligned address + */ +ENTRY(arm920_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm920_dma_inv_range) + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm920_dma_clean_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm920_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +ENTRY(arm920_cache_fns) + .long arm920_flush_kern_cache_all + .long arm920_flush_user_cache_all + .long arm920_flush_user_cache_range + .long arm920_coherent_kern_range + .long arm920_coherent_user_range + .long arm920_flush_kern_dcache_page + .long arm920_dma_inv_range + .long arm920_dma_clean_range + .long arm920_dma_flush_range + +#endif + + +ENTRY(cpu_arm920_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm920_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm920_switch_mm) + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +@ && 'Clean & Invalidate whole DCache' +@ && Re-written to use Index Ops. +@ && Uses registers r1, r3 and ip + + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_arm920_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm920_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + eor r3, r2, #0x0a @ C & small page? + tst r3, #0x0b + biceq r2, r2, #4 +#endif + str r2, [r0] @ hardware version + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + __INIT + + .type __arm920_setup, #function +__arm920_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, arm920_cr1_clear + bic r0, r0, r5 + ldr r5, arm920_cr1_set + orr r0, r0, r5 + mov pc, lr + .size __arm920_setup, . - __arm920_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 0001 ..11 0101 + * + */ + .type arm920_cr1_clear, #object + .type arm920_cr1_set, #object +arm920_cr1_clear: + .word 0x3f3f +arm920_cr1_set: + .word 0x3135 + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm920_processor_functions, #object +arm920_processor_functions: + .word v4t_early_abort + .word cpu_arm920_proc_init + .word cpu_arm920_proc_fin + .word cpu_arm920_reset + .word cpu_arm920_do_idle + .word cpu_arm920_dcache_clean_area + .word cpu_arm920_switch_mm + .word cpu_arm920_set_pte + .size arm920_processor_functions, . - arm920_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv4t" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v4" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_arm920_name, #object +cpu_arm920_name: + .ascii "ARM920T" +#ifndef CONFIG_CPU_ICACHE_DISABLE + .ascii "i" +#endif +#ifndef CONFIG_CPU_DCACHE_DISABLE + .ascii "d" +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + .ascii "(wt)" +#else + .ascii "(wb)" +#endif +#endif + .ascii "\0" + .size cpu_arm920_name, . - cpu_arm920_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __arm920_proc_info,#object +__arm920_proc_info: + .long 0x41009200 + .long 0xff00fff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm920_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm920_name + .long arm920_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + .long arm920_cache_fns +#else + .long v4wt_cache_fns +#endif + .size __arm920_proc_info, . - __arm920_proc_info diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S new file mode 100644 index 00000000000..62bc34a139e --- /dev/null +++ b/arch/arm/mm/proc-arm922.S @@ -0,0 +1,484 @@ +/* + * linux/arch/arm/mm/proc-arm922.S: MMU functions for ARM922 + * + * Copyright (C) 1999,2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * Copyright (C) 2001 Altera Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm922. + * + * CONFIG_CPU_ARM922_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/config.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/pgtable.h> +#include <asm/procinfo.h> +#include <asm/hardware.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 4 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintainence instructions. (I think this should + * be 32768). + */ +#define CACHE_DLIMIT 8192 + + + .text +/* + * cpu_arm922_proc_init() + */ +ENTRY(cpu_arm922_proc_init) + mov pc, lr + +/* + * cpu_arm922_proc_fin() + */ +ENTRY(cpu_arm922_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bl arm922_flush_kern_cache_all +#else + bl v4wt_flush_kern_cache_all +#endif + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_arm922_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_arm922_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_arm922_do_idle() + */ + .align 5 +ENTRY(cpu_arm922_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + + +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(arm922_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm922_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm922_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm922_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm922_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - page aligned address + */ +ENTRY(arm922_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm922_dma_inv_range) + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm922_dma_clean_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm922_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +ENTRY(arm922_cache_fns) + .long arm922_flush_kern_cache_all + .long arm922_flush_user_cache_all + .long arm922_flush_user_cache_range + .long arm922_coherent_kern_range + .long arm922_coherent_user_range + .long arm922_flush_kern_dcache_page + .long arm922_dma_inv_range + .long arm922_dma_clean_range + .long arm922_dma_flush_range + +#endif + + +ENTRY(cpu_arm922_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm922_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm922_switch_mm) + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +@ && 'Clean & Invalidate whole DCache' +@ && Re-written to use Index Ops. +@ && Uses registers r1, r3 and ip + + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_arm922_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm922_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + eor r3, r2, #0x0a @ C & small page? + tst r3, #0x0b + biceq r2, r2, #4 +#endif + str r2, [r0] @ hardware version + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + __INIT + + .type __arm922_setup, #function +__arm922_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, arm922_cr1_clear + bic r0, r0, r5 + ldr r5, arm922_cr1_set + orr r0, r0, r5 + mov pc, lr + .size __arm922_setup, . - __arm922_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 0001 ..11 0101 + * + */ + .type arm922_cr1_clear, #object + .type arm922_cr1_set, #object +arm922_cr1_clear: + .word 0x3f3f +arm922_cr1_set: + .word 0x3135 + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm922_processor_functions, #object +arm922_processor_functions: + .word v4t_early_abort + .word cpu_arm922_proc_init + .word cpu_arm922_proc_fin + .word cpu_arm922_reset + .word cpu_arm922_do_idle + .word cpu_arm922_dcache_clean_area + .word cpu_arm922_switch_mm + .word cpu_arm922_set_pte + .size arm922_processor_functions, . - arm922_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv4t" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v4" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_arm922_name, #object +cpu_arm922_name: + .ascii "ARM922T" +#ifndef CONFIG_CPU_ICACHE_DISABLE + .ascii "i" +#endif +#ifndef CONFIG_CPU_DCACHE_DISABLE + .ascii "d" +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + .ascii "(wt)" +#else + .ascii "(wb)" +#endif +#endif + .ascii "\0" + .size cpu_arm922_name, . - cpu_arm922_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __arm922_proc_info,#object +__arm922_proc_info: + .long 0x41009220 + .long 0xff00fff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm922_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm922_name + .long arm922_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + .long arm922_cache_fns +#else + .long v4wt_cache_fns +#endif + .size __arm922_proc_info, . - __arm922_proc_info diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S new file mode 100644 index 00000000000..ee49aa2ca78 --- /dev/null +++ b/arch/arm/mm/proc-arm925.S @@ -0,0 +1,562 @@ +/* + * linux/arch/arm/mm/arm925.S: MMU functions for ARM925 + * + * Copyright (C) 1999,2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * Copyright (C) 2002 RidgeRun, Inc. + * Copyright (C) 2002-2003 MontaVista Software, Inc. + * + * Update for Linux-2.6 and cache flush improvements + * Copyright (C) 2004 Nokia Corporation by Tony Lindgren <tony@atomide.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm925. + * + * CONFIG_CPU_ARM925_CPU_IDLE -> nohlt + * + * Some additional notes based on deciphering the TI TRM on OMAP-5910: + * + * NOTE1: The TI925T Configuration Register bit "D-cache clean and flush + * entry mode" must be 0 to flush the entries in both segments + * at once. This is the default value. See TRM 2-20 and 2-24 for + * more information. + * + * NOTE2: Default is the "D-cache clean and flush entry mode". It looks + * like the "Transparent mode" must be on for partial cache flushes + * to work in this mode. This mode only works with 16-bit external + * memory. See TRM 2-24 for more information. + * + * NOTE3: Write-back cache flushing seems to be flakey with devices using + * direct memory access, such as USB OHCI. The workaround is to use + * write-through cache with CONFIG_CPU_DCACHE_WRITETHROUGH (this is + * the default for OMAP-1510). + */ + +#include <linux/linkage.h> +#include <linux/config.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/pgtable.h> +#include <asm/procinfo.h> +#include <asm/hardware.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 16 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 2 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 256 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintainence instructions. + */ +#define CACHE_DLIMIT 8192 + + .text +/* + * cpu_arm925_proc_init() + */ +ENTRY(cpu_arm925_proc_init) + mov pc, lr + +/* + * cpu_arm925_proc_fin() + */ +ENTRY(cpu_arm925_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl arm925_flush_kern_cache_all + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_arm925_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_arm925_reset) + /* Send software reset to MPU and DSP */ + mov ip, #0xff000000 + orr ip, ip, #0x00fe0000 + orr ip, ip, #0x0000ce00 + mov r4, #1 + strh r4, [ip, #0x10] + + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_arm925_do_idle() + * + * Called with IRQs disabled + */ + .align 10 +ENTRY(cpu_arm925_do_idle) + mov r0, #0 + mrc p15, 0, r1, c1, c0, 0 @ Read control register + mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer + bic r2, r1, #1 << 12 + mcr p15, 0, r2, c1, c0, 0 @ Disable I cache + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable + mov pc, lr + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(arm925_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm925_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + /* Flush entries in both segments at once, see NOTE1 above */ + mov r3, #(CACHE_DENTRIES - 1) << 4 @ 256 entries in segment +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 4 + bcs 2b @ entries 255 to 0 +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm925_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#else + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#endif + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm925_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm925_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - page aligned address + */ +ENTRY(arm925_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm925_dma_inv_range) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +#endif + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm925_dma_clean_range) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm925_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry +#else + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +ENTRY(arm925_cache_fns) + .long arm925_flush_kern_cache_all + .long arm925_flush_user_cache_all + .long arm925_flush_user_cache_range + .long arm925_coherent_kern_range + .long arm925_coherent_user_range + .long arm925_flush_kern_dcache_page + .long arm925_dma_inv_range + .long arm925_dma_clean_range + .long arm925_dma_flush_range + +ENTRY(cpu_arm925_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm925_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm925_switch_mm) + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + /* Flush entries in bothe segments at once, see NOTE1 above */ + mov r3, #(CACHE_DENTRIES - 1) << 4 @ 256 entries in segment +2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index + subs r3, r3, #1 << 4 + bcs 2b @ entries 255 to 0 +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_arm925_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm925_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + eor r3, r2, #0x0a @ C & small page? + tst r3, #0x0b + biceq r2, r2, #4 +#endif + str r2, [r0] @ hardware version + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + __INIT + + .type __arm925_setup, #function +__arm925_setup: + mov r0, #0 +#if defined(CONFIG_CPU_ICACHE_STREAMING_DISABLE) + orr r0,r0,#1 << 7 +#endif + + /* Transparent on, D-cache clean & flush mode. See NOTE2 above */ + orr r0,r0,#1 << 1 @ transparent mode on + mcr p15, 0, r0, c15, c1, 0 @ write TI config register + + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #4 @ disable write-back on caches explicitly + mcr p15, 7, r0, c15, c0, 0 +#endif + + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, arm925_cr1_clear + bic r0, r0, r5 + ldr r5, arm925_cr1_set + orr r0, r0, r5 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .1.. .... .... .... +#endif + mov pc, lr + .size __arm925_setup, . - __arm925_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 0001 ..11 1101 + * + */ + .type arm925_cr1_clear, #object + .type arm925_cr1_set, #object +arm925_cr1_clear: + .word 0x7f3f +arm925_cr1_set: + .word 0x313d + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm925_processor_functions, #object +arm925_processor_functions: + .word v4t_early_abort + .word cpu_arm925_proc_init + .word cpu_arm925_proc_fin + .word cpu_arm925_reset + .word cpu_arm925_do_idle + .word cpu_arm925_dcache_clean_area + .word cpu_arm925_switch_mm + .word cpu_arm925_set_pte + .size arm925_processor_functions, . - arm925_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv4t" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v4" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_arm925_name, #object +cpu_arm925_name: + .ascii "ARM925T" +#ifndef CONFIG_CPU_ICACHE_DISABLE + .ascii "i" +#endif +#ifndef CONFIG_CPU_DCACHE_DISABLE + .ascii "d" +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + .ascii "(wt)" +#else + .ascii "(wb)" +#endif +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + .ascii "RR" +#endif +#endif + .ascii "\0" + .size cpu_arm925_name, . - cpu_arm925_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __arm925_proc_info,#object +__arm925_proc_info: + .long 0x54029250 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm925_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm925_name + .long arm925_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm925_cache_fns + .size __arm925_proc_info, . - __arm925_proc_info + + .type __arm915_proc_info,#object +__arm915_proc_info: + .long 0x54029150 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm925_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm925_name + .long arm925_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm925_cache_fns + .size __arm925_proc_info, . - __arm925_proc_info diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S new file mode 100644 index 00000000000..bb95cc9fed0 --- /dev/null +++ b/arch/arm/mm/proc-arm926.S @@ -0,0 +1,495 @@ +/* + * linux/arch/arm/mm/proc-arm926.S: MMU functions for ARM926EJ-S + * + * Copyright (C) 1999-2001 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm926. + * + * CONFIG_CPU_ARM926_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/config.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/pgtable.h> +#include <asm/procinfo.h> +#include <asm/hardware.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define CACHE_DLIMIT 16384 + +/* + * the cache line size of the I and D cache + */ +#define CACHE_DLINESIZE 32 + + .text +/* + * cpu_arm926_proc_init() + */ +ENTRY(cpu_arm926_proc_init) + mov pc, lr + +/* + * cpu_arm926_proc_fin() + */ +ENTRY(cpu_arm926_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl arm926_flush_kern_cache_all + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_arm926_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_arm926_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_arm926_do_idle() + * + * Called with IRQs disabled + */ + .align 10 +ENTRY(cpu_arm926_do_idle) + mov r0, #0 + mrc p15, 0, r1, c1, c0, 0 @ Read control register + mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer + bic r2, r1, #1 << 12 + mcr p15, 0, r2, c1, c0, 0 @ Disable I cache + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable + mov pc, lr + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(arm926_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm926_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate + bne 1b +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm926_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#else + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#endif + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm926_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm926_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - page aligned address + */ +ENTRY(arm926_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm926_dma_inv_range) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +#endif + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(arm926_dma_clean_range) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm926_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry +#else + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +ENTRY(arm926_cache_fns) + .long arm926_flush_kern_cache_all + .long arm926_flush_user_cache_all + .long arm926_flush_user_cache_range + .long arm926_coherent_kern_range + .long arm926_coherent_user_range + .long arm926_flush_kern_dcache_page + .long arm926_dma_inv_range + .long arm926_dma_clean_range + .long arm926_dma_flush_range + +ENTRY(cpu_arm926_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm926_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm926_switch_mm) + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +@ && 'Clean & Invalidate whole DCache' +1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate + bne 1b +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_arm926_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm926_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + eor r3, r2, #0x0a @ C & small page? + tst r3, #0x0b + biceq r2, r2, #4 +#endif + str r2, [r0] @ hardware version + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + __INIT + + .type __arm926_setup, #function +__arm926_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #4 @ disable write-back on caches explicitly + mcr p15, 7, r0, c15, c0, 0 +#endif + + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, arm926_cr1_clear + bic r0, r0, r5 + ldr r5, arm926_cr1_set + orr r0, r0, r5 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .1.. .... .... .... +#endif + mov pc, lr + .size __arm926_setup, . - __arm926_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 0001 ..11 0101 + * + */ + .type arm926_cr1_clear, #object + .type arm926_cr1_set, #object +arm926_cr1_clear: + .word 0x7f3f +arm926_cr1_set: + .word 0x3135 + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type arm926_processor_functions, #object +arm926_processor_functions: + .word v5tj_early_abort + .word cpu_arm926_proc_init + .word cpu_arm926_proc_fin + .word cpu_arm926_reset + .word cpu_arm926_do_idle + .word cpu_arm926_dcache_clean_area + .word cpu_arm926_switch_mm + .word cpu_arm926_set_pte + .size arm926_processor_functions, . - arm926_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv5tej" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v5" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_arm926_name, #object +cpu_arm926_name: + .ascii "ARM926EJ-S" +#ifndef CONFIG_CPU_ICACHE_DISABLE + .ascii "i" +#endif +#ifndef CONFIG_CPU_DCACHE_DISABLE + .ascii "d" +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + .ascii "(wt)" +#else + .ascii "(wb)" +#endif +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + .ascii "RR" +#endif +#endif + .ascii "\0" + .size cpu_arm926_name, . - cpu_arm926_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __arm926_proc_info,#object +__arm926_proc_info: + .long 0x41069260 @ ARM926EJ-S (v5TEJ) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm926_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA + .long cpu_arm926_name + .long arm926_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm926_cache_fns + .size __arm926_proc_info, . - __arm926_proc_info diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S new file mode 100644 index 00000000000..9137fe56359 --- /dev/null +++ b/arch/arm/mm/proc-macros.S @@ -0,0 +1,51 @@ +/* + * We need constants.h for: + * VMA_VM_MM + * VMA_VM_FLAGS + * VM_EXEC + */ +#include <asm/constants.h> +#include <asm/thread_info.h> + +/* + * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) + */ + .macro vma_vm_mm, rd, rn + ldr \rd, [\rn, #VMA_VM_MM] + .endm + +/* + * vma_vm_flags - get vma->vm_flags + */ + .macro vma_vm_flags, rd, rn + ldr \rd, [\rn, #VMA_VM_FLAGS] + .endm + + .macro tsk_mm, rd, rn + ldr \rd, [\rn, #TI_TASK] + ldr \rd, [\rd, #TSK_ACTIVE_MM] + .endm + +/* + * act_mm - get current->active_mm + */ + .macro act_mm, rd + bic \rd, sp, #8128 + bic \rd, \rd, #63 + ldr \rd, [\rd, #TI_TASK] + ldr \rd, [\rd, #TSK_ACTIVE_MM] + .endm + +/* + * mmid - get context id from mm pointer (mm->context.id) + */ + .macro mmid, rd, rn + ldr \rd, [\rn, #MM_CONTEXT_ID] + .endm + +/* + * mask_asid - mask the ASID from the context ID + */ + .macro asid, rd, rn + and \rd, \rn, #255 + .endm diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S new file mode 100644 index 00000000000..360cae90569 --- /dev/null +++ b/arch/arm/mm/proc-sa110.S @@ -0,0 +1,272 @@ +/* + * linux/arch/arm/mm/proc-sa110.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMU functions for SA110 + * + * These are the low level assembler for performing cache and TLB + * functions on the StrongARM-110. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/constants.h> +#include <asm/procinfo.h> +#include <asm/hardware.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +/* + * the cache line size of the I and D cache + */ +#define DCACHELINESIZE 32 +#define FLUSH_OFFSET 32768 + + .macro flush_110_dcache rd, ra, re + ldr \rd, =flush_base + ldr \ra, [\rd] + eor \ra, \ra, #FLUSH_OFFSET + str \ra, [\rd] + add \re, \ra, #16384 @ only necessary for 16k +1001: ldr \rd, [\ra], #DCACHELINESIZE + teq \re, \ra + bne 1001b + .endm + + .data +flush_base: + .long FLUSH_BASE + .text + +/* + * cpu_sa110_proc_init() + */ +ENTRY(cpu_sa110_proc_init) + mov r0, #0 + mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching + mov pc, lr + +/* + * cpu_sa110_proc_fin() + */ +ENTRY(cpu_sa110_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl v4wb_flush_kern_cache_all @ clean caches +1: mov r0, #0 + mcr p15, 0, r0, c15, c2, 2 @ Disable clock switching + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_sa110_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_sa110_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_sa110_do_idle(type) + * + * Cause the processor to idle + * + * type: call type: + * 0 = slow idle + * 1 = fast idle + * 2 = switch to slow processor clock + * 3 = switch to fast processor clock + */ + .align 5 + +ENTRY(cpu_sa110_do_idle) + mcr p15, 0, ip, c15, c2, 2 @ disable clock switching + ldr r1, =UNCACHEABLE_ADDR @ load from uncacheable loc + ldr r1, [r1, #0] @ force switch to MCLK + mov r0, r0 @ safety + mov r0, r0 @ safety + mov r0, r0 @ safety + mcr p15, 0, r0, c15, c8, 2 @ Wait for interrupt, cache aligned + mov r0, r0 @ safety + mov r0, r0 @ safety + mov r0, r0 @ safety + mcr p15, 0, r0, c15, c1, 2 @ enable clock switching + mov pc, lr + +/* ================================= CACHE ================================ */ + +/* + * cpu_sa110_dcache_clean_area(addr,sz) + * + * Clean the specified entry of any caches such that the MMU + * translation fetches will obtain correct data. + * + * addr: cache-unaligned virtual address + */ + .align 5 +ENTRY(cpu_sa110_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #DCACHELINESIZE + subs r1, r1, #DCACHELINESIZE + bhi 1b + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_sa110_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_sa110_switch_mm) + flush_110_dcache r3, ip, r1 + mov r1, #0 + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_sa110_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_sa110_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 + + str r2, [r0] @ hardware version + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + __INIT + + .type __sa110_setup, #function +__sa110_setup: + mov r10, #0 + mcr p15, 0, r10, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r10, c7, c10, 4 @ drain write buffer on v4 + mcr p15, 0, r10, c8, c7 @ invalidate I,D TLBs on v4 + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, sa110_cr1_clear + bic r0, r0, r5 + ldr r5, sa110_cr1_set + orr r0, r0, r5 + mov pc, lr + .size __sa110_setup, . - __sa110_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..01 0001 ..11 1101 + * + */ + .type sa110_cr1_clear, #object + .type sa110_cr1_set, #object +sa110_cr1_clear: + .word 0x3f3f +sa110_cr1_set: + .word 0x113d + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + + .type sa110_processor_functions, #object +ENTRY(sa110_processor_functions) + .word v4_early_abort + .word cpu_sa110_proc_init + .word cpu_sa110_proc_fin + .word cpu_sa110_reset + .word cpu_sa110_do_idle + .word cpu_sa110_dcache_clean_area + .word cpu_sa110_switch_mm + .word cpu_sa110_set_pte + .size sa110_processor_functions, . - sa110_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv4" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v4" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_sa110_name, #object +cpu_sa110_name: + .asciz "StrongARM-110" + .size cpu_sa110_name, . - cpu_sa110_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __sa110_proc_info,#object +__sa110_proc_info: + .long 0x4401a100 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __sa110_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT + .long cpu_sa110_name + .long sa110_processor_functions + .long v4wb_tlb_fns + .long v4wb_user_fns + .long v4wb_cache_fns + .size __sa110_proc_info, . - __sa110_proc_info diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S new file mode 100644 index 00000000000..d447cd5f3dd --- /dev/null +++ b/arch/arm/mm/proc-sa1100.S @@ -0,0 +1,323 @@ +/* + * linux/arch/arm/mm/proc-sa1100.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMU functions for SA110 + * + * These are the low level assembler for performing cache and TLB + * functions on the StrongARM-1100 and StrongARM-1110. + * + * Note that SA1100 and SA1110 share everything but their name and CPU ID. + * + * 12-jun-2000, Erik Mouw (J.A.K.Mouw@its.tudelft.nl): + * Flush the read buffer at context switches + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/constants.h> +#include <asm/procinfo.h> +#include <asm/hardware.h> +#include <asm/pgtable.h> + +/* + * the cache line size of the I and D cache + */ +#define DCACHELINESIZE 32 +#define FLUSH_OFFSET 32768 + + .macro flush_1100_dcache rd, ra, re + ldr \rd, =flush_base + ldr \ra, [\rd] + eor \ra, \ra, #FLUSH_OFFSET + str \ra, [\rd] + add \re, \ra, #8192 @ only necessary for 8k +1001: ldr \rd, [\ra], #DCACHELINESIZE + teq \re, \ra + bne 1001b +#ifdef FLUSH_BASE_MINICACHE + add \ra, \ra, #FLUSH_BASE_MINICACHE - FLUSH_BASE + add \re, \ra, #512 @ only 512 bytes +1002: ldr \rd, [\ra], #DCACHELINESIZE + teq \re, \ra + bne 1002b +#endif + .endm + + .data +flush_base: + .long FLUSH_BASE + .text + + __INIT + +/* + * cpu_sa1100_proc_init() + */ +ENTRY(cpu_sa1100_proc_init) + mov r0, #0 + mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching + mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland + mov pc, lr + + .previous + +/* + * cpu_sa1100_proc_fin() + * + * Prepare the CPU for reset: + * - Disable interrupts + * - Clean and turn off caches. + */ +ENTRY(cpu_sa1100_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + flush_1100_dcache r0, r1, r2 @ clean caches + mov r0, #0 + mcr p15, 0, r0, c15, c2, 2 @ Disable clock switching + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_sa1100_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_sa1100_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_sa1100_do_idle(type) + * + * Cause the processor to idle + * + * type: call type: + * 0 = slow idle + * 1 = fast idle + * 2 = switch to slow processor clock + * 3 = switch to fast processor clock + */ + .align 5 +ENTRY(cpu_sa1100_do_idle) + mov r0, r0 @ 4 nop padding + mov r0, r0 + mov r0, r0 + mov r0, r0 @ 4 nop padding + mov r0, r0 + mov r0, r0 + mov r0, #0 + ldr r1, =UNCACHEABLE_ADDR @ ptr to uncacheable address + @ --- aligned to a cache line + mcr p15, 0, r0, c15, c2, 2 @ disable clock switching + ldr r1, [r1, #0] @ force switch to MCLK + mcr p15, 0, r0, c15, c8, 2 @ wait for interrupt + mov r0, r0 @ safety + mcr p15, 0, r0, c15, c1, 2 @ enable clock switching + mov pc, lr + +/* ================================= CACHE ================================ */ + +/* + * cpu_sa1100_dcache_clean_area(addr,sz) + * + * Clean the specified entry of any caches such that the MMU + * translation fetches will obtain correct data. + * + * addr: cache-unaligned virtual address + */ + .align 5 +ENTRY(cpu_sa1100_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #DCACHELINESIZE + subs r1, r1, #DCACHELINESIZE + bhi 1b + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_sa1100_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_sa1100_switch_mm) + flush_1100_dcache r3, ip, r1 + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c9, c0, 0 @ invalidate RB + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_sa1100_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_sa1100_set_pte) + str r1, [r0], #-2048 @ linux version + + eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r1, #L_PTE_USER @ User? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 + + str r2, [r0] @ hardware version + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + __INIT + + .type __sa1100_setup, #function +__sa1100_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, sa1100_cr1_clear + bic r0, r0, r5 + ldr r5, sa1100_cr1_set + orr r0, r0, r5 + mov pc, lr + .size __sa1100_setup, . - __sa1100_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 0001 ..11 1101 + * + */ + .type sa1100_cr1_clear, #object + .type sa1100_cr1_set, #object +sa1100_cr1_clear: + .word 0x3f3f +sa1100_cr1_set: + .word 0x313d + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + +/* + * SA1100 and SA1110 share the same function calls + */ + .type sa1100_processor_functions, #object +ENTRY(sa1100_processor_functions) + .word v4_early_abort + .word cpu_sa1100_proc_init + .word cpu_sa1100_proc_fin + .word cpu_sa1100_reset + .word cpu_sa1100_do_idle + .word cpu_sa1100_dcache_clean_area + .word cpu_sa1100_switch_mm + .word cpu_sa1100_set_pte + .size sa1100_processor_functions, . - sa1100_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv4" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v4" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_sa1100_name, #object +cpu_sa1100_name: + .asciz "StrongARM-1100" + .size cpu_sa1100_name, . - cpu_sa1100_name + + .type cpu_sa1110_name, #object +cpu_sa1110_name: + .asciz "StrongARM-1110" + .size cpu_sa1110_name, . - cpu_sa1110_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __sa1100_proc_info,#object +__sa1100_proc_info: + .long 0x4401a110 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __sa1100_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT + .long cpu_sa1100_name + .long sa1100_processor_functions + .long v4wb_tlb_fns + .long v4_mc_user_fns + .long v4wb_cache_fns + .size __sa1100_proc_info, . - __sa1100_proc_info + + .type __sa1110_proc_info,#object +__sa1110_proc_info: + .long 0x6901b110 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __sa1100_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT + .long cpu_sa1110_name + .long sa1100_processor_functions + .long v4wb_tlb_fns + .long v4_mc_user_fns + .long v4wb_cache_fns + .size __sa1110_proc_info, . - __sa1110_proc_info diff --git a/arch/arm/mm/proc-syms.c b/arch/arm/mm/proc-syms.c new file mode 100644 index 00000000000..6c5f0fe578a --- /dev/null +++ b/arch/arm/mm/proc-syms.c @@ -0,0 +1,40 @@ +/* + * linux/arch/arm/mm/proc-syms.c + * + * Copyright (C) 2000-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/mm.h> + +#include <asm/cacheflush.h> +#include <asm/proc-fns.h> +#include <asm/tlbflush.h> + +#ifndef MULTI_CPU +EXPORT_SYMBOL(cpu_dcache_clean_area); +EXPORT_SYMBOL(cpu_set_pte); +#else +EXPORT_SYMBOL(processor); +#endif + +#ifndef MULTI_CACHE +EXPORT_SYMBOL(__cpuc_flush_kern_all); +EXPORT_SYMBOL(__cpuc_flush_user_all); +EXPORT_SYMBOL(__cpuc_flush_user_range); +EXPORT_SYMBOL(__cpuc_coherent_kern_range); +#else +EXPORT_SYMBOL(cpu_cache); +#endif + +/* + * No module should need to touch the TLB (and currently + * no modules do. We export this for "loadkernel" support + * (booting a new kernel from within a running kernel.) + */ +#ifdef MULTI_TLB +EXPORT_SYMBOL(cpu_tlb); +#endif diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S new file mode 100644 index 00000000000..0aa73d41478 --- /dev/null +++ b/arch/arm/mm/proc-v6.S @@ -0,0 +1,272 @@ +/* + * linux/arch/arm/mm/proc-v6.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv6 processor support. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/constants.h> +#include <asm/procinfo.h> +#include <asm/pgtable.h> + +#include "proc-macros.S" + +#define D_CACHE_LINE_SIZE 32 + + .macro cpsie, flags + .ifc \flags, f + .long 0xf1080040 + .exitm + .endif + .ifc \flags, i + .long 0xf1080080 + .exitm + .endif + .ifc \flags, if + .long 0xf10800c0 + .exitm + .endif + .err + .endm + + .macro cpsid, flags + .ifc \flags, f + .long 0xf10c0040 + .exitm + .endif + .ifc \flags, i + .long 0xf10c0080 + .exitm + .endif + .ifc \flags, if + .long 0xf10c00c0 + .exitm + .endif + .err + .endm + +ENTRY(cpu_v6_proc_init) + mov pc, lr + +ENTRY(cpu_v6_proc_fin) + mov pc, lr + +/* + * cpu_v6_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * - loc - location to jump to for soft reset + * + * It is assumed that: + */ + .align 5 +ENTRY(cpu_v6_reset) + mov pc, r0 + +/* + * cpu_v6_do_idle() + * + * Idle the processor (eg, wait for interrupt). + * + * IRQs are already disabled. + */ +ENTRY(cpu_v6_do_idle) + mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt + mov pc, lr + +ENTRY(cpu_v6_dcache_clean_area) +#ifndef TLB_CAN_READ_FROM_L1_CACHE +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #D_CACHE_LINE_SIZE + subs r1, r1, #D_CACHE_LINE_SIZE + bhi 1b +#endif + mov pc, lr + +/* + * cpu_arm926_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys + * + * - pgd_phys - physical address of new TTB + * + * It is assumed that: + * - we are not using split page tables + */ +ENTRY(cpu_v6_switch_mm) + mov r2, #0 + ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id + mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB + mcr p15, 0, r2, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 + mcr p15, 0, r1, c13, c0, 1 @ set context ID + mov pc, lr + +#define nG (1 << 11) +#define APX (1 << 9) +#define AP1 (1 << 5) +#define AP0 (1 << 4) +#define XN (1 << 0) + +/* + * cpu_v6_set_pte(ptep, pte) + * + * Set a level 2 translation table entry. + * + * - ptep - pointer to level 2 translation table entry + * (hardware version is stored at -1024 bytes) + * - pte - PTE value to store + * + * Permissions: + * YUWD APX AP1 AP0 SVC User + * 0xxx 0 0 0 no acc no acc + * 100x 1 0 1 r/o no acc + * 10x0 1 0 1 r/o no acc + * 1011 0 0 1 r/w no acc + * 110x 1 1 0 r/o r/o + * 11x0 1 1 0 r/o r/o + * 1111 0 1 1 r/w r/w + */ +ENTRY(cpu_v6_set_pte) + str r1, [r0], #-2048 @ linux version + + bic r2, r1, #0x00000ff0 + bic r2, r2, #0x00000003 + orr r2, r2, #AP0 | 2 + + tst r1, #L_PTE_WRITE + tstne r1, #L_PTE_DIRTY + orreq r2, r2, #APX + + tst r1, #L_PTE_USER + orrne r2, r2, #AP1 | nG + tstne r2, #APX + eorne r2, r2, #AP0 + + tst r1, #L_PTE_YOUNG + biceq r2, r2, #APX | AP1 | AP0 + +@ tst r1, #L_PTE_EXEC +@ orreq r2, r2, #XN + + tst r1, #L_PTE_PRESENT + moveq r2, #0 + + str r2, [r0] + mcr p15, 0, r0, c7, c10, 1 @ flush_pte + mov pc, lr + + + + +cpu_v6_name: + .asciz "Some Random V6 Processor" + .align + + .section ".text.init", #alloc, #execinstr + +/* + * __v6_setup + * + * Initialise TLB, Caches, and MMU state ready to switch the MMU + * on. Return in r0 the new CP15 C1 control register setting. + * + * We automatically detect if we have a Harvard cache, and use the + * Harvard cache control instructions insead of the unified cache + * control instructions. + * + * This should be able to cover all ARMv6 cores. + * + * It is assumed that: + * - cache type register is implemented + */ +__v6_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c14, 0 @ clean+invalidate D cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c15, 0 @ clean+invalidate cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs + mcr p15, 0, r0, c2, c0, 2 @ TTB control register + mcr p15, 0, r4, c2, c0, 1 @ load TTB1 +#ifdef CONFIG_VFP + mrc p15, 0, r0, c1, c0, 2 + orr r0, r0, #(3 << 20) + mcr p15, 0, r0, c1, c0, 2 @ Enable full access to VFP +#endif + mrc p15, 0, r0, c1, c0, 0 @ read control register + ldr r5, v6_cr1_clear @ get mask for bits to clear + bic r0, r0, r5 @ clear bits them + ldr r5, v6_cr1_set @ get mask for bits to set + orr r0, r0, r5 @ set them + mov pc, lr @ return to head.S:__ret + + /* + * V X F I D LR + * .... ...E PUI. .T.T 4RVI ZFRS BLDP WCAM + * rrrr rrrx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 0 110 0011 1.00 .111 1101 < we want + */ + .type v6_cr1_clear, #object + .type v6_cr1_set, #object +v6_cr1_clear: + .word 0x01e0fb7f +v6_cr1_set: + .word 0x00c0387d + + .type v6_processor_functions, #object +ENTRY(v6_processor_functions) + .word v6_early_abort + .word cpu_v6_proc_init + .word cpu_v6_proc_fin + .word cpu_v6_reset + .word cpu_v6_do_idle + .word cpu_v6_dcache_clean_area + .word cpu_v6_switch_mm + .word cpu_v6_set_pte + .size v6_processor_functions, . - v6_processor_functions + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv6" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v6" + .size cpu_elf_name, . - cpu_elf_name + .align + + .section ".proc.info", #alloc, #execinstr + + /* + * Match any ARMv6 processor core. + */ + .type __v6_proc_info, #object +__v6_proc_info: + .long 0x0007b000 + .long 0x0007f000 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __v6_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_VFP|HWCAP_EDSP|HWCAP_JAVA + .long cpu_v6_name + .long v6_processor_functions + .long v6wbi_tlb_fns + .long v6_user_fns + .long v6_cache_fns + .size __v6_proc_info, . - __v6_proc_info diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S new file mode 100644 index 00000000000..2d977b4eeea --- /dev/null +++ b/arch/arm/mm/proc-xscale.S @@ -0,0 +1,934 @@ +/* + * linux/arch/arm/mm/proc-xscale.S + * + * Author: Nicolas Pitre + * Created: November 2000 + * Copyright: (C) 2000, 2001 MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMU functions for the Intel XScale CPUs + * + * 2001 Aug 21: + * some contributions by Brett Gaines <brett.w.gaines@intel.com> + * Copyright 2001 by Intel Corp. + * + * 2001 Sep 08: + * Completely revisited, many important fixes + * Nicolas Pitre <nico@cam.org> + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/procinfo.h> +#include <asm/hardware.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be flushed. If the area + * is larger than this, then we flush the whole cache + */ +#define MAX_AREA_SIZE 32768 + +/* + * the cache line size of the I and D cache + */ +#define CACHELINESIZE 32 + +/* + * the size of the data cache + */ +#define CACHESIZE 32768 + +/* + * Virtual address used to allocate the cache when flushed + * + * This must be an address range which is _never_ used. It should + * apparently have a mapping in the corresponding page table for + * compatibility with future CPUs that _could_ require it. For instance we + * don't care. + * + * This must be aligned on a 2*CACHESIZE boundary. The code selects one of + * the 2 areas in alternance each time the clean_d_cache macro is used. + * Without this the XScale core exhibits cache eviction problems and no one + * knows why. + * + * Reminder: the vector table is located at 0xffff0000-0xffff0fff. + */ +#define CLEAN_ADDR 0xfffe0000 + +/* + * This macro is used to wait for a CP15 write and is needed + * when we have to ensure that the last operation to the co-pro + * was completed before continuing with operation. + */ + .macro cpwait, rd + mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 + mov \rd, \rd @ wait for completion + sub pc, pc, #4 @ flush instruction pipeline + .endm + + .macro cpwait_ret, lr, rd + mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 + sub pc, \lr, \rd, LSR #32 @ wait for completion and + @ flush instruction pipeline + .endm + +/* + * This macro cleans the entire dcache using line allocate. + * The main loop has been unrolled to reduce loop overhead. + * rd and rs are two scratch registers. + */ + .macro clean_d_cache, rd, rs + ldr \rs, =clean_addr + ldr \rd, [\rs] + eor \rd, \rd, #CACHESIZE + str \rd, [\rs] + add \rs, \rd, #CACHESIZE +1: mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + teq \rd, \rs + bne 1b + .endm + + .data +clean_addr: .word CLEAN_ADDR + + .text + +/* + * cpu_xscale_proc_init() + * + * Nothing too exciting at the moment + */ +ENTRY(cpu_xscale_proc_init) + mov pc, lr + +/* + * cpu_xscale_proc_fin() + */ +ENTRY(cpu_xscale_proc_fin) + str lr, [sp, #-4]! + mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE + msr cpsr_c, r0 + bl xscale_flush_kern_cache_all @ clean caches + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1800 @ ...IZ........... + bic r0, r0, #0x0006 @ .............CA. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldr pc, [sp], #4 + +/* + * cpu_xscale_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_xscale_reset) + mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE + msr cpsr_c, r1 @ reset CPSR + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x0086 @ ........B....CA. + bic r1, r1, #0x3900 @ ..VIZ..S........ + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches & BTB + bic r1, r1, #0x0001 @ ...............M + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + @ CAUTION: MMU turned off from this point. We count on the pipeline + @ already containing those two last instructions to survive. + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, r0 + +/* + * cpu_xscale_do_idle() + * + * Cause the processor to idle + * + * For now we do nothing but go to idle mode for every case + * + * XScale supports clock switching, but using idle mode support + * allows external hardware to react to system state changes. + */ + .align 5 + +ENTRY(cpu_xscale_do_idle) + mov r0, #1 + mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE + mov pc, lr + +/* ================================= CACHE ================================ */ + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(xscale_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(xscale_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + clean_d_cache r0, r1 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB + mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * flush_user_cache_range(start, end, vm_flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_area_struct describing address space + */ + .align 5 +ENTRY(xscale_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #MAX_AREA_SIZE + bhs __flush_whole_cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ Invalidate I cache line + mcr p15, 0, r0, c7, c10, 1 @ Clean D cache line + mcr p15, 0, r0, c7, c6, 1 @ Invalidate D cache line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * Note: single I-cache line invalidation isn't used here since + * it also trashes the mini I-cache used by JTAG debuggers. + */ +ENTRY(xscale_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * Note: single I-cache line invalidation isn't used here since + * it also trashes the mini I-cache used by JTAG debuggers. + */ +ENTRY(xscale_coherent_user_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - page aligned address + */ +ENTRY(xscale_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(xscale_dma_inv_range) + mrc p15, 0, r2, c0, c0, 0 @ read ID + eor r2, r2, #0x69000000 + eor r2, r2, #0x00052000 + bics r2, r2, #1 + beq xscale_dma_flush_range + + tst r0, #CACHELINESIZE - 1 + bic r0, r0, #CACHELINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHELINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(xscale_dma_clean_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(xscale_dma_flush_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +ENTRY(xscale_cache_fns) + .long xscale_flush_kern_cache_all + .long xscale_flush_user_cache_all + .long xscale_flush_user_cache_range + .long xscale_coherent_kern_range + .long xscale_coherent_user_range + .long xscale_flush_kern_dcache_page + .long xscale_dma_inv_range + .long xscale_dma_clean_range + .long xscale_dma_flush_range + +ENTRY(cpu_xscale_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHELINESIZE + subs r1, r1, #CACHELINESIZE + bhi 1b + mov pc, lr + +/* ================================ CACHE LOCKING============================ + * + * The XScale MicroArchitecture implements support for locking entries into + * the data and instruction cache. The following functions implement the core + * low level instructions needed to accomplish the locking. The developer's + * manual states that the code that performs the locking must be in non-cached + * memory. To accomplish this, the code in xscale-cache-lock.c copies the + * following functions from the cache into a non-cached memory region that + * is allocated through consistent_alloc(). + * + */ + .align 5 +/* + * xscale_icache_lock + * + * r0: starting address to lock + * r1: end address to lock + */ +ENTRY(xscale_icache_lock) + +iLockLoop: + bic r0, r0, #CACHELINESIZE - 1 + mcr p15, 0, r0, c9, c1, 0 @ lock into cache + cmp r0, r1 @ are we done? + add r0, r0, #CACHELINESIZE @ advance to next cache line + bls iLockLoop + mov pc, lr + +/* + * xscale_icache_unlock + */ +ENTRY(xscale_icache_unlock) + mcr p15, 0, r0, c9, c1, 1 @ Unlock icache + mov pc, lr + +/* + * xscale_dcache_lock + * + * r0: starting address to lock + * r1: end address to lock + */ +ENTRY(xscale_dcache_lock) + mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov r2, #1 + mcr p15, 0, r2, c9, c2, 0 @ Put dcache in lock mode + cpwait ip @ Wait for completion + + mrs r2, cpsr + orr r3, r2, #PSR_F_BIT | PSR_I_BIT +dLockLoop: + msr cpsr_c, r3 + mcr p15, 0, r0, c7, c10, 1 @ Write back line if it is dirty + mcr p15, 0, r0, c7, c6, 1 @ Flush/invalidate line + msr cpsr_c, r2 + ldr ip, [r0], #CACHELINESIZE @ Preload 32 bytes into cache from + @ location [r0]. Post-increment + @ r3 to next cache line + cmp r0, r1 @ Are we done? + bls dLockLoop + + mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov r2, #0 + mcr p15, 0, r2, c9, c2, 0 @ Get out of lock mode + cpwait_ret lr, ip + +/* + * xscale_dcache_unlock + */ +ENTRY(xscale_dcache_unlock) + mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mcr p15, 0, ip, c9, c2, 1 @ Unlock cache + mov pc, lr + +/* + * Needed to determine the length of the code that needs to be copied. + */ + .align 5 +ENTRY(xscale_cache_dummy) + mov pc, lr + +/* ================================ TLB LOCKING============================== + * + * The XScale MicroArchitecture implements support for locking entries into + * the Instruction and Data TLBs. The following functions provide the + * low level support for supporting these under Linux. xscale-lock.c + * implements some higher level management code. Most of the following + * is taken straight out of the Developer's Manual. + */ + +/* + * Lock I-TLB entry + * + * r0: Virtual address to translate and lock + */ + .align 5 +ENTRY(xscale_itlb_lock) + mrs r2, cpsr + orr r3, r2, #PSR_F_BIT | PSR_I_BIT + msr cpsr_c, r3 @ Disable interrupts + mcr p15, 0, r0, c8, c5, 1 @ Invalidate I-TLB entry + mcr p15, 0, r0, c10, c4, 0 @ Translate and lock + msr cpsr_c, r2 @ Restore interrupts + cpwait_ret lr, ip + +/* + * Lock D-TLB entry + * + * r0: Virtual address to translate and lock + */ + .align 5 +ENTRY(xscale_dtlb_lock) + mrs r2, cpsr + orr r3, r2, #PSR_F_BIT | PSR_I_BIT + msr cpsr_c, r3 @ Disable interrupts + mcr p15, 0, r0, c8, c6, 1 @ Invalidate D-TLB entry + mcr p15, 0, r0, c10, c8, 0 @ Translate and lock + msr cpsr_c, r2 @ Restore interrupts + cpwait_ret lr, ip + +/* + * Unlock all I-TLB entries + */ + .align 5 +ENTRY(xscale_itlb_unlock) + mcr p15, 0, ip, c10, c4, 1 @ Unlock I-TLB + mcr p15, 0, ip, c8, c5, 0 @ Invalidate I-TLB + cpwait_ret lr, ip + +/* + * Unlock all D-TLB entries + */ +ENTRY(xscale_dtlb_unlock) + mcr p15, 0, ip, c10, c8, 1 @ Unlock D-TBL + mcr p15, 0, ip, c8, c6, 0 @ Invalidate D-TLB + cpwait_ret lr, ip + +/* =============================== PageTable ============================== */ + +#define PTE_CACHE_WRITE_ALLOCATE 0 + +/* + * cpu_xscale_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_xscale_switch_mm) + clean_d_cache r1, r2 + mcr p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + cpwait_ret lr, ip + +/* + * cpu_xscale_set_pte(ptep, pte) + * + * Set a PTE and flush it out + * + * Errata 40: must set memory to write-through for user read-only pages. + */ + .align 5 +ENTRY(cpu_xscale_set_pte) + str r1, [r0], #-2048 @ linux version + + bic r2, r1, #0xff0 + orr r2, r2, #PTE_TYPE_EXT @ extended page + + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + + tst r3, #L_PTE_USER @ User? + orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w + + tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w + @ combined with user -> user r/w + + @ + @ Handle the X bit. We want to set this bit for the minicache + @ (U = E = B = W = 0, C = 1) or when write allocate is enabled, + @ and we have a writeable, cacheable region. If we ignore the + @ U and E bits, we can allow user space to use the minicache as + @ well. + @ + @ X = (C & ~W & ~B) | (C & W & B & write_allocate) + @ + eor ip, r1, #L_PTE_CACHEABLE + tst ip, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE +#if PTE_CACHE_WRITE_ALLOCATE + eorne ip, r1, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE + tstne ip, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE +#endif + orreq r2, r2, #PTE_EXT_TEX(1) + + @ + @ Erratum 40: The B bit must be cleared for a user read-only + @ cacheable page. + @ + @ B = B & ~(U & C & ~W) + @ + and ip, r1, #L_PTE_USER | L_PTE_WRITE | L_PTE_CACHEABLE + teq ip, #L_PTE_USER | L_PTE_CACHEABLE + biceq r2, r2, #PTE_BUFFERABLE + + tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + movne r2, #0 @ no -> fault + + str r2, [r0] @ hardware version + mov ip, #0 + mcr p15, 0, r0, c7, c10, 1 @ Clean D cache line + mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + + + .ltorg + + .align + + __INIT + + .type __xscale_setup, #function +__xscale_setup: + mcr p15, 0, ip, c7, c7, 0 @ invalidate I, D caches & BTB + mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I, D TLBs +#ifdef CONFIG_IWMMXT + mov r0, #0 @ initially disallow access to CP0/CP1 +#else + mov r0, #1 @ Allow access to CP0 +#endif + orr r0, r0, #1 << 6 @ cp6 for IOP3xx and Bulverde + orr r0, r0, #1 << 13 @ Its undefined whether this + mcr p15, 0, r0, c15, c1, 0 @ affects USR or SVC modes + mrc p15, 0, r0, c1, c0, 0 @ get control register + ldr r5, xscale_cr1_clear + bic r0, r0, r5 + ldr r5, xscale_cr1_set + orr r0, r0, r5 + mov pc, lr + .size __xscale_setup, . - __xscale_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 1.01 .... .101 + * + */ + .type xscale_cr1_clear, #object + .type xscale_cr1_set, #object +xscale_cr1_clear: + .word 0x3b07 +xscale_cr1_set: + .word 0x3905 + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + + .type xscale_processor_functions, #object +ENTRY(xscale_processor_functions) + .word v5t_early_abort + .word cpu_xscale_proc_init + .word cpu_xscale_proc_fin + .word cpu_xscale_reset + .word cpu_xscale_do_idle + .word cpu_xscale_dcache_clean_area + .word cpu_xscale_switch_mm + .word cpu_xscale_set_pte + .size xscale_processor_functions, . - xscale_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv5te" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v5" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_80200_name, #object +cpu_80200_name: + .asciz "XScale-80200" + .size cpu_80200_name, . - cpu_80200_name + + .type cpu_8032x_name, #object +cpu_8032x_name: + .asciz "XScale-IOP8032x Family" + .size cpu_8032x_name, . - cpu_8032x_name + + .type cpu_8033x_name, #object +cpu_8033x_name: + .asciz "XScale-IOP8033x Family" + .size cpu_8033x_name, . - cpu_8033x_name + + .type cpu_pxa250_name, #object +cpu_pxa250_name: + .asciz "XScale-PXA250" + .size cpu_pxa250_name, . - cpu_pxa250_name + + .type cpu_pxa210_name, #object +cpu_pxa210_name: + .asciz "XScale-PXA210" + .size cpu_pxa210_name, . - cpu_pxa210_name + + .type cpu_ixp42x_name, #object +cpu_ixp42x_name: + .asciz "XScale-IXP42x Family" + .size cpu_ixp42x_name, . - cpu_ixp42x_name + + .type cpu_ixp46x_name, #object +cpu_ixp46x_name: + .asciz "XScale-IXP46x Family" + .size cpu_ixp46x_name, . - cpu_ixp46x_name + + .type cpu_ixp2400_name, #object +cpu_ixp2400_name: + .asciz "XScale-IXP2400" + .size cpu_ixp2400_name, . - cpu_ixp2400_name + + .type cpu_ixp2800_name, #object +cpu_ixp2800_name: + .asciz "XScale-IXP2800" + .size cpu_ixp2800_name, . - cpu_ixp2800_name + + .type cpu_pxa255_name, #object +cpu_pxa255_name: + .asciz "XScale-PXA255" + .size cpu_pxa255_name, . - cpu_pxa255_name + + .type cpu_pxa270_name, #object +cpu_pxa270_name: + .asciz "XScale-PXA270" + .size cpu_pxa270_name, . - cpu_pxa270_name + + .align + + .section ".proc.info", #alloc, #execinstr + + .type __80200_proc_info,#object +__80200_proc_info: + .long 0x69052000 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_80200_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __80200_proc_info, . - __80200_proc_info + + .type __8032x_proc_info,#object +__8032x_proc_info: + .long 0x69052420 + .long 0xfffff5e0 @ mask should accomodate IOP80219 also + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_8032x_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __8032x_proc_info, . - __8032x_proc_info + + .type __8033x_proc_info,#object +__8033x_proc_info: + .long 0x69054010 + .long 0xffffff30 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_8033x_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __8033x_proc_info, . - __8033x_proc_info + + .type __pxa250_proc_info,#object +__pxa250_proc_info: + .long 0x69052100 + .long 0xfffff7f0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_pxa250_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __pxa250_proc_info, . - __pxa250_proc_info + + .type __pxa210_proc_info,#object +__pxa210_proc_info: + .long 0x69052120 + .long 0xfffff3f0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_pxa210_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __pxa210_proc_info, . - __pxa210_proc_info + + .type __ixp2400_proc_info, #object +__ixp2400_proc_info: + .long 0x69054190 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_ixp2400_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __ixp2400_proc_info, . - __ixp2400_proc_info + + .type __ixp2800_proc_info, #object +__ixp2800_proc_info: + .long 0x690541a0 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_ixp2800_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __ixp2800_proc_info, . - __ixp2800_proc_info + + .type __ixp42x_proc_info, #object +__ixp42x_proc_info: + .long 0x690541c0 + .long 0xffffffc0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_ixp42x_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __ixp42x_proc_info, . - __ixp42x_proc_info + + .type __ixp46x_proc_info, #object +__ixp46x_proc_info: + .long 0x69054200 + .long 0xffffff00 + .long 0x00000c0e + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_ixp46x_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __ixp46x_proc_info, . - __ixp46x_proc_info + + .type __pxa255_proc_info,#object +__pxa255_proc_info: + .long 0x69052d00 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_pxa255_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __pxa255_proc_info, . - __pxa255_proc_info + + .type __pxa270_proc_info,#object +__pxa270_proc_info: + .long 0x69054110 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_pxa270_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .long xscale_cache_fns + .size __pxa270_proc_info, . - __pxa270_proc_info + diff --git a/arch/arm/mm/tlb-v3.S b/arch/arm/mm/tlb-v3.S new file mode 100644 index 00000000000..44b0daeaff9 --- /dev/null +++ b/arch/arm/mm/tlb-v3.S @@ -0,0 +1,52 @@ +/* + * linux/arch/arm/mm/tlbv3.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 3 TLB handling functions. + * + * Processors: ARM610, ARM710. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/constants.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + .align 5 +/* + * v3_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v3_flush_user_tlb_range) + vma_vm_mm r2, r2 + act_mm r3 @ get current->active_mm + teq r2, r3 @ == mm ? + movne pc, lr @ no, we dont do anything +ENTRY(v3_flush_kern_tlb_range) + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c6, c0, 0 @ invalidate TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + + __INITDATA + + .type v3_tlb_fns, #object +ENTRY(v3_tlb_fns) + .long v3_flush_user_tlb_range + .long v3_flush_kern_tlb_range + .long v3_tlb_flags + .size v3_tlb_fns, . - v3_tlb_fns diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S new file mode 100644 index 00000000000..db82ee46824 --- /dev/null +++ b/arch/arm/mm/tlb-v4.S @@ -0,0 +1,65 @@ +/* + * linux/arch/arm/mm/tlbv4.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4 TLB handling functions. + * These assume a split I/D TLBs, and no write buffer. + * + * Processors: ARM720T + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/constants.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + .align 5 +/* + * v4_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified user address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v4_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything +.v4_flush_kern_tlb_range: + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + +/* + * v4_flush_kern_tlb_range(start, end) + * + * Invalidate a range of TLB entries in the specified kernel + * address range. + * + * - start - virtual address (may not be aligned) + * - end - virtual address (may not be aligned) + */ +.globl v4_flush_kern_tlb_range +.equ v4_flush_kern_tlb_range, .v4_flush_kern_tlb_range + + __INITDATA + + .type v4_tlb_fns, #object +ENTRY(v4_tlb_fns) + .long v4_flush_user_tlb_range + .long v4_flush_kern_tlb_range + .long v4_tlb_flags + .size v4_tlb_fns, . - v4_tlb_fns diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S new file mode 100644 index 00000000000..7908d5f1f13 --- /dev/null +++ b/arch/arm/mm/tlb-v4wb.S @@ -0,0 +1,77 @@ +/* + * linux/arch/arm/mm/tlbv4wb.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4 TLB handling functions. + * These assume a split I/D TLBs w/o I TLB entry, with a write buffer. + * + * Processors: SA110 SA1100 SA1110 + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/constants.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + .align 5 +/* + * v4wb_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v4wb_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything + vma_vm_flags r2, r2 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + tst r2, #VM_EXEC + mcrne p15, 0, r3, c8, c5, 0 @ invalidate I TLB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + +/* + * v4_flush_kern_tlb_range(start, end) + * + * Invalidate a range of TLB entries in the specified kernel + * address range. + * + * - start - virtual address (may not be aligned) + * - end - virtual address (may not be aligned) + */ +ENTRY(v4wb_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 + mcr p15, 0, r3, c8, c5, 0 @ invalidate I TLB +1: mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + + __INITDATA + + .type v4wb_tlb_fns, #object +ENTRY(v4wb_tlb_fns) + .long v4wb_flush_user_tlb_range + .long v4wb_flush_kern_tlb_range + .long v4wb_tlb_flags + .size v4wb_tlb_fns, . - v4wb_tlb_fns diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S new file mode 100644 index 00000000000..efbe94bbe1a --- /dev/null +++ b/arch/arm/mm/tlb-v4wbi.S @@ -0,0 +1,68 @@ +/* + * linux/arch/arm/mm/tlbv4wbi.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4 and version 5 TLB handling functions. + * These assume a split I/D TLBs, with a write buffer. + * + * Processors: ARM920 ARM922 ARM925 ARM926 XScale + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/constants.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + +/* + * v4wb_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v4wbi_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + vma_vm_flags r2, r2 + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c8, c5, 1 @ invalidate I TLB entry + mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + +ENTRY(v4wbi_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c5, 1 @ invalidate I TLB entry + mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + + __INITDATA + + .type v4wbi_tlb_fns, #object +ENTRY(v4wbi_tlb_fns) + .long v4wbi_flush_user_tlb_range + .long v4wbi_flush_kern_tlb_range + .long v4wbi_tlb_flags + .size v4wbi_tlb_fns, . - v4wbi_tlb_fns diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S new file mode 100644 index 00000000000..99ed26e78ad --- /dev/null +++ b/arch/arm/mm/tlb-v6.S @@ -0,0 +1,92 @@ +/* + * linux/arch/arm/mm/tlb-v6.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 6 TLB handling functions. + * These assume a split I/D TLB. + */ +#include <linux/linkage.h> +#include <asm/constants.h> +#include <asm/page.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + +#define HARVARD_TLB + +/* + * v6wbi_flush_user_tlb_range(start, end, vma) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_struct describing address range + * + * It is assumed that: + * - the "Invalidate single entry" instruction will invalidate + * both the I and the D TLBs on Harvard-style TLBs + */ +ENTRY(v6wbi_flush_user_tlb_range) + vma_vm_mm r3, r2 @ get vma->vm_mm + mov ip, #0 + mmid r3, r3 @ get vm_mm->context.id + mcr p15, 0, ip, c7, c10, 4 @ drain write buffer + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + asid r3, r3 @ mask ASID + orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA + mov r1, r1, lsl #PAGE_SHIFT + vma_vm_flags r2, r2 @ get vma->vm_flags +1: +#ifdef HARVARD_TLB + mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA (was 1) + tst r2, #VM_EXEC @ Executable area ? + mcrne p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA (was 1) +#else + mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate MVA (was 1) +#endif + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + +/* + * v6wbi_flush_kern_tlb_range(start,end) + * + * Invalidate a range of kernel TLB entries + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + */ +ENTRY(v6wbi_flush_kern_tlb_range) + mov r2, #0 + mcr p15, 0, r2, c7, c10, 4 @ drain write buffer + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + mov r0, r0, lsl #PAGE_SHIFT + mov r1, r1, lsl #PAGE_SHIFT +1: +#ifdef HARVARD_TLB + mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA + mcr p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA +#else + mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate MVA +#endif + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + + .section ".text.init", #alloc, #execinstr + + .type v6wbi_tlb_fns, #object +ENTRY(v6wbi_tlb_fns) + .long v6wbi_flush_user_tlb_range + .long v6wbi_flush_kern_tlb_range + .long v6wbi_tlb_flags + .size v6wbi_tlb_fns, . - v6wbi_tlb_fns |