diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-01-29 08:52:50 +1100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-01-29 08:52:50 +1100 |
commit | e189f3495c4e30fc84fc9241096edf3932e23439 (patch) | |
tree | 5916c89ace81537a02ae01869386ba6caafdab9c /arch/sh/lib64/copy_page.S | |
parent | f4798748dee00c807a63f5518f08b3df161e0f6d (diff) | |
parent | 6582d7b7376aa587d74b08c74457dc28abc1a9fa (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6: (197 commits)
sh: add spi header and r2d platform data V3
sh: update r7780rp interrupt code
sh: remove consistent alloc stuff from the machine vector
sh: use declared coherent memory for dreamcast pci ethernet adapter
sh: declared coherent memory support V2
sh: Add support for SDK7780 board.
sh: constify function pointer tables
sh: Kill off -traditional for linker script.
cdrom: Add support for Sega Dreamcast GD-ROM.
sh: Kill off hs7751rvoip reference from arch/sh/Kconfig.
sh: Drop r7780rp_defconfig, use r7780mp_defconfig as kbuild default.
sh: Kill off dead HS771RVoIP board support.
sh: r7785rp: Fix up DECLARE_INTC_DESC() arg mismatch.
sh: r7785rp: Hook up the rest of the HL7785 FPGA IRQ vectors.
sh: r2d - enable sm501 usb host function
sh: remove voyagergx
sh: r2d - add lcd planel timings to sm501 platform data
sh: Add OHCI and UDC platform devices for SH7720.
sh: intc - remove default interrupt priority tables
sh: Correct pte size mismatch for X2 TLB.
...
Diffstat (limited to 'arch/sh/lib64/copy_page.S')
-rw-r--r-- | arch/sh/lib64/copy_page.S | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/arch/sh/lib64/copy_page.S b/arch/sh/lib64/copy_page.S new file mode 100644 index 00000000000..0ec6fca63b5 --- /dev/null +++ b/arch/sh/lib64/copy_page.S @@ -0,0 +1,89 @@ +/* + Copyright 2003 Richard Curnow, SuperH (UK) Ltd. + + This file is subject to the terms and conditions of the GNU General Public + License. See the file "COPYING" in the main directory of this archive + for more details. + + Tight version of mempy for the case of just copying a page. + Prefetch strategy empirically optimised against RTL simulations + of SH5-101 cut2 eval chip with Cayman board DDR memory. + + Parameters: + r2 : destination effective address (start of page) + r3 : source effective address (start of page) + + Always copies 4096 bytes. + + Points to review. + * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead. + It seems like the prefetch needs to be at at least 4 lines ahead to get + the data into the cache in time, and the allocos contend with outstanding + prefetches for the same cache set, so it's better to have the numbers + different. + */ + + .section .text..SHmedia32,"ax" + .little + + .balign 8 + .global copy_page +copy_page: + + /* Copy 4096 bytes worth of data from r3 to r2. + Do prefetches 4 lines ahead. + Do alloco 2 lines ahead */ + + pta 1f, tr1 + pta 2f, tr2 + pta 3f, tr3 + ptabs r18, tr0 + +#if 0 + /* TAKum03020 */ + ld.q r3, 0x00, r63 + ld.q r3, 0x20, r63 + ld.q r3, 0x40, r63 + ld.q r3, 0x60, r63 +#endif + alloco r2, 0x00 + synco ! TAKum03020 + alloco r2, 0x20 + synco ! TAKum03020 + + movi 3968, r6 + add r2, r6, r6 + addi r6, 64, r7 + addi r7, 64, r8 + sub r3, r2, r60 + addi r60, 8, r61 + addi r61, 8, r62 + addi r62, 8, r23 + addi r60, 0x80, r22 + +/* Minimal code size. The extra branches inside the loop don't cost much + because they overlap with the time spent waiting for prefetches to + complete. */ +1: +#if 0 + /* TAKum03020 */ + bge/u r2, r6, tr2 ! skip prefetch for last 4 lines + ldx.q r2, r22, r63 ! prefetch 4 lines hence +#endif +2: + bge/u r2, r7, tr3 ! skip alloco for last 2 lines + alloco r2, 0x40 ! alloc destination line 2 lines ahead + synco ! TAKum03020 +3: + ldx.q r2, r60, r36 + ldx.q r2, r61, r37 + ldx.q r2, r62, r38 + ldx.q r2, r23, r39 + st.q r2, 0, r36 + st.q r2, 8, r37 + st.q r2, 16, r38 + st.q r2, 24, r39 + addi r2, 32, r2 + bgt/l r8, r2, tr1 + + blink tr0, r63 ! return |