From 619b6e18fce20e4b2d0082cde989f37e1be7b3e1 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 23 Oct 2007 12:43:25 +0100 Subject: [MIPS] R4000/R4400 daddiu erratum workaround This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at= feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki Signed-off-by: Ralf Baechle --- arch/mips/lib/memcpy.S | 60 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 13 deletions(-) (limited to 'arch/mips/lib/memcpy.S') diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index a526c62cb76..aded7b15905 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -9,6 +9,7 @@ * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. * Copyright (C) 2002 Broadcom, Inc. * memcpy/copy_user author: Mark Vandevoorde + * Copyright (C) 2007 Maciej W. Rozycki * * Mnemonic names for arguments to memcpy/__copy_user */ @@ -175,7 +176,11 @@ .text .set noreorder +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS .set noat +#else + .set at=v1 +#endif /* * A combined memcpy/__copy_user @@ -271,8 +276,10 @@ EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 4*NBYTES beqz len, done - ADD dst, dst, 4*NBYTES + .set noreorder less_than_4units: /* * rem = len % NBYTES @@ -284,8 +291,10 @@ EXC( LOAD t0, 0(src), l_exc) ADD src, src, NBYTES SUB len, len, NBYTES EXC( STORE t0, 0(dst), s_exc_p1u) + .set reorder /* DADDI_WAR */ + ADD dst, dst, NBYTES bne rem, len, 1b - ADD dst, dst, NBYTES + .set noreorder /* * src and dst are aligned, need to copy rem bytes (rem < NBYTES) @@ -364,8 +373,10 @@ EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 4*NBYTES bne len, rem, 1b - ADD dst, dst, 4*NBYTES + .set noreorder cleanup_src_unaligned: beqz len, done @@ -378,8 +389,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) ADD src, src, NBYTES SUB len, len, NBYTES EXC( STORE t0, 0(dst), s_exc_p1u) + .set reorder /* DADDI_WAR */ + ADD dst, dst, NBYTES bne len, rem, 1b - ADD dst, dst, NBYTES + .set noreorder copy_bytes_checklen: beqz len, done @@ -427,8 +440,10 @@ l_exc_copy: EXC( lb t1, 0(src), l_exc) ADD src, src, 1 sb t1, 0(dst) # can't fault -- we're copy_from_user + .set reorder /* DADDI_WAR */ + ADD dst, dst, 1 bne src, t0, 1b - ADD dst, dst, 1 + .set noreorder l_exc: LOAD t0, TI_TASK($28) nop @@ -446,20 +461,33 @@ l_exc: * Clear len bytes starting at dst. Can't call __bzero because it * might modify len. An inefficient loop for these rare times... */ + .set reorder /* DADDI_WAR */ + SUB src, len, 1 beqz len, done - SUB src, len, 1 + .set noreorder 1: sb zero, 0(dst) ADD dst, dst, 1 +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS bnez src, 1b SUB src, src, 1 +#else + .set push + .set noat + li v1, 1 + bnez src, 1b + SUB src, src, v1 + .set pop +#endif jr ra nop -#define SEXC(n) \ -s_exc_p ## n ## u: \ - jr ra; \ - ADD len, len, n*NBYTES +#define SEXC(n) \ + .set reorder; /* DADDI_WAR */ \ +s_exc_p ## n ## u: \ + ADD len, len, n*NBYTES; \ + jr ra; \ + .set noreorder SEXC(8) SEXC(7) @@ -471,8 +499,10 @@ SEXC(2) SEXC(1) s_exc_p1: + .set reorder /* DADDI_WAR */ + ADD len, len, 1 jr ra - ADD len, len, 1 + .set noreorder s_exc: jr ra nop @@ -502,8 +532,10 @@ r_end_bytes: SUB a2, a2, 0x1 sb t0, -1(a0) SUB a1, a1, 0x1 + .set reorder /* DADDI_WAR */ + SUB a0, a0, 0x1 bnez a2, r_end_bytes - SUB a0, a0, 0x1 + .set noreorder r_out: jr ra @@ -514,8 +546,10 @@ r_end_bytes_up: SUB a2, a2, 0x1 sb t0, (a0) ADD a1, a1, 0x1 + .set reorder /* DADDI_WAR */ + ADD a0, a0, 0x1 bnez a2, r_end_bytes_up - ADD a0, a0, 0x1 + .set noreorder jr ra move a2, zero -- cgit v1.2.3 From 930bff882296c02ca81db108672ef4ca06c37db5 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Sun, 25 Nov 2007 11:47:56 +0100 Subject: [MIPS] IP28: added cache barrier to assembly routines IP28 needs special treatment to avoid speculative accesses. gcc takes care for .c code, but for assembly code we need to do it manually. This is taken from Peter Fuersts IP28 patches. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Ralf Baechle --- arch/mips/lib/memcpy.S | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/mips/lib/memcpy.S') diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index aded7b15905..01e450b1ebc 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -199,6 +199,7 @@ FEXPORT(__copy_user) */ #define rem t8 + R10KCBARRIER(0(ra)) /* * The "issue break"s below are very approximate. * Issue delays for dcache fills will perturb the schedule, as will @@ -231,6 +232,7 @@ both_aligned: PREF( 1, 3*32(dst) ) .align 4 1: + R10KCBARRIER(0(ra)) EXC( LOAD t0, UNIT(0)(src), l_exc) EXC( LOAD t1, UNIT(1)(src), l_exc_copy) EXC( LOAD t2, UNIT(2)(src), l_exc_copy) @@ -272,6 +274,7 @@ EXC( LOAD t2, UNIT(2)(src), l_exc_copy) EXC( LOAD t3, UNIT(3)(src), l_exc_copy) SUB len, len, 4*NBYTES ADD src, src, 4*NBYTES + R10KCBARRIER(0(ra)) EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) @@ -287,6 +290,7 @@ less_than_4units: beq rem, len, copy_bytes nop 1: + R10KCBARRIER(0(ra)) EXC( LOAD t0, 0(src), l_exc) ADD src, src, NBYTES SUB len, len, NBYTES @@ -334,6 +338,7 @@ EXC( LDFIRST t3, FIRST(0)(src), l_exc) EXC( LDREST t3, REST(0)(src), l_exc_copy) SUB t2, t2, t1 # t2 = number of bytes copied xor match, t0, t1 + R10KCBARRIER(0(ra)) EXC( STFIRST t3, FIRST(0)(dst), s_exc) beq len, t2, done SUB len, len, t2 @@ -354,6 +359,7 @@ src_unaligned_dst_aligned: * It's OK to load FIRST(N+1) before REST(N) because the two addresses * are to the same unit (unless src is aligned, but it's not). */ + R10KCBARRIER(0(ra)) EXC( LDFIRST t0, FIRST(0)(src), l_exc) EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) SUB len, len, 4*NBYTES @@ -384,6 +390,7 @@ cleanup_src_unaligned: beq rem, len, copy_bytes nop 1: + R10KCBARRIER(0(ra)) EXC( LDFIRST t0, FIRST(0)(src), l_exc) EXC( LDREST t0, REST(0)(src), l_exc_copy) ADD src, src, NBYTES @@ -399,6 +406,7 @@ copy_bytes_checklen: nop copy_bytes: /* 0 < len < NBYTES */ + R10KCBARRIER(0(ra)) #define COPY_BYTE(N) \ EXC( lb t0, N(src), l_exc); \ SUB len, len, 1; \ @@ -528,6 +536,7 @@ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ ADD a1, a2 # src = src + len r_end_bytes: + R10KCBARRIER(0(ra)) lb t0, -1(a1) SUB a2, a2, 0x1 sb t0, -1(a0) @@ -542,6 +551,7 @@ r_out: move a2, zero r_end_bytes_up: + R10KCBARRIER(0(ra)) lb t0, (a1) SUB a2, a2, 0x1 sb t0, (a0) -- cgit v1.2.3 From c5ec1983e45d25446a023e98207e30ab1bf2311a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 29 Jan 2008 10:14:59 +0000 Subject: [MIPS] Eleminate local symbols from the symbol table. These symbols appear in oprofile output, stacktraces and similar but only make the output harder to read. Many identical symbol names such as "both_aligned" were also being used in multiple source files making it impossible to see which file actually was meant. So let's get rid of them. Signed-off-by: Ralf Baechle --- arch/mips/lib/memcpy.S | 182 ++++++++++++++++++++++++------------------------- 1 file changed, 91 insertions(+), 91 deletions(-) (limited to 'arch/mips/lib/memcpy.S') diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 01e450b1ebc..c06cccf60be 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -191,7 +191,7 @@ .align 5 LEAF(memcpy) /* a0=dst a1=src a2=len */ move v0, dst /* return value */ -__memcpy: +.L__memcpy: FEXPORT(__copy_user) /* * Note: dst & src may be unaligned, len may be 0 @@ -213,45 +213,45 @@ FEXPORT(__copy_user) and t1, dst, ADDRMASK PREF( 0, 1*32(src) ) PREF( 1, 1*32(dst) ) - bnez t2, copy_bytes_checklen + bnez t2, .Lcopy_bytes_checklen and t0, src, ADDRMASK PREF( 0, 2*32(src) ) PREF( 1, 2*32(dst) ) - bnez t1, dst_unaligned + bnez t1, .Ldst_unaligned nop - bnez t0, src_unaligned_dst_aligned + bnez t0, .Lsrc_unaligned_dst_aligned /* * use delay slot for fall-through * src and dst are aligned; need to compute rem */ -both_aligned: +.Lboth_aligned: SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter - beqz t0, cleanup_both_aligned # len < 8*NBYTES + beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) PREF( 0, 3*32(src) ) PREF( 1, 3*32(dst) ) .align 4 1: R10KCBARRIER(0(ra)) -EXC( LOAD t0, UNIT(0)(src), l_exc) -EXC( LOAD t1, UNIT(1)(src), l_exc_copy) -EXC( LOAD t2, UNIT(2)(src), l_exc_copy) -EXC( LOAD t3, UNIT(3)(src), l_exc_copy) +EXC( LOAD t0, UNIT(0)(src), .Ll_exc) +EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) +EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) +EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) SUB len, len, 8*NBYTES -EXC( LOAD t4, UNIT(4)(src), l_exc_copy) -EXC( LOAD t7, UNIT(5)(src), l_exc_copy) -EXC( STORE t0, UNIT(0)(dst), s_exc_p8u) -EXC( STORE t1, UNIT(1)(dst), s_exc_p7u) -EXC( LOAD t0, UNIT(6)(src), l_exc_copy) -EXC( LOAD t1, UNIT(7)(src), l_exc_copy) +EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) +EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) +EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u) +EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u) +EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) +EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) ADD src, src, 8*NBYTES ADD dst, dst, 8*NBYTES -EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) -EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) -EXC( STORE t4, UNIT(-4)(dst), s_exc_p4u) -EXC( STORE t7, UNIT(-3)(dst), s_exc_p3u) -EXC( STORE t0, UNIT(-2)(dst), s_exc_p2u) -EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) +EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u) +EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u) +EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u) +EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u) +EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u) +EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u) PREF( 0, 8*32(src) ) PREF( 1, 8*32(dst) ) bne len, rem, 1b @@ -260,41 +260,41 @@ EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) /* * len == rem == the number of bytes left to copy < 8*NBYTES */ -cleanup_both_aligned: - beqz len, done +.Lcleanup_both_aligned: + beqz len, .Ldone sltu t0, len, 4*NBYTES - bnez t0, less_than_4units + bnez t0, .Lless_than_4units and rem, len, (NBYTES-1) # rem = len % NBYTES /* * len >= 4*NBYTES */ -EXC( LOAD t0, UNIT(0)(src), l_exc) -EXC( LOAD t1, UNIT(1)(src), l_exc_copy) -EXC( LOAD t2, UNIT(2)(src), l_exc_copy) -EXC( LOAD t3, UNIT(3)(src), l_exc_copy) +EXC( LOAD t0, UNIT(0)(src), .Ll_exc) +EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) +EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) +EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) SUB len, len, 4*NBYTES ADD src, src, 4*NBYTES R10KCBARRIER(0(ra)) -EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) -EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) -EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) -EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) +EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) +EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) +EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) +EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) .set reorder /* DADDI_WAR */ ADD dst, dst, 4*NBYTES - beqz len, done + beqz len, .Ldone .set noreorder -less_than_4units: +.Lless_than_4units: /* * rem = len % NBYTES */ - beq rem, len, copy_bytes + beq rem, len, .Lcopy_bytes nop 1: R10KCBARRIER(0(ra)) -EXC( LOAD t0, 0(src), l_exc) +EXC( LOAD t0, 0(src), .Ll_exc) ADD src, src, NBYTES SUB len, len, NBYTES -EXC( STORE t0, 0(dst), s_exc_p1u) +EXC( STORE t0, 0(dst), .Ls_exc_p1u) .set reorder /* DADDI_WAR */ ADD dst, dst, NBYTES bne rem, len, 1b @@ -312,17 +312,17 @@ EXC( STORE t0, 0(dst), s_exc_p1u) * more instruction-level parallelism. */ #define bits t2 - beqz len, done + beqz len, .Ldone ADD t1, dst, len # t1 is just past last byte of dst li bits, 8*NBYTES SLL rem, len, 3 # rem = number of bits to keep -EXC( LOAD t0, 0(src), l_exc) +EXC( LOAD t0, 0(src), .Ll_exc) SUB bits, bits, rem # bits = number of bits to discard SHIFT_DISCARD t0, t0, bits -EXC( STREST t0, -1(t1), s_exc) +EXC( STREST t0, -1(t1), .Ls_exc) jr ra move len, zero -dst_unaligned: +.Ldst_unaligned: /* * dst is unaligned * t0 = src & ADDRMASK @@ -333,23 +333,23 @@ dst_unaligned: * Set match = (src and dst have same alignment) */ #define match rem -EXC( LDFIRST t3, FIRST(0)(src), l_exc) +EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) ADD t2, zero, NBYTES -EXC( LDREST t3, REST(0)(src), l_exc_copy) +EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) SUB t2, t2, t1 # t2 = number of bytes copied xor match, t0, t1 R10KCBARRIER(0(ra)) -EXC( STFIRST t3, FIRST(0)(dst), s_exc) - beq len, t2, done +EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) + beq len, t2, .Ldone SUB len, len, t2 ADD dst, dst, t2 - beqz match, both_aligned + beqz match, .Lboth_aligned ADD src, src, t2 -src_unaligned_dst_aligned: +.Lsrc_unaligned_dst_aligned: SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter PREF( 0, 3*32(src) ) - beqz t0, cleanup_src_unaligned + beqz t0, .Lcleanup_src_unaligned and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES PREF( 1, 3*32(dst) ) 1: @@ -360,58 +360,58 @@ src_unaligned_dst_aligned: * are to the same unit (unless src is aligned, but it's not). */ R10KCBARRIER(0(ra)) -EXC( LDFIRST t0, FIRST(0)(src), l_exc) -EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) +EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) +EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) SUB len, len, 4*NBYTES -EXC( LDREST t0, REST(0)(src), l_exc_copy) -EXC( LDREST t1, REST(1)(src), l_exc_copy) -EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) -EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) -EXC( LDREST t2, REST(2)(src), l_exc_copy) -EXC( LDREST t3, REST(3)(src), l_exc_copy) +EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) +EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) +EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) +EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) +EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) +EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) ADD src, src, 4*NBYTES #ifdef CONFIG_CPU_SB1 nop # improves slotting #endif -EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) -EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) -EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) -EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) +EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) +EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) +EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) +EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) .set reorder /* DADDI_WAR */ ADD dst, dst, 4*NBYTES bne len, rem, 1b .set noreorder -cleanup_src_unaligned: - beqz len, done +.Lcleanup_src_unaligned: + beqz len, .Ldone and rem, len, NBYTES-1 # rem = len % NBYTES - beq rem, len, copy_bytes + beq rem, len, .Lcopy_bytes nop 1: R10KCBARRIER(0(ra)) -EXC( LDFIRST t0, FIRST(0)(src), l_exc) -EXC( LDREST t0, REST(0)(src), l_exc_copy) +EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) +EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) ADD src, src, NBYTES SUB len, len, NBYTES -EXC( STORE t0, 0(dst), s_exc_p1u) +EXC( STORE t0, 0(dst), .Ls_exc_p1u) .set reorder /* DADDI_WAR */ ADD dst, dst, NBYTES bne len, rem, 1b .set noreorder -copy_bytes_checklen: - beqz len, done +.Lcopy_bytes_checklen: + beqz len, .Ldone nop -copy_bytes: +.Lcopy_bytes: /* 0 < len < NBYTES */ R10KCBARRIER(0(ra)) #define COPY_BYTE(N) \ -EXC( lb t0, N(src), l_exc); \ +EXC( lb t0, N(src), .Ll_exc); \ SUB len, len, 1; \ - beqz len, done; \ -EXC( sb t0, N(dst), s_exc_p1) + beqz len, .Ldone; \ +EXC( sb t0, N(dst), .Ls_exc_p1) COPY_BYTE(0) COPY_BYTE(1) @@ -421,16 +421,16 @@ EXC( sb t0, N(dst), s_exc_p1) COPY_BYTE(4) COPY_BYTE(5) #endif -EXC( lb t0, NBYTES-2(src), l_exc) +EXC( lb t0, NBYTES-2(src), .Ll_exc) SUB len, len, 1 jr ra -EXC( sb t0, NBYTES-2(dst), s_exc_p1) -done: +EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1) +.Ldone: jr ra nop END(memcpy) -l_exc_copy: +.Ll_exc_copy: /* * Copy bytes from src until faulting load address (or until a * lb faults) @@ -445,14 +445,14 @@ l_exc_copy: nop LOAD t0, THREAD_BUADDR(t0) 1: -EXC( lb t1, 0(src), l_exc) +EXC( lb t1, 0(src), .Ll_exc) ADD src, src, 1 sb t1, 0(dst) # can't fault -- we're copy_from_user .set reorder /* DADDI_WAR */ ADD dst, dst, 1 bne src, t0, 1b .set noreorder -l_exc: +.Ll_exc: LOAD t0, TI_TASK($28) nop LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address @@ -471,7 +471,7 @@ l_exc: */ .set reorder /* DADDI_WAR */ SUB src, len, 1 - beqz len, done + beqz len, .Ldone .set noreorder 1: sb zero, 0(dst) ADD dst, dst, 1 @@ -492,7 +492,7 @@ l_exc: #define SEXC(n) \ .set reorder; /* DADDI_WAR */ \ -s_exc_p ## n ## u: \ +.Ls_exc_p ## n ## u: \ ADD len, len, n*NBYTES; \ jr ra; \ .set noreorder @@ -506,12 +506,12 @@ SEXC(3) SEXC(2) SEXC(1) -s_exc_p1: +.Ls_exc_p1: .set reorder /* DADDI_WAR */ ADD len, len, 1 jr ra .set noreorder -s_exc: +.Ls_exc: jr ra nop @@ -522,20 +522,20 @@ LEAF(memmove) sltu t0, a1, t0 # dst + len <= src -> memcpy sltu t1, a0, t1 # dst >= src + len -> memcpy and t0, t1 - beqz t0, __memcpy + beqz t0, .L__memcpy move v0, a0 /* return value */ - beqz a2, r_out + beqz a2, .Lr_out END(memmove) /* fall through to __rmemcpy */ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ sltu t0, a1, a0 - beqz t0, r_end_bytes_up # src >= dst + beqz t0, .Lr_end_bytes_up # src >= dst nop ADD a0, a2 # dst = dst + len ADD a1, a2 # src = src + len -r_end_bytes: +.Lr_end_bytes: R10KCBARRIER(0(ra)) lb t0, -1(a1) SUB a2, a2, 0x1 @@ -543,14 +543,14 @@ r_end_bytes: SUB a1, a1, 0x1 .set reorder /* DADDI_WAR */ SUB a0, a0, 0x1 - bnez a2, r_end_bytes + bnez a2, .Lr_end_bytes .set noreorder -r_out: +.Lr_out: jr ra move a2, zero -r_end_bytes_up: +.Lr_end_bytes_up: R10KCBARRIER(0(ra)) lb t0, (a1) SUB a2, a2, 0x1 @@ -558,7 +558,7 @@ r_end_bytes_up: ADD a1, a1, 0x1 .set reorder /* DADDI_WAR */ ADD a0, a0, 0x1 - bnez a2, r_end_bytes_up + bnez a2, .Lr_end_bytes_up .set noreorder jr ra -- cgit v1.2.3