diff options
Diffstat (limited to 'arch/blackfin/lib')
-rw-r--r-- | arch/blackfin/lib/checksum.c | 4 | ||||
-rw-r--r-- | arch/blackfin/lib/ins.S | 272 | ||||
-rw-r--r-- | arch/blackfin/lib/muldi3.S | 68 | ||||
-rw-r--r-- | arch/blackfin/lib/muldi3.c | 99 |
4 files changed, 156 insertions, 287 deletions
diff --git a/arch/blackfin/lib/checksum.c b/arch/blackfin/lib/checksum.c index 5c87505165d..762a7f02970 100644 --- a/arch/blackfin/lib/checksum.c +++ b/arch/blackfin/lib/checksum.c @@ -29,6 +29,7 @@ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <linux/module.h> #include <net/checksum.h> #include <asm/checksum.h> @@ -76,6 +77,7 @@ __sum16 ip_fast_csum(unsigned char *iph, unsigned int ihl) { return (__force __sum16)~do_csum(iph, ihl * 4); } +EXPORT_SYMBOL(ip_fast_csum); /* * computes the checksum of a memory block at buff, length len, @@ -104,6 +106,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) return sum; } +EXPORT_SYMBOL(csum_partial); /* * this routine is used for miscellaneous IP-like checksums, mainly @@ -137,3 +140,4 @@ __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum) memcpy(dst, src, len); return csum_partial(dst, len, sum); } +EXPORT_SYMBOL(csum_partial_copy); diff --git a/arch/blackfin/lib/ins.S b/arch/blackfin/lib/ins.S index d60554dce87..1863a6ba507 100644 --- a/arch/blackfin/lib/ins.S +++ b/arch/blackfin/lib/ins.S @@ -1,31 +1,9 @@ /* - * File: arch/blackfin/lib/ins.S - * Based on: - * Author: Bas Vermeulen <bas@buyways.nl> + * arch/blackfin/lib/ins.S - ins{bwl} using hardware loops * - * Created: Tue Mar 22 15:27:24 CEST 2005 - * Description: Implementation of ins{bwl} for BlackFin processors using zero overhead loops. - * - * Modified: - * Copyright 2004-2008 Analog Devices Inc. - * Copyright (C) 2005 Bas Vermeulen, BuyWays BV <bas@buyways.nl> - * - * Bugs: Enter bugs at http://blackfin.uclinux.org/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see the file COPYING, or write - * to the Free Software Foundation, Inc., - * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * Copyright 2004-2008 Analog Devices Inc. + * Copyright (C) 2005 Bas Vermeulen, BuyWays BV <bas@buyways.nl> + * Licensed under the GPL-2 or later. */ #include <linux/linkage.h> @@ -33,6 +11,46 @@ .align 2 +#ifdef CONFIG_IPIPE +# define DO_CLI \ + [--sp] = rets; \ + [--sp] = (P5:0); \ + sp += -12; \ + call ___ipipe_stall_root_raw; \ + sp += 12; \ + (P5:0) = [sp++]; +# define CLI_INNER_NOP +#else +# define DO_CLI cli R3; +# define CLI_INNER_NOP nop; nop; nop; +#endif + +#ifdef CONFIG_IPIPE +# define DO_STI \ + sp += -12; \ + call ___ipipe_unstall_root_raw; \ + sp += 12; \ +2: rets = [sp++]; +#else +# define DO_STI 2: sti R3; +#endif + +#ifdef CONFIG_BFIN_INS_LOWOVERHEAD +# define CLI_OUTER DO_CLI; +# define STI_OUTER DO_STI; +# define CLI_INNER 1: +# if ANOMALY_05000416 +# define STI_INNER nop; 2: nop; +# else +# define STI_INNER 2: +# endif +#else +# define CLI_OUTER +# define STI_OUTER +# define CLI_INNER 1: DO_CLI; CLI_INNER_NOP; +# define STI_INNER DO_STI; +#endif + /* * Reads on the Blackfin are speculative. In Blackfin terms, this means they * can be interrupted at any time (even after they have been issued on to the @@ -53,170 +71,48 @@ * buffers in/out of FIFOs. */ -ENTRY(_insl) -#ifdef CONFIG_BFIN_INS_LOWOVERHEAD - P0 = R0; /* P0 = port */ - cli R3; - P1 = R1; /* P1 = address */ - P2 = R2; /* P2 = count */ - SSYNC; - LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2; -.Llong_loop_s: R0 = [P0]; - [P1++] = R0; - NOP; -.Llong_loop_e: NOP; - sti R3; - RTS; -#else - P0 = R0; /* P0 = port */ - P1 = R1; /* P1 = address */ - P2 = R2; /* P2 = count */ - SSYNC; - LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2; -.Llong_loop_s: - CLI R3; - NOP; NOP; NOP; - R0 = [P0]; - [P1++] = R0; -.Llong_loop_e: - STI R3; +#define COMMON_INS(func, ops) \ +ENTRY(_ins##func) \ + P0 = R0; /* P0 = port */ \ + CLI_OUTER; /* 3 instructions before first read access */ \ + P1 = R1; /* P1 = address */ \ + P2 = R2; /* P2 = count */ \ + SSYNC; \ + \ + LSETUP(1f, 2f) LC0 = P2; \ + CLI_INNER; \ + ops; \ + STI_INNER; \ + \ + STI_OUTER; \ + RTS; \ +ENDPROC(_ins##func) - RTS; -#endif -ENDPROC(_insl) - -ENTRY(_insw) -#ifdef CONFIG_BFIN_INS_LOWOVERHEAD - P0 = R0; /* P0 = port */ - cli R3; - P1 = R1; /* P1 = address */ - P2 = R2; /* P2 = count */ - SSYNC; - LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2; -.Lword_loop_s: R0 = W[P0]; - W[P1++] = R0; - NOP; -.Lword_loop_e: NOP; - sti R3; - RTS; -#else - P0 = R0; /* P0 = port */ - P1 = R1; /* P1 = address */ - P2 = R2; /* P2 = count */ - SSYNC; - LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2; -.Lword_loop_s: - CLI R3; - NOP; NOP; NOP; - R0 = W[P0]; - W[P1++] = R0; -.Lword_loop_e: - STI R3; - RTS; - -#endif -ENDPROC(_insw) - -ENTRY(_insw_8) -#ifdef CONFIG_BFIN_INS_LOWOVERHEAD - P0 = R0; /* P0 = port */ - cli R3; - P1 = R1; /* P1 = address */ - P2 = R2; /* P2 = count */ - SSYNC; - LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2; -.Lword8_loop_s: R0 = W[P0]; - B[P1++] = R0; - R0 = R0 >> 8; - B[P1++] = R0; - NOP; -.Lword8_loop_e: NOP; - sti R3; - RTS; -#else - P0 = R0; /* P0 = port */ - P1 = R1; /* P1 = address */ - P2 = R2; /* P2 = count */ - SSYNC; - LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2; -.Lword8_loop_s: - CLI R3; - NOP; NOP; NOP; - R0 = W[P0]; - B[P1++] = R0; - R0 = R0 >> 8; - B[P1++] = R0; - NOP; -.Lword8_loop_e: - STI R3; +COMMON_INS(l, \ + R0 = [P0]; \ + [P1++] = R0; \ +) - RTS; -#endif -ENDPROC(_insw_8) +COMMON_INS(w, \ + R0 = W[P0]; \ + W[P1++] = R0; \ +) -ENTRY(_insb) -#ifdef CONFIG_BFIN_INS_LOWOVERHEAD - P0 = R0; /* P0 = port */ - cli R3; - P1 = R1; /* P1 = address */ - P2 = R2; /* P2 = count */ - SSYNC; - LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2; -.Lbyte_loop_s: R0 = B[P0]; - B[P1++] = R0; - NOP; -.Lbyte_loop_e: NOP; - sti R3; - RTS; -#else - P0 = R0; /* P0 = port */ - P1 = R1; /* P1 = address */ - P2 = R2; /* P2 = count */ - SSYNC; - LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2; -.Lbyte_loop_s: - CLI R3; - NOP; NOP; NOP; - R0 = B[P0]; - B[P1++] = R0; -.Lbyte_loop_e: - STI R3; +COMMON_INS(w_8, \ + R0 = W[P0]; \ + B[P1++] = R0; \ + R0 = R0 >> 8; \ + B[P1++] = R0; \ +) - RTS; -#endif -ENDPROC(_insb) +COMMON_INS(b, \ + R0 = B[P0]; \ + B[P1++] = R0; \ +) -ENTRY(_insl_16) -#ifdef CONFIG_BFIN_INS_LOWOVERHEAD - P0 = R0; /* P0 = port */ - cli R3; - P1 = R1; /* P1 = address */ - P2 = R2; /* P2 = count */ - SSYNC; - LSETUP( .Llong16_loop_s, .Llong16_loop_e) LC0 = P2; -.Llong16_loop_s: R0 = [P0]; - W[P1++] = R0; - R0 = R0 >> 16; - W[P1++] = R0; - NOP; -.Llong16_loop_e: NOP; - sti R3; - RTS; -#else - P0 = R0; /* P0 = port */ - P1 = R1; /* P1 = address */ - P2 = R2; /* P2 = count */ - SSYNC; - LSETUP( .Llong16_loop_s, .Llong16_loop_e) LC0 = P2; -.Llong16_loop_s: - CLI R3; - NOP; NOP; NOP; - R0 = [P0]; - W[P1++] = R0; - R0 = R0 >> 16; - W[P1++] = R0; -.Llong16_loop_e: - STI R3; - RTS; -#endif -ENDPROC(_insl_16) +COMMON_INS(l_16, \ + R0 = [P0]; \ + W[P1++] = R0; \ + R0 = R0 >> 16; \ + W[P1++] = R0; \ +) diff --git a/arch/blackfin/lib/muldi3.S b/arch/blackfin/lib/muldi3.S new file mode 100644 index 00000000000..abde120ee23 --- /dev/null +++ b/arch/blackfin/lib/muldi3.S @@ -0,0 +1,68 @@ +.align 2 +.global ___muldi3; +.type ___muldi3, STT_FUNC; + +#ifdef CONFIG_ARITHMETIC_OPS_L1 +.section .l1.text +#else +.text +#endif + +/* + R1:R0 * R3:R2 + = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l +[X] = (R1.h * R3.h) * 2^96 +[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80 +[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64 +[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48 +[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32 +[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16 +[T4] + (R0.l * R2.l) + + We can discard the first three lines marked "X" since we produce + only a 64 bit result. So, we need ten 16-bit multiplies. + + Individual mul-acc results: +[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h +[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h +[E3] = R0.l * R2.h + R2.l * R0.h +[E4] = R0.l * R2.l + + We also need to add high parts from lower-level results to higher ones: + E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4 + + One interesting property is that all parts of the result that depend + on the sign of the multiplication are discarded. Those would be the + multiplications involving R1.h and R3.h, but only the top 16 bit of + the 32 bit result depend on the sign, and since R1.h and R3.h only + occur in E1, the top half of these results is cut off. + So, we can just use FU mode for all of the 16-bit multiplies, and + ignore questions of when to use mixed mode. */ + +___muldi3: + /* [SP] technically is part of the caller's frame, but we can + use it as scratch space. */ + A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */ + A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */ + A0 += A1; /* E1 */ + R4 = A0.w; + A0 = R0.l * R3.l (FU); /* E2 */ + A0 += R2.l * R1.l (FU); /* E2 */ + + A1 = R2.L * R0.L (FU); /* E4 */ + R3 = A1.w; + A1 = A1 >> 16; /* E3c */ + A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */ + A1 += R0.L * R2.H (FU); /* E3c */ + R0 = A1.w; + A1 = A1 >> 16; /* E2c */ + A0 += A1; /* E2c */ + R1 = A0.w; + + /* low(result) = low(E3c):low(E4) */ + R0 = PACK (R0.l, R3.l); + /* high(result) = E2c + (E1 << 16) */ + R1.h = R1.h + R4.l (NS) || R4 = [SP]; + RTS; + +.size ___muldi3, .-___muldi3 diff --git a/arch/blackfin/lib/muldi3.c b/arch/blackfin/lib/muldi3.c deleted file mode 100644 index 303d0c6a6db..00000000000 --- a/arch/blackfin/lib/muldi3.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * File: arch/blackfin/lib/muldi3.c - * Based on: - * Author: - * - * Created: - * Description: - * - * Modified: - * Copyright 2004-2006 Analog Devices Inc. - * - * Bugs: Enter bugs at http://blackfin.uclinux.org/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see the file COPYING, or write - * to the Free Software Foundation, Inc., - * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef SI_TYPE_SIZE -#define SI_TYPE_SIZE 32 -#endif -#define __ll_b (1L << (SI_TYPE_SIZE / 2)) -#define __ll_lowpart(t) ((usitype) (t) % __ll_b) -#define __ll_highpart(t) ((usitype) (t) / __ll_b) -#define BITS_PER_UNIT 8 - -#if !defined(umul_ppmm) -#define umul_ppmm(w1, w0, u, v) \ - do { \ - usitype __x0, __x1, __x2, __x3; \ - usitype __ul, __vl, __uh, __vh; \ - \ - __ul = __ll_lowpart (u); \ - __uh = __ll_highpart (u); \ - __vl = __ll_lowpart (v); \ - __vh = __ll_highpart (v); \ - \ - __x0 = (usitype) __ul * __vl; \ - __x1 = (usitype) __ul * __vh; \ - __x2 = (usitype) __uh * __vl; \ - __x3 = (usitype) __uh * __vh; \ - \ - __x1 += __ll_highpart (__x0);/* this can't give carry */ \ - __x1 += __x2; /* but this indeed can */ \ - if (__x1 < __x2) /* did we get it? */ \ - __x3 += __ll_b; /* yes, add it in the proper pos. */ \ - \ - (w1) = __x3 + __ll_highpart (__x1); \ - (w0) = __ll_lowpart (__x1) * __ll_b + __ll_lowpart (__x0); \ - } while (0) -#endif - -#if !defined(__umulsidi3) -#define __umulsidi3(u, v) \ - ({diunion __w; \ - umul_ppmm (__w.s.high, __w.s.low, u, v); \ - __w.ll; }) -#endif - -typedef unsigned int usitype __attribute__ ((mode(SI))); -typedef int sitype __attribute__ ((mode(SI))); -typedef int ditype __attribute__ ((mode(DI))); -typedef int word_type __attribute__ ((mode(__word__))); - -struct distruct { - sitype low, high; -}; -typedef union { - struct distruct s; - ditype ll; -} diunion; - -#ifdef CONFIG_ARITHMETIC_OPS_L1 -ditype __muldi3(ditype u, ditype v)__attribute__((l1_text)); -#endif - -ditype __muldi3(ditype u, ditype v) -{ - diunion w; - diunion uu, vv; - - uu.ll = u, vv.ll = v; - w.ll = __umulsidi3(uu.s.low, vv.s.low); - w.s.high += ((usitype) uu.s.low * (usitype) vv.s.high - + (usitype) uu.s.high * (usitype) vv.s.low); - - return w.ll; -} |