diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/sh/Makefile | 19 | ||||
-rw-r--r-- | arch/sh/kernel/sh_ksyms_32.c | 36 | ||||
-rw-r--r-- | arch/sh/lib/Makefile | 14 | ||||
-rw-r--r-- | arch/sh/lib/ashiftrt.S | 149 | ||||
-rw-r--r-- | arch/sh/lib/ashldi3.c | 29 | ||||
-rw-r--r-- | arch/sh/lib/ashlsi3.S | 193 | ||||
-rw-r--r-- | arch/sh/lib/ashrdi3.c | 31 | ||||
-rw-r--r-- | arch/sh/lib/ashrsi3.S | 185 | ||||
-rw-r--r-- | arch/sh/lib/libgcc.h | 26 | ||||
-rw-r--r-- | arch/sh/lib/lshrdi3.c | 29 | ||||
-rw-r--r-- | arch/sh/lib/lshrsi3.S | 193 | ||||
-rw-r--r-- | arch/sh/lib/movmem.S | 238 | ||||
-rw-r--r-- | arch/sh/lib/udiv_qrnnd.S | 81 | ||||
-rw-r--r-- | arch/sh/lib/udivsi3-Os.S | 147 | ||||
-rw-r--r-- | arch/sh/lib/udivsi3.S | 664 |
15 files changed, 1984 insertions, 50 deletions
diff --git a/arch/sh/Makefile b/arch/sh/Makefile index c43eb0d7fa3..22a1794287a 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -34,22 +34,6 @@ cflags-$(CONFIG_CPU_SH4A) += $(call cc-option,-m4a,) \ $(call cc-option,-m4a-nofpu,) cflags-$(CONFIG_CPU_SH5) := $(call cc-option,-m5-32media-nofpu,) -ifeq ($(cflags-y),) -# -# In the case where we are stuck with a compiler that has been uselessly -# restricted to a particular ISA, a favourite default of newer GCCs when -# extensive multilib targets are not provided, ensure we get the best fit -# regarding FP generation. This is necessary to avoid references to FP -# variants in libgcc where integer variants exist, which otherwise result -# in link errors. This is intentionally stupid (albeit many orders of -# magnitude less than GCC's default behaviour), as anything with a large -# number of multilib targets better have been built correctly for -# the target in mind. -# -cflags-y += $(shell $(CC) $(KBUILD_CFLAGS) -print-multi-lib | \ - grep nofpu | sed q | sed -e 's/^/-/;s/;.*$$//') -endif - cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mb cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -ml @@ -176,8 +160,7 @@ KBUILD_AFLAGS += $(cflags-y) LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) libs-$(CONFIG_SUPERH32) := arch/sh/lib/ $(libs-y) -libs-$(CONFIG_SUPERH64) := arch/sh/lib64/ $(libs-y) -libs-y += $(LIBGCC) +libs-$(CONFIG_SUPERH64) := arch/sh/lib64/ $(libs-y) $(LIBGCC) PHONY += maketools FORCE diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c index 92ae5e6c099..9c5ae7b9cd9 100644 --- a/arch/sh/kernel/sh_ksyms_32.c +++ b/arch/sh/kernel/sh_ksyms_32.c @@ -52,16 +52,10 @@ EXPORT_SYMBOL(__const_udelay); #define DECLARE_EXPORT(name) \ extern void name(void);EXPORT_SYMBOL(name) -#define MAYBE_DECLARE_EXPORT(name) \ - extern void name(void) __weak;EXPORT_SYMBOL(name) -/* These symbols are generated by the compiler itself */ -DECLARE_EXPORT(__udivsi3); -DECLARE_EXPORT(__sdivsi3); +DECLARE_EXPORT(__lshrsi3); DECLARE_EXPORT(__ashrsi3); DECLARE_EXPORT(__ashlsi3); -DECLARE_EXPORT(__ashrdi3); -DECLARE_EXPORT(__ashldi3); DECLARE_EXPORT(__ashiftrt_r4_6); DECLARE_EXPORT(__ashiftrt_r4_7); DECLARE_EXPORT(__ashiftrt_r4_8); @@ -79,8 +73,7 @@ DECLARE_EXPORT(__ashiftrt_r4_23); DECLARE_EXPORT(__ashiftrt_r4_24); DECLARE_EXPORT(__ashiftrt_r4_27); DECLARE_EXPORT(__ashiftrt_r4_30); -DECLARE_EXPORT(__lshrsi3); -DECLARE_EXPORT(__lshrdi3); +DECLARE_EXPORT(__movstr); DECLARE_EXPORT(__movstrSI8); DECLARE_EXPORT(__movstrSI12); DECLARE_EXPORT(__movstrSI16); @@ -95,31 +88,12 @@ DECLARE_EXPORT(__movstrSI48); DECLARE_EXPORT(__movstrSI52); DECLARE_EXPORT(__movstrSI56); DECLARE_EXPORT(__movstrSI60); -#if __GNUC__ == 4 -DECLARE_EXPORT(__movmem); -#else -DECLARE_EXPORT(__movstr); -#endif - -#if __GNUC__ == 4 -DECLARE_EXPORT(__movmem_i4_even); -DECLARE_EXPORT(__movmem_i4_odd); -DECLARE_EXPORT(__movmemSI12_i4); - -#if (__GNUC_MINOR__ >= 2 || defined(__GNUC_STM_RELEASE__)) -/* - * GCC >= 4.2 emits these for division, as do GCC 4.1.x versions of the ST - * compiler which include backported patches. - */ -DECLARE_EXPORT(__udiv_qrnnd_16); -MAYBE_DECLARE_EXPORT(__sdivsi3_i4i); -MAYBE_DECLARE_EXPORT(__udivsi3_i4i); -#endif -#else /* GCC 3.x */ DECLARE_EXPORT(__movstr_i4_even); DECLARE_EXPORT(__movstr_i4_odd); DECLARE_EXPORT(__movstrSI12_i4); -#endif /* __GNUC__ == 4 */ +DECLARE_EXPORT(__udiv_qrnnd_16); +DECLARE_EXPORT(__sdivsi3_i4i); +DECLARE_EXPORT(__udivsi3_i4i); #if !defined(CONFIG_CACHE_OFF) && (defined(CONFIG_CPU_SH4) || \ defined(CONFIG_SH7705_CACHE_32KB)) diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile index 596421821d0..a30acb8342d 100644 --- a/arch/sh/lib/Makefile +++ b/arch/sh/lib/Makefile @@ -5,6 +5,18 @@ lib-y = delay.o memset.o memmove.o memchr.o \ checksum.o strlen.o div64.o div64-generic.o +# Extracted from libgcc +lib-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \ + ashlsi3.o ashrsi3.o ashiftrt.o lshrsi3.o \ + udiv_qrnnd.o + +udivsi3-y := udivsi3-Os.o + +ifneq ($(CONFIG_CC_OPTIMIZE_FOR_SIZE),y) +udivsi3-$(CONFIG_CPU_SH3) := udivsi3.o +udivsi3-$(CONFIG_CPU_SH4) := udivsi3.o +endif + obj-y += io.o memcpy-y := memcpy.o @@ -12,6 +24,6 @@ memcpy-$(CONFIG_CPU_SH4) := memcpy-sh4.o lib-$(CONFIG_MMU) += copy_page.o clear_page.o lib-$(CONFIG_FUNCTION_TRACER) += mcount.o -lib-y += $(memcpy-y) +lib-y += $(memcpy-y) $(udivsi3-y) EXTRA_CFLAGS += -Werror diff --git a/arch/sh/lib/ashiftrt.S b/arch/sh/lib/ashiftrt.S new file mode 100644 index 00000000000..45ce86558f4 --- /dev/null +++ b/arch/sh/lib/ashiftrt.S @@ -0,0 +1,149 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + + .global __ashiftrt_r4_0 + .global __ashiftrt_r4_1 + .global __ashiftrt_r4_2 + .global __ashiftrt_r4_3 + .global __ashiftrt_r4_4 + .global __ashiftrt_r4_5 + .global __ashiftrt_r4_6 + .global __ashiftrt_r4_7 + .global __ashiftrt_r4_8 + .global __ashiftrt_r4_9 + .global __ashiftrt_r4_10 + .global __ashiftrt_r4_11 + .global __ashiftrt_r4_12 + .global __ashiftrt_r4_13 + .global __ashiftrt_r4_14 + .global __ashiftrt_r4_15 + .global __ashiftrt_r4_16 + .global __ashiftrt_r4_17 + .global __ashiftrt_r4_18 + .global __ashiftrt_r4_19 + .global __ashiftrt_r4_20 + .global __ashiftrt_r4_21 + .global __ashiftrt_r4_22 + .global __ashiftrt_r4_23 + .global __ashiftrt_r4_24 + .global __ashiftrt_r4_25 + .global __ashiftrt_r4_26 + .global __ashiftrt_r4_27 + .global __ashiftrt_r4_28 + .global __ashiftrt_r4_29 + .global __ashiftrt_r4_30 + .global __ashiftrt_r4_31 + .global __ashiftrt_r4_32 + + .align 1 +__ashiftrt_r4_32: +__ashiftrt_r4_31: + rotcl r4 + rts + subc r4,r4 +__ashiftrt_r4_30: + shar r4 +__ashiftrt_r4_29: + shar r4 +__ashiftrt_r4_28: + shar r4 +__ashiftrt_r4_27: + shar r4 +__ashiftrt_r4_26: + shar r4 +__ashiftrt_r4_25: + shar r4 +__ashiftrt_r4_24: + shlr16 r4 + shlr8 r4 + rts + exts.b r4,r4 +__ashiftrt_r4_23: + shar r4 +__ashiftrt_r4_22: + shar r4 +__ashiftrt_r4_21: + shar r4 +__ashiftrt_r4_20: + shar r4 +__ashiftrt_r4_19: + shar r4 +__ashiftrt_r4_18: + shar r4 +__ashiftrt_r4_17: + shar r4 +__ashiftrt_r4_16: + shlr16 r4 + rts + exts.w r4,r4 +__ashiftrt_r4_15: + shar r4 +__ashiftrt_r4_14: + shar r4 +__ashiftrt_r4_13: + shar r4 +__ashiftrt_r4_12: + shar r4 +__ashiftrt_r4_11: + shar r4 +__ashiftrt_r4_10: + shar r4 +__ashiftrt_r4_9: + shar r4 +__ashiftrt_r4_8: + shar r4 +__ashiftrt_r4_7: + shar r4 +__ashiftrt_r4_6: + shar r4 +__ashiftrt_r4_5: + shar r4 +__ashiftrt_r4_4: + shar r4 +__ashiftrt_r4_3: + shar r4 +__ashiftrt_r4_2: + shar r4 +__ashiftrt_r4_1: + rts + shar r4 +__ashiftrt_r4_0: + rts + nop diff --git a/arch/sh/lib/ashldi3.c b/arch/sh/lib/ashldi3.c new file mode 100644 index 00000000000..beb80f31609 --- /dev/null +++ b/arch/sh/lib/ashldi3.c @@ -0,0 +1,29 @@ +#include <linux/module.h> + +#include "libgcc.h" + +long long __ashldi3(long long u, word_type b) +{ + DWunion uu, w; + word_type bm; + + if (b == 0) + return u; + + uu.ll = u; + bm = 32 - b; + + if (bm <= 0) { + w.s.low = 0; + w.s.high = (unsigned int) uu.s.low << -bm; + } else { + const unsigned int carries = (unsigned int) uu.s.low >> bm; + + w.s.low = (unsigned int) uu.s.low << b; + w.s.high = ((unsigned int) uu.s.high << b) | carries; + } + + return w.ll; +} + +EXPORT_SYMBOL(__ashldi3); diff --git a/arch/sh/lib/ashlsi3.S b/arch/sh/lib/ashlsi3.S new file mode 100644 index 00000000000..bd47e9b403a --- /dev/null +++ b/arch/sh/lib/ashlsi3.S @@ -0,0 +1,193 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +! +! __ashlsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global __ashlsi3 + + .align 2 +__ashlsi3: + mov #31,r0 + and r0,r5 + mova ashlsi3_table,r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +ashlsi3_table: + .byte ashlsi3_0-ashlsi3_table + .byte ashlsi3_1-ashlsi3_table + .byte ashlsi3_2-ashlsi3_table + .byte ashlsi3_3-ashlsi3_table + .byte ashlsi3_4-ashlsi3_table + .byte ashlsi3_5-ashlsi3_table + .byte ashlsi3_6-ashlsi3_table + .byte ashlsi3_7-ashlsi3_table + .byte ashlsi3_8-ashlsi3_table + .byte ashlsi3_9-ashlsi3_table + .byte ashlsi3_10-ashlsi3_table + .byte ashlsi3_11-ashlsi3_table + .byte ashlsi3_12-ashlsi3_table + .byte ashlsi3_13-ashlsi3_table + .byte ashlsi3_14-ashlsi3_table + .byte ashlsi3_15-ashlsi3_table + .byte ashlsi3_16-ashlsi3_table + .byte ashlsi3_17-ashlsi3_table + .byte ashlsi3_18-ashlsi3_table + .byte ashlsi3_19-ashlsi3_table + .byte ashlsi3_20-ashlsi3_table + .byte ashlsi3_21-ashlsi3_table + .byte ashlsi3_22-ashlsi3_table + .byte ashlsi3_23-ashlsi3_table + .byte ashlsi3_24-ashlsi3_table + .byte ashlsi3_25-ashlsi3_table + .byte ashlsi3_26-ashlsi3_table + .byte ashlsi3_27-ashlsi3_table + .byte ashlsi3_28-ashlsi3_table + .byte ashlsi3_29-ashlsi3_table + .byte ashlsi3_30-ashlsi3_table + .byte ashlsi3_31-ashlsi3_table + +ashlsi3_6: + shll2 r0 +ashlsi3_4: + shll2 r0 +ashlsi3_2: + rts + shll2 r0 + +ashlsi3_7: + shll2 r0 +ashlsi3_5: + shll2 r0 +ashlsi3_3: + shll2 r0 +ashlsi3_1: + rts + shll r0 + +ashlsi3_14: + shll2 r0 +ashlsi3_12: + shll2 r0 +ashlsi3_10: + shll2 r0 +ashlsi3_8: + rts + shll8 r0 + +ashlsi3_15: + shll2 r0 +ashlsi3_13: + shll2 r0 +ashlsi3_11: + shll2 r0 +ashlsi3_9: + shll8 r0 + rts + shll r0 + +ashlsi3_22: + shll2 r0 +ashlsi3_20: + shll2 r0 +ashlsi3_18: + shll2 r0 +ashlsi3_16: + rts + shll16 r0 + +ashlsi3_23: + shll2 r0 +ashlsi3_21: + shll2 r0 +ashlsi3_19: + shll2 r0 +ashlsi3_17: + shll16 r0 + rts + shll r0 + +ashlsi3_30: + shll2 r0 +ashlsi3_28: + shll2 r0 +ashlsi3_26: + shll2 r0 +ashlsi3_24: + shll16 r0 + rts + shll8 r0 + +ashlsi3_31: + shll2 r0 +ashlsi3_29: + shll2 r0 +ashlsi3_27: + shll2 r0 +ashlsi3_25: + shll16 r0 + shll8 r0 + rts + shll r0 + +ashlsi3_0: + rts + nop diff --git a/arch/sh/lib/ashrdi3.c b/arch/sh/lib/ashrdi3.c new file mode 100644 index 00000000000..c884a912b66 --- /dev/null +++ b/arch/sh/lib/ashrdi3.c @@ -0,0 +1,31 @@ +#include <linux/module.h> + +#include "libgcc.h" + +long long __ashrdi3(long long u, word_type b) +{ + DWunion uu, w; + word_type bm; + + if (b == 0) + return u; + + uu.ll = u; + bm = 32 - b; + + if (bm <= 0) { + /* w.s.high = 1..1 or 0..0 */ + w.s.high = + uu.s.high >> 31; + w.s.low = uu.s.high >> -bm; + } else { + const unsigned int carries = (unsigned int) uu.s.high << bm; + + w.s.high = uu.s.high >> b; + w.s.low = ((unsigned int) uu.s.low >> b) | carries; + } + + return w.ll; +} + +EXPORT_SYMBOL(__ashrdi3); diff --git a/arch/sh/lib/ashrsi3.S b/arch/sh/lib/ashrsi3.S new file mode 100644 index 00000000000..6f3cf46b77c --- /dev/null +++ b/arch/sh/lib/ashrsi3.S @@ -0,0 +1,185 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +! +! __ashrsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + + .global __ashrsi3 + + .align 2 +__ashrsi3: + mov #31,r0 + and r0,r5 + mova ashrsi3_table,r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +ashrsi3_table: + .byte ashrsi3_0-ashrsi3_table + .byte ashrsi3_1-ashrsi3_table + .byte ashrsi3_2-ashrsi3_table + .byte ashrsi3_3-ashrsi3_table + .byte ashrsi3_4-ashrsi3_table + .byte ashrsi3_5-ashrsi3_table + .byte ashrsi3_6-ashrsi3_table + .byte ashrsi3_7-ashrsi3_table + .byte ashrsi3_8-ashrsi3_table + .byte ashrsi3_9-ashrsi3_table + .byte ashrsi3_10-ashrsi3_table + .byte ashrsi3_11-ashrsi3_table + .byte ashrsi3_12-ashrsi3_table + .byte ashrsi3_13-ashrsi3_table + .byte ashrsi3_14-ashrsi3_table + .byte ashrsi3_15-ashrsi3_table + .byte ashrsi3_16-ashrsi3_table + .byte ashrsi3_17-ashrsi3_table + .byte ashrsi3_18-ashrsi3_table + .byte ashrsi3_19-ashrsi3_table + .byte ashrsi3_20-ashrsi3_table + .byte ashrsi3_21-ashrsi3_table + .byte ashrsi3_22-ashrsi3_table + .byte ashrsi3_23-ashrsi3_table + .byte ashrsi3_24-ashrsi3_table + .byte ashrsi3_25-ashrsi3_table + .byte ashrsi3_26-ashrsi3_table + .byte ashrsi3_27-ashrsi3_table + .byte ashrsi3_28-ashrsi3_table + .byte ashrsi3_29-ashrsi3_table + .byte ashrsi3_30-ashrsi3_table + .byte ashrsi3_31-ashrsi3_table + +ashrsi3_31: + rotcl r0 + rts + subc r0,r0 + +ashrsi3_30: + shar r0 +ashrsi3_29: + shar r0 +ashrsi3_28: + shar r0 +ashrsi3_27: + shar r0 +ashrsi3_26: + shar r0 +ashrsi3_25: + shar r0 +ashrsi3_24: + shlr16 r0 + shlr8 r0 + rts + exts.b r0,r0 + +ashrsi3_23: + shar r0 +ashrsi3_22: + shar r0 +ashrsi3_21: + shar r0 +ashrsi3_20: + shar r0 +ashrsi3_19: + shar r0 +ashrsi3_18: + shar r0 +ashrsi3_17: + shar r0 +ashrsi3_16: + shlr16 r0 + rts + exts.w r0,r0 + +ashrsi3_15: + shar r0 +ashrsi3_14: + shar r0 +ashrsi3_13: + shar r0 +ashrsi3_12: + shar r0 +ashrsi3_11: + shar r0 +ashrsi3_10: + shar r0 +ashrsi3_9: + shar r0 +ashrsi3_8: + shar r0 +ashrsi3_7: + shar r0 +ashrsi3_6: + shar r0 +ashrsi3_5: + shar r0 +ashrsi3_4: + shar r0 +ashrsi3_3: + shar r0 +ashrsi3_2: + shar r0 +ashrsi3_1: + rts + shar r0 + +ashrsi3_0: + rts + nop diff --git a/arch/sh/lib/libgcc.h b/arch/sh/lib/libgcc.h new file mode 100644 index 00000000000..3f19d1c5d94 --- /dev/null +++ b/arch/sh/lib/libgcc.h @@ -0,0 +1,26 @@ +#ifndef __ASM_LIBGCC_H +#define __ASM_LIBGCC_H + +#include <asm/byteorder.h> + +typedef int word_type __attribute__ ((mode (__word__))); + +#ifdef __BIG_ENDIAN +struct DWstruct { + int high, low; +}; +#elif defined(__LITTLE_ENDIAN) +struct DWstruct { + int low, high; +}; +#else +#error I feel sick. +#endif + +typedef union +{ + struct DWstruct s; + long long ll; +} DWunion; + +#endif /* __ASM_LIBGCC_H */ diff --git a/arch/sh/lib/lshrdi3.c b/arch/sh/lib/lshrdi3.c new file mode 100644 index 00000000000..dcf8d6810b7 --- /dev/null +++ b/arch/sh/lib/lshrdi3.c @@ -0,0 +1,29 @@ +#include <linux/module.h> + +#include "libgcc.h" + +long long __lshrdi3(long long u, word_type b) +{ + DWunion uu, w; + word_type bm; + + if (b == 0) + return u; + + uu.ll = u; + bm = 32 - b; + + if (bm <= 0) { + w.s.high = 0; + w.s.low = (unsigned int) uu.s.high >> -bm; + } else { + const unsigned int carries = (unsigned int) uu.s.high << bm; + + w.s.high = (unsigned int) uu.s.high >> b; + w.s.low = ((unsigned int) uu.s.low >> b) | carries; + } + + return w.ll; +} + +EXPORT_SYMBOL(__lshrdi3); diff --git a/arch/sh/lib/lshrsi3.S b/arch/sh/lib/lshrsi3.S new file mode 100644 index 00000000000..1e7aaa55713 --- /dev/null +++ b/arch/sh/lib/lshrsi3.S @@ -0,0 +1,193 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +! +! __lshrsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global __lshrsi3 + + .align 2 +__lshrsi3: + mov #31,r0 + and r0,r5 + mova lshrsi3_table,r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +lshrsi3_table: + .byte lshrsi3_0-lshrsi3_table + .byte lshrsi3_1-lshrsi3_table + .byte lshrsi3_2-lshrsi3_table + .byte lshrsi3_3-lshrsi3_table + .byte lshrsi3_4-lshrsi3_table + .byte lshrsi3_5-lshrsi3_table + .byte lshrsi3_6-lshrsi3_table + .byte lshrsi3_7-lshrsi3_table + .byte lshrsi3_8-lshrsi3_table + .byte lshrsi3_9-lshrsi3_table + .byte lshrsi3_10-lshrsi3_table + .byte lshrsi3_11-lshrsi3_table + .byte lshrsi3_12-lshrsi3_table + .byte lshrsi3_13-lshrsi3_table + .byte lshrsi3_14-lshrsi3_table + .byte lshrsi3_15-lshrsi3_table + .byte lshrsi3_16-lshrsi3_table + .byte lshrsi3_17-lshrsi3_table + .byte lshrsi3_18-lshrsi3_table + .byte lshrsi3_19-lshrsi3_table + .byte lshrsi3_20-lshrsi3_table + .byte lshrsi3_21-lshrsi3_table + .byte lshrsi3_22-lshrsi3_table + .byte lshrsi3_23-lshrsi3_table + .byte lshrsi3_24-lshrsi3_table + .byte lshrsi3_25-lshrsi3_table + .byte lshrsi3_26-lshrsi3_table + .byte lshrsi3_27-lshrsi3_table + .byte lshrsi3_28-lshrsi3_table + .byte lshrsi3_29-lshrsi3_table + .byte lshrsi3_30-lshrsi3_table + .byte lshrsi3_31-lshrsi3_table + +lshrsi3_6: + shlr2 r0 +lshrsi3_4: + shlr2 r0 +lshrsi3_2: + rts + shlr2 r0 + +lshrsi3_7: + shlr2 r0 +lshrsi3_5: + shlr2 r0 +lshrsi3_3: + shlr2 r0 +lshrsi3_1: + rts + shlr r0 + +lshrsi3_14: + shlr2 r0 +lshrsi3_12: + shlr2 r0 +lshrsi3_10: + shlr2 r0 +lshrsi3_8: + rts + shlr8 r0 + +lshrsi3_15: + shlr2 r0 +lshrsi3_13: + shlr2 r0 +lshrsi3_11: + shlr2 r0 +lshrsi3_9: + shlr8 r0 + rts + shlr r0 + +lshrsi3_22: + shlr2 r0 +lshrsi3_20: + shlr2 r0 +lshrsi3_18: + shlr2 r0 +lshrsi3_16: + rts + shlr16 r0 + +lshrsi3_23: + shlr2 r0 +lshrsi3_21: + shlr2 r0 +lshrsi3_19: + shlr2 r0 +lshrsi3_17: + shlr16 r0 + rts + shlr r0 + +lshrsi3_30: + shlr2 r0 +lshrsi3_28: + shlr2 r0 +lshrsi3_26: + shlr2 r0 +lshrsi3_24: + shlr16 r0 + rts + shlr8 r0 + +lshrsi3_31: + shlr2 r0 +lshrsi3_29: + shlr2 r0 +lshrsi3_27: + shlr2 r0 +lshrsi3_25: + shlr16 r0 + shlr8 r0 + rts + shlr r0 + +lshrsi3_0: + rts + nop diff --git a/arch/sh/lib/movmem.S b/arch/sh/lib/movmem.S new file mode 100644 index 00000000000..62075f6bc67 --- /dev/null +++ b/arch/sh/lib/movmem.S @@ -0,0 +1,238 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + + .text + .balign 4 + .global __movmem + .global __movstr + .set __movstr, __movmem + /* This would be a lot simpler if r6 contained the byte count + minus 64, and we wouldn't be called here for a byte count of 64. */ +__movmem: + sts.l pr,@-r15 + shll2 r6 + bsr __movmemSI52+2 + mov.l @(48,r5),r0 + .balign 4 +movmem_loop: /* Reached with rts */ + mov.l @(60,r5),r0 + add #-64,r6 + mov.l r0,@(60,r4) + tst r6,r6 + mov.l @(56,r5),r0 + bt movmem_done + mov.l r0,@(56,r4) + cmp/pl r6 + mov.l @(52,r5),r0 + add #64,r5 + mov.l r0,@(52,r4) + add #64,r4 + bt __movmemSI52 +! done all the large groups, do the remainder +! jump to movmem+ + mova __movmemSI4+4,r0 + add r6,r0 + jmp @r0 +movmem_done: ! share slot insn, works out aligned. + lds.l @r15+,pr + mov.l r0,@(56,r4) + mov.l @(52,r5),r0 + rts + mov.l r0,@(52,r4) + .balign 4 + + .global __movmemSI64 + .global __movstrSI64 + .set __movstrSI64, __movmemSI64 +__movmemSI64: + mov.l @(60,r5),r0 + mov.l r0,@(60,r4) + .global __movmemSI60 + .global __movstrSI60 + .set __movstrSI60, __movmemSI60 +__movmemSI60: + mov.l @(56,r5),r0 + mov.l r0,@(56,r4) + .global __movmemSI56 + .global __movstrSI56 + .set __movstrSI56, __movmemSI56 +__movmemSI56: + mov.l @(52,r5),r0 + mov.l r0,@(52,r4) + .global __movmemSI52 + .global __movstrSI52 + .set __movstrSI52, __movmemSI52 +__movmemSI52: + mov.l @(48,r5),r0 + mov.l r0,@(48,r4) + .global __movmemSI48 + .global __movstrSI48 + .set __movstrSI48, __movmemSI48 +__movmemSI48: + mov.l @(44,r5),r0 + mov.l r0,@(44,r4) + .global __movmemSI44 + .global __movstrSI44 + .set __movstrSI44, __movmemSI44 +__movmemSI44: + mov.l @(40,r5),r0 + mov.l r0,@(40,r4) + .global __movmemSI40 + .global __movstrSI40 + .set __movstrSI40, __movmemSI40 +__movmemSI40: + mov.l @(36,r5),r0 + mov.l r0,@(36,r4) + .global __movmemSI36 + .global __movstrSI36 + .set __movstrSI36, __movmemSI36 +__movmemSI36: + mov.l @(32,r5),r0 + mov.l r0,@(32,r4) + .global __movmemSI32 + .global __movstrSI32 + .set __movstrSI32, __movmemSI32 +__movmemSI32: + mov.l @(28,r5),r0 + mov.l r0,@(28,r4) + .global __movmemSI28 + .global __movstrSI28 + .set __movstrSI28, __movmemSI28 +__movmemSI28: + mov.l @(24,r5),r0 + mov.l r0,@(24,r4) + .global __movmemSI24 + .global __movstrSI24 + .set __movstrSI24, __movmemSI24 +__movmemSI24: + mov.l @(20,r5),r0 + mov.l r0,@(20,r4) + .global __movmemSI20 + .global __movstrSI20 + .set __movstrSI20, __movmemSI20 +__movmemSI20: + mov.l @(16,r5),r0 + mov.l r0,@(16,r4) + .global __movmemSI16 + .global __movstrSI16 + .set __movstrSI16, __movmemSI16 +__movmemSI16: + mov.l @(12,r5),r0 + mov.l r0,@(12,r4) + .global __movmemSI12 + .global __movstrSI12 + .set __movstrSI12, __movmemSI12 +__movmemSI12: + mov.l @(8,r5),r0 + mov.l r0,@(8,r4) + .global __movmemSI8 + .global __movstrSI8 + .set __movstrSI8, __movmemSI8 +__movmemSI8: + mov.l @(4,r5),r0 + mov.l r0,@(4,r4) + .global __movmemSI4 + .global __movstrSI4 + .set __movstrSI4, __movmemSI4 +__movmemSI4: + mov.l @(0,r5),r0 + rts + mov.l r0,@(0,r4) + + .global __movmem_i4_even + .global __movstr_i4_even + .set __movstr_i4_even, __movmem_i4_even + + .global __movmem_i4_odd + .global __movstr_i4_odd + .set __movstr_i4_odd, __movmem_i4_odd + + .global __movmemSI12_i4 + .global __movstrSI12_i4 + .set __movstrSI12_i4, __movmemSI12_i4 + + .p2align 5 +L_movmem_2mod4_end: + mov.l r0,@(16,r4) + rts + mov.l r1,@(20,r4) + + .p2align 2 + +__movmem_i4_even: + mov.l @r5+,r0 + bra L_movmem_start_even + mov.l @r5+,r1 + +__movmem_i4_odd: + mov.l @r5+,r1 + add #-4,r4 + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r1,@(4,r4) + mov.l r2,@(8,r4) + +L_movmem_loop: + mov.l r3,@(12,r4) + dt r6 + mov.l @r5+,r0 + bt/s L_movmem_2mod4_end + mov.l @r5+,r1 + add #16,r4 +L_movmem_start_even: + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r0,@r4 + dt r6 + mov.l r1,@(4,r4) + bf/s L_movmem_loop + mov.l r2,@(8,r4) + rts + mov.l r3,@(12,r4) + + .p2align 4 +__movmemSI12_i4: + mov.l @r5,r0 + mov.l @(4,r5),r1 + mov.l @(8,r5),r2 + mov.l r0,@r4 + mov.l r1,@(4,r4) + rts + mov.l r2,@(8,r4) diff --git a/arch/sh/lib/udiv_qrnnd.S b/arch/sh/lib/udiv_qrnnd.S new file mode 100644 index 00000000000..32b9a36de94 --- /dev/null +++ b/arch/sh/lib/udiv_qrnnd.S @@ -0,0 +1,81 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + + /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ + /* n1 < d, but n1 might be larger than d1. */ + .global __udiv_qrnnd_16 + .balign 8 +__udiv_qrnnd_16: + div0u + cmp/hi r6,r0 + bt .Lots + .rept 16 + div1 r6,r0 + .endr + extu.w r0,r1 + bt 0f + add r6,r0 +0: rotcl r1 + mulu.w r1,r5 + xtrct r4,r0 + swap.w r0,r0 + sts macl,r2 + cmp/hs r2,r0 + sub r2,r0 + bt 0f + addc r5,r0 + add #-1,r1 + bt 0f +1: add #-1,r1 + rts + add r5,r0 + .balign 8 +.Lots: + sub r5,r0 + swap.w r4,r1 + xtrct r0,r1 + clrt + mov r1,r0 + addc r5,r0 + mov #-1,r1 + bf/s 1b + shlr16 r1 +0: rts + nop diff --git a/arch/sh/lib/udivsi3-Os.S b/arch/sh/lib/udivsi3-Os.S new file mode 100644 index 00000000000..2bed76587f1 --- /dev/null +++ b/arch/sh/lib/udivsi3-Os.S @@ -0,0 +1,147 @@ +/* Copyright (C) 2006 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +/* Moderately Space-optimized libgcc routines for the Renesas SH / + STMicroelectronics ST40 CPUs. + Contributed by J"orn Rennecke joern.rennecke@st.com. */ + +/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i + sh4-200 run times: + udiv small divisor: 55 cycles + udiv large divisor: 52 cycles + sdiv small divisor, positive result: 59 cycles + sdiv large divisor, positive result: 56 cycles + sdiv small divisor, negative result: 65 cycles (*) + sdiv large divisor, negative result: 62 cycles (*) + (*): r2 is restored in the rts delay slot and has a lingering latency + of two more cycles. */ + .balign 4 + .global __udivsi3_i4i + .global __udivsi3 + .set __udivsi3, __udivsi3_i4i + .type __udivsi3_i4i, @function + .type __sdivsi3_i4i, @function +__udivsi3_i4i: + sts pr,r1 + mov.l r4,@-r15 + extu.w r5,r0 + cmp/eq r5,r0 + swap.w r4,r0 + shlr16 r4 + bf/s large_divisor + div0u + mov.l r5,@-r15 + shll16 r5 +sdiv_small_divisor: + div1 r5,r4 + bsr div6 + div1 r5,r4 + div1 r5,r4 + bsr div6 + div1 r5,r4 + xtrct r4,r0 + xtrct r0,r4 + bsr div7 + swap.w r4,r4 + div1 r5,r4 + bsr div7 + div1 r5,r4 + xtrct r4,r0 + mov.l @r15+,r5 + swap.w r0,r0 + mov.l @r15+,r4 + jmp @r1 + rotcl r0 +div7: + div1 r5,r4 +div6: + div1 r5,r4; div1 r5,r4; div1 r5,r4 + div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 + +divx3: + rotcl r0 + div1 r5,r4 + rotcl r0 + div1 r5,r4 + rotcl r0 + rts + div1 r5,r4 + +large_divisor: + mov.l r5,@-r15 +sdiv_large_divisor: + xor r4,r0 + .rept 4 + rotcl r0 + bsr divx3 + div1 r5,r4 + .endr + mov.l @r15+,r5 + mov.l @r15+,r4 + jmp @r1 + rotcl r0 + + .global __sdivsi3_i4i + .global __sdivsi3 + .set __sdivsi3, __sdivsi3_i4i +__sdivsi3_i4i: + mov.l r4,@-r15 + cmp/pz r5 + mov.l r5,@-r15 + bt/s pos_divisor + cmp/pz r4 + neg r5,r5 + extu.w r5,r0 + bt/s neg_result + cmp/eq r5,r0 + neg r4,r4 +pos_result: + swap.w r4,r0 + bra sdiv_check_divisor + sts pr,r1 +pos_divisor: + extu.w r5,r0 + bt/s pos_result + cmp/eq r5,r0 + neg r4,r4 +neg_result: + mova negate_result,r0 + ; + mov r0,r1 + swap.w r4,r0 + lds r2,macl + sts pr,r2 +sdiv_check_divisor: + shlr16 r4 + bf/s sdiv_large_divisor + div0u + bra sdiv_small_divisor + shll16 r5 + .balign 4 +negate_result: + neg r0,r0 + jmp @r2 + sts macl,r2 diff --git a/arch/sh/lib/udivsi3.S b/arch/sh/lib/udivsi3.S new file mode 100644 index 00000000000..a810fc6f21d --- /dev/null +++ b/arch/sh/lib/udivsi3.S @@ -0,0 +1,664 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +/* This code used shld, thus is not suitable for SH1 / SH2. */ + +/* Signed / unsigned division without use of FPU, optimized for SH4. + Uses a lookup table for divisors in the range -128 .. +128, and + div1 with case distinction for larger divisors in three more ranges. + The code is lumped together with the table to allow the use of mova. */ +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define L_LSB 0 +#define L_LSWMSB 1 +#define L_MSWLSB 2 +#else +#define L_LSB 3 +#define L_LSWMSB 2 +#define L_MSWLSB 1 +#endif + + .balign 4 + .global __udivsi3_i4i + .global __udivsi3 + .set __udivsi3, __udivsi3_i4i + .type __udivsi3_i4i, @function +__udivsi3_i4i: + mov.w c128_w, r1 + div0u + mov r4,r0 + shlr8 r0 + cmp/hi r1,r5 + extu.w r5,r1 + bf udiv_le128 + cmp/eq r5,r1 + bf udiv_ge64k + shlr r0 + mov r5,r1 + shll16 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 + div1 r5,r0 + div1 r5,r0 + bra udiv_25 + div1 r5,r0 + +div_le128: + mova div_table_ix,r0 + bra div_le128_2 + mov.b @(r0,r5),r1 +udiv_le128: + mov.l r4,@-r15 + mova div_table_ix,r0 + mov.b @(r0,r5),r1 + mov.l r5,@-r15 +div_le128_2: + mova div_table_inv,r0 + mov.l @(r0,r1),r1 + mov r5,r0 + tst #0xfe,r0 + mova div_table_clz,r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + bt/s div_by_1 + mov r4,r0 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + rts + shld r1,r0 + +div_by_1_neg: + neg r4,r0 +div_by_1: + mov.l @r15+,r5 + rts + mov.l @r15+,r4 + +div_ge64k: + bt/s div_r8 + div0u + shll8 r5 + bra div_ge64k_2 + div1 r5,r0 +udiv_ge64k: + cmp/hi r0,r5 + mov r5,r1 + bt udiv_r8 + shll8 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 +div_ge64k_2: + div1 r5,r0 + mov.l zero_l,r1 + .rept 4 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w m256_w,r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra div_ge64k_end + xor r4,r0 + +div_r8: + shll16 r4 + bra div_r8_2 + shll8 r4 +udiv_r8: + mov.l r4,@-r15 + shll16 r4 + clrt + shll8 r4 + mov.l r5,@-r15 +div_r8_2: + rotcl r4 + mov r0,r1 + div1 r5,r1 + mov r4,r0 + rotcl r0 + mov r5,r4 + div1 r5,r1 + .rept 5 + rotcl r0; div1 r5,r1 + .endr + rotcl r0 + mov.l @r15+,r5 + div1 r4,r1 + mov.l @r15+,r4 + rts + rotcl r0 + + .global __sdivsi3_i4i + .global __sdivsi3 + .set __sdivsi3, __sdivsi3_i4i + .type __sdivsi3_i4i, @function + /* This is link-compatible with a __sdivsi3 call, + but we effectively clobber only r1. */ +__sdivsi3_i4i: + mov.l r4,@-r15 + cmp/pz r5 + mov.w c128_w, r1 + bt/s pos_divisor + cmp/pz r4 + mov.l r5,@-r15 + neg r5,r5 + bt/s neg_result + cmp/hi r1,r5 + neg r4,r4 +pos_result: + extu.w r5,r0 + bf div_le128 + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s div_ge64k + cmp/hi r0,r5 + div0u + shll16 r5 + div1 r5,r0 + div1 r5,r0 + div1 r5,r0 +udiv_25: + mov.l zero_l,r1 + div1 r5,r0 + div1 r5,r0 + mov.l r1,@-r15 + .rept 3 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +div_ge64k_end: + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r0 + mov.l @r15+,r5 + or r4,r0 + mov.l @r15+,r4 + rts + rotcl r0 + +div_le128_neg: + tst #0xfe,r0 + mova div_table_ix,r0 + mov.b @(r0,r5),r1 + mova div_table_inv,r0 + bt/s div_by_1_neg + mov.l @(r0,r1),r1 + mova div_table_clz,r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + shld r1,r0 + rts + neg r0,r0 + +pos_divisor: + mov.l r5,@-r15 + bt/s pos_result + cmp/hi r1,r5 + neg r4,r4 +neg_result: + extu.w r5,r0 + bf div_le128_neg + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s div_ge64k_neg + cmp/hi r0,r5 + div0u + mov.l zero_l,r1 + shll16 r5 + div1 r5,r0 + mov.l r1,@-r15 + .rept 7 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +div_ge64k_neg_end: + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r1 + mov.l @r15+,r5 + or r4,r1 +div_r8_neg_end: + mov.l @r15+,r4 + rotcl r1 + rts + neg r1,r0 + +div_ge64k_neg: + bt/s div_r8_neg + div0u + shll8 r5 + mov.l zero_l,r1 + .rept 6 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w m256_w,r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra div_ge64k_neg_end + xor r4,r0 + +c128_w: + .word 128 + +div_r8_neg: + clrt + shll16 r4 + mov r4,r1 + shll8 r1 + mov r5,r4 + .rept 7 + rotcl r1; div1 r5,r0 + .endr + mov.l @r15+,r5 + rotcl r1 + bra div_r8_neg_end + div1 r4,r0 + +m256_w: + .word 0xff00 +/* This table has been generated by divtab-sh4.c. */ + .balign 4 +div_table_clz: + .byte 0 + .byte 1 + .byte 0 + .byte -1 + .byte -1 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 +/* Lookup table translating positive divisor to index into table of + normalized inverse. N.B. the '0' entry is also the last entry of the + previous table, and causes an unaligned access for division by zero. */ +div_table_ix: + .byte -6 + .byte -128 + .byte -128 + .byte 0 + .byte -128 + .byte -64 + .byte 0 + .byte 64 + .byte -128 + .byte -96 + .byte -64 + .byte -32 + .byte 0 + .byte 32 + .byte 64 + .byte 96 + .byte -128 + .byte -112 + .byte -96 + .byte -80 + .byte -64 + .byte -48 + .byte -32 + .byte -16 + .byte 0 + .byte 16 + .byte 32 + .byte 48 + .byte 64 + .byte 80 + .byte 96 + .byte 112 + .byte -128 + .byte -120 + .byte -112 + .byte -104 + .byte -96 + .byte -88 + .byte -80 + .byte -72 + .byte -64 + .byte -56 + .byte -48 + .byte -40 + .byte -32 + .byte -24 + .byte -16 + .byte -8 + .byte 0 + .byte 8 + .byte 16 + .byte 24 + .byte 32 + .byte 40 + .byte 48 + .byte 56 + .byte 64 + .byte 72 + .byte 80 + .byte 88 + .byte 96 + .byte 104 + .byte 112 + .byte 120 + .byte -128 + .byte -124 + .byte -120 + .byte -116 + .byte -112 + .byte -108 + .byte -104 + .byte -100 + .byte -96 + .byte -92 + .byte -88 + .byte -84 + .byte -80 + .byte -76 + .byte -72 + .byte -68 + .byte -64 + .byte -60 + .byte -56 + .byte -52 + .byte -48 + .byte -44 + .byte -40 + .byte -36 + .byte -32 + .byte -28 + .byte -24 + .byte -20 + .byte -16 + .byte -12 + .byte -8 + .byte -4 + .byte 0 + .byte 4 + .byte 8 + .byte 12 + .byte 16 + .byte 20 + .byte 24 + .byte 28 + .byte 32 + .byte 36 + .byte 40 + .byte 44 + .byte 48 + .byte 52 + .byte 56 + .byte 60 + .byte 64 + .byte 68 + .byte 72 + .byte 76 + .byte 80 + .byte 84 + .byte 88 + .byte 92 + .byte 96 + .byte 100 + .byte 104 + .byte 108 + .byte 112 + .byte 116 + .byte 120 + .byte 124 + .byte -128 +/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ + .balign 4 +zero_l: + .long 0x0 + .long 0xF81F81F9 + .long 0xF07C1F08 + .long 0xE9131AC0 + .long 0xE1E1E1E2 + .long 0xDAE6076C + .long 0xD41D41D5 + .long 0xCD856891 + .long 0xC71C71C8 + .long 0xC0E07039 + .long 0xBACF914D + .long 0xB4E81B4F + .long 0xAF286BCB + .long 0xA98EF607 + .long 0xA41A41A5 + .long 0x9EC8E952 + .long 0x9999999A + .long 0x948B0FCE + .long 0x8F9C18FA + .long 0x8ACB90F7 + .long 0x86186187 + .long 0x81818182 + .long 0x7D05F418 + .long 0x78A4C818 + .long 0x745D1746 + .long 0x702E05C1 + .long 0x6C16C16D + .long 0x68168169 + .long 0x642C8591 + .long 0x60581606 + .long 0x5C9882BA + .long 0x58ED2309 +div_table_inv: + .long 0x55555556 + .long 0x51D07EAF + .long 0x4E5E0A73 + .long 0x4AFD6A06 + .long 0x47AE147B + .long 0x446F8657 + .long 0x41414142 + .long 0x3E22CBCF + .long 0x3B13B13C + .long 0x38138139 + .long 0x3521CFB3 + .long 0x323E34A3 + .long 0x2F684BDB + .long 0x2C9FB4D9 + .long 0x29E4129F + .long 0x27350B89 + .long 0x24924925 + .long 0x21FB7813 + .long 0x1F7047DD + .long 0x1CF06ADB + .long 0x1A7B9612 + .long 0x18118119 + .long 0x15B1E5F8 + .long 0x135C8114 + .long 0x11111112 + .long 0xECF56BF + .long 0xC9714FC + .long 0xA6810A7 + .long 0x8421085 + .long 0x624DD30 + .long 0x4104105 + .long 0x2040811 + /* maximum error: 0.987342 scaled: 0.921875*/ |