From da957e111bb0c189a4a3bf8a00caaecb59ed94ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:31 +0200 Subject: i386: move math-emu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Kconfig | 2 +- arch/i386/Makefile | 2 +- arch/i386/math-emu/Makefile | 30 - arch/i386/math-emu/README | 427 --------- arch/i386/math-emu/control_w.h | 45 - arch/i386/math-emu/div_Xsig.S | 365 -------- arch/i386/math-emu/div_small.S | 47 - arch/i386/math-emu/errors.c | 739 --------------- arch/i386/math-emu/exception.h | 53 -- arch/i386/math-emu/fpu_arith.c | 174 ---- arch/i386/math-emu/fpu_asm.h | 32 - arch/i386/math-emu/fpu_aux.c | 204 ---- arch/i386/math-emu/fpu_emu.h | 218 ----- arch/i386/math-emu/fpu_entry.c | 761 --------------- arch/i386/math-emu/fpu_etc.c | 143 --- arch/i386/math-emu/fpu_proto.h | 140 --- arch/i386/math-emu/fpu_system.h | 90 -- arch/i386/math-emu/fpu_tags.c | 127 --- arch/i386/math-emu/fpu_trig.c | 1845 ------------------------------------- arch/i386/math-emu/get_address.c | 438 --------- arch/i386/math-emu/load_store.c | 272 ------ arch/i386/math-emu/mul_Xsig.S | 176 ---- arch/i386/math-emu/poly.h | 121 --- arch/i386/math-emu/poly_2xm1.c | 156 ---- arch/i386/math-emu/poly_atan.c | 229 ----- arch/i386/math-emu/poly_l2.c | 272 ------ arch/i386/math-emu/poly_sin.c | 397 -------- arch/i386/math-emu/poly_tan.c | 222 ----- arch/i386/math-emu/polynom_Xsig.S | 135 --- arch/i386/math-emu/reg_add_sub.c | 374 -------- arch/i386/math-emu/reg_compare.c | 381 -------- arch/i386/math-emu/reg_constant.c | 120 --- arch/i386/math-emu/reg_constant.h | 25 - arch/i386/math-emu/reg_convert.c | 53 -- arch/i386/math-emu/reg_divide.c | 207 ----- arch/i386/math-emu/reg_ld_str.c | 1375 --------------------------- arch/i386/math-emu/reg_mul.c | 132 --- arch/i386/math-emu/reg_norm.S | 147 --- arch/i386/math-emu/reg_round.S | 708 -------------- arch/i386/math-emu/reg_u_add.S | 167 ---- arch/i386/math-emu/reg_u_div.S | 471 ---------- arch/i386/math-emu/reg_u_mul.S | 148 --- arch/i386/math-emu/reg_u_sub.S | 272 ------ arch/i386/math-emu/round_Xsig.S | 141 --- arch/i386/math-emu/shr_Xsig.S | 87 -- arch/i386/math-emu/status_w.h | 67 -- arch/i386/math-emu/version.h | 12 - arch/i386/math-emu/wm_shrx.S | 204 ---- arch/i386/math-emu/wm_sqrt.S | 470 ---------- arch/x86/math-emu/Makefile | 30 + arch/x86/math-emu/README | 427 +++++++++ arch/x86/math-emu/control_w.h | 45 + arch/x86/math-emu/div_Xsig.S | 365 ++++++++ arch/x86/math-emu/div_small.S | 47 + arch/x86/math-emu/errors.c | 739 +++++++++++++++ arch/x86/math-emu/exception.h | 53 ++ arch/x86/math-emu/fpu_arith.c | 174 ++++ arch/x86/math-emu/fpu_asm.h | 32 + arch/x86/math-emu/fpu_aux.c | 204 ++++ arch/x86/math-emu/fpu_emu.h | 218 +++++ arch/x86/math-emu/fpu_entry.c | 761 +++++++++++++++ arch/x86/math-emu/fpu_etc.c | 143 +++ arch/x86/math-emu/fpu_proto.h | 140 +++ arch/x86/math-emu/fpu_system.h | 90 ++ arch/x86/math-emu/fpu_tags.c | 127 +++ arch/x86/math-emu/fpu_trig.c | 1845 +++++++++++++++++++++++++++++++++++++ arch/x86/math-emu/get_address.c | 438 +++++++++ arch/x86/math-emu/load_store.c | 272 ++++++ arch/x86/math-emu/mul_Xsig.S | 176 ++++ arch/x86/math-emu/poly.h | 121 +++ arch/x86/math-emu/poly_2xm1.c | 156 ++++ arch/x86/math-emu/poly_atan.c | 229 +++++ arch/x86/math-emu/poly_l2.c | 272 ++++++ arch/x86/math-emu/poly_sin.c | 397 ++++++++ arch/x86/math-emu/poly_tan.c | 222 +++++ arch/x86/math-emu/polynom_Xsig.S | 135 +++ arch/x86/math-emu/reg_add_sub.c | 374 ++++++++ arch/x86/math-emu/reg_compare.c | 381 ++++++++ arch/x86/math-emu/reg_constant.c | 120 +++ arch/x86/math-emu/reg_constant.h | 25 + arch/x86/math-emu/reg_convert.c | 53 ++ arch/x86/math-emu/reg_divide.c | 207 +++++ arch/x86/math-emu/reg_ld_str.c | 1375 +++++++++++++++++++++++++++ arch/x86/math-emu/reg_mul.c | 132 +++ arch/x86/math-emu/reg_norm.S | 147 +++ arch/x86/math-emu/reg_round.S | 708 ++++++++++++++ arch/x86/math-emu/reg_u_add.S | 167 ++++ arch/x86/math-emu/reg_u_div.S | 471 ++++++++++ arch/x86/math-emu/reg_u_mul.S | 148 +++ arch/x86/math-emu/reg_u_sub.S | 272 ++++++ arch/x86/math-emu/round_Xsig.S | 141 +++ arch/x86/math-emu/shr_Xsig.S | 87 ++ arch/x86/math-emu/status_w.h | 67 ++ arch/x86/math-emu/version.h | 12 + arch/x86/math-emu/wm_shrx.S | 204 ++++ arch/x86/math-emu/wm_sqrt.S | 470 ++++++++++ 96 files changed, 13421 insertions(+), 13421 deletions(-) delete mode 100644 arch/i386/math-emu/Makefile delete mode 100644 arch/i386/math-emu/README delete mode 100644 arch/i386/math-emu/control_w.h delete mode 100644 arch/i386/math-emu/div_Xsig.S delete mode 100644 arch/i386/math-emu/div_small.S delete mode 100644 arch/i386/math-emu/errors.c delete mode 100644 arch/i386/math-emu/exception.h delete mode 100644 arch/i386/math-emu/fpu_arith.c delete mode 100644 arch/i386/math-emu/fpu_asm.h delete mode 100644 arch/i386/math-emu/fpu_aux.c delete mode 100644 arch/i386/math-emu/fpu_emu.h delete mode 100644 arch/i386/math-emu/fpu_entry.c delete mode 100644 arch/i386/math-emu/fpu_etc.c delete mode 100644 arch/i386/math-emu/fpu_proto.h delete mode 100644 arch/i386/math-emu/fpu_system.h delete mode 100644 arch/i386/math-emu/fpu_tags.c delete mode 100644 arch/i386/math-emu/fpu_trig.c delete mode 100644 arch/i386/math-emu/get_address.c delete mode 100644 arch/i386/math-emu/load_store.c delete mode 100644 arch/i386/math-emu/mul_Xsig.S delete mode 100644 arch/i386/math-emu/poly.h delete mode 100644 arch/i386/math-emu/poly_2xm1.c delete mode 100644 arch/i386/math-emu/poly_atan.c delete mode 100644 arch/i386/math-emu/poly_l2.c delete mode 100644 arch/i386/math-emu/poly_sin.c delete mode 100644 arch/i386/math-emu/poly_tan.c delete mode 100644 arch/i386/math-emu/polynom_Xsig.S delete mode 100644 arch/i386/math-emu/reg_add_sub.c delete mode 100644 arch/i386/math-emu/reg_compare.c delete mode 100644 arch/i386/math-emu/reg_constant.c delete mode 100644 arch/i386/math-emu/reg_constant.h delete mode 100644 arch/i386/math-emu/reg_convert.c delete mode 100644 arch/i386/math-emu/reg_divide.c delete mode 100644 arch/i386/math-emu/reg_ld_str.c delete mode 100644 arch/i386/math-emu/reg_mul.c delete mode 100644 arch/i386/math-emu/reg_norm.S delete mode 100644 arch/i386/math-emu/reg_round.S delete mode 100644 arch/i386/math-emu/reg_u_add.S delete mode 100644 arch/i386/math-emu/reg_u_div.S delete mode 100644 arch/i386/math-emu/reg_u_mul.S delete mode 100644 arch/i386/math-emu/reg_u_sub.S delete mode 100644 arch/i386/math-emu/round_Xsig.S delete mode 100644 arch/i386/math-emu/shr_Xsig.S delete mode 100644 arch/i386/math-emu/status_w.h delete mode 100644 arch/i386/math-emu/version.h delete mode 100644 arch/i386/math-emu/wm_shrx.S delete mode 100644 arch/i386/math-emu/wm_sqrt.S create mode 100644 arch/x86/math-emu/Makefile create mode 100644 arch/x86/math-emu/README create mode 100644 arch/x86/math-emu/control_w.h create mode 100644 arch/x86/math-emu/div_Xsig.S create mode 100644 arch/x86/math-emu/div_small.S create mode 100644 arch/x86/math-emu/errors.c create mode 100644 arch/x86/math-emu/exception.h create mode 100644 arch/x86/math-emu/fpu_arith.c create mode 100644 arch/x86/math-emu/fpu_asm.h create mode 100644 arch/x86/math-emu/fpu_aux.c create mode 100644 arch/x86/math-emu/fpu_emu.h create mode 100644 arch/x86/math-emu/fpu_entry.c create mode 100644 arch/x86/math-emu/fpu_etc.c create mode 100644 arch/x86/math-emu/fpu_proto.h create mode 100644 arch/x86/math-emu/fpu_system.h create mode 100644 arch/x86/math-emu/fpu_tags.c create mode 100644 arch/x86/math-emu/fpu_trig.c create mode 100644 arch/x86/math-emu/get_address.c create mode 100644 arch/x86/math-emu/load_store.c create mode 100644 arch/x86/math-emu/mul_Xsig.S create mode 100644 arch/x86/math-emu/poly.h create mode 100644 arch/x86/math-emu/poly_2xm1.c create mode 100644 arch/x86/math-emu/poly_atan.c create mode 100644 arch/x86/math-emu/poly_l2.c create mode 100644 arch/x86/math-emu/poly_sin.c create mode 100644 arch/x86/math-emu/poly_tan.c create mode 100644 arch/x86/math-emu/polynom_Xsig.S create mode 100644 arch/x86/math-emu/reg_add_sub.c create mode 100644 arch/x86/math-emu/reg_compare.c create mode 100644 arch/x86/math-emu/reg_constant.c create mode 100644 arch/x86/math-emu/reg_constant.h create mode 100644 arch/x86/math-emu/reg_convert.c create mode 100644 arch/x86/math-emu/reg_divide.c create mode 100644 arch/x86/math-emu/reg_ld_str.c create mode 100644 arch/x86/math-emu/reg_mul.c create mode 100644 arch/x86/math-emu/reg_norm.S create mode 100644 arch/x86/math-emu/reg_round.S create mode 100644 arch/x86/math-emu/reg_u_add.S create mode 100644 arch/x86/math-emu/reg_u_div.S create mode 100644 arch/x86/math-emu/reg_u_mul.S create mode 100644 arch/x86/math-emu/reg_u_sub.S create mode 100644 arch/x86/math-emu/round_Xsig.S create mode 100644 arch/x86/math-emu/shr_Xsig.S create mode 100644 arch/x86/math-emu/status_w.h create mode 100644 arch/x86/math-emu/version.h create mode 100644 arch/x86/math-emu/wm_shrx.S create mode 100644 arch/x86/math-emu/wm_sqrt.S diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index fc86d41d252..561cc21914b 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -707,7 +707,7 @@ config MATH_EMULATION intend to use this kernel on different machines. More information about the internals of the Linux math coprocessor - emulation can be found in . + emulation can be found in . If you are not sure, say Y; apart from resulting in a 66 KB bigger kernel, it won't hurt. diff --git a/arch/i386/Makefile b/arch/i386/Makefile index dca07ae933d..fe374b65430 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -106,7 +106,7 @@ core-y += arch/i386/kernel/ \ arch/i386/mm/ \ $(mcore-y)/ \ arch/x86/crypto/ -drivers-$(CONFIG_MATH_EMULATION) += arch/i386/math-emu/ +drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_PCI) += arch/i386/pci/ # must be linked after kernel/ drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ diff --git a/arch/i386/math-emu/Makefile b/arch/i386/math-emu/Makefile deleted file mode 100644 index 9c943fa6ce6..00000000000 --- a/arch/i386/math-emu/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -# -# Makefile for wm-FPU-emu -# - -#DEBUG = -DDEBUGGING -DEBUG = -PARANOID = -DPARANOID -CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION) - -EXTRA_AFLAGS := $(PARANOID) - -# From 'C' language sources: -C_OBJS =fpu_entry.o errors.o \ - fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \ - load_store.o get_address.o \ - poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \ - reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \ - reg_ld_str.o reg_divide.o reg_mul.o - -# From 80x86 assembler sources: -A_OBJS =reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \ - div_small.o reg_norm.o reg_round.o \ - wm_shrx.o wm_sqrt.o \ - div_Xsig.o polynom_Xsig.o round_Xsig.o \ - shr_Xsig.o mul_Xsig.o - -obj-y =$(C_OBJS) $(A_OBJS) - -proto: - cproto -e -DMAKING_PROTO *.c >fpu_proto.h diff --git a/arch/i386/math-emu/README b/arch/i386/math-emu/README deleted file mode 100644 index e6235491d6e..00000000000 --- a/arch/i386/math-emu/README +++ /dev/null @@ -1,427 +0,0 @@ - +---------------------------------------------------------------------------+ - | wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. | - | | - | Copyright (C) 1992,1993,1994,1995,1996,1997,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@melbpc.org.au | - | | - | This program is free software; you can redistribute it and/or modify | - | it under the terms of the GNU General Public License version 2 as | - | published by the Free Software Foundation. | - | | - | This program is distributed in the hope that it will be useful, | - | but WITHOUT ANY WARRANTY; without even the implied warranty of | - | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | - | GNU General Public License for more details. | - | | - | You should have received a copy of the GNU General Public License | - | along with this program; if not, write to the Free Software | - | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | - | | - +---------------------------------------------------------------------------+ - - - -wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387 -which was my 80387 emulator for early versions of djgpp (gcc under -msdos); wm-emu387 was in turn based upon emu387 which was written by -DJ Delorie for djgpp. The interface to the Linux kernel is based upon -the original Linux math emulator by Linus Torvalds. - -My target FPU for wm-FPU-emu is that described in the Intel486 -Programmer's Reference Manual (1992 edition). Unfortunately, numerous -facets of the functioning of the FPU are not well covered in the -Reference Manual. The information in the manual has been supplemented -with measurements on real 80486's. Unfortunately, it is simply not -possible to be sure that all of the peculiarities of the 80486 have -been discovered, so there is always likely to be obscure differences -in the detailed behaviour of the emulator and a real 80486. - -wm-FPU-emu does not implement all of the behaviour of the 80486 FPU, -but is very close. See "Limitations" later in this file for a list of -some differences. - -Please report bugs, etc to me at: - billm@melbpc.org.au -or b.metzenthen@medoto.unimelb.edu.au - -For more information on the emulator and on floating point topics, see -my web pages, currently at http://www.suburbia.net/~billm/ - - ---Bill Metzenthen - December 1999 - - ------------------------ Internals of wm-FPU-emu ----------------------- - -Numeric algorithms: -(1) Add, subtract, and multiply. Nothing remarkable in these. -(2) Divide has been tuned to get reasonable performance. The algorithm - is not the obvious one which most people seem to use, but is designed - to take advantage of the characteristics of the 80386. I expect that - it has been invented many times before I discovered it, but I have not - seen it. It is based upon one of those ideas which one carries around - for years without ever bothering to check it out. -(3) The sqrt function has been tuned to get good performance. It is based - upon Newton's classic method. Performance was improved by capitalizing - upon the properties of Newton's method, and the code is once again - structured taking account of the 80386 characteristics. -(4) The trig, log, and exp functions are based in each case upon quasi- - "optimal" polynomial approximations. My definition of "optimal" was - based upon getting good accuracy with reasonable speed. -(5) The argument reducing code for the trig function effectively uses - a value of pi which is accurate to more than 128 bits. As a consequence, - the reduced argument is accurate to more than 64 bits for arguments up - to a few pi, and accurate to more than 64 bits for most arguments, - even for arguments approaching 2^63. This is far superior to an - 80486, which uses a value of pi which is accurate to 66 bits. - -The code of the emulator is complicated slightly by the need to -account for a limited form of re-entrancy. Normally, the emulator will -emulate each FPU instruction to completion without interruption. -However, it may happen that when the emulator is accessing the user -memory space, swapping may be needed. In this case the emulator may be -temporarily suspended while disk i/o takes place. During this time -another process may use the emulator, thereby perhaps changing static -variables. The code which accesses user memory is confined to five -files: - fpu_entry.c - reg_ld_str.c - load_store.c - get_address.c - errors.c -As from version 1.12 of the emulator, no static variables are used -(apart from those in the kernel's per-process tables). The emulator is -therefore now fully re-entrant, rather than having just the restricted -form of re-entrancy which is required by the Linux kernel. - ------------------------ Limitations of wm-FPU-emu ----------------------- - -There are a number of differences between the current wm-FPU-emu -(version 2.01) and the 80486 FPU (apart from bugs). The differences -are fewer than those which applied to the 1.xx series of the emulator. -Some of the more important differences are listed below: - -The Roundup flag does not have much meaning for the transcendental -functions and its 80486 value with these functions is likely to differ -from its emulator value. - -In a few rare cases the Underflow flag obtained with the emulator will -be different from that obtained with an 80486. This occurs when the -following conditions apply simultaneously: -(a) the operands have a higher precision than the current setting of the - precision control (PC) flags. -(b) the underflow exception is masked. -(c) the magnitude of the exact result (before rounding) is less than 2^-16382. -(d) the magnitude of the final result (after rounding) is exactly 2^-16382. -(e) the magnitude of the exact result would be exactly 2^-16382 if the - operands were rounded to the current precision before the arithmetic - operation was performed. -If all of these apply, the emulator will set the Underflow flag but a real -80486 will not. - -NOTE: Certain formats of Extended Real are UNSUPPORTED. They are -unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities, -and Unnormals. None of these will be generated by an 80486 or by the -emulator. Do not use them. The emulator treats them differently in -detail from the way an 80486 does. - -Self modifying code can cause the emulator to fail. An example of such -code is: - movl %esp,[%ebx] - fld1 -The FPU instruction may be (usually will be) loaded into the pre-fetch -queue of the CPU before the mov instruction is executed. If the -destination of the 'movl' overlaps the FPU instruction then the bytes -in the prefetch queue and memory will be inconsistent when the FPU -instruction is executed. The emulator will be invoked but will not be -able to find the instruction which caused the device-not-present -exception. For this case, the emulator cannot emulate the behaviour of -an 80486DX. - -Handling of the address size override prefix byte (0x67) has not been -extensively tested yet. A major problem exists because using it in -vm86 mode can cause a general protection fault. Address offsets -greater than 0xffff appear to be illegal in vm86 mode but are quite -acceptable (and work) in real mode. A small test program developed to -check the addressing, and which runs successfully in real mode, -crashes dosemu under Linux and also brings Windows down with a general -protection fault message when run under the MS-DOS prompt of Windows -3.1. (The program simply reads data from a valid address). - -The emulator supports 16-bit protected mode, with one difference from -an 80486DX. A 80486DX will allow some floating point instructions to -write a few bytes below the lowest address of the stack. The emulator -will not allow this in 16-bit protected mode: no instructions are -allowed to write outside the bounds set by the protection. - ------------------------ Performance of wm-FPU-emu ----------------------- - -Speed. ------ - -The speed of floating point computation with the emulator will depend -upon instruction mix. Relative performance is best for the instructions -which require most computation. The simple instructions are adversely -affected by the FPU instruction trap overhead. - - -Timing: Some simple timing tests have been made on the emulator functions. -The times include load/store instructions. All times are in microseconds -measured on a 33MHz 386 with 64k cache. The Turbo C tests were under -ms-dos, the next two columns are for emulators running with the djgpp -ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97, -using libm4.0 (hard). - -function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu - - + 60.5 154.8 76.5 139.4 - - 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7 - * 71.0 190.8 79.6 146.6 - / 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1 - - sin() 310.8 4692.0 319.0 398.5 - cos() 284.4 4855.2 308.0 388.7 - tan() 495.0 8807.1 394.9 504.7 - atan() 328.9 4866.4 601.1 419.5-491.9 - - sqrt() 128.7 crashed 145.2 227.0 - log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1 - exp() 479.1 6619.2 469.1 850.8 - - -The performance under Linux is improved by the use of look-ahead code. -The following results show the improvement which is obtained under -Linux due to the look-ahead code. Also given are the times for the -original Linux emulator with the 4.1 'soft' lib. - - [ Linus' note: I changed look-ahead to be the default under linux, as - there was no reason not to use it after I had edited it to be - disabled during tracing ] - - wm-FPU-emu w original w - look-ahead 'soft' lib - + 106.4 190.2 - - 108.6-111.6 192.4-216.2 - * 113.4 193.1 - / 108.8-124.4 700.1-706.2 - - sin() 390.5 2642.0 - cos() 381.5 2767.4 - tan() 496.5 3153.3 - atan() 367.2-435.5 2439.4-3396.8 - - sqrt() 195.1 4732.5 - log() 358.0-387.5 3359.2-3390.3 - exp() 619.3 4046.4 - - -These figures are now somewhat out-of-date. The emulator has become -progressively slower for most functions as more of the 80486 features -have been implemented. - - ------------------------ Accuracy of wm-FPU-emu ----------------------- - - -The accuracy of the emulator is in almost all cases equal to or better -than that of an Intel 80486 FPU. - -The results of the basic arithmetic functions (+,-,*,/), and fsqrt -match those of an 80486 FPU. They are the best possible; the error for -these never exceeds 1/2 an lsb. The fprem and fprem1 instructions -return exact results; they have no error. - - -The following table compares the emulator accuracy for the sqrt(), -trig and log functions against the Turbo C "emulator". For this table, -each function was tested at about 400 points. Ideal worst-case results -would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for -arguments greater than pi/4 can be thought of as being related to the -precision of the argument x; e.g. an argument of pi/2-(1e-10) which is -accurate to 64 bits can result in a relative accuracy in cos() of -about 64 + log2(cos(x)) = 31 bits. - - -Function Tested x range Worst result Turbo C - (relative bits) - -sqrt(x) 1 .. 2 64.1 63.2 -atan(x) 1e-10 .. 200 64.2 62.8 -cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4 - 64.1 (x = pi/2-(1e-10)) 31.9 -sin(x) 1e-10 .. pi/2 64.0 62.8 -tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1 - 64.1 (x = pi/2-(1e-10)) 31.9 -exp(x) 0 .. 1 63.1 ** 62.9 -log(x) 1+1e-6 .. 2 63.8 ** 62.1 - -** The accuracy for exp() and log() is low because the FPU (emulator) -does not compute them directly; two operations are required. - - -The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or -later) for 'float' variables (24 bit precision numbers) when precision -control is set to 24, 53 or 64 bits, and for 'double' variables (53 -bit precision numbers) when precision control is set to 53 bits (a -properly performing FPU cannot pass the 'paranoia' tests for 'double' -variables when precision control is set to 64 bits). - -The code for reducing the argument for the trig functions (fsin, fcos, -fptan and fsincos) has been improved and now effectively uses a value -for pi which is accurate to more than 128 bits precision. As a -consequence, the accuracy of these functions for large arguments has -been dramatically improved (and is now very much better than an 80486 -FPU). There is also now no degradation of accuracy for fcos and fptan -for operands close to pi/2. Measured results are (note that the -definition of accuracy has changed slightly from that used for the -above table): - -Function Tested x range Worst result - (absolute bits) - -cos(x) 0 .. 9.22e+18 62.0 -sin(x) 1e-16 .. 9.22e+18 62.1 -tan(x) 1e-16 .. 9.22e+18 61.8 - -It is possible with some effort to find very large arguments which -give much degraded precision. For example, the integer number - 8227740058411162616.0 -is within about 10e-7 of a multiple of pi. To find the tan (for -example) of this number to 64 bits precision it would be necessary to -have a value of pi which had about 150 bits precision. The FPU -emulator computes the result to about 42.6 bits precision (the correct -result is about -9.739715e-8). On the other hand, an 80486 FPU returns -0.01059, which in relative terms is hopelessly inaccurate. - -For arguments close to critical angles (which occur at multiples of -pi/2) the emulator is more accurate than an 80486 FPU. For very large -arguments, the emulator is far more accurate. - - -Prior to version 1.20 of the emulator, the accuracy of the results for -the transcendental functions (in their principal range) was not as -good as the results from an 80486 FPU. From version 1.20, the accuracy -has been considerably improved and these functions now give measured -worst-case results which are better than the worst-case results given -by an 80486 FPU. - -The following table gives the measured results for the emulator. The -number of randomly selected arguments in each case is about half a -million. The group of three columns gives the frequency of the given -accuracy in number of times per million, thus the second of these -columns shows that an accuracy of between 63.80 and 63.89 bits was -found at a rate of 133 times per one million measurements for fsin. -The results show that the fsin, fcos and fptan instructions return -results which are in error (i.e. less accurate than the best possible -result (which is 64 bits)) for about one per cent of all arguments -between -pi/2 and +pi/2. The other instructions have a lower -frequency of results which are in error. The last two columns give -the worst accuracy which was found (in bits) and the approximate value -of the argument which produced it. - - frequency (per M) - ------------------- --------------- -instr arg range # tests 63.7 63.8 63.9 worst at arg - bits bits bits bits ------ ------------ ------- ---- ---- ----- ----- -------- -fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317 -fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801 -fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876 -fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q) -fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x) -fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x) -f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709 - - -Tests performed on an 80486 FPU showed results of lower accuracy. The -following table gives the results which were obtained with an AMD -486DX2/66 (other tests indicate that an Intel 486DX produces -identical results). The tests were basically the same as those used -to measure the emulator (the values, being random, were in general not -the same). The total number of tests for each instruction are given -at the end of the table, in case each about 100k tests were performed. -Another line of figures at the end of the table shows that most of the -instructions return results which are in error for more than 10 -percent of the arguments tested. - -The numbers in the body of the table give the approx number of times a -result of the given accuracy in bits (given in the left-most column) -was obtained per one million arguments. For three of the instructions, -two columns of results are given: * The second column for f2xm1 gives -the number cases where the results of the first column were for a -positive argument, this shows that this instruction gives better -results for positive arguments than it does for negative. * In the -cases of fcos and fptan, the first column gives the results when all -cases where arguments greater than 1.5 were removed from the results -given in the second column. Unlike the emulator, an 80486 FPU returns -results of relatively poor accuracy for these instructions when the -argument approaches pi/2. The table does not show those cases when the -accuracy of the results were less than 62 bits, which occurs quite -often for fsin and fptan when the argument approaches pi/2. This poor -accuracy is discussed above in relation to the Turbo C "emulator", and -the accuracy of the value of pi. - - -bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan -62.0 0 0 0 0 437 0 0 0 0 925 -62.1 0 0 10 0 894 0 0 0 0 1023 -62.2 14 0 0 0 1033 0 0 0 0 945 -62.3 57 0 0 0 1202 0 0 0 0 1023 -62.4 385 0 0 10 1292 0 23 0 0 1178 -62.5 1140 0 0 119 1649 0 39 0 0 1149 -62.6 2037 0 0 189 1620 0 16 0 0 1169 -62.7 5086 14 0 646 2315 10 101 35 39 1402 -62.8 8818 86 0 984 3050 59 287 131 224 2036 -62.9 11340 1355 0 2126 4153 79 605 357 321 1948 -63.0 15557 4750 0 3319 5376 246 1281 862 808 2688 -63.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302 -63.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724 -63.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236 -63.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587 -63.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262 -63.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346 -63.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209 -63.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329 -63.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601 - -Per cent with error: - 30.9 3.2 18.5 9.8 13.1 11.6 17.4 -Total arguments tested: - 70194 70099 101784 100641 100641 101799 128853 114893 102675 102675 - - -------------------------- Contributors ------------------------------- - -A number of people have contributed to the development of the -emulator, often by just reporting bugs, sometimes with suggested -fixes, and a few kind people have provided me with access in one way -or another to an 80486 machine. Contributors include (to those people -who I may have forgotten, please forgive me): - -Linus Torvalds -Tommy.Thorn@daimi.aau.dk -Andrew.Tridgell@anu.edu.au -Nick Holloway, alfie@dcs.warwick.ac.uk -Hermano Moura, moura@dcs.gla.ac.uk -Jon Jagger, J.Jagger@scp.ac.uk -Lennart Benschop -Brian Gallew, geek+@CMU.EDU -Thomas Staniszewski, ts3v+@andrew.cmu.edu -Martin Howell, mph@plasma.apana.org.au -M Saggaf, alsaggaf@athena.mit.edu -Peter Barker, PETER@socpsy.sci.fau.edu -tom@vlsivie.tuwien.ac.at -Dan Russel, russed@rpi.edu -Daniel Carosone, danielce@ee.mu.oz.au -cae@jpmorgan.com -Hamish Coleman, t933093@minyos.xx.rmit.oz.au -Bruce Evans, bde@kralizec.zeta.org.au -Timo Korvola, Timo.Korvola@hut.fi -Rick Lyons, rick@razorback.brisnet.org.au -Rick, jrs@world.std.com - -...and numerous others who responded to my request for help with -a real 80486. - diff --git a/arch/i386/math-emu/control_w.h b/arch/i386/math-emu/control_w.h deleted file mode 100644 index ae2274dbd30..00000000000 --- a/arch/i386/math-emu/control_w.h +++ /dev/null @@ -1,45 +0,0 @@ -/*---------------------------------------------------------------------------+ - | control_w.h | - | | - | Copyright (C) 1992,1993 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@vaxc.cc.monash.edu.au | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _CONTROLW_H_ -#define _CONTROLW_H_ - -#ifdef __ASSEMBLY__ -#define _Const_(x) $##x -#else -#define _Const_(x) x -#endif - -#define CW_RC _Const_(0x0C00) /* rounding control */ -#define CW_PC _Const_(0x0300) /* precision control */ - -#define CW_Precision Const_(0x0020) /* loss of precision mask */ -#define CW_Underflow Const_(0x0010) /* underflow mask */ -#define CW_Overflow Const_(0x0008) /* overflow mask */ -#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */ -#define CW_Denormal Const_(0x0002) /* denormalized operand mask */ -#define CW_Invalid Const_(0x0001) /* invalid operation mask */ - -#define CW_Exceptions _Const_(0x003f) /* all masks */ - -#define RC_RND _Const_(0x0000) -#define RC_DOWN _Const_(0x0400) -#define RC_UP _Const_(0x0800) -#define RC_CHOP _Const_(0x0C00) - -/* p 15-5: Precision control bits affect only the following: - ADD, SUB(R), MUL, DIV(R), and SQRT */ -#define PR_24_BITS _Const_(0x000) -#define PR_53_BITS _Const_(0x200) -#define PR_64_BITS _Const_(0x300) -#define PR_RESERVED_BITS _Const_(0x100) -/* FULL_PRECISION simulates all exceptions masked */ -#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f) - -#endif /* _CONTROLW_H_ */ diff --git a/arch/i386/math-emu/div_Xsig.S b/arch/i386/math-emu/div_Xsig.S deleted file mode 100644 index f77ba3058b3..00000000000 --- a/arch/i386/math-emu/div_Xsig.S +++ /dev/null @@ -1,365 +0,0 @@ - .file "div_Xsig.S" -/*---------------------------------------------------------------------------+ - | div_Xsig.S | - | | - | Division subroutine for 96 bit quantities | - | | - | Copyright (C) 1994,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | - | put the 96 bit result at the location d. | - | | - | The result may not be accurate to 96 bits. It is intended for use where | - | a result better than 64 bits is required. The result should usually be | - | good to at least 94 bits. | - | The returned result is actually divided by one half. This is done to | - | prevent overflow. | - | | - | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | - | | - | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | - | | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "fpu_emu.h" - - -#define XsigLL(x) (x) -#define XsigL(x) 4(x) -#define XsigH(x) 8(x) - - -#ifndef NON_REENTRANT_FPU -/* - Local storage on the stack: - Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 - */ -#define FPU_accum_3 -4(%ebp) -#define FPU_accum_2 -8(%ebp) -#define FPU_accum_1 -12(%ebp) -#define FPU_accum_0 -16(%ebp) -#define FPU_result_3 -20(%ebp) -#define FPU_result_2 -24(%ebp) -#define FPU_result_1 -28(%ebp) - -#else -.data -/* - Local storage in a static area: - Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 - */ - .align 4,0 -FPU_accum_3: - .long 0 -FPU_accum_2: - .long 0 -FPU_accum_1: - .long 0 -FPU_accum_0: - .long 0 -FPU_result_3: - .long 0 -FPU_result_2: - .long 0 -FPU_result_1: - .long 0 -#endif /* NON_REENTRANT_FPU */ - - -.text -ENTRY(div_Xsig) - pushl %ebp - movl %esp,%ebp -#ifndef NON_REENTRANT_FPU - subl $28,%esp -#endif /* NON_REENTRANT_FPU */ - - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%esi /* pointer to num */ - movl PARAM2,%ebx /* pointer to denom */ - -#ifdef PARANOID - testl $0x80000000, XsigH(%ebx) /* Divisor */ - je L_bugged -#endif /* PARANOID */ - - -/*---------------------------------------------------------------------------+ - | Divide: Return arg1/arg2 to arg3. | - | | - | The maximum returned value is (ignoring exponents) | - | .ffffffff ffffffff | - | ------------------ = 1.ffffffff fffffffe | - | .80000000 00000000 | - | and the minimum is | - | .80000000 00000000 | - | ------------------ = .80000000 00000001 (rounded) | - | .ffffffff ffffffff | - | | - +---------------------------------------------------------------------------*/ - - /* Save extended dividend in local register */ - - /* Divide by 2 to prevent overflow */ - clc - movl XsigH(%esi),%eax - rcrl %eax - movl %eax,FPU_accum_3 - movl XsigL(%esi),%eax - rcrl %eax - movl %eax,FPU_accum_2 - movl XsigLL(%esi),%eax - rcrl %eax - movl %eax,FPU_accum_1 - movl $0,%eax - rcrl %eax - movl %eax,FPU_accum_0 - - movl FPU_accum_2,%eax /* Get the current num */ - movl FPU_accum_3,%edx - -/*----------------------------------------------------------------------*/ -/* Initialization done. - Do the first 32 bits. */ - - /* We will divide by a number which is too large */ - movl XsigH(%ebx),%ecx - addl $1,%ecx - jnc LFirst_div_not_1 - - /* here we need to divide by 100000000h, - i.e., no division at all.. */ - mov %edx,%eax - jmp LFirst_div_done - -LFirst_div_not_1: - divl %ecx /* Divide the numerator by the augmented - denom ms dw */ - -LFirst_div_done: - movl %eax,FPU_result_3 /* Put the result in the answer */ - - mull XsigH(%ebx) /* mul by the ms dw of the denom */ - - subl %eax,FPU_accum_2 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_3 - - movl FPU_result_3,%eax /* Get the result back */ - mull XsigL(%ebx) /* now mul the ls dw of the denom */ - - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - sbbl $0,FPU_accum_3 - je LDo_2nd_32_bits /* Must check for non-zero result here */ - -#ifdef PARANOID - jb L_bugged_1 -#endif /* PARANOID */ - - /* need to subtract another once of the denom */ - incl FPU_result_3 /* Correct the answer */ - - movl XsigL(%ebx),%eax - movl XsigH(%ebx),%edx - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - -#ifdef PARANOID - sbbl $0,FPU_accum_3 - jne L_bugged_1 /* Must check for non-zero result here */ -#endif /* PARANOID */ - -/*----------------------------------------------------------------------*/ -/* Half of the main problem is done, there is just a reduced numerator - to handle now. - Work with the second 32 bits, FPU_accum_0 not used from now on */ -LDo_2nd_32_bits: - movl FPU_accum_2,%edx /* get the reduced num */ - movl FPU_accum_1,%eax - - /* need to check for possible subsequent overflow */ - cmpl XsigH(%ebx),%edx - jb LDo_2nd_div - ja LPrevent_2nd_overflow - - cmpl XsigL(%ebx),%eax - jb LDo_2nd_div - -LPrevent_2nd_overflow: -/* The numerator is greater or equal, would cause overflow */ - /* prevent overflow */ - subl XsigL(%ebx),%eax - sbbl XsigH(%ebx),%edx - movl %edx,FPU_accum_2 - movl %eax,FPU_accum_1 - - incl FPU_result_3 /* Reflect the subtraction in the answer */ - -#ifdef PARANOID - je L_bugged_2 /* Can't bump the result to 1.0 */ -#endif /* PARANOID */ - -LDo_2nd_div: - cmpl $0,%ecx /* augmented denom msw */ - jnz LSecond_div_not_1 - - /* %ecx == 0, we are dividing by 1.0 */ - mov %edx,%eax - jmp LSecond_div_done - -LSecond_div_not_1: - divl %ecx /* Divide the numerator by the denom ms dw */ - -LSecond_div_done: - movl %eax,FPU_result_2 /* Put the result in the answer */ - - mull XsigH(%ebx) /* mul by the ms dw of the denom */ - - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 -#endif /* PARANOID */ - - movl FPU_result_2,%eax /* Get the result back */ - mull XsigL(%ebx) /* now mul the ls dw of the denom */ - - subl %eax,FPU_accum_0 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ - sbbl $0,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 -#endif /* PARANOID */ - - jz LDo_3rd_32_bits - -#ifdef PARANOID - cmpl $1,FPU_accum_2 - jne L_bugged_2 -#endif /* PARANOID */ - - /* need to subtract another once of the denom */ - movl XsigL(%ebx),%eax - movl XsigH(%ebx),%edx - subl %eax,FPU_accum_0 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_1 - sbbl $0,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 - jne L_bugged_2 -#endif /* PARANOID */ - - addl $1,FPU_result_2 /* Correct the answer */ - adcl $0,FPU_result_3 - -#ifdef PARANOID - jc L_bugged_2 /* Must check for non-zero result here */ -#endif /* PARANOID */ - -/*----------------------------------------------------------------------*/ -/* The division is essentially finished here, we just need to perform - tidying operations. - Deal with the 3rd 32 bits */ -LDo_3rd_32_bits: - /* We use an approximation for the third 32 bits. - To take account of the 3rd 32 bits of the divisor - (call them del), we subtract del * (a/b) */ - - movl FPU_result_3,%eax /* a/b */ - mull XsigLL(%ebx) /* del */ - - subl %edx,FPU_accum_1 - - /* A borrow indicates that the result is negative */ - jnb LTest_over - - movl XsigH(%ebx),%edx - addl %edx,FPU_accum_1 - - subl $1,FPU_result_2 /* Adjust the answer */ - sbbl $0,FPU_result_3 - - /* The above addition might not have been enough, check again. */ - movl FPU_accum_1,%edx /* get the reduced num */ - cmpl XsigH(%ebx),%edx /* denom */ - jb LDo_3rd_div - - movl XsigH(%ebx),%edx - addl %edx,FPU_accum_1 - - subl $1,FPU_result_2 /* Adjust the answer */ - sbbl $0,FPU_result_3 - jmp LDo_3rd_div - -LTest_over: - movl FPU_accum_1,%edx /* get the reduced num */ - - /* need to check for possible subsequent overflow */ - cmpl XsigH(%ebx),%edx /* denom */ - jb LDo_3rd_div - - /* prevent overflow */ - subl XsigH(%ebx),%edx - movl %edx,FPU_accum_1 - - addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ - adcl $0,FPU_result_3 - -LDo_3rd_div: - movl FPU_accum_0,%eax - movl FPU_accum_1,%edx - divl XsigH(%ebx) - - movl %eax,FPU_result_1 /* Rough estimate of third word */ - - movl PARAM3,%esi /* pointer to answer */ - - movl FPU_result_1,%eax - movl %eax,XsigLL(%esi) - movl FPU_result_2,%eax - movl %eax,XsigL(%esi) - movl FPU_result_3,%eax - movl %eax,XsigH(%esi) - -L_exit: - popl %ebx - popl %edi - popl %esi - - leave - ret - - -#ifdef PARANOID -/* The logic is wrong if we got here */ -L_bugged: - pushl EX_INTERNAL|0x240 - call EXCEPTION - pop %ebx - jmp L_exit - -L_bugged_1: - pushl EX_INTERNAL|0x241 - call EXCEPTION - pop %ebx - jmp L_exit - -L_bugged_2: - pushl EX_INTERNAL|0x242 - call EXCEPTION - pop %ebx - jmp L_exit -#endif /* PARANOID */ diff --git a/arch/i386/math-emu/div_small.S b/arch/i386/math-emu/div_small.S deleted file mode 100644 index 47099628fa4..00000000000 --- a/arch/i386/math-emu/div_small.S +++ /dev/null @@ -1,47 +0,0 @@ - .file "div_small.S" -/*---------------------------------------------------------------------------+ - | div_small.S | - | | - | Divide a 64 bit integer by a 32 bit integer & return remainder. | - | | - | Copyright (C) 1992,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | unsigned long FPU_div_small(unsigned long long *x, unsigned long y) | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" - -.text -ENTRY(FPU_div_small) - pushl %ebp - movl %esp,%ebp - - pushl %esi - - movl PARAM1,%esi /* pointer to num */ - movl PARAM2,%ecx /* The denominator */ - - movl 4(%esi),%eax /* Get the current num msw */ - xorl %edx,%edx - divl %ecx - - movl %eax,4(%esi) - - movl (%esi),%eax /* Get the num lsw */ - divl %ecx - - movl %eax,(%esi) - - movl %edx,%eax /* Return the remainder in eax */ - - popl %esi - - leave - ret - diff --git a/arch/i386/math-emu/errors.c b/arch/i386/math-emu/errors.c deleted file mode 100644 index a1b0d22f697..00000000000 --- a/arch/i386/math-emu/errors.c +++ /dev/null @@ -1,739 +0,0 @@ -/*---------------------------------------------------------------------------+ - | errors.c | - | | - | The error handling functions for wm-FPU-emu | - | | - | Copyright (C) 1992,1993,1994,1996 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@jacobi.maths.monash.edu.au | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Note: | - | The file contains code which accesses user memory. | - | Emulator static data may change when user memory is accessed, due to | - | other processes using the emulator while swapping is in progress. | - +---------------------------------------------------------------------------*/ - -#include - -#include - -#include "fpu_emu.h" -#include "fpu_system.h" -#include "exception.h" -#include "status_w.h" -#include "control_w.h" -#include "reg_constant.h" -#include "version.h" - -/* */ -#undef PRINT_MESSAGES -/* */ - - -#if 0 -void Un_impl(void) -{ - u_char byte1, FPU_modrm; - unsigned long address = FPU_ORIG_EIP; - - RE_ENTRANT_CHECK_OFF; - /* No need to check access_ok(), we have previously fetched these bytes. */ - printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address); - if ( FPU_CS == __USER_CS ) - { - while ( 1 ) - { - FPU_get_user(byte1, (u_char __user *) address); - if ( (byte1 & 0xf8) == 0xd8 ) break; - printk("[%02x]", byte1); - address++; - } - printk("%02x ", byte1); - FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); - - if (FPU_modrm >= 0300) - printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); - else - printk("/%d\n", (FPU_modrm >> 3) & 7); - } - else - { - printk("cs selector = %04x\n", FPU_CS); - } - - RE_ENTRANT_CHECK_ON; - - EXCEPTION(EX_Invalid); - -} -#endif /* 0 */ - - -/* - Called for opcodes which are illegal and which are known to result in a - SIGILL with a real 80486. - */ -void FPU_illegal(void) -{ - math_abort(FPU_info,SIGILL); -} - - - -void FPU_printall(void) -{ - int i; - static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty", - "DeNorm", "Inf", "NaN" }; - u_char byte1, FPU_modrm; - unsigned long address = FPU_ORIG_EIP; - - RE_ENTRANT_CHECK_OFF; - /* No need to check access_ok(), we have previously fetched these bytes. */ - printk("At %p:", (void *) address); - if ( FPU_CS == __USER_CS ) - { -#define MAX_PRINTED_BYTES 20 - for ( i = 0; i < MAX_PRINTED_BYTES; i++ ) - { - FPU_get_user(byte1, (u_char __user *) address); - if ( (byte1 & 0xf8) == 0xd8 ) - { - printk(" %02x", byte1); - break; - } - printk(" [%02x]", byte1); - address++; - } - if ( i == MAX_PRINTED_BYTES ) - printk(" [more..]\n"); - else - { - FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); - - if (FPU_modrm >= 0300) - printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); - else - printk(" /%d, mod=%d rm=%d\n", - (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7); - } - } - else - { - printk("%04x\n", FPU_CS); - } - - partial_status = status_word(); - -#ifdef DEBUGGING -if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n"); -if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n"); -if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n"); -if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n"); -if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n"); -if ( partial_status & SW_Summary ) printk("SW: exception summary\n"); -if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n"); -if ( partial_status & SW_Precision ) printk("SW: loss of precision\n"); -if ( partial_status & SW_Underflow ) printk("SW: underflow\n"); -if ( partial_status & SW_Overflow ) printk("SW: overflow\n"); -if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n"); -if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n"); -if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n"); -#endif /* DEBUGGING */ - - printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", - partial_status & 0x8000 ? 1 : 0, /* busy */ - (partial_status & 0x3800) >> 11, /* stack top pointer */ - partial_status & 0x80 ? 1 : 0, /* Error summary status */ - partial_status & 0x40 ? 1 : 0, /* Stack flag */ - partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */ - partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */ - partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0, - partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0, - partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0); - -printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n", - control_word & 0x1000 ? 1 : 0, - (control_word & 0x800) >> 11, (control_word & 0x400) >> 10, - (control_word & 0x200) >> 9, (control_word & 0x100) >> 8, - control_word & 0x80 ? 1 : 0, - control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0, - control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0, - control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0); - - for ( i = 0; i < 8; i++ ) - { - FPU_REG *r = &st(i); - u_char tagi = FPU_gettagi(i); - switch (tagi) - { - case TAG_Empty: - continue; - break; - case TAG_Zero: - case TAG_Special: - tagi = FPU_Special(r); - case TAG_Valid: - printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i, - getsign(r) ? '-' : '+', - (long)(r->sigh >> 16), - (long)(r->sigh & 0xFFFF), - (long)(r->sigl >> 16), - (long)(r->sigl & 0xFFFF), - exponent(r) - EXP_BIAS + 1); - break; - default: - printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi); - continue; - break; - } - printk("%s\n", tag_desc[(int) (unsigned) tagi]); - } - - RE_ENTRANT_CHECK_ON; - -} - -static struct { - int type; - const char *name; -} exception_names[] = { - { EX_StackOver, "stack overflow" }, - { EX_StackUnder, "stack underflow" }, - { EX_Precision, "loss of precision" }, - { EX_Underflow, "underflow" }, - { EX_Overflow, "overflow" }, - { EX_ZeroDiv, "divide by zero" }, - { EX_Denormal, "denormalized operand" }, - { EX_Invalid, "invalid operation" }, - { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION }, - { 0, NULL } -}; - -/* - EX_INTERNAL is always given with a code which indicates where the - error was detected. - - Internal error types: - 0x14 in fpu_etc.c - 0x1nn in a *.c file: - 0x101 in reg_add_sub.c - 0x102 in reg_mul.c - 0x104 in poly_atan.c - 0x105 in reg_mul.c - 0x107 in fpu_trig.c - 0x108 in reg_compare.c - 0x109 in reg_compare.c - 0x110 in reg_add_sub.c - 0x111 in fpe_entry.c - 0x112 in fpu_trig.c - 0x113 in errors.c - 0x115 in fpu_trig.c - 0x116 in fpu_trig.c - 0x117 in fpu_trig.c - 0x118 in fpu_trig.c - 0x119 in fpu_trig.c - 0x120 in poly_atan.c - 0x121 in reg_compare.c - 0x122 in reg_compare.c - 0x123 in reg_compare.c - 0x125 in fpu_trig.c - 0x126 in fpu_entry.c - 0x127 in poly_2xm1.c - 0x128 in fpu_entry.c - 0x129 in fpu_entry.c - 0x130 in get_address.c - 0x131 in get_address.c - 0x132 in get_address.c - 0x133 in get_address.c - 0x140 in load_store.c - 0x141 in load_store.c - 0x150 in poly_sin.c - 0x151 in poly_sin.c - 0x160 in reg_ld_str.c - 0x161 in reg_ld_str.c - 0x162 in reg_ld_str.c - 0x163 in reg_ld_str.c - 0x164 in reg_ld_str.c - 0x170 in fpu_tags.c - 0x171 in fpu_tags.c - 0x172 in fpu_tags.c - 0x180 in reg_convert.c - 0x2nn in an *.S file: - 0x201 in reg_u_add.S - 0x202 in reg_u_div.S - 0x203 in reg_u_div.S - 0x204 in reg_u_div.S - 0x205 in reg_u_mul.S - 0x206 in reg_u_sub.S - 0x207 in wm_sqrt.S - 0x208 in reg_div.S - 0x209 in reg_u_sub.S - 0x210 in reg_u_sub.S - 0x211 in reg_u_sub.S - 0x212 in reg_u_sub.S - 0x213 in wm_sqrt.S - 0x214 in wm_sqrt.S - 0x215 in wm_sqrt.S - 0x220 in reg_norm.S - 0x221 in reg_norm.S - 0x230 in reg_round.S - 0x231 in reg_round.S - 0x232 in reg_round.S - 0x233 in reg_round.S - 0x234 in reg_round.S - 0x235 in reg_round.S - 0x236 in reg_round.S - 0x240 in div_Xsig.S - 0x241 in div_Xsig.S - 0x242 in div_Xsig.S - */ - -asmlinkage void FPU_exception(int n) -{ - int i, int_type; - - int_type = 0; /* Needed only to stop compiler warnings */ - if ( n & EX_INTERNAL ) - { - int_type = n - EX_INTERNAL; - n = EX_INTERNAL; - /* Set lots of exception bits! */ - partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward); - } - else - { - /* Extract only the bits which we use to set the status word */ - n &= (SW_Exc_Mask); - /* Set the corresponding exception bit */ - partial_status |= n; - /* Set summary bits iff exception isn't masked */ - if ( partial_status & ~control_word & CW_Exceptions ) - partial_status |= (SW_Summary | SW_Backward); - if ( n & (SW_Stack_Fault | EX_Precision) ) - { - if ( !(n & SW_C1) ) - /* This bit distinguishes over- from underflow for a stack fault, - and roundup from round-down for precision loss. */ - partial_status &= ~SW_C1; - } - } - - RE_ENTRANT_CHECK_OFF; - if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) ) - { -#ifdef PRINT_MESSAGES - /* My message from the sponsor */ - printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n"); -#endif /* PRINT_MESSAGES */ - - /* Get a name string for error reporting */ - for (i=0; exception_names[i].type; i++) - if ( (exception_names[i].type & n) == exception_names[i].type ) - break; - - if (exception_names[i].type) - { -#ifdef PRINT_MESSAGES - printk("FP Exception: %s!\n", exception_names[i].name); -#endif /* PRINT_MESSAGES */ - } - else - printk("FPU emulator: Unknown Exception: 0x%04x!\n", n); - - if ( n == EX_INTERNAL ) - { - printk("FPU emulator: Internal error type 0x%04x\n", int_type); - FPU_printall(); - } -#ifdef PRINT_MESSAGES - else - FPU_printall(); -#endif /* PRINT_MESSAGES */ - - /* - * The 80486 generates an interrupt on the next non-control FPU - * instruction. So we need some means of flagging it. - * We use the ES (Error Summary) bit for this. - */ - } - RE_ENTRANT_CHECK_ON; - -#ifdef __DEBUG__ - math_abort(FPU_info,SIGFPE); -#endif /* __DEBUG__ */ - -} - - -/* Real operation attempted on a NaN. */ -/* Returns < 0 if the exception is unmasked */ -int real_1op_NaN(FPU_REG *a) -{ - int signalling, isNaN; - - isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000); - - /* The default result for the case of two "equal" NaNs (signs may - differ) is chosen to reproduce 80486 behaviour */ - signalling = isNaN && !(a->sigh & 0x40000000); - - if ( !signalling ) - { - if ( !isNaN ) /* pseudo-NaN, or other unsupported? */ - { - if ( control_word & CW_Invalid ) - { - /* Masked response */ - reg_copy(&CONST_QNaN, a); - } - EXCEPTION(EX_Invalid); - return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; - } - return TAG_Special; - } - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - if ( !(a->sigh & 0x80000000) ) /* pseudo-NaN ? */ - { - reg_copy(&CONST_QNaN, a); - } - /* ensure a Quiet NaN */ - a->sigh |= 0x40000000; - } - - EXCEPTION(EX_Invalid); - - return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; -} - - -/* Real operation attempted on two operands, one a NaN. */ -/* Returns < 0 if the exception is unmasked */ -int real_2op_NaN(FPU_REG const *b, u_char tagb, - int deststnr, - FPU_REG const *defaultNaN) -{ - FPU_REG *dest = &st(deststnr); - FPU_REG const *a = dest; - u_char taga = FPU_gettagi(deststnr); - FPU_REG const *x; - int signalling, unsupported; - - if ( taga == TAG_Special ) - taga = FPU_Special(a); - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - - /* TW_NaN is also used for unsupported data types. */ - unsupported = ((taga == TW_NaN) - && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000))) - || ((tagb == TW_NaN) - && !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000))); - if ( unsupported ) - { - if ( control_word & CW_Invalid ) - { - /* Masked response */ - FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); - } - EXCEPTION(EX_Invalid); - return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; - } - - if (taga == TW_NaN) - { - x = a; - if (tagb == TW_NaN) - { - signalling = !(a->sigh & b->sigh & 0x40000000); - if ( significand(b) > significand(a) ) - x = b; - else if ( significand(b) == significand(a) ) - { - /* The default result for the case of two "equal" NaNs (signs may - differ) is chosen to reproduce 80486 behaviour */ - x = defaultNaN; - } - } - else - { - /* return the quiet version of the NaN in a */ - signalling = !(a->sigh & 0x40000000); - } - } - else -#ifdef PARANOID - if (tagb == TW_NaN) -#endif /* PARANOID */ - { - signalling = !(b->sigh & 0x40000000); - x = b; - } -#ifdef PARANOID - else - { - signalling = 0; - EXCEPTION(EX_INTERNAL|0x113); - x = &CONST_QNaN; - } -#endif /* PARANOID */ - - if ( (!signalling) || (control_word & CW_Invalid) ) - { - if ( ! x ) - x = b; - - if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */ - x = &CONST_QNaN; - - FPU_copy_to_regi(x, TAG_Special, deststnr); - - if ( !signalling ) - return TAG_Special; - - /* ensure a Quiet NaN */ - dest->sigh |= 0x40000000; - } - - EXCEPTION(EX_Invalid); - - return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; -} - - -/* Invalid arith operation on Valid registers */ -/* Returns < 0 if the exception is unmasked */ -asmlinkage int arith_invalid(int deststnr) -{ - - EXCEPTION(EX_Invalid); - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); - } - - return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid; - -} - - -/* Divide a finite number by zero */ -asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign) -{ - FPU_REG *dest = &st(deststnr); - int tag = TAG_Valid; - - if ( control_word & CW_ZeroDiv ) - { - /* The masked response */ - FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr); - setsign(dest, sign); - tag = TAG_Special; - } - - EXCEPTION(EX_ZeroDiv); - - return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag; - -} - - -/* This may be called often, so keep it lean */ -int set_precision_flag(int flags) -{ - if ( control_word & CW_Precision ) - { - partial_status &= ~(SW_C1 & flags); - partial_status |= flags; /* The masked response */ - return 0; - } - else - { - EXCEPTION(flags); - return 1; - } -} - - -/* This may be called often, so keep it lean */ -asmlinkage void set_precision_flag_up(void) -{ - if ( control_word & CW_Precision ) - partial_status |= (SW_Precision | SW_C1); /* The masked response */ - else - EXCEPTION(EX_Precision | SW_C1); -} - - -/* This may be called often, so keep it lean */ -asmlinkage void set_precision_flag_down(void) -{ - if ( control_word & CW_Precision ) - { /* The masked response */ - partial_status &= ~SW_C1; - partial_status |= SW_Precision; - } - else - EXCEPTION(EX_Precision); -} - - -asmlinkage int denormal_operand(void) -{ - if ( control_word & CW_Denormal ) - { /* The masked response */ - partial_status |= SW_Denorm_Op; - return TAG_Special; - } - else - { - EXCEPTION(EX_Denormal); - return TAG_Special | FPU_Exception; - } -} - - -asmlinkage int arith_overflow(FPU_REG *dest) -{ - int tag = TAG_Valid; - - if ( control_word & CW_Overflow ) - { - /* The masked response */ -/* ###### The response here depends upon the rounding mode */ - reg_copy(&CONST_INF, dest); - tag = TAG_Special; - } - else - { - /* Subtract the magic number from the exponent */ - addexponent(dest, (-3 * (1 << 13))); - } - - EXCEPTION(EX_Overflow); - if ( control_word & CW_Overflow ) - { - /* The overflow exception is masked. */ - /* By definition, precision is lost. - The roundup bit (C1) is also set because we have - "rounded" upwards to Infinity. */ - EXCEPTION(EX_Precision | SW_C1); - return tag; - } - - return tag; - -} - - -asmlinkage int arith_underflow(FPU_REG *dest) -{ - int tag = TAG_Valid; - - if ( control_word & CW_Underflow ) - { - /* The masked response */ - if ( exponent16(dest) <= EXP_UNDER - 63 ) - { - reg_copy(&CONST_Z, dest); - partial_status &= ~SW_C1; /* Round down. */ - tag = TAG_Zero; - } - else - { - stdexp(dest); - } - } - else - { - /* Add the magic number to the exponent. */ - addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias); - } - - EXCEPTION(EX_Underflow); - if ( control_word & CW_Underflow ) - { - /* The underflow exception is masked. */ - EXCEPTION(EX_Precision); - return tag; - } - - return tag; - -} - - -void FPU_stack_overflow(void) -{ - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - top--; - FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); - } - - EXCEPTION(EX_StackOver); - - return; - -} - - -void FPU_stack_underflow(void) -{ - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); - } - - EXCEPTION(EX_StackUnder); - - return; - -} - - -void FPU_stack_underflow_i(int i) -{ - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); - } - - EXCEPTION(EX_StackUnder); - - return; - -} - - -void FPU_stack_underflow_pop(int i) -{ - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); - FPU_pop(); - } - - EXCEPTION(EX_StackUnder); - - return; - -} - diff --git a/arch/i386/math-emu/exception.h b/arch/i386/math-emu/exception.h deleted file mode 100644 index b463f21a811..00000000000 --- a/arch/i386/math-emu/exception.h +++ /dev/null @@ -1,53 +0,0 @@ -/*---------------------------------------------------------------------------+ - | exception.h | - | | - | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@vaxc.cc.monash.edu.au | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _EXCEPTION_H_ -#define _EXCEPTION_H_ - - -#ifdef __ASSEMBLY__ -#define Const_(x) $##x -#else -#define Const_(x) x -#endif - -#ifndef SW_C1 -#include "fpu_emu.h" -#endif /* SW_C1 */ - -#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */ -#define EX_ErrorSummary Const_(0x0080) /* Error summary status */ -/* Special exceptions: */ -#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */ -#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */ -#define EX_StackUnder Const_(0x0041) /* stack underflow */ -/* Exception flags: */ -#define EX_Precision Const_(0x0020) /* loss of precision */ -#define EX_Underflow Const_(0x0010) /* underflow */ -#define EX_Overflow Const_(0x0008) /* overflow */ -#define EX_ZeroDiv Const_(0x0004) /* divide by zero */ -#define EX_Denormal Const_(0x0002) /* denormalized operand */ -#define EX_Invalid Const_(0x0001) /* invalid operation */ - - -#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1)) -#define PRECISION_LOST_DOWN Const_(EX_Precision) - - -#ifndef __ASSEMBLY__ - -#ifdef DEBUG -#define EXCEPTION(x) { printk("exception in %s at line %d\n", \ - __FILE__, __LINE__); FPU_exception(x); } -#else -#define EXCEPTION(x) FPU_exception(x) -#endif - -#endif /* __ASSEMBLY__ */ - -#endif /* _EXCEPTION_H_ */ diff --git a/arch/i386/math-emu/fpu_arith.c b/arch/i386/math-emu/fpu_arith.c deleted file mode 100644 index 6972dec01af..00000000000 --- a/arch/i386/math-emu/fpu_arith.c +++ /dev/null @@ -1,174 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_arith.c | - | | - | Code to implement the FPU register/register arithmetic instructions | - | | - | Copyright (C) 1992,1993,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "fpu_emu.h" -#include "control_w.h" -#include "status_w.h" - - -void fadd__(void) -{ - /* fadd st,st(i) */ - int i = FPU_rm; - clear_C1(); - FPU_add(&st(i), FPU_gettagi(i), 0, control_word); -} - - -void fmul__(void) -{ - /* fmul st,st(i) */ - int i = FPU_rm; - clear_C1(); - FPU_mul(&st(i), FPU_gettagi(i), 0, control_word); -} - - - -void fsub__(void) -{ - /* fsub st,st(i) */ - clear_C1(); - FPU_sub(0, FPU_rm, control_word); -} - - -void fsubr_(void) -{ - /* fsubr st,st(i) */ - clear_C1(); - FPU_sub(REV, FPU_rm, control_word); -} - - -void fdiv__(void) -{ - /* fdiv st,st(i) */ - clear_C1(); - FPU_div(0, FPU_rm, control_word); -} - - -void fdivr_(void) -{ - /* fdivr st,st(i) */ - clear_C1(); - FPU_div(REV, FPU_rm, control_word); -} - - - -void fadd_i(void) -{ - /* fadd st(i),st */ - int i = FPU_rm; - clear_C1(); - FPU_add(&st(i), FPU_gettagi(i), i, control_word); -} - - -void fmul_i(void) -{ - /* fmul st(i),st */ - clear_C1(); - FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word); -} - - -void fsubri(void) -{ - /* fsubr st(i),st */ - clear_C1(); - FPU_sub(DEST_RM, FPU_rm, control_word); -} - - -void fsub_i(void) -{ - /* fsub st(i),st */ - clear_C1(); - FPU_sub(REV|DEST_RM, FPU_rm, control_word); -} - - -void fdivri(void) -{ - /* fdivr st(i),st */ - clear_C1(); - FPU_div(DEST_RM, FPU_rm, control_word); -} - - -void fdiv_i(void) -{ - /* fdiv st(i),st */ - clear_C1(); - FPU_div(REV|DEST_RM, FPU_rm, control_word); -} - - - -void faddp_(void) -{ - /* faddp st(i),st */ - int i = FPU_rm; - clear_C1(); - if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 ) - FPU_pop(); -} - - -void fmulp_(void) -{ - /* fmulp st(i),st */ - clear_C1(); - if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 ) - FPU_pop(); -} - - - -void fsubrp(void) -{ - /* fsubrp st(i),st */ - clear_C1(); - if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 ) - FPU_pop(); -} - - -void fsubp_(void) -{ - /* fsubp st(i),st */ - clear_C1(); - if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 ) - FPU_pop(); -} - - -void fdivrp(void) -{ - /* fdivrp st(i),st */ - clear_C1(); - if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 ) - FPU_pop(); -} - - -void fdivp_(void) -{ - /* fdivp st(i),st */ - clear_C1(); - if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 ) - FPU_pop(); -} diff --git a/arch/i386/math-emu/fpu_asm.h b/arch/i386/math-emu/fpu_asm.h deleted file mode 100644 index 9ba12416df1..00000000000 --- a/arch/i386/math-emu/fpu_asm.h +++ /dev/null @@ -1,32 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_asm.h | - | | - | Copyright (C) 1992,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _FPU_ASM_H_ -#define _FPU_ASM_H_ - -#include - -#define EXCEPTION FPU_exception - - -#define PARAM1 8(%ebp) -#define PARAM2 12(%ebp) -#define PARAM3 16(%ebp) -#define PARAM4 20(%ebp) -#define PARAM5 24(%ebp) -#define PARAM6 28(%ebp) -#define PARAM7 32(%ebp) - -#define SIGL_OFFSET 0 -#define EXP(x) 8(x) -#define SIG(x) SIGL_OFFSET##(x) -#define SIGL(x) SIGL_OFFSET##(x) -#define SIGH(x) 4(x) - -#endif /* _FPU_ASM_H_ */ diff --git a/arch/i386/math-emu/fpu_aux.c b/arch/i386/math-emu/fpu_aux.c deleted file mode 100644 index 20886cfb9f7..00000000000 --- a/arch/i386/math-emu/fpu_aux.c +++ /dev/null @@ -1,204 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_aux.c | - | | - | Code to implement some of the FPU auxiliary instructions. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" -#include "status_w.h" -#include "control_w.h" - - -static void fnop(void) -{ -} - -static void fclex(void) -{ - partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision| - SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op| - SW_Invalid); - no_ip_update = 1; -} - -/* Needs to be externally visible */ -void finit(void) -{ - control_word = 0x037f; - partial_status = 0; - top = 0; /* We don't keep top in the status word internally. */ - fpu_tag_word = 0xffff; - /* The behaviour is different from that detailed in - Section 15.1.6 of the Intel manual */ - operand_address.offset = 0; - operand_address.selector = 0; - instruction_address.offset = 0; - instruction_address.selector = 0; - instruction_address.opcode = 0; - no_ip_update = 1; -} - -/* - * These are nops on the i387.. - */ -#define feni fnop -#define fdisi fnop -#define fsetpm fnop - -static FUNC const finit_table[] = { - feni, fdisi, fclex, finit, - fsetpm, FPU_illegal, FPU_illegal, FPU_illegal -}; - -void finit_(void) -{ - (finit_table[FPU_rm])(); -} - - -static void fstsw_ax(void) -{ - *(short *) &FPU_EAX = status_word(); - no_ip_update = 1; -} - -static FUNC const fstsw_table[] = { - fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal, - FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal -}; - -void fstsw_(void) -{ - (fstsw_table[FPU_rm])(); -} - - -static FUNC const fp_nop_table[] = { - fnop, FPU_illegal, FPU_illegal, FPU_illegal, - FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal -}; - -void fp_nop(void) -{ - (fp_nop_table[FPU_rm])(); -} - - -void fld_i_(void) -{ - FPU_REG *st_new_ptr; - int i; - u_char tag; - - if ( STACK_OVERFLOW ) - { FPU_stack_overflow(); return; } - - /* fld st(i) */ - i = FPU_rm; - if ( NOT_EMPTY(i) ) - { - reg_copy(&st(i), st_new_ptr); - tag = FPU_gettagi(i); - push(); - FPU_settag0(tag); - } - else - { - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_stack_underflow(); - } - else - EXCEPTION(EX_StackUnder); - } - -} - - -void fxch_i(void) -{ - /* fxch st(i) */ - FPU_REG t; - int i = FPU_rm; - FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i); - long tag_word = fpu_tag_word; - int regnr = top & 7, regnri = ((regnr + i) & 7); - u_char st0_tag = (tag_word >> (regnr*2)) & 3; - u_char sti_tag = (tag_word >> (regnri*2)) & 3; - - if ( st0_tag == TAG_Empty ) - { - if ( sti_tag == TAG_Empty ) - { - FPU_stack_underflow(); - FPU_stack_underflow_i(i); - return; - } - if ( control_word & CW_Invalid ) - { - /* Masked response */ - FPU_copy_to_reg0(sti_ptr, sti_tag); - } - FPU_stack_underflow_i(i); - return; - } - if ( sti_tag == TAG_Empty ) - { - if ( control_word & CW_Invalid ) - { - /* Masked response */ - FPU_copy_to_regi(st0_ptr, st0_tag, i); - } - FPU_stack_underflow(); - return; - } - clear_C1(); - - reg_copy(st0_ptr, &t); - reg_copy(sti_ptr, st0_ptr); - reg_copy(&t, sti_ptr); - - tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2)); - tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2)); - fpu_tag_word = tag_word; -} - - -void ffree_(void) -{ - /* ffree st(i) */ - FPU_settagi(FPU_rm, TAG_Empty); -} - - -void ffreep(void) -{ - /* ffree st(i) + pop - unofficial code */ - FPU_settagi(FPU_rm, TAG_Empty); - FPU_pop(); -} - - -void fst_i_(void) -{ - /* fst st(i) */ - FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); -} - - -void fstp_i(void) -{ - /* fstp st(i) */ - FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); - FPU_pop(); -} - diff --git a/arch/i386/math-emu/fpu_emu.h b/arch/i386/math-emu/fpu_emu.h deleted file mode 100644 index 65120f52385..00000000000 --- a/arch/i386/math-emu/fpu_emu.h +++ /dev/null @@ -1,218 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_emu.h | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - +---------------------------------------------------------------------------*/ - - -#ifndef _FPU_EMU_H_ -#define _FPU_EMU_H_ - -/* - * Define PECULIAR_486 to get a closer approximation to 80486 behaviour, - * rather than behaviour which appears to be cleaner. - * This is a matter of opinion: for all I know, the 80486 may simply - * be complying with the IEEE spec. Maybe one day I'll get to see the - * spec... - */ -#define PECULIAR_486 - -#ifdef __ASSEMBLY__ -#include "fpu_asm.h" -#define Const(x) $##x -#else -#define Const(x) x -#endif - -#define EXP_BIAS Const(0) -#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */ -#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */ -#define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but - still a 16 bit nr. */ -#define EXP_Infinity EXP_OVER -#define EXP_NaN EXP_OVER - -#define EXTENDED_Ebias Const(0x3fff) -#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */ - -#define SIGN_POS Const(0) -#define SIGN_NEG Const(0x80) - -#define SIGN_Positive Const(0) -#define SIGN_Negative Const(0x8000) - - -/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */ -/* The following fold to 2 (Special) in the Tag Word */ -#define TW_Denormal Const(4) /* De-normal */ -#define TW_Infinity Const(5) /* + or - infinity */ -#define TW_NaN Const(6) /* Not a Number */ -#define TW_Unsupported Const(7) /* Not supported by an 80486 */ - -#define TAG_Valid Const(0) /* valid */ -#define TAG_Zero Const(1) /* zero */ -#define TAG_Special Const(2) /* De-normal, + or - infinity, - or Not a Number */ -#define TAG_Empty Const(3) /* empty */ -#define TAG_Error Const(0x80) /* probably need to abort */ - -#define LOADED_DATA Const(10101) /* Special st() number to identify - loaded data (not on stack). */ - -/* A few flags (must be >= 0x10). */ -#define REV 0x10 -#define DEST_RM 0x20 -#define LOADED 0x40 - -#define FPU_Exception Const(0x80000000) /* Added to tag returns. */ - - -#ifndef __ASSEMBLY__ - -#include "fpu_system.h" - -#include /* for struct _fpstate */ -#include -#include - -/* -#define RE_ENTRANT_CHECKING - */ - -#ifdef RE_ENTRANT_CHECKING -extern u_char emulating; -# define RE_ENTRANT_CHECK_OFF emulating = 0 -# define RE_ENTRANT_CHECK_ON emulating = 1 -#else -# define RE_ENTRANT_CHECK_OFF -# define RE_ENTRANT_CHECK_ON -#endif /* RE_ENTRANT_CHECKING */ - -#define FWAIT_OPCODE 0x9b -#define OP_SIZE_PREFIX 0x66 -#define ADDR_SIZE_PREFIX 0x67 -#define PREFIX_CS 0x2e -#define PREFIX_DS 0x3e -#define PREFIX_ES 0x26 -#define PREFIX_SS 0x36 -#define PREFIX_FS 0x64 -#define PREFIX_GS 0x65 -#define PREFIX_REPE 0xf3 -#define PREFIX_REPNE 0xf2 -#define PREFIX_LOCK 0xf0 -#define PREFIX_CS_ 1 -#define PREFIX_DS_ 2 -#define PREFIX_ES_ 3 -#define PREFIX_FS_ 4 -#define PREFIX_GS_ 5 -#define PREFIX_SS_ 6 -#define PREFIX_DEFAULT 7 - -struct address { - unsigned int offset; - unsigned int selector:16; - unsigned int opcode:11; - unsigned int empty:5; -}; -struct fpu__reg { - unsigned sigl; - unsigned sigh; - short exp; -}; - -typedef void (*FUNC)(void); -typedef struct fpu__reg FPU_REG; -typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag); -typedef struct { u_char address_size, operand_size, segment; } - overrides; -/* This structure is 32 bits: */ -typedef struct { overrides override; - u_char default_mode; } fpu_addr_modes; -/* PROTECTED has a restricted meaning in the emulator; it is used - to signal that the emulator needs to do special things to ensure - that protection is respected in a segmented model. */ -#define PROTECTED 4 -#define SIXTEEN 1 /* We rely upon this being 1 (true) */ -#define VM86 SIXTEEN -#define PM16 (SIXTEEN | PROTECTED) -#define SEG32 PROTECTED -extern u_char const data_sizes_16[32]; - -#define register_base ((u_char *) registers ) -#define fpu_register(x) ( * ((FPU_REG *)( register_base + 10 * (x & 7) )) ) -#define st(x) ( * ((FPU_REG *)( register_base + 10 * ((top+x) & 7) )) ) - -#define STACK_OVERFLOW (FPU_stackoverflow(&st_new_ptr)) -#define NOT_EMPTY(i) (!FPU_empty_i(i)) - -#define NOT_EMPTY_ST0 (st0_tag ^ TAG_Empty) - -#define poppop() { FPU_pop(); FPU_pop(); } - -/* push() does not affect the tags */ -#define push() { top--; } - -#define signbyte(a) (((u_char *)(a))[9]) -#define getsign(a) (signbyte(a) & 0x80) -#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; } -#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \ - else signbyte(b) &= 0x7f; } -#define changesign(a) { signbyte(a) ^= 0x80; } -#define setpositive(a) { signbyte(a) &= 0x7f; } -#define setnegative(a) { signbyte(a) |= 0x80; } -#define signpositive(a) ( (signbyte(a) & 0x80) == 0 ) -#define signnegative(a) (signbyte(a) & 0x80) - -static inline void reg_copy(FPU_REG const *x, FPU_REG *y) -{ - *(short *)&(y->exp) = *(const short *)&(x->exp); - *(long long *)&(y->sigl) = *(const long long *)&(x->sigl); -} - -#define exponent(x) (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias) -#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \ - ((y) + EXTENDED_Ebias) & 0x7fff; } -#define exponent16(x) (*(short *)&((x)->exp)) -#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); } -#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); } -#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; } - -#define isdenormal(ptr) (exponent(ptr) == EXP_BIAS+EXP_UNDER) - -#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] ) - - -/*----- Prototypes for functions written in assembler -----*/ -/* extern void reg_move(FPU_REG *a, FPU_REG *b); */ - -asmlinkage int FPU_normalize(FPU_REG *x); -asmlinkage int FPU_normalize_nuo(FPU_REG *x); -asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2, - FPU_REG *answ, unsigned int control_w, u_char sign, - int expa, int expb); -asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2, - FPU_REG *answ, unsigned int control_w, u_char sign, - int expon); -asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2, - FPU_REG *answ, unsigned int control_w, u_char sign); -asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2, - FPU_REG *answ, unsigned int control_w, u_char sign, - int expa, int expb); -asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2, - unsigned int control_w, u_char sign); -asmlinkage unsigned FPU_shrx(void *l, unsigned x); -asmlinkage unsigned FPU_shrxs(void *v, unsigned x); -asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y); -asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy, - unsigned int control_w, u_char sign); - -#ifndef MAKING_PROTO -#include "fpu_proto.h" -#endif - -#endif /* __ASSEMBLY__ */ - -#endif /* _FPU_EMU_H_ */ diff --git a/arch/i386/math-emu/fpu_entry.c b/arch/i386/math-emu/fpu_entry.c deleted file mode 100644 index 1853524c8b5..00000000000 --- a/arch/i386/math-emu/fpu_entry.c +++ /dev/null @@ -1,761 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_entry.c | - | | - | The entry functions for wm-FPU-emu | - | | - | Copyright (C) 1992,1993,1994,1996,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | See the files "README" and "COPYING" for further copyright and warranty | - | information. | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Note: | - | The file contains code which accesses user memory. | - | Emulator static data may change when user memory is accessed, due to | - | other processes using the emulator while swapping is in progress. | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | math_emulate(), restore_i387_soft() and save_i387_soft() are the only | - | entry points for wm-FPU-emu. | - +---------------------------------------------------------------------------*/ - -#include -#include - -#include -#include - -#include "fpu_system.h" -#include "fpu_emu.h" -#include "exception.h" -#include "control_w.h" -#include "status_w.h" - -#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ - -#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */ - -/* WARNING: These codes are not documented by Intel in their 80486 manual - and may not work on FPU clones or later Intel FPUs. */ - -/* Changes to support the un-doc codes provided by Linus Torvalds. */ - -#define _d9_d8_ fstp_i /* unofficial code (19) */ -#define _dc_d0_ fcom_st /* unofficial code (14) */ -#define _dc_d8_ fcompst /* unofficial code (1c) */ -#define _dd_c8_ fxch_i /* unofficial code (0d) */ -#define _de_d0_ fcompst /* unofficial code (16) */ -#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */ -#define _df_c8_ fxch_i /* unofficial code (0f) */ -#define _df_d0_ fstp_i /* unofficial code (17) */ -#define _df_d8_ fstp_i /* unofficial code (1f) */ - -static FUNC const st_instr_table[64] = { - fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_, - fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_, - fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_, - fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_, - fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, - fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, - fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, - fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, -}; - -#else /* Support only documented FPU op-codes */ - -static FUNC const st_instr_table[64] = { - fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__, - fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__, - fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__, - fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__, - fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, - fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, - fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, - fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, -}; - -#endif /* NO_UNDOC_CODE */ - - -#define _NONE_ 0 /* Take no special action */ -#define _REG0_ 1 /* Need to check for not empty st(0) */ -#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ -#define _REGi_ 0 /* Uses st(rm) */ -#define _PUSH_ 3 /* Need to check for space to push onto stack */ -#define _null_ 4 /* Function illegal or not implemented */ -#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */ -#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */ -#define _REGIc 0 /* Compare st(0) and st(rm) */ -#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ - -#ifndef NO_UNDOC_CODE - -/* Un-documented FPU op-codes supported by default. (see above) */ - -static u_char const type_table[64] = { - _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_, - _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_, - _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, - _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, - _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, - _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ -}; - -#else /* Support only documented FPU op-codes */ - -static u_char const type_table[64] = { - _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_, - _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_, - _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_, - _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_, - _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, - _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ -}; - -#endif /* NO_UNDOC_CODE */ - - -#ifdef RE_ENTRANT_CHECKING -u_char emulating=0; -#endif /* RE_ENTRANT_CHECKING */ - -static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, - overrides *override); - -asmlinkage void math_emulate(long arg) -{ - u_char FPU_modrm, byte1; - unsigned short code; - fpu_addr_modes addr_modes; - int unmasked; - FPU_REG loaded_data; - FPU_REG *st0_ptr; - u_char loaded_tag, st0_tag; - void __user *data_address; - struct address data_sel_off; - struct address entry_sel_off; - unsigned long code_base = 0; - unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ - struct desc_struct code_descriptor; - -#ifdef RE_ENTRANT_CHECKING - if ( emulating ) - { - printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); - } - RE_ENTRANT_CHECK_ON; -#endif /* RE_ENTRANT_CHECKING */ - - if (!used_math()) - { - finit(); - set_used_math(); - } - - SETUP_DATA_AREA(arg); - - FPU_ORIG_EIP = FPU_EIP; - - if ( (FPU_EFLAGS & 0x00020000) != 0 ) - { - /* Virtual 8086 mode */ - addr_modes.default_mode = VM86; - FPU_EIP += code_base = FPU_CS << 4; - code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */ - } - else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS ) - { - addr_modes.default_mode = 0; - } - else if ( FPU_CS == __KERNEL_CS ) - { - printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP); - panic("Math emulation needed in kernel"); - } - else - { - - if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */ - { - /* Can only handle segmented addressing via the LDT - for now, and it must be 16 bit */ - printk("FPU emulator: Unsupported addressing mode\n"); - math_abort(FPU_info, SIGILL); - } - - code_descriptor = LDT_DESCRIPTOR(FPU_CS); - if ( SEG_D_SIZE(code_descriptor) ) - { - /* The above test may be wrong, the book is not clear */ - /* Segmented 32 bit protected mode */ - addr_modes.default_mode = SEG32; - } - else - { - /* 16 bit protected mode */ - addr_modes.default_mode = PM16; - } - FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor); - code_limit = code_base - + (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor) - - 1; - if ( code_limit < code_base ) code_limit = 0xffffffff; - } - - FPU_lookahead = 1; - if (current->ptrace & PT_PTRACED) - FPU_lookahead = 0; - - if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP, - &addr_modes.override) ) - { - RE_ENTRANT_CHECK_OFF; - printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n" - "FPU emulator: self-modifying code! (emulation impossible)\n", - byte1); - RE_ENTRANT_CHECK_ON; - EXCEPTION(EX_INTERNAL|0x126); - math_abort(FPU_info,SIGILL); - } - -do_another_FPU_instruction: - - no_ip_update = 0; - - FPU_EIP++; /* We have fetched the prefix and first code bytes. */ - - if ( addr_modes.default_mode ) - { - /* This checks for the minimum instruction bytes. - We also need to check any extra (address mode) code access. */ - if ( FPU_EIP > code_limit ) - math_abort(FPU_info,SIGSEGV); - } - - if ( (byte1 & 0xf8) != 0xd8 ) - { - if ( byte1 == FWAIT_OPCODE ) - { - if (partial_status & SW_Summary) - goto do_the_FPU_interrupt; - else - goto FPU_fwait_done; - } -#ifdef PARANOID - EXCEPTION(EX_INTERNAL|0x128); - math_abort(FPU_info,SIGILL); -#endif /* PARANOID */ - } - - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP); - RE_ENTRANT_CHECK_ON; - FPU_EIP++; - - if (partial_status & SW_Summary) - { - /* Ignore the error for now if the current instruction is a no-wait - control instruction */ - /* The 80486 manual contradicts itself on this topic, - but a real 80486 uses the following instructions: - fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex. - */ - code = (FPU_modrm << 8) | byte1; - if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */ - (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv, - fnstsw */ - ((code & 0xc000) != 0xc000))) ) ) - { - /* - * We need to simulate the action of the kernel to FPU - * interrupts here. - */ - do_the_FPU_interrupt: - - FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */ - - RE_ENTRANT_CHECK_OFF; - current->thread.trap_no = 16; - current->thread.error_code = 0; - send_sig(SIGFPE, current, 1); - return; - } - } - - entry_sel_off.offset = FPU_ORIG_EIP; - entry_sel_off.selector = FPU_CS; - entry_sel_off.opcode = (byte1 << 8) | FPU_modrm; - - FPU_rm = FPU_modrm & 7; - - if ( FPU_modrm < 0300 ) - { - /* All of these instructions use the mod/rm byte to get a data address */ - - if ( (addr_modes.default_mode & SIXTEEN) - ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) ) - data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off, - addr_modes); - else - data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off, - addr_modes); - - if ( addr_modes.default_mode ) - { - if ( FPU_EIP-1 > code_limit ) - math_abort(FPU_info,SIGSEGV); - } - - if ( !(byte1 & 1) ) - { - unsigned short status1 = partial_status; - - st0_ptr = &st(0); - st0_tag = FPU_gettag0(); - - /* Stack underflow has priority */ - if ( NOT_EMPTY_ST0 ) - { - if ( addr_modes.default_mode & PROTECTED ) - { - /* This table works for 16 and 32 bit protected mode */ - if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] ) - math_abort(FPU_info,SIGSEGV); - } - - unmasked = 0; /* Do this here to stop compiler warnings. */ - switch ( (byte1 >> 1) & 3 ) - { - case 0: - unmasked = FPU_load_single((float __user *)data_address, - &loaded_data); - loaded_tag = unmasked & 0xff; - unmasked &= ~0xff; - break; - case 1: - loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); - break; - case 2: - unmasked = FPU_load_double((double __user *)data_address, - &loaded_data); - loaded_tag = unmasked & 0xff; - unmasked &= ~0xff; - break; - case 3: - default: /* Used here to suppress gcc warnings. */ - loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); - break; - } - - /* No more access to user memory, it is safe - to use static data now */ - - /* NaN operands have the next priority. */ - /* We have to delay looking at st(0) until after - loading the data, because that data might contain an SNaN */ - if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) || - ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) ) - { - /* Restore the status word; we might have loaded a - denormal. */ - partial_status = status1; - if ( (FPU_modrm & 0x30) == 0x10 ) - { - /* fcom or fcomp */ - EXCEPTION(EX_Invalid); - setcc(SW_C3 | SW_C2 | SW_C0); - if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) - FPU_pop(); /* fcomp, masked, so we pop. */ - } - else - { - if ( loaded_tag == TAG_Special ) - loaded_tag = FPU_Special(&loaded_data); -#ifdef PECULIAR_486 - /* This is not really needed, but gives behaviour - identical to an 80486 */ - if ( (FPU_modrm & 0x28) == 0x20 ) - /* fdiv or fsub */ - real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data); - else -#endif /* PECULIAR_486 */ - /* fadd, fdivr, fmul, or fsubr */ - real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr); - } - goto reg_mem_instr_done; - } - - if ( unmasked && !((FPU_modrm & 0x30) == 0x10) ) - { - /* Is not a comparison instruction. */ - if ( (FPU_modrm & 0x38) == 0x38 ) - { - /* fdivr */ - if ( (st0_tag == TAG_Zero) && - ((loaded_tag == TAG_Valid) - || (loaded_tag == TAG_Special - && isdenormal(&loaded_data))) ) - { - if ( FPU_divide_by_zero(0, getsign(&loaded_data)) - < 0 ) - { - /* We use the fact here that the unmasked - exception in the loaded data was for a - denormal operand */ - /* Restore the state of the denormal op bit */ - partial_status &= ~SW_Denorm_Op; - partial_status |= status1 & SW_Denorm_Op; - } - else - setsign(st0_ptr, getsign(&loaded_data)); - } - } - goto reg_mem_instr_done; - } - - switch ( (FPU_modrm >> 3) & 7 ) - { - case 0: /* fadd */ - clear_C1(); - FPU_add(&loaded_data, loaded_tag, 0, control_word); - break; - case 1: /* fmul */ - clear_C1(); - FPU_mul(&loaded_data, loaded_tag, 0, control_word); - break; - case 2: /* fcom */ - FPU_compare_st_data(&loaded_data, loaded_tag); - break; - case 3: /* fcomp */ - if ( !FPU_compare_st_data(&loaded_data, loaded_tag) - && !unmasked ) - FPU_pop(); - break; - case 4: /* fsub */ - clear_C1(); - FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word); - break; - case 5: /* fsubr */ - clear_C1(); - FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word); - break; - case 6: /* fdiv */ - clear_C1(); - FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word); - break; - case 7: /* fdivr */ - clear_C1(); - if ( st0_tag == TAG_Zero ) - partial_status = status1; /* Undo any denorm tag, - zero-divide has priority. */ - FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word); - break; - } - } - else - { - if ( (FPU_modrm & 0x30) == 0x10 ) - { - /* The instruction is fcom or fcomp */ - EXCEPTION(EX_StackUnder); - setcc(SW_C3 | SW_C2 | SW_C0); - if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) - FPU_pop(); /* fcomp */ - } - else - FPU_stack_underflow(); - } - reg_mem_instr_done: - operand_address = data_sel_off; - } - else - { - if ( !(no_ip_update = - FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1, - addr_modes, data_address)) ) - { - operand_address = data_sel_off; - } - } - - } - else - { - /* None of these instructions access user memory */ - u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7); - -#ifdef PECULIAR_486 - /* This is supposed to be undefined, but a real 80486 seems - to do this: */ - operand_address.offset = 0; - operand_address.selector = FPU_DS; -#endif /* PECULIAR_486 */ - - st0_ptr = &st(0); - st0_tag = FPU_gettag0(); - switch ( type_table[(int) instr_index] ) - { - case _NONE_: /* also _REGIc: _REGIn */ - break; - case _REG0_: - if ( !NOT_EMPTY_ST0 ) - { - FPU_stack_underflow(); - goto FPU_instruction_done; - } - break; - case _REGIi: - if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) - { - FPU_stack_underflow_i(FPU_rm); - goto FPU_instruction_done; - } - break; - case _REGIp: - if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) - { - FPU_stack_underflow_pop(FPU_rm); - goto FPU_instruction_done; - } - break; - case _REGI_: - if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) - { - FPU_stack_underflow(); - goto FPU_instruction_done; - } - break; - case _PUSH_: /* Only used by the fld st(i) instruction */ - break; - case _null_: - FPU_illegal(); - goto FPU_instruction_done; - default: - EXCEPTION(EX_INTERNAL|0x111); - goto FPU_instruction_done; - } - (*st_instr_table[(int) instr_index])(); - -FPU_instruction_done: - ; - } - - if ( ! no_ip_update ) - instruction_address = entry_sel_off; - -FPU_fwait_done: - -#ifdef DEBUG - RE_ENTRANT_CHECK_OFF; - FPU_printall(); - RE_ENTRANT_CHECK_ON; -#endif /* DEBUG */ - - if (FPU_lookahead && !need_resched()) - { - FPU_ORIG_EIP = FPU_EIP - code_base; - if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP, - &addr_modes.override) ) - goto do_another_FPU_instruction; - } - - if ( addr_modes.default_mode ) - FPU_EIP -= code_base; - - RE_ENTRANT_CHECK_OFF; -} - - -/* Support for prefix bytes is not yet complete. To properly handle - all prefix bytes, further changes are needed in the emulator code - which accesses user address space. Access to separate segments is - important for msdos emulation. */ -static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, - overrides *override) -{ - u_char byte; - u_char __user *ip = *fpu_eip; - - *override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */ - - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(byte, ip); - RE_ENTRANT_CHECK_ON; - - while ( 1 ) - { - switch ( byte ) - { - case ADDR_SIZE_PREFIX: - override->address_size = ADDR_SIZE_PREFIX; - goto do_next_byte; - - case OP_SIZE_PREFIX: - override->operand_size = OP_SIZE_PREFIX; - goto do_next_byte; - - case PREFIX_CS: - override->segment = PREFIX_CS_; - goto do_next_byte; - case PREFIX_ES: - override->segment = PREFIX_ES_; - goto do_next_byte; - case PREFIX_SS: - override->segment = PREFIX_SS_; - goto do_next_byte; - case PREFIX_FS: - override->segment = PREFIX_FS_; - goto do_next_byte; - case PREFIX_GS: - override->segment = PREFIX_GS_; - goto do_next_byte; - case PREFIX_DS: - override->segment = PREFIX_DS_; - goto do_next_byte; - -/* lock is not a valid prefix for FPU instructions, - let the cpu handle it to generate a SIGILL. */ -/* case PREFIX_LOCK: */ - - /* rep.. prefixes have no meaning for FPU instructions */ - case PREFIX_REPE: - case PREFIX_REPNE: - - do_next_byte: - ip++; - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(byte, ip); - RE_ENTRANT_CHECK_ON; - break; - case FWAIT_OPCODE: - *Byte = byte; - return 1; - default: - if ( (byte & 0xf8) == 0xd8 ) - { - *Byte = byte; - *fpu_eip = ip; - return 1; - } - else - { - /* Not a valid sequence of prefix bytes followed by - an FPU instruction. */ - *Byte = byte; /* Needed for error message. */ - return 0; - } - } - } -} - - -void math_abort(struct info * info, unsigned int signal) -{ - FPU_EIP = FPU_ORIG_EIP; - current->thread.trap_no = 16; - current->thread.error_code = 0; - send_sig(signal,current,1); - RE_ENTRANT_CHECK_OFF; - __asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4)); -#ifdef PARANOID - printk("ERROR: wm-FPU-emu math_abort failed!\n"); -#endif /* PARANOID */ -} - - - -#define S387 ((struct i387_soft_struct *)s387) -#define sstatus_word() \ - ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) - -int restore_i387_soft(void *s387, struct _fpstate __user *buf) -{ - u_char __user *d = (u_char __user *)buf; - int offset, other, i, tags, regnr, tag, newtop; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10); - if (__copy_from_user(&S387->cwd, d, 7*4)) - return -1; - RE_ENTRANT_CHECK_ON; - - d += 7*4; - - S387->ftop = (S387->swd >> SW_Top_Shift) & 7; - offset = (S387->ftop & 7) * 10; - other = 80 - offset; - - RE_ENTRANT_CHECK_OFF; - /* Copy all registers in stack order. */ - if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other)) - return -1; - if ( offset ) - if (__copy_from_user((u_char *)&S387->st_space, d+other, offset)) - return -1; - RE_ENTRANT_CHECK_ON; - - /* The tags may need to be corrected now. */ - tags = S387->twd; - newtop = S387->ftop; - for ( i = 0; i < 8; i++ ) - { - regnr = (i+newtop) & 7; - if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty ) - { - /* The loaded data over-rides all other cases. */ - tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr)); - tags &= ~(3 << (regnr*2)); - tags |= (tag & 3) << (regnr*2); - } - } - S387->twd = tags; - - return 0; -} - - -int save_i387_soft(void *s387, struct _fpstate __user * buf) -{ - u_char __user *d = (u_char __user *)buf; - int offset = (S387->ftop & 7) * 10, other = 80 - offset; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10); -#ifdef PECULIAR_486 - S387->cwd &= ~0xe080; - /* An 80486 sets nearly all of the reserved bits to 1. */ - S387->cwd |= 0xffff0040; - S387->swd = sstatus_word() | 0xffff0000; - S387->twd |= 0xffff0000; - S387->fcs &= ~0xf8000000; - S387->fos |= 0xffff0000; -#endif /* PECULIAR_486 */ - if (__copy_to_user(d, &S387->cwd, 7*4)) - return -1; - RE_ENTRANT_CHECK_ON; - - d += 7*4; - - RE_ENTRANT_CHECK_OFF; - /* Copy all registers in stack order. */ - if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other)) - return -1; - if ( offset ) - if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset)) - return -1; - RE_ENTRANT_CHECK_ON; - - return 1; -} diff --git a/arch/i386/math-emu/fpu_etc.c b/arch/i386/math-emu/fpu_etc.c deleted file mode 100644 index e3b5d465587..00000000000 --- a/arch/i386/math-emu/fpu_etc.c +++ /dev/null @@ -1,143 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_etc.c | - | | - | Implement a few FPU instructions. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" -#include "status_w.h" -#include "reg_constant.h" - - -static void fchs(FPU_REG *st0_ptr, u_char st0tag) -{ - if ( st0tag ^ TAG_Empty ) - { - signbyte(st0_ptr) ^= SIGN_NEG; - clear_C1(); - } - else - FPU_stack_underflow(); -} - - -static void fabs(FPU_REG *st0_ptr, u_char st0tag) -{ - if ( st0tag ^ TAG_Empty ) - { - setpositive(st0_ptr); - clear_C1(); - } - else - FPU_stack_underflow(); -} - - -static void ftst_(FPU_REG *st0_ptr, u_char st0tag) -{ - switch (st0tag) - { - case TAG_Zero: - setcc(SW_C3); - break; - case TAG_Valid: - if (getsign(st0_ptr) == SIGN_POS) - setcc(0); - else - setcc(SW_C0); - break; - case TAG_Special: - switch ( FPU_Special(st0_ptr) ) - { - case TW_Denormal: - if (getsign(st0_ptr) == SIGN_POS) - setcc(0); - else - setcc(SW_C0); - if ( denormal_operand() < 0 ) - { -#ifdef PECULIAR_486 - /* This is weird! */ - if (getsign(st0_ptr) == SIGN_POS) - setcc(SW_C3); -#endif /* PECULIAR_486 */ - return; - } - break; - case TW_NaN: - setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ - EXCEPTION(EX_Invalid); - break; - case TW_Infinity: - if (getsign(st0_ptr) == SIGN_POS) - setcc(0); - else - setcc(SW_C0); - break; - default: - setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ - EXCEPTION(EX_INTERNAL|0x14); - break; - } - break; - case TAG_Empty: - setcc(SW_C0|SW_C2|SW_C3); - EXCEPTION(EX_StackUnder); - break; - } -} - - -static void fxam(FPU_REG *st0_ptr, u_char st0tag) -{ - int c = 0; - switch (st0tag) - { - case TAG_Empty: - c = SW_C3|SW_C0; - break; - case TAG_Zero: - c = SW_C3; - break; - case TAG_Valid: - c = SW_C2; - break; - case TAG_Special: - switch ( FPU_Special(st0_ptr) ) - { - case TW_Denormal: - c = SW_C2|SW_C3; /* Denormal */ - break; - case TW_NaN: - /* We also use NaN for unsupported types. */ - if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) ) - c = SW_C0; - break; - case TW_Infinity: - c = SW_C2|SW_C0; - break; - } - } - if ( getsign(st0_ptr) == SIGN_NEG ) - c |= SW_C1; - setcc(c); -} - - -static FUNC_ST0 const fp_etc_table[] = { - fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal, - ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal -}; - -void FPU_etc(void) -{ - (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0()); -} diff --git a/arch/i386/math-emu/fpu_proto.h b/arch/i386/math-emu/fpu_proto.h deleted file mode 100644 index 37a8a7fe7e2..00000000000 --- a/arch/i386/math-emu/fpu_proto.h +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef _FPU_PROTO_H -#define _FPU_PROTO_H - -/* errors.c */ -extern void FPU_illegal(void); -extern void FPU_printall(void); -asmlinkage void FPU_exception(int n); -extern int real_1op_NaN(FPU_REG *a); -extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr, - FPU_REG const *defaultNaN); -asmlinkage int arith_invalid(int deststnr); -asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign); -extern int set_precision_flag(int flags); -asmlinkage void set_precision_flag_up(void); -asmlinkage void set_precision_flag_down(void); -asmlinkage int denormal_operand(void); -asmlinkage int arith_overflow(FPU_REG *dest); -asmlinkage int arith_underflow(FPU_REG *dest); -extern void FPU_stack_overflow(void); -extern void FPU_stack_underflow(void); -extern void FPU_stack_underflow_i(int i); -extern void FPU_stack_underflow_pop(int i); -/* fpu_arith.c */ -extern void fadd__(void); -extern void fmul__(void); -extern void fsub__(void); -extern void fsubr_(void); -extern void fdiv__(void); -extern void fdivr_(void); -extern void fadd_i(void); -extern void fmul_i(void); -extern void fsubri(void); -extern void fsub_i(void); -extern void fdivri(void); -extern void fdiv_i(void); -extern void faddp_(void); -extern void fmulp_(void); -extern void fsubrp(void); -extern void fsubp_(void); -extern void fdivrp(void); -extern void fdivp_(void); -/* fpu_aux.c */ -extern void finit(void); -extern void finit_(void); -extern void fstsw_(void); -extern void fp_nop(void); -extern void fld_i_(void); -extern void fxch_i(void); -extern void ffree_(void); -extern void ffreep(void); -extern void fst_i_(void); -extern void fstp_i(void); -/* fpu_entry.c */ -asmlinkage extern void math_emulate(long arg); -extern void math_abort(struct info *info, unsigned int signal); -/* fpu_etc.c */ -extern void FPU_etc(void); -/* fpu_tags.c */ -extern int FPU_gettag0(void); -extern int FPU_gettagi(int stnr); -extern int FPU_gettag(int regnr); -extern void FPU_settag0(int tag); -extern void FPU_settagi(int stnr, int tag); -extern void FPU_settag(int regnr, int tag); -extern int FPU_Special(FPU_REG const *ptr); -extern int isNaN(FPU_REG const *ptr); -extern void FPU_pop(void); -extern int FPU_empty_i(int stnr); -extern int FPU_stackoverflow(FPU_REG **st_new_ptr); -extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr); -extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag); -extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag); -/* fpu_trig.c */ -extern void FPU_triga(void); -extern void FPU_trigb(void); -/* get_address.c */ -extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, - struct address *addr, fpu_addr_modes addr_modes); -extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, - struct address *addr, fpu_addr_modes addr_modes); -/* load_store.c */ -extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes, - void __user *data_address); -/* poly_2xm1.c */ -extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result); -/* poly_atan.c */ -extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr, - u_char st1_tag); -/* poly_l2.c */ -extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign); -extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1, - FPU_REG *d); -/* poly_sin.c */ -extern void poly_sine(FPU_REG *st0_ptr); -extern void poly_cos(FPU_REG *st0_ptr); -/* poly_tan.c */ -extern void poly_tan(FPU_REG *st0_ptr); -/* reg_add_sub.c */ -extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w); -extern int FPU_sub(int flags, int rm, int control_w); -/* reg_compare.c */ -extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag); -extern void fcom_st(void); -extern void fcompst(void); -extern void fcompp(void); -extern void fucom_(void); -extern void fucomp(void); -extern void fucompp(void); -/* reg_constant.c */ -extern void fconst(void); -/* reg_ld_str.c */ -extern int FPU_load_extended(long double __user *s, int stnr); -extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data); -extern int FPU_load_single(float __user *single, FPU_REG *loaded_data); -extern int FPU_load_int64(long long __user *_s); -extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data); -extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data); -extern int FPU_load_bcd(u_char __user *s); -extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, - long double __user *d); -extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat); -extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single); -extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d); -extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d); -extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d); -extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d); -extern int FPU_round_to_int(FPU_REG *r, u_char tag); -extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s); -extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address); -extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d); -extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address); -extern int FPU_tagof(FPU_REG *ptr); -/* reg_mul.c */ -extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w); - -extern int FPU_div(int flags, int regrm, int control_w); -/* reg_convert.c */ -extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x); -#endif /* _FPU_PROTO_H */ - diff --git a/arch/i386/math-emu/fpu_system.h b/arch/i386/math-emu/fpu_system.h deleted file mode 100644 index a3ae28c49dd..00000000000 --- a/arch/i386/math-emu/fpu_system.h +++ /dev/null @@ -1,90 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_system.h | - | | - | Copyright (C) 1992,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _FPU_SYSTEM_H -#define _FPU_SYSTEM_H - -/* system dependent definitions */ - -#include -#include -#include - -/* This sets the pointer FPU_info to point to the argument part - of the stack frame of math_emulate() */ -#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg - -/* s is always from a cpu register, and the cpu does bounds checking - * during register load --> no further bounds checks needed */ -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) -#define SEG_D_SIZE(x) ((x).b & (3 << 21)) -#define SEG_G_BIT(x) ((x).b & (1 << 23)) -#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) -#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23))) -#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \ - | (((s).b & 0xff) << 16) | ((s).a >> 16)) -#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff)) -#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11)) -#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9)) -#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ - == (1 << 10)) - -#define I387 (current->thread.i387) -#define FPU_info (I387.soft.info) - -#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) -#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) -#define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) -#define FPU_EAX (FPU_info->___eax) -#define FPU_EFLAGS (FPU_info->___eflags) -#define FPU_EIP (FPU_info->___eip) -#define FPU_ORIG_EIP (FPU_info->___orig_eip) - -#define FPU_lookahead (I387.soft.lookahead) - -/* nz if ip_offset and cs_selector are not to be set for the current - instruction. */ -#define no_ip_update (*(u_char *)&(I387.soft.no_update)) -#define FPU_rm (*(u_char *)&(I387.soft.rm)) - -/* Number of bytes of data which can be legally accessed by the current - instruction. This only needs to hold a number <= 108, so a byte will do. */ -#define access_limit (*(u_char *)&(I387.soft.alimit)) - -#define partial_status (I387.soft.swd) -#define control_word (I387.soft.cwd) -#define fpu_tag_word (I387.soft.twd) -#define registers (I387.soft.st_space) -#define top (I387.soft.ftop) - -#define instruction_address (*(struct address *)&I387.soft.fip) -#define operand_address (*(struct address *)&I387.soft.foo) - -#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ - math_abort(FPU_info,SIGSEGV) -#define FPU_abort math_abort(FPU_info, SIGSEGV) - -#undef FPU_IGNORE_CODE_SEGV -#ifdef FPU_IGNORE_CODE_SEGV -/* access_ok() is very expensive, and causes the emulator to run - about 20% slower if applied to the code. Anyway, errors due to bad - code addresses should be much rarer than errors due to bad data - addresses. */ -#define FPU_code_access_ok(z) -#else -/* A simpler test than access_ok() can probably be done for - FPU_code_access_ok() because the only possible error is to step - past the upper boundary of a legal code area. */ -#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user *)FPU_EIP,z) -#endif - -#define FPU_get_user(x,y) get_user((x),(y)) -#define FPU_put_user(x,y) put_user((x),(y)) - -#endif diff --git a/arch/i386/math-emu/fpu_tags.c b/arch/i386/math-emu/fpu_tags.c deleted file mode 100644 index cb436fe20e4..00000000000 --- a/arch/i386/math-emu/fpu_tags.c +++ /dev/null @@ -1,127 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_tags.c | - | | - | Set FPU register tags. | - | | - | Copyright (C) 1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@jacobi.maths.monash.edu.au | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" -#include "fpu_system.h" -#include "exception.h" - - -void FPU_pop(void) -{ - fpu_tag_word |= 3 << ((top & 7)*2); - top++; -} - - -int FPU_gettag0(void) -{ - return (fpu_tag_word >> ((top & 7)*2)) & 3; -} - - -int FPU_gettagi(int stnr) -{ - return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3; -} - - -int FPU_gettag(int regnr) -{ - return (fpu_tag_word >> ((regnr & 7)*2)) & 3; -} - - -void FPU_settag0(int tag) -{ - int regnr = top; - regnr &= 7; - fpu_tag_word &= ~(3 << (regnr*2)); - fpu_tag_word |= (tag & 3) << (regnr*2); -} - - -void FPU_settagi(int stnr, int tag) -{ - int regnr = stnr+top; - regnr &= 7; - fpu_tag_word &= ~(3 << (regnr*2)); - fpu_tag_word |= (tag & 3) << (regnr*2); -} - - -void FPU_settag(int regnr, int tag) -{ - regnr &= 7; - fpu_tag_word &= ~(3 << (regnr*2)); - fpu_tag_word |= (tag & 3) << (regnr*2); -} - - -int FPU_Special(FPU_REG const *ptr) -{ - int exp = exponent(ptr); - - if ( exp == EXP_BIAS+EXP_UNDER ) - return TW_Denormal; - else if ( exp != EXP_BIAS+EXP_OVER ) - return TW_NaN; - else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) ) - return TW_Infinity; - return TW_NaN; -} - - -int isNaN(FPU_REG const *ptr) -{ - return ( (exponent(ptr) == EXP_BIAS+EXP_OVER) - && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) ); -} - - -int FPU_empty_i(int stnr) -{ - int regnr = (top+stnr) & 7; - - return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty; -} - - -int FPU_stackoverflow(FPU_REG **st_new_ptr) -{ - *st_new_ptr = &st(-1); - - return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty; -} - - -void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr) -{ - reg_copy(r, &st(stnr)); - FPU_settagi(stnr, tag); -} - -void FPU_copy_to_reg1(FPU_REG const *r, u_char tag) -{ - reg_copy(r, &st(1)); - FPU_settagi(1, tag); -} - -void FPU_copy_to_reg0(FPU_REG const *r, u_char tag) -{ - int regnr = top; - regnr &= 7; - - reg_copy(r, &st(0)); - - fpu_tag_word &= ~(3 << (regnr*2)); - fpu_tag_word |= (tag & 3) << (regnr*2); -} diff --git a/arch/i386/math-emu/fpu_trig.c b/arch/i386/math-emu/fpu_trig.c deleted file mode 100644 index 403cbde1d42..00000000000 --- a/arch/i386/math-emu/fpu_trig.c +++ /dev/null @@ -1,1845 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_trig.c | - | | - | Implementation of the FPU "transcendental" functions. | - | | - | Copyright (C) 1992,1993,1994,1997,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@melbpc.org.au | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" -#include "status_w.h" -#include "control_w.h" -#include "reg_constant.h" - -static void rem_kernel(unsigned long long st0, unsigned long long *y, - unsigned long long st1, - unsigned long long q, int n); - -#define BETTER_THAN_486 - -#define FCOS 4 - -/* Used only by fptan, fsin, fcos, and fsincos. */ -/* This routine produces very accurate results, similar to - using a value of pi with more than 128 bits precision. */ -/* Limited measurements show no results worse than 64 bit precision - except for the results for arguments close to 2^63, where the - precision of the result sometimes degrades to about 63.9 bits */ -static int trig_arg(FPU_REG *st0_ptr, int even) -{ - FPU_REG tmp; - u_char tmptag; - unsigned long long q; - int old_cw = control_word, saved_status = partial_status; - int tag, st0_tag = TAG_Valid; - - if ( exponent(st0_ptr) >= 63 ) - { - partial_status |= SW_C2; /* Reduction incomplete. */ - return -1; - } - - control_word &= ~CW_RC; - control_word |= RC_CHOP; - - setpositive(st0_ptr); - tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f, - SIGN_POS); - - FPU_round_to_int(&tmp, tag); /* Fortunately, this can't overflow - to 2^64 */ - q = significand(&tmp); - if ( q ) - { - rem_kernel(significand(st0_ptr), - &significand(&tmp), - significand(&CONST_PI2), - q, exponent(st0_ptr) - exponent(&CONST_PI2)); - setexponent16(&tmp, exponent(&CONST_PI2)); - st0_tag = FPU_normalize(&tmp); - FPU_copy_to_reg0(&tmp, st0_tag); - } - - if ( (even && !(q & 1)) || (!even && (q & 1)) ) - { - st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION); - -#ifdef BETTER_THAN_486 - /* So far, the results are exact but based upon a 64 bit - precision approximation to pi/2. The technique used - now is equivalent to using an approximation to pi/2 which - is accurate to about 128 bits. */ - if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) ) - { - /* This code gives the effect of having pi/2 to better than - 128 bits precision. */ - - significand(&tmp) = q + 1; - setexponent16(&tmp, 63); - FPU_normalize(&tmp); - tmptag = - FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS, - exponent(&CONST_PI2extra) + exponent(&tmp)); - setsign(&tmp, getsign(&CONST_PI2extra)); - st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION); - if ( signnegative(st0_ptr) ) - { - /* CONST_PI2extra is negative, so the result of the addition - can be negative. This means that the argument is actually - in a different quadrant. The correction is always < pi/2, - so it can't overflow into yet another quadrant. */ - setpositive(st0_ptr); - q++; - } - } -#endif /* BETTER_THAN_486 */ - } -#ifdef BETTER_THAN_486 - else - { - /* So far, the results are exact but based upon a 64 bit - precision approximation to pi/2. The technique used - now is equivalent to using an approximation to pi/2 which - is accurate to about 128 bits. */ - if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64)) - || (q > 1) ) - { - /* This code gives the effect of having p/2 to better than - 128 bits precision. */ - - significand(&tmp) = q; - setexponent16(&tmp, 63); - FPU_normalize(&tmp); /* This must return TAG_Valid */ - tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, - SIGN_POS, - exponent(&CONST_PI2extra) + exponent(&tmp)); - setsign(&tmp, getsign(&CONST_PI2extra)); - st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp, - FULL_PRECISION); - if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) && - ((st0_ptr->sigh > CONST_PI2.sigh) - || ((st0_ptr->sigh == CONST_PI2.sigh) - && (st0_ptr->sigl > CONST_PI2.sigl))) ) - { - /* CONST_PI2extra is negative, so the result of the - subtraction can be larger than pi/2. This means - that the argument is actually in a different quadrant. - The correction is always < pi/2, so it can't overflow - into yet another quadrant. */ - st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, - FULL_PRECISION); - q++; - } - } - } -#endif /* BETTER_THAN_486 */ - - FPU_settag0(st0_tag); - control_word = old_cw; - partial_status = saved_status & ~SW_C2; /* Reduction complete. */ - - return (q & 3) | even; -} - - -/* Convert a long to register */ -static void convert_l2reg(long const *arg, int deststnr) -{ - int tag; - long num = *arg; - u_char sign; - FPU_REG *dest = &st(deststnr); - - if (num == 0) - { - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - return; - } - - if (num > 0) - { sign = SIGN_POS; } - else - { num = -num; sign = SIGN_NEG; } - - dest->sigh = num; - dest->sigl = 0; - setexponent16(dest, 31); - tag = FPU_normalize(dest); - FPU_settagi(deststnr, tag); - setsign(dest, sign); - return; -} - - -static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag) -{ - if ( st0_tag == TAG_Empty ) - FPU_stack_underflow(); /* Puts a QNaN in st(0) */ - else if ( st0_tag == TW_NaN ) - real_1op_NaN(st0_ptr); /* return with a NaN in st(0) */ -#ifdef PARANOID - else - EXCEPTION(EX_INTERNAL|0x0112); -#endif /* PARANOID */ -} - - -static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag) -{ - int isNaN; - - switch ( st0_tag ) - { - case TW_NaN: - isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000); - if ( isNaN && !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */ - { - EXCEPTION(EX_Invalid); - if ( control_word & CW_Invalid ) - { - /* The masked response */ - /* Convert to a QNaN */ - st0_ptr->sigh |= 0x40000000; - push(); - FPU_copy_to_reg0(st0_ptr, TAG_Special); - } - } - else if ( isNaN ) - { - /* A QNaN */ - push(); - FPU_copy_to_reg0(st0_ptr, TAG_Special); - } - else - { - /* pseudoNaN or other unsupported */ - EXCEPTION(EX_Invalid); - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); - push(); - FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); - } - } - break; /* return with a NaN in st(0) */ -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x0112); -#endif /* PARANOID */ - } -} - - -/*---------------------------------------------------------------------------*/ - -static void f2xm1(FPU_REG *st0_ptr, u_char tag) -{ - FPU_REG a; - - clear_C1(); - - if ( tag == TAG_Valid ) - { - /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */ - if ( exponent(st0_ptr) < 0 ) - { - denormal_arg: - - FPU_to_exp16(st0_ptr, &a); - - /* poly_2xm1(x) requires 0 < st(0) < 1. */ - poly_2xm1(getsign(st0_ptr), &a, st0_ptr); - } - set_precision_flag_up(); /* 80486 appears to always do this */ - return; - } - - if ( tag == TAG_Zero ) - return; - - if ( tag == TAG_Special ) - tag = FPU_Special(st0_ptr); - - switch ( tag ) - { - case TW_Denormal: - if ( denormal_operand() < 0 ) - return; - goto denormal_arg; - case TW_Infinity: - if ( signnegative(st0_ptr) ) - { - /* -infinity gives -1 (p16-10) */ - FPU_copy_to_reg0(&CONST_1, TAG_Valid); - setnegative(st0_ptr); - } - return; - default: - single_arg_error(st0_ptr, tag); - } -} - - -static void fptan(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st_new_ptr; - int q; - u_char arg_sign = getsign(st0_ptr); - - /* Stack underflow has higher priority */ - if ( st0_tag == TAG_Empty ) - { - FPU_stack_underflow(); /* Puts a QNaN in st(0) */ - if ( control_word & CW_Invalid ) - { - st_new_ptr = &st(-1); - push(); - FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ - } - return; - } - - if ( STACK_OVERFLOW ) - { FPU_stack_overflow(); return; } - - if ( st0_tag == TAG_Valid ) - { - if ( exponent(st0_ptr) > -40 ) - { - if ( (q = trig_arg(st0_ptr, 0)) == -1 ) - { - /* Operand is out of range */ - return; - } - - poly_tan(st0_ptr); - setsign(st0_ptr, (q & 1) ^ (arg_sign != 0)); - set_precision_flag_up(); /* We do not really know if up or down */ - } - else - { - /* For a small arg, the result == the argument */ - /* Underflow may happen */ - - denormal_arg: - - FPU_to_exp16(st0_ptr, st0_ptr); - - st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); - FPU_settag0(st0_tag); - } - push(); - FPU_copy_to_reg0(&CONST_1, TAG_Valid); - return; - } - - if ( st0_tag == TAG_Zero ) - { - push(); - FPU_copy_to_reg0(&CONST_1, TAG_Valid); - setcc(0); - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - - if ( st0_tag == TW_Denormal ) - { - if ( denormal_operand() < 0 ) - return; - - goto denormal_arg; - } - - if ( st0_tag == TW_Infinity ) - { - /* The 80486 treats infinity as an invalid operand */ - if ( arith_invalid(0) >= 0 ) - { - st_new_ptr = &st(-1); - push(); - arith_invalid(0); - } - return; - } - - single_arg_2_error(st0_ptr, st0_tag); -} - - -static void fxtract(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st_new_ptr; - u_char sign; - register FPU_REG *st1_ptr = st0_ptr; /* anticipate */ - - if ( STACK_OVERFLOW ) - { FPU_stack_overflow(); return; } - - clear_C1(); - - if ( st0_tag == TAG_Valid ) - { - long e; - - push(); - sign = getsign(st1_ptr); - reg_copy(st1_ptr, st_new_ptr); - setexponent16(st_new_ptr, exponent(st_new_ptr)); - - denormal_arg: - - e = exponent16(st_new_ptr); - convert_l2reg(&e, 1); - setexponentpos(st_new_ptr, 0); - setsign(st_new_ptr, sign); - FPU_settag0(TAG_Valid); /* Needed if arg was a denormal */ - return; - } - else if ( st0_tag == TAG_Zero ) - { - sign = getsign(st0_ptr); - - if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 ) - return; - - push(); - FPU_copy_to_reg0(&CONST_Z, TAG_Zero); - setsign(st_new_ptr, sign); - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - - if ( st0_tag == TW_Denormal ) - { - if (denormal_operand() < 0 ) - return; - - push(); - sign = getsign(st1_ptr); - FPU_to_exp16(st1_ptr, st_new_ptr); - goto denormal_arg; - } - else if ( st0_tag == TW_Infinity ) - { - sign = getsign(st0_ptr); - setpositive(st0_ptr); - push(); - FPU_copy_to_reg0(&CONST_INF, TAG_Special); - setsign(st_new_ptr, sign); - return; - } - else if ( st0_tag == TW_NaN ) - { - if ( real_1op_NaN(st0_ptr) < 0 ) - return; - - push(); - FPU_copy_to_reg0(st0_ptr, TAG_Special); - return; - } - else if ( st0_tag == TAG_Empty ) - { - /* Is this the correct behaviour? */ - if ( control_word & EX_Invalid ) - { - FPU_stack_underflow(); - push(); - FPU_stack_underflow(); - } - else - EXCEPTION(EX_StackUnder); - } -#ifdef PARANOID - else - EXCEPTION(EX_INTERNAL | 0x119); -#endif /* PARANOID */ -} - - -static void fdecstp(void) -{ - clear_C1(); - top--; -} - -static void fincstp(void) -{ - clear_C1(); - top++; -} - - -static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag) -{ - int expon; - - clear_C1(); - - if ( st0_tag == TAG_Valid ) - { - u_char tag; - - if (signnegative(st0_ptr)) - { - arith_invalid(0); /* sqrt(negative) is invalid */ - return; - } - - /* make st(0) in [1.0 .. 4.0) */ - expon = exponent(st0_ptr); - - denormal_arg: - - setexponent16(st0_ptr, (expon & 1)); - - /* Do the computation, the sign of the result will be positive. */ - tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS); - addexponent(st0_ptr, expon >> 1); - FPU_settag0(tag); - return; - } - - if ( st0_tag == TAG_Zero ) - return; - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - - if ( st0_tag == TW_Infinity ) - { - if ( signnegative(st0_ptr) ) - arith_invalid(0); /* sqrt(-Infinity) is invalid */ - return; - } - else if ( st0_tag == TW_Denormal ) - { - if (signnegative(st0_ptr)) - { - arith_invalid(0); /* sqrt(negative) is invalid */ - return; - } - - if ( denormal_operand() < 0 ) - return; - - FPU_to_exp16(st0_ptr, st0_ptr); - - expon = exponent16(st0_ptr); - - goto denormal_arg; - } - - single_arg_error(st0_ptr, st0_tag); - -} - - -static void frndint_(FPU_REG *st0_ptr, u_char st0_tag) -{ - int flags, tag; - - if ( st0_tag == TAG_Valid ) - { - u_char sign; - - denormal_arg: - - sign = getsign(st0_ptr); - - if (exponent(st0_ptr) > 63) - return; - - if ( st0_tag == TW_Denormal ) - { - if (denormal_operand() < 0 ) - return; - } - - /* Fortunately, this can't overflow to 2^64 */ - if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) ) - set_precision_flag(flags); - - setexponent16(st0_ptr, 63); - tag = FPU_normalize(st0_ptr); - setsign(st0_ptr, sign); - FPU_settag0(tag); - return; - } - - if ( st0_tag == TAG_Zero ) - return; - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - - if ( st0_tag == TW_Denormal ) - goto denormal_arg; - else if ( st0_tag == TW_Infinity ) - return; - else - single_arg_error(st0_ptr, st0_tag); -} - - -static int fsin(FPU_REG *st0_ptr, u_char tag) -{ - u_char arg_sign = getsign(st0_ptr); - - if ( tag == TAG_Valid ) - { - int q; - - if ( exponent(st0_ptr) > -40 ) - { - if ( (q = trig_arg(st0_ptr, 0)) == -1 ) - { - /* Operand is out of range */ - return 1; - } - - poly_sine(st0_ptr); - - if (q & 2) - changesign(st0_ptr); - - setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign); - - /* We do not really know if up or down */ - set_precision_flag_up(); - return 0; - } - else - { - /* For a small arg, the result == the argument */ - set_precision_flag_up(); /* Must be up. */ - return 0; - } - } - - if ( tag == TAG_Zero ) - { - setcc(0); - return 0; - } - - if ( tag == TAG_Special ) - tag = FPU_Special(st0_ptr); - - if ( tag == TW_Denormal ) - { - if ( denormal_operand() < 0 ) - return 1; - - /* For a small arg, the result == the argument */ - /* Underflow may happen */ - FPU_to_exp16(st0_ptr, st0_ptr); - - tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); - - FPU_settag0(tag); - - return 0; - } - else if ( tag == TW_Infinity ) - { - /* The 80486 treats infinity as an invalid operand */ - arith_invalid(0); - return 1; - } - else - { - single_arg_error(st0_ptr, tag); - return 1; - } -} - - -static int f_cos(FPU_REG *st0_ptr, u_char tag) -{ - u_char st0_sign; - - st0_sign = getsign(st0_ptr); - - if ( tag == TAG_Valid ) - { - int q; - - if ( exponent(st0_ptr) > -40 ) - { - if ( (exponent(st0_ptr) < 0) - || ((exponent(st0_ptr) == 0) - && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) ) - { - poly_cos(st0_ptr); - - /* We do not really know if up or down */ - set_precision_flag_down(); - - return 0; - } - else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 ) - { - poly_sine(st0_ptr); - - if ((q+1) & 2) - changesign(st0_ptr); - - /* We do not really know if up or down */ - set_precision_flag_down(); - - return 0; - } - else - { - /* Operand is out of range */ - return 1; - } - } - else - { - denormal_arg: - - setcc(0); - FPU_copy_to_reg0(&CONST_1, TAG_Valid); -#ifdef PECULIAR_486 - set_precision_flag_down(); /* 80486 appears to do this. */ -#else - set_precision_flag_up(); /* Must be up. */ -#endif /* PECULIAR_486 */ - return 0; - } - } - else if ( tag == TAG_Zero ) - { - FPU_copy_to_reg0(&CONST_1, TAG_Valid); - setcc(0); - return 0; - } - - if ( tag == TAG_Special ) - tag = FPU_Special(st0_ptr); - - if ( tag == TW_Denormal ) - { - if ( denormal_operand() < 0 ) - return 1; - - goto denormal_arg; - } - else if ( tag == TW_Infinity ) - { - /* The 80486 treats infinity as an invalid operand */ - arith_invalid(0); - return 1; - } - else - { - single_arg_error(st0_ptr, tag); /* requires st0_ptr == &st(0) */ - return 1; - } -} - - -static void fcos(FPU_REG *st0_ptr, u_char st0_tag) -{ - f_cos(st0_ptr, st0_tag); -} - - -static void fsincos(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st_new_ptr; - FPU_REG arg; - u_char tag; - - /* Stack underflow has higher priority */ - if ( st0_tag == TAG_Empty ) - { - FPU_stack_underflow(); /* Puts a QNaN in st(0) */ - if ( control_word & CW_Invalid ) - { - st_new_ptr = &st(-1); - push(); - FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ - } - return; - } - - if ( STACK_OVERFLOW ) - { FPU_stack_overflow(); return; } - - if ( st0_tag == TAG_Special ) - tag = FPU_Special(st0_ptr); - else - tag = st0_tag; - - if ( tag == TW_NaN ) - { - single_arg_2_error(st0_ptr, TW_NaN); - return; - } - else if ( tag == TW_Infinity ) - { - /* The 80486 treats infinity as an invalid operand */ - if ( arith_invalid(0) >= 0 ) - { - /* Masked response */ - push(); - arith_invalid(0); - } - return; - } - - reg_copy(st0_ptr, &arg); - if ( !fsin(st0_ptr, st0_tag) ) - { - push(); - FPU_copy_to_reg0(&arg, st0_tag); - f_cos(&st(0), st0_tag); - } - else - { - /* An error, so restore st(0) */ - FPU_copy_to_reg0(&arg, st0_tag); - } -} - - -/*---------------------------------------------------------------------------*/ -/* The following all require two arguments: st(0) and st(1) */ - -/* A lean, mean kernel for the fprem instructions. This relies upon - the division and rounding to an integer in do_fprem giving an - exact result. Because of this, rem_kernel() needs to deal only with - the least significant 64 bits, the more significant bits of the - result must be zero. - */ -static void rem_kernel(unsigned long long st0, unsigned long long *y, - unsigned long long st1, - unsigned long long q, int n) -{ - int dummy; - unsigned long long x; - - x = st0 << n; - - /* Do the required multiplication and subtraction in the one operation */ - - /* lsw x -= lsw st1 * lsw q */ - asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1" - :"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]), - "=a" (dummy) - :"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0]) - :"%dx"); - /* msw x -= msw st1 * lsw q */ - asm volatile ("mull %3; subl %%eax,%0" - :"=m" (((unsigned *)&x)[1]), "=a" (dummy) - :"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0]) - :"%dx"); - /* msw x -= lsw st1 * msw q */ - asm volatile ("mull %3; subl %%eax,%0" - :"=m" (((unsigned *)&x)[1]), "=a" (dummy) - :"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1]) - :"%dx"); - - *y = x; -} - - -/* Remainder of st(0) / st(1) */ -/* This routine produces exact results, i.e. there is never any - rounding or truncation, etc of the result. */ -static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round) -{ - FPU_REG *st1_ptr = &st(1); - u_char st1_tag = FPU_gettagi(1); - - if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) - { - FPU_REG tmp, st0, st1; - u_char st0_sign, st1_sign; - u_char tmptag; - int tag; - int old_cw; - int expdif; - long long q; - unsigned short saved_status; - int cc; - - fprem_valid: - /* Convert registers for internal use. */ - st0_sign = FPU_to_exp16(st0_ptr, &st0); - st1_sign = FPU_to_exp16(st1_ptr, &st1); - expdif = exponent16(&st0) - exponent16(&st1); - - old_cw = control_word; - cc = 0; - - /* We want the status following the denorm tests, but don't want - the status changed by the arithmetic operations. */ - saved_status = partial_status; - control_word &= ~CW_RC; - control_word |= RC_CHOP; - - if ( expdif < 64 ) - { - /* This should be the most common case */ - - if ( expdif > -2 ) - { - u_char sign = st0_sign ^ st1_sign; - tag = FPU_u_div(&st0, &st1, &tmp, - PR_64_BITS | RC_CHOP | 0x3f, - sign); - setsign(&tmp, sign); - - if ( exponent(&tmp) >= 0 ) - { - FPU_round_to_int(&tmp, tag); /* Fortunately, this can't - overflow to 2^64 */ - q = significand(&tmp); - - rem_kernel(significand(&st0), - &significand(&tmp), - significand(&st1), - q, expdif); - - setexponent16(&tmp, exponent16(&st1)); - } - else - { - reg_copy(&st0, &tmp); - q = 0; - } - - if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) ) - { - /* We may need to subtract st(1) once more, - to get a result <= 1/2 of st(1). */ - unsigned long long x; - expdif = exponent16(&st1) - exponent16(&tmp); - if ( expdif <= 1 ) - { - if ( expdif == 0 ) - x = significand(&st1) - significand(&tmp); - else /* expdif is 1 */ - x = (significand(&st1) << 1) - significand(&tmp); - if ( (x < significand(&tmp)) || - /* or equi-distant (from 0 & st(1)) and q is odd */ - ((x == significand(&tmp)) && (q & 1) ) ) - { - st0_sign = ! st0_sign; - significand(&tmp) = x; - q++; - } - } - } - - if (q & 4) cc |= SW_C0; - if (q & 2) cc |= SW_C3; - if (q & 1) cc |= SW_C1; - } - else - { - control_word = old_cw; - setcc(0); - return; - } - } - else - { - /* There is a large exponent difference ( >= 64 ) */ - /* To make much sense, the code in this section should - be done at high precision. */ - int exp_1, N; - u_char sign; - - /* prevent overflow here */ - /* N is 'a number between 32 and 63' (p26-113) */ - reg_copy(&st0, &tmp); - tmptag = st0_tag; - N = (expdif & 0x0000001f) + 32; /* This choice gives results - identical to an AMD 486 */ - setexponent16(&tmp, N); - exp_1 = exponent16(&st1); - setexponent16(&st1, 0); - expdif -= N; - - sign = getsign(&tmp) ^ st1_sign; - tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f, - sign); - setsign(&tmp, sign); - - FPU_round_to_int(&tmp, tag); /* Fortunately, this can't - overflow to 2^64 */ - - rem_kernel(significand(&st0), - &significand(&tmp), - significand(&st1), - significand(&tmp), - exponent(&tmp) - ); - setexponent16(&tmp, exp_1 + expdif); - - /* It is possible for the operation to be complete here. - What does the IEEE standard say? The Intel 80486 manual - implies that the operation will never be completed at this - point, and the behaviour of a real 80486 confirms this. - */ - if ( !(tmp.sigh | tmp.sigl) ) - { - /* The result is zero */ - control_word = old_cw; - partial_status = saved_status; - FPU_copy_to_reg0(&CONST_Z, TAG_Zero); - setsign(&st0, st0_sign); -#ifdef PECULIAR_486 - setcc(SW_C2); -#else - setcc(0); -#endif /* PECULIAR_486 */ - return; - } - cc = SW_C2; - } - - control_word = old_cw; - partial_status = saved_status; - tag = FPU_normalize_nuo(&tmp); - reg_copy(&tmp, st0_ptr); - - /* The only condition to be looked for is underflow, - and it can occur here only if underflow is unmasked. */ - if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero) - && !(control_word & CW_Underflow) ) - { - setcc(cc); - tag = arith_underflow(st0_ptr); - setsign(st0_ptr, st0_sign); - FPU_settag0(tag); - return; - } - else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) ) - { - stdexp(st0_ptr); - setsign(st0_ptr, st0_sign); - } - else - { - tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign); - } - FPU_settag0(tag); - setcc(cc); - - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - if ( st1_tag == TAG_Special ) - st1_tag = FPU_Special(st1_ptr); - - if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) - || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) - || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) - { - if ( denormal_operand() < 0 ) - return; - goto fprem_valid; - } - else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) - { - FPU_stack_underflow(); - return; - } - else if ( st0_tag == TAG_Zero ) - { - if ( st1_tag == TAG_Valid ) - { - setcc(0); return; - } - else if ( st1_tag == TW_Denormal ) - { - if ( denormal_operand() < 0 ) - return; - setcc(0); return; - } - else if ( st1_tag == TAG_Zero ) - { arith_invalid(0); return; } /* fprem(?,0) always invalid */ - else if ( st1_tag == TW_Infinity ) - { setcc(0); return; } - } - else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) - { - if ( st1_tag == TAG_Zero ) - { - arith_invalid(0); /* fprem(Valid,Zero) is invalid */ - return; - } - else if ( st1_tag != TW_NaN ) - { - if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal)) - && (denormal_operand() < 0) ) - return; - - if ( st1_tag == TW_Infinity ) - { - /* fprem(Valid,Infinity) is o.k. */ - setcc(0); return; - } - } - } - else if ( st0_tag == TW_Infinity ) - { - if ( st1_tag != TW_NaN ) - { - arith_invalid(0); /* fprem(Infinity,?) is invalid */ - return; - } - } - - /* One of the registers must contain a NaN if we got here. */ - -#ifdef PARANOID - if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) ) - EXCEPTION(EX_INTERNAL | 0x118); -#endif /* PARANOID */ - - real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr); - -} - - -/* ST(1) <- ST(1) * log ST; pop ST */ -static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st1_ptr = &st(1), exponent; - u_char st1_tag = FPU_gettagi(1); - u_char sign; - int e, tag; - - clear_C1(); - - if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) ) - { - both_valid: - /* Both regs are Valid or Denormal */ - if ( signpositive(st0_ptr) ) - { - if ( st0_tag == TW_Denormal ) - FPU_to_exp16(st0_ptr, st0_ptr); - else - /* Convert st(0) for internal use. */ - setexponent16(st0_ptr, exponent(st0_ptr)); - - if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) ) - { - /* Special case. The result can be precise. */ - u_char esign; - e = exponent16(st0_ptr); - if ( e >= 0 ) - { - exponent.sigh = e; - esign = SIGN_POS; - } - else - { - exponent.sigh = -e; - esign = SIGN_NEG; - } - exponent.sigl = 0; - setexponent16(&exponent, 31); - tag = FPU_normalize_nuo(&exponent); - stdexp(&exponent); - setsign(&exponent, esign); - tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION); - if ( tag >= 0 ) - FPU_settagi(1, tag); - } - else - { - /* The usual case */ - sign = getsign(st1_ptr); - if ( st1_tag == TW_Denormal ) - FPU_to_exp16(st1_ptr, st1_ptr); - else - /* Convert st(1) for internal use. */ - setexponent16(st1_ptr, exponent(st1_ptr)); - poly_l2(st0_ptr, st1_ptr, sign); - } - } - else - { - /* negative */ - if ( arith_invalid(1) < 0 ) - return; - } - - FPU_pop(); - - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - if ( st1_tag == TAG_Special ) - st1_tag = FPU_Special(st1_ptr); - - if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) - { - FPU_stack_underflow_pop(1); - return; - } - else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) ) - { - if ( st0_tag == TAG_Zero ) - { - if ( st1_tag == TAG_Zero ) - { - /* Both args zero is invalid */ - if ( arith_invalid(1) < 0 ) - return; - } - else - { - u_char sign; - sign = getsign(st1_ptr)^SIGN_NEG; - if ( FPU_divide_by_zero(1, sign) < 0 ) - return; - - setsign(st1_ptr, sign); - } - } - else if ( st1_tag == TAG_Zero ) - { - /* st(1) contains zero, st(0) valid <> 0 */ - /* Zero is the valid answer */ - sign = getsign(st1_ptr); - - if ( signnegative(st0_ptr) ) - { - /* log(negative) */ - if ( arith_invalid(1) < 0 ) - return; - } - else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - else - { - if ( exponent(st0_ptr) < 0 ) - sign ^= SIGN_NEG; - - FPU_copy_to_reg1(&CONST_Z, TAG_Zero); - setsign(st1_ptr, sign); - } - } - else - { - /* One or both operands are denormals. */ - if ( denormal_operand() < 0 ) - return; - goto both_valid; - } - } - else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) - { - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) - return; - } - /* One or both arg must be an infinity */ - else if ( st0_tag == TW_Infinity ) - { - if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) ) - { - /* log(-infinity) or 0*log(infinity) */ - if ( arith_invalid(1) < 0 ) - return; - } - else - { - u_char sign = getsign(st1_ptr); - - if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - FPU_copy_to_reg1(&CONST_INF, TAG_Special); - setsign(st1_ptr, sign); - } - } - /* st(1) must be infinity here */ - else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) - && ( signpositive(st0_ptr) ) ) - { - if ( exponent(st0_ptr) >= 0 ) - { - if ( (exponent(st0_ptr) == 0) && - (st0_ptr->sigh == 0x80000000) && - (st0_ptr->sigl == 0) ) - { - /* st(0) holds 1.0 */ - /* infinity*log(1) */ - if ( arith_invalid(1) < 0 ) - return; - } - /* else st(0) is positive and > 1.0 */ - } - else - { - /* st(0) is positive and < 1.0 */ - - if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - changesign(st1_ptr); - } - } - else - { - /* st(0) must be zero or negative */ - if ( st0_tag == TAG_Zero ) - { - /* This should be invalid, but a real 80486 is happy with it. */ - -#ifndef PECULIAR_486 - sign = getsign(st1_ptr); - if ( FPU_divide_by_zero(1, sign) < 0 ) - return; -#endif /* PECULIAR_486 */ - - changesign(st1_ptr); - } - else if ( arith_invalid(1) < 0 ) /* log(negative) */ - return; - } - - FPU_pop(); -} - - -static void fpatan(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st1_ptr = &st(1); - u_char st1_tag = FPU_gettagi(1); - int tag; - - clear_C1(); - if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) - { - valid_atan: - - poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag); - - FPU_pop(); - - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - if ( st1_tag == TAG_Special ) - st1_tag = FPU_Special(st1_ptr); - - if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) - || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) - || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) - { - if ( denormal_operand() < 0 ) - return; - - goto valid_atan; - } - else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) - { - FPU_stack_underflow_pop(1); - return; - } - else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) - { - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 ) - FPU_pop(); - return; - } - else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) ) - { - u_char sign = getsign(st1_ptr); - if ( st0_tag == TW_Infinity ) - { - if ( st1_tag == TW_Infinity ) - { - if ( signpositive(st0_ptr) ) - { - FPU_copy_to_reg1(&CONST_PI4, TAG_Valid); - } - else - { - setpositive(st1_ptr); - tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr, - FULL_PRECISION, SIGN_POS, - exponent(&CONST_PI4), exponent(&CONST_PI2)); - if ( tag >= 0 ) - FPU_settagi(1, tag); - } - } - else - { - if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - if ( signpositive(st0_ptr) ) - { - FPU_copy_to_reg1(&CONST_Z, TAG_Zero); - setsign(st1_ptr, sign); /* An 80486 preserves the sign */ - FPU_pop(); - return; - } - else - { - FPU_copy_to_reg1(&CONST_PI, TAG_Valid); - } - } - } - else - { - /* st(1) is infinity, st(0) not infinity */ - if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); - } - setsign(st1_ptr, sign); - } - else if ( st1_tag == TAG_Zero ) - { - /* st(0) must be valid or zero */ - u_char sign = getsign(st1_ptr); - - if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - if ( signpositive(st0_ptr) ) - { - /* An 80486 preserves the sign */ - FPU_pop(); - return; - } - - FPU_copy_to_reg1(&CONST_PI, TAG_Valid); - setsign(st1_ptr, sign); - } - else if ( st0_tag == TAG_Zero ) - { - /* st(1) must be TAG_Valid here */ - u_char sign = getsign(st1_ptr); - - if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); - setsign(st1_ptr, sign); - } -#ifdef PARANOID - else - EXCEPTION(EX_INTERNAL | 0x125); -#endif /* PARANOID */ - - FPU_pop(); - set_precision_flag_up(); /* We do not really know if up or down */ -} - - -static void fprem(FPU_REG *st0_ptr, u_char st0_tag) -{ - do_fprem(st0_ptr, st0_tag, RC_CHOP); -} - - -static void fprem1(FPU_REG *st0_ptr, u_char st0_tag) -{ - do_fprem(st0_ptr, st0_tag, RC_RND); -} - - -static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag) -{ - u_char sign, sign1; - FPU_REG *st1_ptr = &st(1), a, b; - u_char st1_tag = FPU_gettagi(1); - - clear_C1(); - if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) - { - valid_yl2xp1: - - sign = getsign(st0_ptr); - sign1 = getsign(st1_ptr); - - FPU_to_exp16(st0_ptr, &a); - FPU_to_exp16(st1_ptr, &b); - - if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) ) - return; - - FPU_pop(); - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - if ( st1_tag == TAG_Special ) - st1_tag = FPU_Special(st1_ptr); - - if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) - || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) - || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) - { - if ( denormal_operand() < 0 ) - return; - - goto valid_yl2xp1; - } - else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) ) - { - FPU_stack_underflow_pop(1); - return; - } - else if ( st0_tag == TAG_Zero ) - { - switch ( st1_tag ) - { - case TW_Denormal: - if ( denormal_operand() < 0 ) - return; - - case TAG_Zero: - case TAG_Valid: - setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr)); - FPU_copy_to_reg1(st0_ptr, st0_tag); - break; - - case TW_Infinity: - /* Infinity*log(1) */ - if ( arith_invalid(1) < 0 ) - return; - break; - - case TW_NaN: - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) - return; - break; - - default: -#ifdef PARANOID - EXCEPTION(EX_INTERNAL | 0x116); - return; -#endif /* PARANOID */ - break; - } - } - else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) - { - switch ( st1_tag ) - { - case TAG_Zero: - if ( signnegative(st0_ptr) ) - { - if ( exponent(st0_ptr) >= 0 ) - { - /* st(0) holds <= -1.0 */ -#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ - changesign(st1_ptr); -#else - if ( arith_invalid(1) < 0 ) - return; -#endif /* PECULIAR_486 */ - } - else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - else - changesign(st1_ptr); - } - else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - break; - - case TW_Infinity: - if ( signnegative(st0_ptr) ) - { - if ( (exponent(st0_ptr) >= 0) && - !((st0_ptr->sigh == 0x80000000) && - (st0_ptr->sigl == 0)) ) - { - /* st(0) holds < -1.0 */ -#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ - changesign(st1_ptr); -#else - if ( arith_invalid(1) < 0 ) return; -#endif /* PECULIAR_486 */ - } - else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - else - changesign(st1_ptr); - } - else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - break; - - case TW_NaN: - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) - return; - } - - } - else if ( st0_tag == TW_NaN ) - { - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) - return; - } - else if ( st0_tag == TW_Infinity ) - { - if ( st1_tag == TW_NaN ) - { - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) - return; - } - else if ( signnegative(st0_ptr) ) - { -#ifndef PECULIAR_486 - /* This should have higher priority than denormals, but... */ - if ( arith_invalid(1) < 0 ) /* log(-infinity) */ - return; -#endif /* PECULIAR_486 */ - if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; -#ifdef PECULIAR_486 - /* Denormal operands actually get higher priority */ - if ( arith_invalid(1) < 0 ) /* log(-infinity) */ - return; -#endif /* PECULIAR_486 */ - } - else if ( st1_tag == TAG_Zero ) - { - /* log(infinity) */ - if ( arith_invalid(1) < 0 ) - return; - } - - /* st(1) must be valid here. */ - - else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - /* The Manual says that log(Infinity) is invalid, but a real - 80486 sensibly says that it is o.k. */ - else - { - u_char sign = getsign(st1_ptr); - FPU_copy_to_reg1(&CONST_INF, TAG_Special); - setsign(st1_ptr, sign); - } - } -#ifdef PARANOID - else - { - EXCEPTION(EX_INTERNAL | 0x117); - return; - } -#endif /* PARANOID */ - - FPU_pop(); - return; - -} - - -static void fscale(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st1_ptr = &st(1); - u_char st1_tag = FPU_gettagi(1); - int old_cw = control_word; - u_char sign = getsign(st0_ptr); - - clear_C1(); - if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) - { - long scale; - FPU_REG tmp; - - /* Convert register for internal use. */ - setexponent16(st0_ptr, exponent(st0_ptr)); - - valid_scale: - - if ( exponent(st1_ptr) > 30 ) - { - /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */ - - if ( signpositive(st1_ptr) ) - { - EXCEPTION(EX_Overflow); - FPU_copy_to_reg0(&CONST_INF, TAG_Special); - } - else - { - EXCEPTION(EX_Underflow); - FPU_copy_to_reg0(&CONST_Z, TAG_Zero); - } - setsign(st0_ptr, sign); - return; - } - - control_word &= ~CW_RC; - control_word |= RC_CHOP; - reg_copy(st1_ptr, &tmp); - FPU_round_to_int(&tmp, st1_tag); /* This can never overflow here */ - control_word = old_cw; - scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl; - scale += exponent16(st0_ptr); - - setexponent16(st0_ptr, scale); - - /* Use FPU_round() to properly detect under/overflow etc */ - FPU_round(st0_ptr, 0, 0, control_word, sign); - - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - if ( st1_tag == TAG_Special ) - st1_tag = FPU_Special(st1_ptr); - - if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) - { - switch ( st1_tag ) - { - case TAG_Valid: - /* st(0) must be a denormal */ - if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - FPU_to_exp16(st0_ptr, st0_ptr); /* Will not be left on stack */ - goto valid_scale; - - case TAG_Zero: - if ( st0_tag == TW_Denormal ) - denormal_operand(); - return; - - case TW_Denormal: - denormal_operand(); - return; - - case TW_Infinity: - if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - if ( signpositive(st1_ptr) ) - FPU_copy_to_reg0(&CONST_INF, TAG_Special); - else - FPU_copy_to_reg0(&CONST_Z, TAG_Zero); - setsign(st0_ptr, sign); - return; - - case TW_NaN: - real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); - return; - } - } - else if ( st0_tag == TAG_Zero ) - { - switch ( st1_tag ) - { - case TAG_Valid: - case TAG_Zero: - return; - - case TW_Denormal: - denormal_operand(); - return; - - case TW_Infinity: - if ( signpositive(st1_ptr) ) - arith_invalid(0); /* Zero scaled by +Infinity */ - return; - - case TW_NaN: - real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); - return; - } - } - else if ( st0_tag == TW_Infinity ) - { - switch ( st1_tag ) - { - case TAG_Valid: - case TAG_Zero: - return; - - case TW_Denormal: - denormal_operand(); - return; - - case TW_Infinity: - if ( signnegative(st1_ptr) ) - arith_invalid(0); /* Infinity scaled by -Infinity */ - return; - - case TW_NaN: - real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); - return; - } - } - else if ( st0_tag == TW_NaN ) - { - if ( st1_tag != TAG_Empty ) - { real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; } - } - -#ifdef PARANOID - if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) ) - { - EXCEPTION(EX_INTERNAL | 0x115); - return; - } -#endif - - /* At least one of st(0), st(1) must be empty */ - FPU_stack_underflow(); - -} - - -/*---------------------------------------------------------------------------*/ - -static FUNC_ST0 const trig_table_a[] = { - f2xm1, fyl2x, fptan, fpatan, - fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp -}; - -void FPU_triga(void) -{ - (trig_table_a[FPU_rm])(&st(0), FPU_gettag0()); -} - - -static FUNC_ST0 const trig_table_b[] = - { - fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos - }; - -void FPU_trigb(void) -{ - (trig_table_b[FPU_rm])(&st(0), FPU_gettag0()); -} diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c deleted file mode 100644 index 2e2c51a8bd3..00000000000 --- a/arch/i386/math-emu/get_address.c +++ /dev/null @@ -1,438 +0,0 @@ -/*---------------------------------------------------------------------------+ - | get_address.c | - | | - | Get the effective address from an FPU instruction. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Note: | - | The file contains code which accesses user memory. | - | Emulator static data may change when user memory is accessed, due to | - | other processes using the emulator while swapping is in progress. | - +---------------------------------------------------------------------------*/ - - -#include - -#include -#include - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" - - -#define FPU_WRITE_BIT 0x10 - -static int reg_offset[] = { - offsetof(struct info,___eax), - offsetof(struct info,___ecx), - offsetof(struct info,___edx), - offsetof(struct info,___ebx), - offsetof(struct info,___esp), - offsetof(struct info,___ebp), - offsetof(struct info,___esi), - offsetof(struct info,___edi) -}; - -#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) - -static int reg_offset_vm86[] = { - offsetof(struct info,___cs), - offsetof(struct info,___vm86_ds), - offsetof(struct info,___vm86_es), - offsetof(struct info,___vm86_fs), - offsetof(struct info,___vm86_gs), - offsetof(struct info,___ss), - offsetof(struct info,___vm86_ds) - }; - -#define VM86_REG_(x) (*(unsigned short *) \ - (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) - -/* This dummy, gs is not saved on the stack. */ -#define ___GS ___ds - -static int reg_offset_pm[] = { - offsetof(struct info,___cs), - offsetof(struct info,___ds), - offsetof(struct info,___es), - offsetof(struct info,___fs), - offsetof(struct info,___GS), - offsetof(struct info,___ss), - offsetof(struct info,___ds) - }; - -#define PM_REG_(x) (*(unsigned short *) \ - (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info)) - - -/* Decode the SIB byte. This function assumes mod != 0 */ -static int sib(int mod, unsigned long *fpu_eip) -{ - u_char ss,index,base; - long offset; - - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */ - RE_ENTRANT_CHECK_ON; - (*fpu_eip)++; - ss = base >> 6; - index = (base >> 3) & 7; - base &= 7; - - if ((mod == 0) && (base == 5)) - offset = 0; /* No base register */ - else - offset = REG_(base); - - if (index == 4) - { - /* No index register */ - /* A non-zero ss is illegal */ - if ( ss ) - EXCEPTION(EX_Invalid); - } - else - { - offset += (REG_(index)) << ss; - } - - if (mod == 1) - { - /* 8 bit signed displacement */ - long displacement; - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(displacement, (signed char __user *) (*fpu_eip)); - offset += displacement; - RE_ENTRANT_CHECK_ON; - (*fpu_eip)++; - } - else if (mod == 2 || base == 5) /* The second condition also has mod==0 */ - { - /* 32 bit displacement */ - long displacement; - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(4); - FPU_get_user(displacement, (long __user *) (*fpu_eip)); - offset += displacement; - RE_ENTRANT_CHECK_ON; - (*fpu_eip) += 4; - } - - return offset; -} - - -static unsigned long vm86_segment(u_char segment, - struct address *addr) -{ - segment--; -#ifdef PARANOID - if ( segment > PREFIX_SS_ ) - { - EXCEPTION(EX_INTERNAL|0x130); - math_abort(FPU_info,SIGSEGV); - } -#endif /* PARANOID */ - addr->selector = VM86_REG_(segment); - return (unsigned long)VM86_REG_(segment) << 4; -} - - -/* This should work for 16 and 32 bit protected mode. */ -static long pm_address(u_char FPU_modrm, u_char segment, - struct address *addr, long offset) -{ - struct desc_struct descriptor; - unsigned long base_address, limit, address, seg_top; - - segment--; - -#ifdef PARANOID - /* segment is unsigned, so this also detects if segment was 0: */ - if ( segment > PREFIX_SS_ ) - { - EXCEPTION(EX_INTERNAL|0x132); - math_abort(FPU_info,SIGSEGV); - } -#endif /* PARANOID */ - - switch ( segment ) - { - /* gs isn't used by the kernel, so it still has its - user-space value. */ - case PREFIX_GS_-1: - /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ - savesegment(gs, addr->selector); - break; - default: - addr->selector = PM_REG_(segment); - } - - descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); - base_address = SEG_BASE_ADDR(descriptor); - address = base_address + offset; - limit = base_address - + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1; - if ( limit < base_address ) limit = 0xffffffff; - - if ( SEG_EXPAND_DOWN(descriptor) ) - { - if ( SEG_G_BIT(descriptor) ) - seg_top = 0xffffffff; - else - { - seg_top = base_address + (1 << 20); - if ( seg_top < base_address ) seg_top = 0xffffffff; - } - access_limit = - (address <= limit) || (address >= seg_top) ? 0 : - ((seg_top-address) >= 255 ? 255 : seg_top-address); - } - else - { - access_limit = - (address > limit) || (address < base_address) ? 0 : - ((limit-address) >= 254 ? 255 : limit-address+1); - } - if ( SEG_EXECUTE_ONLY(descriptor) || - (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) ) - { - access_limit = 0; - } - return address; -} - - -/* - MOD R/M byte: MOD == 3 has a special use for the FPU - SIB byte used iff R/M = 100b - - 7 6 5 4 3 2 1 0 - ..... ......... ......... - MOD OPCODE(2) R/M - - - SIB byte - - 7 6 5 4 3 2 1 0 - ..... ......... ......... - SS INDEX BASE - -*/ - -void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, - struct address *addr, - fpu_addr_modes addr_modes) -{ - u_char mod; - unsigned rm = FPU_modrm & 7; - long *cpu_reg_ptr; - int address = 0; /* Initialized just to stop compiler warnings. */ - - /* Memory accessed via the cs selector is write protected - in `non-segmented' 32 bit protected mode. */ - if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) - && (addr_modes.override.segment == PREFIX_CS_) ) - { - math_abort(FPU_info,SIGSEGV); - } - - addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ - - mod = (FPU_modrm >> 6) & 3; - - if (rm == 4 && mod != 3) - { - address = sib(mod, fpu_eip); - } - else - { - cpu_reg_ptr = & REG_(rm); - switch (mod) - { - case 0: - if (rm == 5) - { - /* Special case: disp32 */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(4); - FPU_get_user(address, (unsigned long __user *) (*fpu_eip)); - (*fpu_eip) += 4; - RE_ENTRANT_CHECK_ON; - addr->offset = address; - return (void __user *) address; - } - else - { - address = *cpu_reg_ptr; /* Just return the contents - of the cpu register */ - addr->offset = address; - return (void __user *) address; - } - case 1: - /* 8 bit signed displacement */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(address, (signed char __user *) (*fpu_eip)); - RE_ENTRANT_CHECK_ON; - (*fpu_eip)++; - break; - case 2: - /* 32 bit displacement */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(4); - FPU_get_user(address, (long __user *) (*fpu_eip)); - (*fpu_eip) += 4; - RE_ENTRANT_CHECK_ON; - break; - case 3: - /* Not legal for the FPU */ - EXCEPTION(EX_Invalid); - } - address += *cpu_reg_ptr; - } - - addr->offset = address; - - switch ( addr_modes.default_mode ) - { - case 0: - break; - case VM86: - address += vm86_segment(addr_modes.override.segment, addr); - break; - case PM16: - case SEG32: - address = pm_address(FPU_modrm, addr_modes.override.segment, - addr, address); - break; - default: - EXCEPTION(EX_INTERNAL|0x133); - } - - return (void __user *)address; -} - - -void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, - struct address *addr, - fpu_addr_modes addr_modes) -{ - u_char mod; - unsigned rm = FPU_modrm & 7; - int address = 0; /* Default used for mod == 0 */ - - /* Memory accessed via the cs selector is write protected - in `non-segmented' 32 bit protected mode. */ - if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) - && (addr_modes.override.segment == PREFIX_CS_) ) - { - math_abort(FPU_info,SIGSEGV); - } - - addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ - - mod = (FPU_modrm >> 6) & 3; - - switch (mod) - { - case 0: - if (rm == 6) - { - /* Special case: disp16 */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(2); - FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); - (*fpu_eip) += 2; - RE_ENTRANT_CHECK_ON; - goto add_segment; - } - break; - case 1: - /* 8 bit signed displacement */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(address, (signed char __user *) (*fpu_eip)); - RE_ENTRANT_CHECK_ON; - (*fpu_eip)++; - break; - case 2: - /* 16 bit displacement */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(2); - FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); - (*fpu_eip) += 2; - RE_ENTRANT_CHECK_ON; - break; - case 3: - /* Not legal for the FPU */ - EXCEPTION(EX_Invalid); - break; - } - switch ( rm ) - { - case 0: - address += FPU_info->___ebx + FPU_info->___esi; - break; - case 1: - address += FPU_info->___ebx + FPU_info->___edi; - break; - case 2: - address += FPU_info->___ebp + FPU_info->___esi; - if ( addr_modes.override.segment == PREFIX_DEFAULT ) - addr_modes.override.segment = PREFIX_SS_; - break; - case 3: - address += FPU_info->___ebp + FPU_info->___edi; - if ( addr_modes.override.segment == PREFIX_DEFAULT ) - addr_modes.override.segment = PREFIX_SS_; - break; - case 4: - address += FPU_info->___esi; - break; - case 5: - address += FPU_info->___edi; - break; - case 6: - address += FPU_info->___ebp; - if ( addr_modes.override.segment == PREFIX_DEFAULT ) - addr_modes.override.segment = PREFIX_SS_; - break; - case 7: - address += FPU_info->___ebx; - break; - } - - add_segment: - address &= 0xffff; - - addr->offset = address; - - switch ( addr_modes.default_mode ) - { - case 0: - break; - case VM86: - address += vm86_segment(addr_modes.override.segment, addr); - break; - case PM16: - case SEG32: - address = pm_address(FPU_modrm, addr_modes.override.segment, - addr, address); - break; - default: - EXCEPTION(EX_INTERNAL|0x131); - } - - return (void __user *)address ; -} diff --git a/arch/i386/math-emu/load_store.c b/arch/i386/math-emu/load_store.c deleted file mode 100644 index eebd6fb1c8a..00000000000 --- a/arch/i386/math-emu/load_store.c +++ /dev/null @@ -1,272 +0,0 @@ -/*---------------------------------------------------------------------------+ - | load_store.c | - | | - | This file contains most of the code to interpret the FPU instructions | - | which load and store from user memory. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Note: | - | The file contains code which accesses user memory. | - | Emulator static data may change when user memory is accessed, due to | - | other processes using the emulator while swapping is in progress. | - +---------------------------------------------------------------------------*/ - -#include - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" -#include "status_w.h" -#include "control_w.h" - - -#define _NONE_ 0 /* st0_ptr etc not needed */ -#define _REG0_ 1 /* Will be storing st(0) */ -#define _PUSH_ 3 /* Need to check for space to push onto stack */ -#define _null_ 4 /* Function illegal or not implemented */ - -#define pop_0() { FPU_settag0(TAG_Empty); top++; } - - -static u_char const type_table[32] = { - _PUSH_, _PUSH_, _PUSH_, _PUSH_, - _null_, _null_, _null_, _null_, - _REG0_, _REG0_, _REG0_, _REG0_, - _REG0_, _REG0_, _REG0_, _REG0_, - _NONE_, _null_, _NONE_, _PUSH_, - _NONE_, _PUSH_, _null_, _PUSH_, - _NONE_, _null_, _NONE_, _REG0_, - _NONE_, _REG0_, _NONE_, _REG0_ - }; - -u_char const data_sizes_16[32] = { - 4, 4, 8, 2, 0, 0, 0, 0, - 4, 4, 8, 2, 4, 4, 8, 2, - 14, 0, 94, 10, 2, 10, 0, 8, - 14, 0, 94, 10, 2, 10, 2, 8 -}; - -static u_char const data_sizes_32[32] = { - 4, 4, 8, 2, 0, 0, 0, 0, - 4, 4, 8, 2, 4, 4, 8, 2, - 28, 0,108, 10, 2, 10, 0, 8, - 28, 0,108, 10, 2, 10, 2, 8 -}; - -int FPU_load_store(u_char type, fpu_addr_modes addr_modes, - void __user *data_address) -{ - FPU_REG loaded_data; - FPU_REG *st0_ptr; - u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */ - u_char loaded_tag; - - st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ - - if ( addr_modes.default_mode & PROTECTED ) - { - if ( addr_modes.default_mode == SEG32 ) - { - if ( access_limit < data_sizes_32[type] ) - math_abort(FPU_info,SIGSEGV); - } - else if ( addr_modes.default_mode == PM16 ) - { - if ( access_limit < data_sizes_16[type] ) - math_abort(FPU_info,SIGSEGV); - } -#ifdef PARANOID - else - EXCEPTION(EX_INTERNAL|0x140); -#endif /* PARANOID */ - } - - switch ( type_table[type] ) - { - case _NONE_: - break; - case _REG0_: - st0_ptr = &st(0); /* Some of these instructions pop after - storing */ - st0_tag = FPU_gettag0(); - break; - case _PUSH_: - { - if ( FPU_gettagi(-1) != TAG_Empty ) - { FPU_stack_overflow(); return 0; } - top--; - st0_ptr = &st(0); - } - break; - case _null_: - FPU_illegal(); - return 0; -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x141); - return 0; -#endif /* PARANOID */ - } - - switch ( type ) - { - case 000: /* fld m32real */ - clear_C1(); - loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data); - if ( (loaded_tag == TAG_Special) - && isNaN(&loaded_data) - && (real_1op_NaN(&loaded_data) < 0) ) - { - top++; - break; - } - FPU_copy_to_reg0(&loaded_data, loaded_tag); - break; - case 001: /* fild m32int */ - clear_C1(); - loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); - FPU_copy_to_reg0(&loaded_data, loaded_tag); - break; - case 002: /* fld m64real */ - clear_C1(); - loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data); - if ( (loaded_tag == TAG_Special) - && isNaN(&loaded_data) - && (real_1op_NaN(&loaded_data) < 0) ) - { - top++; - break; - } - FPU_copy_to_reg0(&loaded_data, loaded_tag); - break; - case 003: /* fild m16int */ - clear_C1(); - loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); - FPU_copy_to_reg0(&loaded_data, loaded_tag); - break; - case 010: /* fst m32real */ - clear_C1(); - FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address); - break; - case 011: /* fist m32int */ - clear_C1(); - FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address); - break; - case 012: /* fst m64real */ - clear_C1(); - FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address); - break; - case 013: /* fist m16int */ - clear_C1(); - FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address); - break; - case 014: /* fstp m32real */ - clear_C1(); - if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 015: /* fistp m32int */ - clear_C1(); - if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 016: /* fstp m64real */ - clear_C1(); - if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 017: /* fistp m16int */ - clear_C1(); - if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 020: /* fldenv m14/28byte */ - fldenv(addr_modes, (u_char __user *)data_address); - /* Ensure that the values just loaded are not changed by - fix-up operations. */ - return 1; - case 022: /* frstor m94/108byte */ - frstor(addr_modes, (u_char __user *)data_address); - /* Ensure that the values just loaded are not changed by - fix-up operations. */ - return 1; - case 023: /* fbld m80dec */ - clear_C1(); - loaded_tag = FPU_load_bcd((u_char __user *)data_address); - FPU_settag0(loaded_tag); - break; - case 024: /* fldcw */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, data_address, 2); - FPU_get_user(control_word, (unsigned short __user *) data_address); - RE_ENTRANT_CHECK_ON; - if ( partial_status & ~control_word & CW_Exceptions ) - partial_status |= (SW_Summary | SW_Backward); - else - partial_status &= ~(SW_Summary | SW_Backward); -#ifdef PECULIAR_486 - control_word |= 0x40; /* An 80486 appears to always set this bit */ -#endif /* PECULIAR_486 */ - return 1; - case 025: /* fld m80real */ - clear_C1(); - loaded_tag = FPU_load_extended((long double __user *)data_address, 0); - FPU_settag0(loaded_tag); - break; - case 027: /* fild m64int */ - clear_C1(); - loaded_tag = FPU_load_int64((long long __user *)data_address); - if (loaded_tag == TAG_Error) - return 0; - FPU_settag0(loaded_tag); - break; - case 030: /* fstenv m14/28byte */ - fstenv(addr_modes, (u_char __user *)data_address); - return 1; - case 032: /* fsave */ - fsave(addr_modes, (u_char __user *)data_address); - return 1; - case 033: /* fbstp m80dec */ - clear_C1(); - if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 034: /* fstcw m16int */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,data_address,2); - FPU_put_user(control_word, (unsigned short __user *) data_address); - RE_ENTRANT_CHECK_ON; - return 1; - case 035: /* fstp m80real */ - clear_C1(); - if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 036: /* fstsw m2byte */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,data_address,2); - FPU_put_user(status_word(),(unsigned short __user *) data_address); - RE_ENTRANT_CHECK_ON; - return 1; - case 037: /* fistp m64int */ - clear_C1(); - if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - } - return 0; -} diff --git a/arch/i386/math-emu/mul_Xsig.S b/arch/i386/math-emu/mul_Xsig.S deleted file mode 100644 index 717785a53eb..00000000000 --- a/arch/i386/math-emu/mul_Xsig.S +++ /dev/null @@ -1,176 +0,0 @@ -/*---------------------------------------------------------------------------+ - | mul_Xsig.S | - | | - | Multiply a 12 byte fixed point number by another fixed point number. | - | | - | Copyright (C) 1992,1994,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | Call from C as: | - | void mul32_Xsig(Xsig *x, unsigned b) | - | | - | void mul64_Xsig(Xsig *x, unsigned long long *b) | - | | - | void mul_Xsig_Xsig(Xsig *x, unsigned *b) | - | | - | The result is neither rounded nor normalized, and the ls bit or so may | - | be wrong. | - | | - +---------------------------------------------------------------------------*/ - .file "mul_Xsig.S" - - -#include "fpu_emu.h" - -.text -ENTRY(mul32_Xsig) - pushl %ebp - movl %esp,%ebp - subl $16,%esp - pushl %esi - - movl PARAM1,%esi - movl PARAM2,%ecx - - xor %eax,%eax - movl %eax,-4(%ebp) - movl %eax,-8(%ebp) - - movl (%esi),%eax /* lsl of Xsig */ - mull %ecx /* msl of b */ - movl %edx,-12(%ebp) - - movl 4(%esi),%eax /* midl of Xsig */ - mull %ecx /* msl of b */ - addl %eax,-12(%ebp) - adcl %edx,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull %ecx /* msl of b */ - addl %eax,-8(%ebp) - adcl %edx,-4(%ebp) - - movl -12(%ebp),%eax - movl %eax,(%esi) - movl -8(%ebp),%eax - movl %eax,4(%esi) - movl -4(%ebp),%eax - movl %eax,8(%esi) - - popl %esi - leave - ret - - -ENTRY(mul64_Xsig) - pushl %ebp - movl %esp,%ebp - subl $16,%esp - pushl %esi - - movl PARAM1,%esi - movl PARAM2,%ecx - - xor %eax,%eax - movl %eax,-4(%ebp) - movl %eax,-8(%ebp) - - movl (%esi),%eax /* lsl of Xsig */ - mull 4(%ecx) /* msl of b */ - movl %edx,-12(%ebp) - - movl 4(%esi),%eax /* midl of Xsig */ - mull (%ecx) /* lsl of b */ - addl %edx,-12(%ebp) - adcl $0,-8(%ebp) - adcl $0,-4(%ebp) - - movl 4(%esi),%eax /* midl of Xsig */ - mull 4(%ecx) /* msl of b */ - addl %eax,-12(%ebp) - adcl %edx,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull (%ecx) /* lsl of b */ - addl %eax,-12(%ebp) - adcl %edx,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull 4(%ecx) /* msl of b */ - addl %eax,-8(%ebp) - adcl %edx,-4(%ebp) - - movl -12(%ebp),%eax - movl %eax,(%esi) - movl -8(%ebp),%eax - movl %eax,4(%esi) - movl -4(%ebp),%eax - movl %eax,8(%esi) - - popl %esi - leave - ret - - - -ENTRY(mul_Xsig_Xsig) - pushl %ebp - movl %esp,%ebp - subl $16,%esp - pushl %esi - - movl PARAM1,%esi - movl PARAM2,%ecx - - xor %eax,%eax - movl %eax,-4(%ebp) - movl %eax,-8(%ebp) - - movl (%esi),%eax /* lsl of Xsig */ - mull 8(%ecx) /* msl of b */ - movl %edx,-12(%ebp) - - movl 4(%esi),%eax /* midl of Xsig */ - mull 4(%ecx) /* midl of b */ - addl %edx,-12(%ebp) - adcl $0,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull (%ecx) /* lsl of b */ - addl %edx,-12(%ebp) - adcl $0,-8(%ebp) - adcl $0,-4(%ebp) - - movl 4(%esi),%eax /* midl of Xsig */ - mull 8(%ecx) /* msl of b */ - addl %eax,-12(%ebp) - adcl %edx,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull 4(%ecx) /* midl of b */ - addl %eax,-12(%ebp) - adcl %edx,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull 8(%ecx) /* msl of b */ - addl %eax,-8(%ebp) - adcl %edx,-4(%ebp) - - movl -12(%ebp),%edx - movl %edx,(%esi) - movl -8(%ebp),%edx - movl %edx,4(%esi) - movl -4(%ebp),%edx - movl %edx,8(%esi) - - popl %esi - leave - ret - diff --git a/arch/i386/math-emu/poly.h b/arch/i386/math-emu/poly.h deleted file mode 100644 index 4db79811492..00000000000 --- a/arch/i386/math-emu/poly.h +++ /dev/null @@ -1,121 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly.h | - | | - | Header file for the FPU-emu poly*.c source files. | - | | - | Copyright (C) 1994,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@melbpc.org.au | - | | - | Declarations and definitions for functions operating on Xsig (12-byte | - | extended-significand) quantities. | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _POLY_H -#define _POLY_H - -/* This 12-byte structure is used to improve the accuracy of computation - of transcendental functions. - Intended to be used to get results better than 8-byte computation - allows. 9-byte would probably be sufficient. - */ -typedef struct { - unsigned long lsw; - unsigned long midw; - unsigned long msw; -} Xsig; - -asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, - unsigned long long *result); -asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x, - const unsigned long long terms[], const int n); - -asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); -asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); -asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); - -asmlinkage void shr_Xsig(Xsig *, const int n); -asmlinkage int round_Xsig(Xsig *); -asmlinkage int norm_Xsig(Xsig *); -asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); - -/* Macro to extract the most significant 32 bits from a long long */ -#define LL_MSW(x) (((unsigned long *)&x)[1]) - -/* Macro to initialize an Xsig struct */ -#define MK_XSIG(a,b,c) { c, b, a } - -/* Macro to access the 8 ms bytes of an Xsig as a long long */ -#define XSIG_LL(x) (*(unsigned long long *)&x.midw) - - -/* - Need to run gcc with optimizations on to get these to - actually be in-line. - */ - -/* Multiply two fixed-point 32 bit numbers, producing a 32 bit result. - The answer is the ms word of the product. */ -/* Some versions of gcc make it difficult to stop eax from being clobbered. - Merely specifying that it is used doesn't work... - */ -static inline unsigned long mul_32_32(const unsigned long arg1, - const unsigned long arg2) -{ - int retval; - asm volatile ("mull %2; movl %%edx,%%eax" \ - :"=a" (retval) \ - :"0" (arg1), "g" (arg2) \ - :"dx"); - return retval; -} - - -/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ -static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) -{ - asm volatile ("movl %1,%%edi; movl %2,%%esi;\n" - "movl (%%esi),%%eax; addl %%eax,(%%edi);\n" - "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n" - "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n" - :"=g" (*dest):"g" (dest), "g" (x2) - :"ax","si","di"); -} - - -/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ -/* Note: the constraints in the asm statement didn't always work properly - with gcc 2.5.8. Changing from using edi to using ecx got around the - problem, but keep fingers crossed! */ -static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) -{ - asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n" - "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n" - "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n" - "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n" - "jnc 0f;\n" - "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n" - "movl %4,%%ecx; incl (%%ecx)\n" - "movl $1,%%eax; jmp 1f;\n" - "0: xorl %%eax,%%eax;\n" - "1:\n" - :"=g" (*exp), "=g" (*dest) - :"g" (dest), "g" (x2), "g" (exp) - :"cx","si","ax"); -} - - -/* Negate (subtract from 1.0) the 12 byte Xsig */ -/* This is faster in a loop on my 386 than using the "neg" instruction. */ -static inline void negate_Xsig(Xsig *x) -{ - asm volatile("movl %1,%%esi;\n" - "xorl %%ecx,%%ecx;\n" - "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n" - "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n" - "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n" - :"=g" (*x):"g" (x):"si","ax","cx"); -} - -#endif /* _POLY_H */ diff --git a/arch/i386/math-emu/poly_2xm1.c b/arch/i386/math-emu/poly_2xm1.c deleted file mode 100644 index 9766ad5e974..00000000000 --- a/arch/i386/math-emu/poly_2xm1.c +++ /dev/null @@ -1,156 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly_2xm1.c | - | | - | Function to compute 2^x-1 by a polynomial approximation. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" -#include "control_w.h" -#include "poly.h" - - -#define HIPOWER 11 -static const unsigned long long lterms[HIPOWER] = -{ - 0x0000000000000000LL, /* This term done separately as 12 bytes */ - 0xf5fdeffc162c7543LL, - 0x1c6b08d704a0bfa6LL, - 0x0276556df749cc21LL, - 0x002bb0ffcf14f6b8LL, - 0x0002861225ef751cLL, - 0x00001ffcbfcd5422LL, - 0x00000162c005d5f1LL, - 0x0000000da96ccb1bLL, - 0x0000000078d1b897LL, - 0x000000000422b029LL -}; - -static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194); - -/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0, - These numbers are 2^(1/4), 2^(1/2), and 2^(3/4) - */ -static const Xsig shiftterm0 = MK_XSIG(0, 0, 0); -static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318); -static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3); -static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9); - -static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1, - &shiftterm2, &shiftterm3 }; - - -/*--- poly_2xm1() -----------------------------------------------------------+ - | Requires st(0) which is TAG_Valid and < 1. | - +---------------------------------------------------------------------------*/ -int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result) -{ - long int exponent, shift; - unsigned long long Xll; - Xsig accumulator, Denom, argSignif; - u_char tag; - - exponent = exponent16(arg); - -#ifdef PARANOID - if ( exponent >= 0 ) /* Don't want a |number| >= 1.0 */ - { - /* Number negative, too large, or not Valid. */ - EXCEPTION(EX_INTERNAL|0x127); - return 1; - } -#endif /* PARANOID */ - - argSignif.lsw = 0; - XSIG_LL(argSignif) = Xll = significand(arg); - - if ( exponent == -1 ) - { - shift = (argSignif.msw & 0x40000000) ? 3 : 2; - /* subtract 0.5 or 0.75 */ - exponent -= 2; - XSIG_LL(argSignif) <<= 2; - Xll <<= 2; - } - else if ( exponent == -2 ) - { - shift = 1; - /* subtract 0.25 */ - exponent--; - XSIG_LL(argSignif) <<= 1; - Xll <<= 1; - } - else - shift = 0; - - if ( exponent < -2 ) - { - /* Shift the argument right by the required places. */ - if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U ) - Xll++; /* round up */ - } - - accumulator.lsw = accumulator.midw = accumulator.msw = 0; - polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1); - mul_Xsig_Xsig(&accumulator, &argSignif); - shr_Xsig(&accumulator, 3); - - mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */ - add_two_Xsig(&accumulator, &argSignif, &exponent); - - if ( shift ) - { - /* The argument is large, use the identity: - f(x+a) = f(a) * (f(x) + 1) - 1; - */ - shr_Xsig(&accumulator, - exponent); - accumulator.msw |= 0x80000000; /* add 1.0 */ - mul_Xsig_Xsig(&accumulator, shiftterm[shift]); - accumulator.msw &= 0x3fffffff; /* subtract 1.0 */ - exponent = 1; - } - - if ( sign != SIGN_POS ) - { - /* The argument is negative, use the identity: - f(-x) = -f(x) / (1 + f(x)) - */ - Denom.lsw = accumulator.lsw; - XSIG_LL(Denom) = XSIG_LL(accumulator); - if ( exponent < 0 ) - shr_Xsig(&Denom, - exponent); - else if ( exponent > 0 ) - { - /* exponent must be 1 here */ - XSIG_LL(Denom) <<= 1; - if ( Denom.lsw & 0x80000000 ) - XSIG_LL(Denom) |= 1; - (Denom.lsw) <<= 1; - } - Denom.msw |= 0x80000000; /* add 1.0 */ - div_Xsig(&accumulator, &Denom, &accumulator); - } - - /* Convert to 64 bit signed-compatible */ - exponent += round_Xsig(&accumulator); - - result = &st(0); - significand(result) = XSIG_LL(accumulator); - setexponent16(result, exponent); - - tag = FPU_round(result, 1, 0, FULL_PRECISION, sign); - - setsign(result, sign); - FPU_settag0(tag); - - return 0; - -} diff --git a/arch/i386/math-emu/poly_atan.c b/arch/i386/math-emu/poly_atan.c deleted file mode 100644 index 82f702952f6..00000000000 --- a/arch/i386/math-emu/poly_atan.c +++ /dev/null @@ -1,229 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly_atan.c | - | | - | Compute the arctan of a FPU_REG, using a polynomial approximation. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" -#include "status_w.h" -#include "control_w.h" -#include "poly.h" - - -#define HIPOWERon 6 /* odd poly, negative terms */ -static const unsigned long long oddnegterms[HIPOWERon] = -{ - 0x0000000000000000LL, /* Dummy (not for - 1.0) */ - 0x015328437f756467LL, - 0x0005dda27b73dec6LL, - 0x0000226bf2bfb91aLL, - 0x000000ccc439c5f7LL, - 0x0000000355438407LL -} ; - -#define HIPOWERop 6 /* odd poly, positive terms */ -static const unsigned long long oddplterms[HIPOWERop] = -{ -/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */ - 0x0db55a71875c9ac2LL, - 0x0029fce2d67880b0LL, - 0x0000dfd3908b4596LL, - 0x00000550fd61dab4LL, - 0x0000001c9422b3f9LL, - 0x000000003e3301e1LL -}; - -static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL; - -static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa); - -static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b); - - -/*--- poly_atan() -----------------------------------------------------------+ - | | - +---------------------------------------------------------------------------*/ -void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, - FPU_REG *st1_ptr, u_char st1_tag) -{ - u_char transformed, inverted, - sign1, sign2; - int exponent; - long int dummy_exp; - Xsig accumulator, Numer, Denom, accumulatore, argSignif, - argSq, argSqSq; - u_char tag; - - sign1 = getsign(st0_ptr); - sign2 = getsign(st1_ptr); - if ( st0_tag == TAG_Valid ) - { - exponent = exponent(st0_ptr); - } - else - { - /* This gives non-compatible stack contents... */ - FPU_to_exp16(st0_ptr, st0_ptr); - exponent = exponent16(st0_ptr); - } - if ( st1_tag == TAG_Valid ) - { - exponent -= exponent(st1_ptr); - } - else - { - /* This gives non-compatible stack contents... */ - FPU_to_exp16(st1_ptr, st1_ptr); - exponent -= exponent16(st1_ptr); - } - - if ( (exponent < 0) || ((exponent == 0) && - ((st0_ptr->sigh < st1_ptr->sigh) || - ((st0_ptr->sigh == st1_ptr->sigh) && - (st0_ptr->sigl < st1_ptr->sigl))) ) ) - { - inverted = 1; - Numer.lsw = Denom.lsw = 0; - XSIG_LL(Numer) = significand(st0_ptr); - XSIG_LL(Denom) = significand(st1_ptr); - } - else - { - inverted = 0; - exponent = -exponent; - Numer.lsw = Denom.lsw = 0; - XSIG_LL(Numer) = significand(st1_ptr); - XSIG_LL(Denom) = significand(st0_ptr); - } - div_Xsig(&Numer, &Denom, &argSignif); - exponent += norm_Xsig(&argSignif); - - if ( (exponent >= -1) - || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) ) - { - /* The argument is greater than sqrt(2)-1 (=0.414213562...) */ - /* Convert the argument by an identity for atan */ - transformed = 1; - - if ( exponent >= 0 ) - { -#ifdef PARANOID - if ( !( (exponent == 0) && - (argSignif.lsw == 0) && (argSignif.midw == 0) && - (argSignif.msw == 0x80000000) ) ) - { - EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */ - return; - } -#endif /* PARANOID */ - argSignif.msw = 0; /* Make the transformed arg -> 0.0 */ - } - else - { - Numer.lsw = Denom.lsw = argSignif.lsw; - XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif); - - if ( exponent < -1 ) - shr_Xsig(&Numer, -1-exponent); - negate_Xsig(&Numer); - - shr_Xsig(&Denom, -exponent); - Denom.msw |= 0x80000000; - - div_Xsig(&Numer, &Denom, &argSignif); - - exponent = -1 + norm_Xsig(&argSignif); - } - } - else - { - transformed = 0; - } - - argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw; - argSq.msw = argSignif.msw; - mul_Xsig_Xsig(&argSq, &argSq); - - argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw; - mul_Xsig_Xsig(&argSqSq, &argSqSq); - - accumulatore.lsw = argSq.lsw; - XSIG_LL(accumulatore) = XSIG_LL(argSq); - - shr_Xsig(&argSq, 2*(-1-exponent-1)); - shr_Xsig(&argSqSq, 4*(-1-exponent-1)); - - /* Now have argSq etc with binary point at the left - .1xxxxxxxx */ - - /* Do the basic fixed point polynomial evaluation */ - accumulator.msw = accumulator.midw = accumulator.lsw = 0; - polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), - oddplterms, HIPOWERop-1); - mul64_Xsig(&accumulator, &XSIG_LL(argSq)); - negate_Xsig(&accumulator); - polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1); - negate_Xsig(&accumulator); - add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp); - - mul64_Xsig(&accumulatore, &denomterm); - shr_Xsig(&accumulatore, 1 + 2*(-1-exponent)); - accumulatore.msw |= 0x80000000; - - div_Xsig(&accumulator, &accumulatore, &accumulator); - - mul_Xsig_Xsig(&accumulator, &argSignif); - mul_Xsig_Xsig(&accumulator, &argSq); - - shr_Xsig(&accumulator, 3); - negate_Xsig(&accumulator); - add_Xsig_Xsig(&accumulator, &argSignif); - - if ( transformed ) - { - /* compute pi/4 - accumulator */ - shr_Xsig(&accumulator, -1-exponent); - negate_Xsig(&accumulator); - add_Xsig_Xsig(&accumulator, &pi_signif); - exponent = -1; - } - - if ( inverted ) - { - /* compute pi/2 - accumulator */ - shr_Xsig(&accumulator, -exponent); - negate_Xsig(&accumulator); - add_Xsig_Xsig(&accumulator, &pi_signif); - exponent = 0; - } - - if ( sign1 ) - { - /* compute pi - accumulator */ - shr_Xsig(&accumulator, 1 - exponent); - negate_Xsig(&accumulator); - add_Xsig_Xsig(&accumulator, &pi_signif); - exponent = 1; - } - - exponent += round_Xsig(&accumulator); - - significand(st1_ptr) = XSIG_LL(accumulator); - setexponent16(st1_ptr, exponent); - - tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2); - FPU_settagi(1, tag); - - set_precision_flag_up(); /* We do not really know if up or down, - use this as the default. */ - -} diff --git a/arch/i386/math-emu/poly_l2.c b/arch/i386/math-emu/poly_l2.c deleted file mode 100644 index dd00e1d5b07..00000000000 --- a/arch/i386/math-emu/poly_l2.c +++ /dev/null @@ -1,272 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly_l2.c | - | | - | Compute the base 2 log of a FPU_REG, using a polynomial approximation. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" -#include "control_w.h" -#include "poly.h" - - -static void log2_kernel(FPU_REG const *arg, u_char argsign, - Xsig *accum_result, long int *expon); - - -/*--- poly_l2() -------------------------------------------------------------+ - | Base 2 logarithm by a polynomial approximation. | - +---------------------------------------------------------------------------*/ -void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign) -{ - long int exponent, expon, expon_expon; - Xsig accumulator, expon_accum, yaccum; - u_char sign, argsign; - FPU_REG x; - int tag; - - exponent = exponent16(st0_ptr); - - /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */ - if ( st0_ptr->sigh > (unsigned)0xb504f334 ) - { - /* Treat as sqrt(2)/2 < st0_ptr < 1 */ - significand(&x) = - significand(st0_ptr); - setexponent16(&x, -1); - exponent++; - argsign = SIGN_NEG; - } - else - { - /* Treat as 1 <= st0_ptr < sqrt(2) */ - x.sigh = st0_ptr->sigh - 0x80000000; - x.sigl = st0_ptr->sigl; - setexponent16(&x, 0); - argsign = SIGN_POS; - } - tag = FPU_normalize_nuo(&x); - - if ( tag == TAG_Zero ) - { - expon = 0; - accumulator.msw = accumulator.midw = accumulator.lsw = 0; - } - else - { - log2_kernel(&x, argsign, &accumulator, &expon); - } - - if ( exponent < 0 ) - { - sign = SIGN_NEG; - exponent = -exponent; - } - else - sign = SIGN_POS; - expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0; - if ( exponent ) - { - expon_expon = 31 + norm_Xsig(&expon_accum); - shr_Xsig(&accumulator, expon_expon - expon); - - if ( sign ^ argsign ) - negate_Xsig(&accumulator); - add_Xsig_Xsig(&accumulator, &expon_accum); - } - else - { - expon_expon = expon; - sign = argsign; - } - - yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr); - mul_Xsig_Xsig(&accumulator, &yaccum); - - expon_expon += round_Xsig(&accumulator); - - if ( accumulator.msw == 0 ) - { - FPU_copy_to_reg1(&CONST_Z, TAG_Zero); - return; - } - - significand(st1_ptr) = XSIG_LL(accumulator); - setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1); - - tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign); - FPU_settagi(1, tag); - - set_precision_flag_up(); /* 80486 appears to always do this */ - - return; - -} - - -/*--- poly_l2p1() -----------------------------------------------------------+ - | Base 2 logarithm by a polynomial approximation. | - | log2(x+1) | - +---------------------------------------------------------------------------*/ -int poly_l2p1(u_char sign0, u_char sign1, - FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest) -{ - u_char tag; - long int exponent; - Xsig accumulator, yaccum; - - if ( exponent16(st0_ptr) < 0 ) - { - log2_kernel(st0_ptr, sign0, &accumulator, &exponent); - - yaccum.lsw = 0; - XSIG_LL(yaccum) = significand(st1_ptr); - mul_Xsig_Xsig(&accumulator, &yaccum); - - exponent += round_Xsig(&accumulator); - - exponent += exponent16(st1_ptr) + 1; - if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER; - - significand(dest) = XSIG_LL(accumulator); - setexponent16(dest, exponent); - - tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1); - FPU_settagi(1, tag); - - if ( tag == TAG_Valid ) - set_precision_flag_up(); /* 80486 appears to always do this */ - } - else - { - /* The magnitude of st0_ptr is far too large. */ - - if ( sign0 != SIGN_POS ) - { - /* Trying to get the log of a negative number. */ -#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ - changesign(st1_ptr); -#else - if ( arith_invalid(1) < 0 ) - return 1; -#endif /* PECULIAR_486 */ - } - - /* 80486 appears to do this */ - if ( sign0 == SIGN_NEG ) - set_precision_flag_down(); - else - set_precision_flag_up(); - } - - if ( exponent(dest) <= EXP_UNDER ) - EXCEPTION(EX_Underflow); - - return 0; - -} - - - - -#undef HIPOWER -#define HIPOWER 10 -static const unsigned long long logterms[HIPOWER] = -{ - 0x2a8eca5705fc2ef0LL, - 0xf6384ee1d01febceLL, - 0x093bb62877cdf642LL, - 0x006985d8a9ec439bLL, - 0x0005212c4f55a9c8LL, - 0x00004326a16927f0LL, - 0x0000038d1d80a0e7LL, - 0x0000003141cc80c6LL, - 0x00000002b1668c9fLL, - 0x000000002c7a46aaLL -}; - -static const unsigned long leadterm = 0xb8000000; - - -/*--- log2_kernel() ---------------------------------------------------------+ - | Base 2 logarithm by a polynomial approximation. | - | log2(x+1) | - +---------------------------------------------------------------------------*/ -static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result, - long int *expon) -{ - long int exponent, adj; - unsigned long long Xsq; - Xsig accumulator, Numer, Denom, argSignif, arg_signif; - - exponent = exponent16(arg); - Numer.lsw = Denom.lsw = 0; - XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg); - if ( argsign == SIGN_POS ) - { - shr_Xsig(&Denom, 2 - (1 + exponent)); - Denom.msw |= 0x80000000; - div_Xsig(&Numer, &Denom, &argSignif); - } - else - { - shr_Xsig(&Denom, 1 - (1 + exponent)); - negate_Xsig(&Denom); - if ( Denom.msw & 0x80000000 ) - { - div_Xsig(&Numer, &Denom, &argSignif); - exponent ++; - } - else - { - /* Denom must be 1.0 */ - argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw; - argSignif.msw = Numer.msw; - } - } - -#ifndef PECULIAR_486 - /* Should check here that |local_arg| is within the valid range */ - if ( exponent >= -2 ) - { - if ( (exponent > -2) || - (argSignif.msw > (unsigned)0xafb0ccc0) ) - { - /* The argument is too large */ - } - } -#endif /* PECULIAR_486 */ - - arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif); - adj = norm_Xsig(&argSignif); - accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif); - mul_Xsig_Xsig(&accumulator, &accumulator); - shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj))); - Xsq = XSIG_LL(accumulator); - if ( accumulator.lsw & 0x80000000 ) - Xsq++; - - accumulator.msw = accumulator.midw = accumulator.lsw = 0; - /* Do the basic fixed point polynomial evaluation */ - polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1); - - mul_Xsig_Xsig(&accumulator, &argSignif); - shr_Xsig(&accumulator, 6 - adj); - - mul32_Xsig(&arg_signif, leadterm); - add_two_Xsig(&accumulator, &arg_signif, &exponent); - - *expon = exponent + 1; - accum_result->lsw = accumulator.lsw; - accum_result->midw = accumulator.midw; - accum_result->msw = accumulator.msw; - -} diff --git a/arch/i386/math-emu/poly_sin.c b/arch/i386/math-emu/poly_sin.c deleted file mode 100644 index a36313fb06f..00000000000 --- a/arch/i386/math-emu/poly_sin.c +++ /dev/null @@ -1,397 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly_sin.c | - | | - | Computation of an approximation of the sin function and the cosine | - | function by a polynomial. | - | | - | Copyright (C) 1992,1993,1994,1997,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@melbpc.org.au | - | | - | | - +---------------------------------------------------------------------------*/ - - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" -#include "control_w.h" -#include "poly.h" - - -#define N_COEFF_P 4 -#define N_COEFF_N 4 - -static const unsigned long long pos_terms_l[N_COEFF_P] = -{ - 0xaaaaaaaaaaaaaaabLL, - 0x00d00d00d00cf906LL, - 0x000006b99159a8bbLL, - 0x000000000d7392e6LL -}; - -static const unsigned long long neg_terms_l[N_COEFF_N] = -{ - 0x2222222222222167LL, - 0x0002e3bc74aab624LL, - 0x0000000b09229062LL, - 0x00000000000c7973LL -}; - - - -#define N_COEFF_PH 4 -#define N_COEFF_NH 4 -static const unsigned long long pos_terms_h[N_COEFF_PH] = -{ - 0x0000000000000000LL, - 0x05b05b05b05b0406LL, - 0x000049f93edd91a9LL, - 0x00000000c9c9ed62LL -}; - -static const unsigned long long neg_terms_h[N_COEFF_NH] = -{ - 0xaaaaaaaaaaaaaa98LL, - 0x001a01a01a019064LL, - 0x0000008f76c68a77LL, - 0x0000000000d58f5eLL -}; - - -/*--- poly_sine() -----------------------------------------------------------+ - | | - +---------------------------------------------------------------------------*/ -void poly_sine(FPU_REG *st0_ptr) -{ - int exponent, echange; - Xsig accumulator, argSqrd, argTo4; - unsigned long fix_up, adj; - unsigned long long fixed_arg; - FPU_REG result; - - exponent = exponent(st0_ptr); - - accumulator.lsw = accumulator.midw = accumulator.msw = 0; - - /* Split into two ranges, for arguments below and above 1.0 */ - /* The boundary between upper and lower is approx 0.88309101259 */ - if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) ) - { - /* The argument is <= 0.88309101259 */ - - argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0; - mul64_Xsig(&argSqrd, &significand(st0_ptr)); - shr_Xsig(&argSqrd, 2*(-1-exponent)); - argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; - argTo4.lsw = argSqrd.lsw; - mul_Xsig_Xsig(&argTo4, &argTo4); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, - N_COEFF_N-1); - mul_Xsig_Xsig(&accumulator, &argSqrd); - negate_Xsig(&accumulator); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, - N_COEFF_P-1); - - shr_Xsig(&accumulator, 2); /* Divide by four */ - accumulator.msw |= 0x80000000; /* Add 1.0 */ - - mul64_Xsig(&accumulator, &significand(st0_ptr)); - mul64_Xsig(&accumulator, &significand(st0_ptr)); - mul64_Xsig(&accumulator, &significand(st0_ptr)); - - /* Divide by four, FPU_REG compatible, etc */ - exponent = 3*exponent; - - /* The minimum exponent difference is 3 */ - shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); - - negate_Xsig(&accumulator); - XSIG_LL(accumulator) += significand(st0_ptr); - - echange = round_Xsig(&accumulator); - - setexponentpos(&result, exponent(st0_ptr) + echange); - } - else - { - /* The argument is > 0.88309101259 */ - /* We use sin(st(0)) = cos(pi/2-st(0)) */ - - fixed_arg = significand(st0_ptr); - - if ( exponent == 0 ) - { - /* The argument is >= 1.0 */ - - /* Put the binary point at the left. */ - fixed_arg <<= 1; - } - /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ - fixed_arg = 0x921fb54442d18469LL - fixed_arg; - /* There is a special case which arises due to rounding, to fix here. */ - if ( fixed_arg == 0xffffffffffffffffLL ) - fixed_arg = 0; - - XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; - mul64_Xsig(&argSqrd, &fixed_arg); - - XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; - mul_Xsig_Xsig(&argTo4, &argTo4); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, - N_COEFF_NH-1); - mul_Xsig_Xsig(&accumulator, &argSqrd); - negate_Xsig(&accumulator); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, - N_COEFF_PH-1); - negate_Xsig(&accumulator); - - mul64_Xsig(&accumulator, &fixed_arg); - mul64_Xsig(&accumulator, &fixed_arg); - - shr_Xsig(&accumulator, 3); - negate_Xsig(&accumulator); - - add_Xsig_Xsig(&accumulator, &argSqrd); - - shr_Xsig(&accumulator, 1); - - accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ - negate_Xsig(&accumulator); - - /* The basic computation is complete. Now fix the answer to - compensate for the error due to the approximation used for - pi/2 - */ - - /* This has an exponent of -65 */ - fix_up = 0x898cc517; - /* The fix-up needs to be improved for larger args */ - if ( argSqrd.msw & 0xffc00000 ) - { - /* Get about 32 bit precision in these: */ - fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; - } - fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); - - adj = accumulator.lsw; /* temp save */ - accumulator.lsw -= fix_up; - if ( accumulator.lsw > adj ) - XSIG_LL(accumulator) --; - - echange = round_Xsig(&accumulator); - - setexponentpos(&result, echange - 1); - } - - significand(&result) = XSIG_LL(accumulator); - setsign(&result, getsign(st0_ptr)); - FPU_copy_to_reg0(&result, TAG_Valid); - -#ifdef PARANOID - if ( (exponent(&result) >= 0) - && (significand(&result) > 0x8000000000000000LL) ) - { - EXCEPTION(EX_INTERNAL|0x150); - } -#endif /* PARANOID */ - -} - - - -/*--- poly_cos() ------------------------------------------------------------+ - | | - +---------------------------------------------------------------------------*/ -void poly_cos(FPU_REG *st0_ptr) -{ - FPU_REG result; - long int exponent, exp2, echange; - Xsig accumulator, argSqrd, fix_up, argTo4; - unsigned long long fixed_arg; - -#ifdef PARANOID - if ( (exponent(st0_ptr) > 0) - || ((exponent(st0_ptr) == 0) - && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) ) - { - EXCEPTION(EX_Invalid); - FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); - return; - } -#endif /* PARANOID */ - - exponent = exponent(st0_ptr); - - accumulator.lsw = accumulator.midw = accumulator.msw = 0; - - if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) ) - { - /* arg is < 0.687705 */ - - argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; - argSqrd.lsw = 0; - mul64_Xsig(&argSqrd, &significand(st0_ptr)); - - if ( exponent < -1 ) - { - /* shift the argument right by the required places */ - shr_Xsig(&argSqrd, 2*(-1-exponent)); - } - - argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; - argTo4.lsw = argSqrd.lsw; - mul_Xsig_Xsig(&argTo4, &argTo4); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, - N_COEFF_NH-1); - mul_Xsig_Xsig(&accumulator, &argSqrd); - negate_Xsig(&accumulator); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, - N_COEFF_PH-1); - negate_Xsig(&accumulator); - - mul64_Xsig(&accumulator, &significand(st0_ptr)); - mul64_Xsig(&accumulator, &significand(st0_ptr)); - shr_Xsig(&accumulator, -2*(1+exponent)); - - shr_Xsig(&accumulator, 3); - negate_Xsig(&accumulator); - - add_Xsig_Xsig(&accumulator, &argSqrd); - - shr_Xsig(&accumulator, 1); - - /* It doesn't matter if accumulator is all zero here, the - following code will work ok */ - negate_Xsig(&accumulator); - - if ( accumulator.lsw & 0x80000000 ) - XSIG_LL(accumulator) ++; - if ( accumulator.msw == 0 ) - { - /* The result is 1.0 */ - FPU_copy_to_reg0(&CONST_1, TAG_Valid); - return; - } - else - { - significand(&result) = XSIG_LL(accumulator); - - /* will be a valid positive nr with expon = -1 */ - setexponentpos(&result, -1); - } - } - else - { - fixed_arg = significand(st0_ptr); - - if ( exponent == 0 ) - { - /* The argument is >= 1.0 */ - - /* Put the binary point at the left. */ - fixed_arg <<= 1; - } - /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ - fixed_arg = 0x921fb54442d18469LL - fixed_arg; - /* There is a special case which arises due to rounding, to fix here. */ - if ( fixed_arg == 0xffffffffffffffffLL ) - fixed_arg = 0; - - exponent = -1; - exp2 = -1; - - /* A shift is needed here only for a narrow range of arguments, - i.e. for fixed_arg approx 2^-32, but we pick up more... */ - if ( !(LL_MSW(fixed_arg) & 0xffff0000) ) - { - fixed_arg <<= 16; - exponent -= 16; - exp2 -= 16; - } - - XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; - mul64_Xsig(&argSqrd, &fixed_arg); - - if ( exponent < -1 ) - { - /* shift the argument right by the required places */ - shr_Xsig(&argSqrd, 2*(-1-exponent)); - } - - argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; - argTo4.lsw = argSqrd.lsw; - mul_Xsig_Xsig(&argTo4, &argTo4); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, - N_COEFF_N-1); - mul_Xsig_Xsig(&accumulator, &argSqrd); - negate_Xsig(&accumulator); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, - N_COEFF_P-1); - - shr_Xsig(&accumulator, 2); /* Divide by four */ - accumulator.msw |= 0x80000000; /* Add 1.0 */ - - mul64_Xsig(&accumulator, &fixed_arg); - mul64_Xsig(&accumulator, &fixed_arg); - mul64_Xsig(&accumulator, &fixed_arg); - - /* Divide by four, FPU_REG compatible, etc */ - exponent = 3*exponent; - - /* The minimum exponent difference is 3 */ - shr_Xsig(&accumulator, exp2 - exponent); - - negate_Xsig(&accumulator); - XSIG_LL(accumulator) += fixed_arg; - - /* The basic computation is complete. Now fix the answer to - compensate for the error due to the approximation used for - pi/2 - */ - - /* This has an exponent of -65 */ - XSIG_LL(fix_up) = 0x898cc51701b839a2ll; - fix_up.lsw = 0; - - /* The fix-up needs to be improved for larger args */ - if ( argSqrd.msw & 0xffc00000 ) - { - /* Get about 32 bit precision in these: */ - fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; - fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; - } - - exp2 += norm_Xsig(&accumulator); - shr_Xsig(&accumulator, 1); /* Prevent overflow */ - exp2++; - shr_Xsig(&fix_up, 65 + exp2); - - add_Xsig_Xsig(&accumulator, &fix_up); - - echange = round_Xsig(&accumulator); - - setexponentpos(&result, exp2 + echange); - significand(&result) = XSIG_LL(accumulator); - } - - FPU_copy_to_reg0(&result, TAG_Valid); - -#ifdef PARANOID - if ( (exponent(&result) >= 0) - && (significand(&result) > 0x8000000000000000LL) ) - { - EXCEPTION(EX_INTERNAL|0x151); - } -#endif /* PARANOID */ - -} diff --git a/arch/i386/math-emu/poly_tan.c b/arch/i386/math-emu/poly_tan.c deleted file mode 100644 index 8df3e03b6e6..00000000000 --- a/arch/i386/math-emu/poly_tan.c +++ /dev/null @@ -1,222 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly_tan.c | - | | - | Compute the tan of a FPU_REG, using a polynomial approximation. | - | | - | Copyright (C) 1992,1993,1994,1997,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@melbpc.org.au | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" -#include "control_w.h" -#include "poly.h" - - -#define HiPOWERop 3 /* odd poly, positive terms */ -static const unsigned long long oddplterm[HiPOWERop] = -{ - 0x0000000000000000LL, - 0x0051a1cf08fca228LL, - 0x0000000071284ff7LL -}; - -#define HiPOWERon 2 /* odd poly, negative terms */ -static const unsigned long long oddnegterm[HiPOWERon] = -{ - 0x1291a9a184244e80LL, - 0x0000583245819c21LL -}; - -#define HiPOWERep 2 /* even poly, positive terms */ -static const unsigned long long evenplterm[HiPOWERep] = -{ - 0x0e848884b539e888LL, - 0x00003c7f18b887daLL -}; - -#define HiPOWERen 2 /* even poly, negative terms */ -static const unsigned long long evennegterm[HiPOWERen] = -{ - 0xf1f0200fd51569ccLL, - 0x003afb46105c4432LL -}; - -static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL; - - -/*--- poly_tan() ------------------------------------------------------------+ - | | - +---------------------------------------------------------------------------*/ -void poly_tan(FPU_REG *st0_ptr) -{ - long int exponent; - int invert; - Xsig argSq, argSqSq, accumulatoro, accumulatore, accum, - argSignif, fix_up; - unsigned long adj; - - exponent = exponent(st0_ptr); - -#ifdef PARANOID - if ( signnegative(st0_ptr) ) /* Can't hack a number < 0.0 */ - { arith_invalid(0); return; } /* Need a positive number */ -#endif /* PARANOID */ - - /* Split the problem into two domains, smaller and larger than pi/4 */ - if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) ) - { - /* The argument is greater than (approx) pi/4 */ - invert = 1; - accum.lsw = 0; - XSIG_LL(accum) = significand(st0_ptr); - - if ( exponent == 0 ) - { - /* The argument is >= 1.0 */ - /* Put the binary point at the left. */ - XSIG_LL(accum) <<= 1; - } - /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ - XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum); - /* This is a special case which arises due to rounding. */ - if ( XSIG_LL(accum) == 0xffffffffffffffffLL ) - { - FPU_settag0(TAG_Valid); - significand(st0_ptr) = 0x8a51e04daabda360LL; - setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative); - return; - } - - argSignif.lsw = accum.lsw; - XSIG_LL(argSignif) = XSIG_LL(accum); - exponent = -1 + norm_Xsig(&argSignif); - } - else - { - invert = 0; - argSignif.lsw = 0; - XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr); - - if ( exponent < -1 ) - { - /* shift the argument right by the required places */ - if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U ) - XSIG_LL(accum) ++; /* round up */ - } - } - - XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw; - mul_Xsig_Xsig(&argSq, &argSq); - XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw; - mul_Xsig_Xsig(&argSqSq, &argSqSq); - - /* Compute the negative terms for the numerator polynomial */ - accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0; - polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1); - mul_Xsig_Xsig(&accumulatoro, &argSq); - negate_Xsig(&accumulatoro); - /* Add the positive terms */ - polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1); - - - /* Compute the positive terms for the denominator polynomial */ - accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0; - polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1); - mul_Xsig_Xsig(&accumulatore, &argSq); - negate_Xsig(&accumulatore); - /* Add the negative terms */ - polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1); - /* Multiply by arg^2 */ - mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); - mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); - /* de-normalize and divide by 2 */ - shr_Xsig(&accumulatore, -2*(1+exponent) + 1); - negate_Xsig(&accumulatore); /* This does 1 - accumulator */ - - /* Now find the ratio. */ - if ( accumulatore.msw == 0 ) - { - /* accumulatoro must contain 1.0 here, (actually, 0) but it - really doesn't matter what value we use because it will - have negligible effect in later calculations - */ - XSIG_LL(accum) = 0x8000000000000000LL; - accum.lsw = 0; - } - else - { - div_Xsig(&accumulatoro, &accumulatore, &accum); - } - - /* Multiply by 1/3 * arg^3 */ - mul64_Xsig(&accum, &XSIG_LL(argSignif)); - mul64_Xsig(&accum, &XSIG_LL(argSignif)); - mul64_Xsig(&accum, &XSIG_LL(argSignif)); - mul64_Xsig(&accum, &twothirds); - shr_Xsig(&accum, -2*(exponent+1)); - - /* tan(arg) = arg + accum */ - add_two_Xsig(&accum, &argSignif, &exponent); - - if ( invert ) - { - /* We now have the value of tan(pi_2 - arg) where pi_2 is an - approximation for pi/2 - */ - /* The next step is to fix the answer to compensate for the - error due to the approximation used for pi/2 - */ - - /* This is (approx) delta, the error in our approx for pi/2 - (see above). It has an exponent of -65 - */ - XSIG_LL(fix_up) = 0x898cc51701b839a2LL; - fix_up.lsw = 0; - - if ( exponent == 0 ) - adj = 0xffffffff; /* We want approx 1.0 here, but - this is close enough. */ - else if ( exponent > -30 ) - { - adj = accum.msw >> -(exponent+1); /* tan */ - adj = mul_32_32(adj, adj); /* tan^2 */ - } - else - adj = 0; - adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */ - - fix_up.msw += adj; - if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */ - { - /* Yes, we need to add an msb */ - shr_Xsig(&fix_up, 1); - fix_up.msw |= 0x80000000; - shr_Xsig(&fix_up, 64 + exponent); - } - else - shr_Xsig(&fix_up, 65 + exponent); - - add_two_Xsig(&accum, &fix_up, &exponent); - - /* accum now contains tan(pi/2 - arg). - Use tan(arg) = 1.0 / tan(pi/2 - arg) - */ - accumulatoro.lsw = accumulatoro.midw = 0; - accumulatoro.msw = 0x80000000; - div_Xsig(&accumulatoro, &accum, &accum); - exponent = - exponent - 1; - } - - /* Transfer the result */ - round_Xsig(&accum); - FPU_settag0(TAG_Valid); - significand(st0_ptr) = XSIG_LL(accum); - setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */ - -} diff --git a/arch/i386/math-emu/polynom_Xsig.S b/arch/i386/math-emu/polynom_Xsig.S deleted file mode 100644 index 17315c89ff3..00000000000 --- a/arch/i386/math-emu/polynom_Xsig.S +++ /dev/null @@ -1,135 +0,0 @@ -/*---------------------------------------------------------------------------+ - | polynomial_Xsig.S | - | | - | Fixed point arithmetic polynomial evaluation. | - | | - | Copyright (C) 1992,1993,1994,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | Call from C as: | - | void polynomial_Xsig(Xsig *accum, unsigned long long x, | - | unsigned long long terms[], int n) | - | | - | Computes: | - | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | - | and adds the result to the 12 byte Xsig. | - | The terms[] are each 8 bytes, but all computation is performed to 12 byte | - | precision. | - | | - | This function must be used carefully: most overflow of intermediate | - | results is controlled, but overflow of the result is not. | - | | - +---------------------------------------------------------------------------*/ - .file "polynomial_Xsig.S" - -#include "fpu_emu.h" - - -#define TERM_SIZE $8 -#define SUM_MS -20(%ebp) /* sum ms long */ -#define SUM_MIDDLE -24(%ebp) /* sum middle long */ -#define SUM_LS -28(%ebp) /* sum ls long */ -#define ACCUM_MS -4(%ebp) /* accum ms long */ -#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ -#define ACCUM_LS -12(%ebp) /* accum ls long */ -#define OVERFLOWED -16(%ebp) /* addition overflow flag */ - -.text -ENTRY(polynomial_Xsig) - pushl %ebp - movl %esp,%ebp - subl $32,%esp - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM2,%esi /* x */ - movl PARAM3,%edi /* terms */ - - movl TERM_SIZE,%eax - mull PARAM4 /* n */ - addl %eax,%edi - - movl 4(%edi),%edx /* terms[n] */ - movl %edx,SUM_MS - movl (%edi),%edx /* terms[n] */ - movl %edx,SUM_MIDDLE - xor %eax,%eax - movl %eax,SUM_LS - movb %al,OVERFLOWED - - subl TERM_SIZE,%edi - decl PARAM4 - js L_accum_done - -L_accum_loop: - xor %eax,%eax - movl %eax,ACCUM_MS - movl %eax,ACCUM_MIDDLE - - movl SUM_MIDDLE,%eax - mull (%esi) /* x ls long */ - movl %edx,ACCUM_LS - - movl SUM_MIDDLE,%eax - mull 4(%esi) /* x ms long */ - addl %eax,ACCUM_LS - adcl %edx,ACCUM_MIDDLE - adcl $0,ACCUM_MS - - movl SUM_MS,%eax - mull (%esi) /* x ls long */ - addl %eax,ACCUM_LS - adcl %edx,ACCUM_MIDDLE - adcl $0,ACCUM_MS - - movl SUM_MS,%eax - mull 4(%esi) /* x ms long */ - addl %eax,ACCUM_MIDDLE - adcl %edx,ACCUM_MS - - testb $0xff,OVERFLOWED - jz L_no_overflow - - movl (%esi),%eax - addl %eax,ACCUM_MIDDLE - movl 4(%esi),%eax - adcl %eax,ACCUM_MS /* This could overflow too */ - -L_no_overflow: - -/* - * Now put the sum of next term and the accumulator - * into the sum register - */ - movl ACCUM_LS,%eax - addl (%edi),%eax /* term ls long */ - movl %eax,SUM_LS - movl ACCUM_MIDDLE,%eax - adcl (%edi),%eax /* term ls long */ - movl %eax,SUM_MIDDLE - movl ACCUM_MS,%eax - adcl 4(%edi),%eax /* term ms long */ - movl %eax,SUM_MS - sbbb %al,%al - movb %al,OVERFLOWED /* Used in the next iteration */ - - subl TERM_SIZE,%edi - decl PARAM4 - jns L_accum_loop - -L_accum_done: - movl PARAM1,%edi /* accum */ - movl SUM_LS,%eax - addl %eax,(%edi) - movl SUM_MIDDLE,%eax - adcl %eax,4(%edi) - movl SUM_MS,%eax - adcl %eax,8(%edi) - - popl %ebx - popl %edi - popl %esi - leave - ret diff --git a/arch/i386/math-emu/reg_add_sub.c b/arch/i386/math-emu/reg_add_sub.c deleted file mode 100644 index 7cd3b37ac08..00000000000 --- a/arch/i386/math-emu/reg_add_sub.c +++ /dev/null @@ -1,374 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_add_sub.c | - | | - | Functions to add or subtract two registers and put the result in a third. | - | | - | Copyright (C) 1992,1993,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | For each function, the destination may be any FPU_REG, including one of | - | the source FPU_REGs. | - | Each function returns 0 if the answer is o.k., otherwise a non-zero | - | value is returned, indicating either an exception condition or an | - | internal error. | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "control_w.h" -#include "fpu_system.h" - -static -int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, - FPU_REG const *b, u_char tagb, u_char signb, - FPU_REG *dest, int deststnr, int control_w); - -/* - Operates on st(0) and st(n), or on st(0) and temporary data. - The destination must be one of the source st(x). - */ -int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w) -{ - FPU_REG *a = &st(0); - FPU_REG *dest = &st(deststnr); - u_char signb = getsign(b); - u_char taga = FPU_gettag0(); - u_char signa = getsign(a); - u_char saved_sign = getsign(dest); - int diff, tag, expa, expb; - - if ( !(taga | tagb) ) - { - expa = exponent(a); - expb = exponent(b); - - valid_add: - /* Both registers are valid */ - if (!(signa ^ signb)) - { - /* signs are the same */ - tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb); - } - else - { - /* The signs are different, so do a subtraction */ - diff = expa - expb; - if (!diff) - { - diff = a->sigh - b->sigh; /* This works only if the ms bits - are identical. */ - if (!diff) - { - diff = a->sigl > b->sigl; - if (!diff) - diff = -(a->sigl < b->sigl); - } - } - - if (diff > 0) - { - tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb); - } - else if ( diff < 0 ) - { - tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa); - } - else - { - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - /* sign depends upon rounding mode */ - setsign(dest, ((control_w & CW_RC) != RC_DOWN) - ? SIGN_POS : SIGN_NEG); - return TAG_Zero; - } - } - - if ( tag < 0 ) - { - setsign(dest, saved_sign); - return tag; - } - FPU_settagi(deststnr, tag); - return tag; - } - - if ( taga == TAG_Special ) - taga = FPU_Special(a); - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - - if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) - || ((taga == TW_Denormal) && (tagb == TAG_Valid)) - || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) - { - FPU_REG x, y; - - if ( denormal_operand() < 0 ) - return FPU_Exception; - - FPU_to_exp16(a, &x); - FPU_to_exp16(b, &y); - a = &x; - b = &y; - expa = exponent16(a); - expb = exponent16(b); - goto valid_add; - } - - if ( (taga == TW_NaN) || (tagb == TW_NaN) ) - { - if ( deststnr == 0 ) - return real_2op_NaN(b, tagb, deststnr, a); - else - return real_2op_NaN(a, taga, deststnr, a); - } - - return add_sub_specials(a, taga, signa, b, tagb, signb, - dest, deststnr, control_w); -} - - -/* Subtract b from a. (a-b) -> dest */ -int FPU_sub(int flags, int rm, int control_w) -{ - FPU_REG const *a, *b; - FPU_REG *dest; - u_char taga, tagb, signa, signb, saved_sign, sign; - int diff, tag = 0, expa, expb, deststnr; - - a = &st(0); - taga = FPU_gettag0(); - - deststnr = 0; - if ( flags & LOADED ) - { - b = (FPU_REG *)rm; - tagb = flags & 0x0f; - } - else - { - b = &st(rm); - tagb = FPU_gettagi(rm); - - if ( flags & DEST_RM ) - deststnr = rm; - } - - signa = getsign(a); - signb = getsign(b); - - if ( flags & REV ) - { - signa ^= SIGN_NEG; - signb ^= SIGN_NEG; - } - - dest = &st(deststnr); - saved_sign = getsign(dest); - - if ( !(taga | tagb) ) - { - expa = exponent(a); - expb = exponent(b); - - valid_subtract: - /* Both registers are valid */ - - diff = expa - expb; - - if (!diff) - { - diff = a->sigh - b->sigh; /* Works only if ms bits are identical */ - if (!diff) - { - diff = a->sigl > b->sigl; - if (!diff) - diff = -(a->sigl < b->sigl); - } - } - - switch ( (((int)signa)*2 + signb) / SIGN_NEG ) - { - case 0: /* P - P */ - case 3: /* N - N */ - if (diff > 0) - { - /* |a| > |b| */ - tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb); - } - else if ( diff == 0 ) - { - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - - /* sign depends upon rounding mode */ - setsign(dest, ((control_w & CW_RC) != RC_DOWN) - ? SIGN_POS : SIGN_NEG); - return TAG_Zero; - } - else - { - sign = signa ^ SIGN_NEG; - tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa); - } - break; - case 1: /* P - N */ - tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb); - break; - case 2: /* N - P */ - tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb); - break; -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x111); - return -1; -#endif - } - if ( tag < 0 ) - { - setsign(dest, saved_sign); - return tag; - } - FPU_settagi(deststnr, tag); - return tag; - } - - if ( taga == TAG_Special ) - taga = FPU_Special(a); - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - - if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) - || ((taga == TW_Denormal) && (tagb == TAG_Valid)) - || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) - { - FPU_REG x, y; - - if ( denormal_operand() < 0 ) - return FPU_Exception; - - FPU_to_exp16(a, &x); - FPU_to_exp16(b, &y); - a = &x; - b = &y; - expa = exponent16(a); - expb = exponent16(b); - - goto valid_subtract; - } - - if ( (taga == TW_NaN) || (tagb == TW_NaN) ) - { - FPU_REG const *d1, *d2; - if ( flags & REV ) - { - d1 = b; - d2 = a; - } - else - { - d1 = a; - d2 = b; - } - if ( flags & LOADED ) - return real_2op_NaN(b, tagb, deststnr, d1); - if ( flags & DEST_RM ) - return real_2op_NaN(a, taga, deststnr, d2); - else - return real_2op_NaN(b, tagb, deststnr, d2); - } - - return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG, - dest, deststnr, control_w); -} - - -static -int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, - FPU_REG const *b, u_char tagb, u_char signb, - FPU_REG *dest, int deststnr, int control_w) -{ - if ( ((taga == TW_Denormal) || (tagb == TW_Denormal)) - && (denormal_operand() < 0) ) - return FPU_Exception; - - if (taga == TAG_Zero) - { - if (tagb == TAG_Zero) - { - /* Both are zero, result will be zero. */ - u_char different_signs = signa ^ signb; - - FPU_copy_to_regi(a, TAG_Zero, deststnr); - if ( different_signs ) - { - /* Signs are different. */ - /* Sign of answer depends upon rounding mode. */ - setsign(dest, ((control_w & CW_RC) != RC_DOWN) - ? SIGN_POS : SIGN_NEG); - } - else - setsign(dest, signa); /* signa may differ from the sign of a. */ - return TAG_Zero; - } - else - { - reg_copy(b, dest); - if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) ) - { - /* A pseudoDenormal, convert it. */ - addexponent(dest, 1); - tagb = TAG_Valid; - } - else if ( tagb > TAG_Empty ) - tagb = TAG_Special; - setsign(dest, signb); /* signb may differ from the sign of b. */ - FPU_settagi(deststnr, tagb); - return tagb; - } - } - else if (tagb == TAG_Zero) - { - reg_copy(a, dest); - if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) ) - { - /* A pseudoDenormal */ - addexponent(dest, 1); - taga = TAG_Valid; - } - else if ( taga > TAG_Empty ) - taga = TAG_Special; - setsign(dest, signa); /* signa may differ from the sign of a. */ - FPU_settagi(deststnr, taga); - return taga; - } - else if (taga == TW_Infinity) - { - if ( (tagb != TW_Infinity) || (signa == signb) ) - { - FPU_copy_to_regi(a, TAG_Special, deststnr); - setsign(dest, signa); /* signa may differ from the sign of a. */ - return taga; - } - /* Infinity-Infinity is undefined. */ - return arith_invalid(deststnr); - } - else if (tagb == TW_Infinity) - { - FPU_copy_to_regi(b, TAG_Special, deststnr); - setsign(dest, signb); /* signb may differ from the sign of b. */ - return tagb; - } - -#ifdef PARANOID - EXCEPTION(EX_INTERNAL|0x101); -#endif - - return FPU_Exception; -} - diff --git a/arch/i386/math-emu/reg_compare.c b/arch/i386/math-emu/reg_compare.c deleted file mode 100644 index f37c5b5a35a..00000000000 --- a/arch/i386/math-emu/reg_compare.c +++ /dev/null @@ -1,381 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_compare.c | - | | - | Compare two floating point registers | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | compare() is the core FPU_REG comparison function | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" -#include "control_w.h" -#include "status_w.h" - - -static int compare(FPU_REG const *b, int tagb) -{ - int diff, exp0, expb; - u_char st0_tag; - FPU_REG *st0_ptr; - FPU_REG x, y; - u_char st0_sign, signb = getsign(b); - - st0_ptr = &st(0); - st0_tag = FPU_gettag0(); - st0_sign = getsign(st0_ptr); - - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - - if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal)) - || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) ) - { - if ( st0_tag == TAG_Zero ) - { - if ( tagb == TAG_Zero ) return COMP_A_eq_B; - if ( tagb == TAG_Valid ) - return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); - if ( tagb == TW_Denormal ) - return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) - | COMP_Denormal; - } - else if ( tagb == TAG_Zero ) - { - if ( st0_tag == TAG_Valid ) - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); - if ( st0_tag == TW_Denormal ) - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) - | COMP_Denormal; - } - - if ( st0_tag == TW_Infinity ) - { - if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) ) - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); - else if ( tagb == TW_Denormal ) - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) - | COMP_Denormal; - else if ( tagb == TW_Infinity ) - { - /* The 80486 book says that infinities can be equal! */ - return (st0_sign == signb) ? COMP_A_eq_B : - ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); - } - /* Fall through to the NaN code */ - } - else if ( tagb == TW_Infinity ) - { - if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) ) - return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); - if ( st0_tag == TW_Denormal ) - return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) - | COMP_Denormal; - /* Fall through to the NaN code */ - } - - /* The only possibility now should be that one of the arguments - is a NaN */ - if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) ) - { - int signalling = 0, unsupported = 0; - if ( st0_tag == TW_NaN ) - { - signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000; - unsupported = !((exponent(st0_ptr) == EXP_OVER) - && (st0_ptr->sigh & 0x80000000)); - } - if ( tagb == TW_NaN ) - { - signalling |= (b->sigh & 0xc0000000) == 0x80000000; - unsupported |= !((exponent(b) == EXP_OVER) - && (b->sigh & 0x80000000)); - } - if ( signalling || unsupported ) - return COMP_No_Comp | COMP_SNaN | COMP_NaN; - else - /* Neither is a signaling NaN */ - return COMP_No_Comp | COMP_NaN; - } - - EXCEPTION(EX_Invalid); - } - - if (st0_sign != signb) - { - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) - | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? - COMP_Denormal : 0); - } - - if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) ) - { - FPU_to_exp16(st0_ptr, &x); - FPU_to_exp16(b, &y); - st0_ptr = &x; - b = &y; - exp0 = exponent16(st0_ptr); - expb = exponent16(b); - } - else - { - exp0 = exponent(st0_ptr); - expb = exponent(b); - } - -#ifdef PARANOID - if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid); - if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid); -#endif /* PARANOID */ - - diff = exp0 - expb; - if ( diff == 0 ) - { - diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are - identical */ - if ( diff == 0 ) - { - diff = st0_ptr->sigl > b->sigl; - if ( diff == 0 ) - diff = -(st0_ptr->sigl < b->sigl); - } - } - - if ( diff > 0 ) - { - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) - | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? - COMP_Denormal : 0); - } - if ( diff < 0 ) - { - return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) - | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? - COMP_Denormal : 0); - } - - return COMP_A_eq_B - | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? - COMP_Denormal : 0); - -} - - -/* This function requires that st(0) is not empty */ -int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag) -{ - int f = 0, c; - - c = compare(loaded_data, loaded_tag); - - if (c & COMP_NaN) - { - EXCEPTION(EX_Invalid); - f = SW_C3 | SW_C2 | SW_C0; - } - else - switch (c & 7) - { - case COMP_A_lt_B: - f = SW_C0; - break; - case COMP_A_eq_B: - f = SW_C3; - break; - case COMP_A_gt_B: - f = 0; - break; - case COMP_No_Comp: - f = SW_C3 | SW_C2 | SW_C0; - break; -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x121); - f = SW_C3 | SW_C2 | SW_C0; - break; -#endif /* PARANOID */ - } - setcc(f); - if (c & COMP_Denormal) - { - return denormal_operand() < 0; - } - return 0; -} - - -static int compare_st_st(int nr) -{ - int f = 0, c; - FPU_REG *st_ptr; - - if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) - { - setcc(SW_C3 | SW_C2 | SW_C0); - /* Stack fault */ - EXCEPTION(EX_StackUnder); - return !(control_word & CW_Invalid); - } - - st_ptr = &st(nr); - c = compare(st_ptr, FPU_gettagi(nr)); - if (c & COMP_NaN) - { - setcc(SW_C3 | SW_C2 | SW_C0); - EXCEPTION(EX_Invalid); - return !(control_word & CW_Invalid); - } - else - switch (c & 7) - { - case COMP_A_lt_B: - f = SW_C0; - break; - case COMP_A_eq_B: - f = SW_C3; - break; - case COMP_A_gt_B: - f = 0; - break; - case COMP_No_Comp: - f = SW_C3 | SW_C2 | SW_C0; - break; -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x122); - f = SW_C3 | SW_C2 | SW_C0; - break; -#endif /* PARANOID */ - } - setcc(f); - if (c & COMP_Denormal) - { - return denormal_operand() < 0; - } - return 0; -} - - -static int compare_u_st_st(int nr) -{ - int f = 0, c; - FPU_REG *st_ptr; - - if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) - { - setcc(SW_C3 | SW_C2 | SW_C0); - /* Stack fault */ - EXCEPTION(EX_StackUnder); - return !(control_word & CW_Invalid); - } - - st_ptr = &st(nr); - c = compare(st_ptr, FPU_gettagi(nr)); - if (c & COMP_NaN) - { - setcc(SW_C3 | SW_C2 | SW_C0); - if (c & COMP_SNaN) /* This is the only difference between - un-ordered and ordinary comparisons */ - { - EXCEPTION(EX_Invalid); - return !(control_word & CW_Invalid); - } - return 0; - } - else - switch (c & 7) - { - case COMP_A_lt_B: - f = SW_C0; - break; - case COMP_A_eq_B: - f = SW_C3; - break; - case COMP_A_gt_B: - f = 0; - break; - case COMP_No_Comp: - f = SW_C3 | SW_C2 | SW_C0; - break; -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x123); - f = SW_C3 | SW_C2 | SW_C0; - break; -#endif /* PARANOID */ - } - setcc(f); - if (c & COMP_Denormal) - { - return denormal_operand() < 0; - } - return 0; -} - -/*---------------------------------------------------------------------------*/ - -void fcom_st(void) -{ - /* fcom st(i) */ - compare_st_st(FPU_rm); -} - - -void fcompst(void) -{ - /* fcomp st(i) */ - if ( !compare_st_st(FPU_rm) ) - FPU_pop(); -} - - -void fcompp(void) -{ - /* fcompp */ - if (FPU_rm != 1) - { - FPU_illegal(); - return; - } - if ( !compare_st_st(1) ) - poppop(); -} - - -void fucom_(void) -{ - /* fucom st(i) */ - compare_u_st_st(FPU_rm); - -} - - -void fucomp(void) -{ - /* fucomp st(i) */ - if ( !compare_u_st_st(FPU_rm) ) - FPU_pop(); -} - - -void fucompp(void) -{ - /* fucompp */ - if (FPU_rm == 1) - { - if ( !compare_u_st_st(1) ) - poppop(); - } - else - FPU_illegal(); -} diff --git a/arch/i386/math-emu/reg_constant.c b/arch/i386/math-emu/reg_constant.c deleted file mode 100644 index a8501580196..00000000000 --- a/arch/i386/math-emu/reg_constant.c +++ /dev/null @@ -1,120 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_constant.c | - | | - | All of the constant FPU_REGs | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "fpu_emu.h" -#include "status_w.h" -#include "reg_constant.h" -#include "control_w.h" - - -#define MAKE_REG(s,e,l,h) { l, h, \ - ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } - -FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); -#if 0 -FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000); -FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000); -#endif /* 0 */ -static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b); -static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29); -FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2); -FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2); -FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2); -static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84); -static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7); - -/* Extra bits to take pi/2 to more than 128 bits precision. */ -FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66, - 0xfc8f8cbb, 0xece675d1); - -/* Only the sign (and tag) is used in internal zeroes */ -FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); - -/* Only the sign and significand (and tag) are used in internal NaNs */ -/* The 80486 never generates one of these -FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); - */ -/* This is the real indefinite QNaN */ -FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000); - -/* Only the sign (and tag) is used in internal infinities */ -FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); - - -static void fld_const(FPU_REG const *c, int adj, u_char tag) -{ - FPU_REG *st_new_ptr; - - if ( STACK_OVERFLOW ) - { - FPU_stack_overflow(); - return; - } - push(); - reg_copy(c, st_new_ptr); - st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to - borrow or carry. */ - FPU_settag0(tag); - clear_C1(); -} - -/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP - (and not one of RC_RND or RC_UP). - */ -#define DOWN_OR_CHOP(x) (x & RC_DOWN) - -static void fld1(int rc) -{ - fld_const(&CONST_1, 0, TAG_Valid); -} - -static void fldl2t(int rc) -{ - fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid); -} - -static void fldl2e(int rc) -{ - fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); -} - -static void fldpi(int rc) -{ - fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); -} - -static void fldlg2(int rc) -{ - fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); -} - -static void fldln2(int rc) -{ - fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); -} - -static void fldz(int rc) -{ - fld_const(&CONST_Z, 0, TAG_Zero); -} - -typedef void (*FUNC_RC)(int); - -static FUNC_RC constants_table[] = { - fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal -}; - -void fconst(void) -{ - (constants_table[FPU_rm])(control_word & CW_RC); -} diff --git a/arch/i386/math-emu/reg_constant.h b/arch/i386/math-emu/reg_constant.h deleted file mode 100644 index 1bffaec3a13..00000000000 --- a/arch/i386/math-emu/reg_constant.h +++ /dev/null @@ -1,25 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_constant.h | - | | - | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@vaxc.cc.monash.edu.au | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _REG_CONSTANT_H_ -#define _REG_CONSTANT_H_ - -#include "fpu_emu.h" - -extern FPU_REG const CONST_1; -extern FPU_REG const CONST_PI; -extern FPU_REG const CONST_PI2; -extern FPU_REG const CONST_PI2extra; -extern FPU_REG const CONST_PI4; -extern FPU_REG const CONST_Z; -extern FPU_REG const CONST_PINF; -extern FPU_REG const CONST_INF; -extern FPU_REG const CONST_MINF; -extern FPU_REG const CONST_QNaN; - -#endif /* _REG_CONSTANT_H_ */ diff --git a/arch/i386/math-emu/reg_convert.c b/arch/i386/math-emu/reg_convert.c deleted file mode 100644 index 45a25875270..00000000000 --- a/arch/i386/math-emu/reg_convert.c +++ /dev/null @@ -1,53 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_convert.c | - | | - | Convert register representation. | - | | - | Copyright (C) 1992,1993,1994,1996,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "fpu_emu.h" - - -int FPU_to_exp16(FPU_REG const *a, FPU_REG *x) -{ - int sign = getsign(a); - - *(long long *)&(x->sigl) = *(const long long *)&(a->sigl); - - /* Set up the exponent as a 16 bit quantity. */ - setexponent16(x, exponent(a)); - - if ( exponent16(x) == EXP_UNDER ) - { - /* The number is a de-normal or pseudodenormal. */ - /* We only deal with the significand and exponent. */ - - if (x->sigh & 0x80000000) - { - /* Is a pseudodenormal. */ - /* This is non-80486 behaviour because the number - loses its 'denormal' identity. */ - addexponent(x, 1); - } - else - { - /* Is a denormal. */ - addexponent(x, 1); - FPU_normalize_nuo(x); - } - } - - if ( !(x->sigh & 0x80000000) ) - { - EXCEPTION(EX_INTERNAL | 0x180); - } - - return sign; -} - diff --git a/arch/i386/math-emu/reg_divide.c b/arch/i386/math-emu/reg_divide.c deleted file mode 100644 index 5cee7ff920d..00000000000 --- a/arch/i386/math-emu/reg_divide.c +++ /dev/null @@ -1,207 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_divide.c | - | | - | Divide one FPU_REG by another and put the result in a destination FPU_REG.| - | | - | Copyright (C) 1996 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@jacobi.maths.monash.edu.au | - | | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | The destination may be any FPU_REG, including one of the source FPU_REGs. | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" - -/* - Divide one register by another and put the result into a third register. - */ -int FPU_div(int flags, int rm, int control_w) -{ - FPU_REG x, y; - FPU_REG const *a, *b, *st0_ptr, *st_ptr; - FPU_REG *dest; - u_char taga, tagb, signa, signb, sign, saved_sign; - int tag, deststnr; - - if ( flags & DEST_RM ) - deststnr = rm; - else - deststnr = 0; - - if ( flags & REV ) - { - b = &st(0); - st0_ptr = b; - tagb = FPU_gettag0(); - if ( flags & LOADED ) - { - a = (FPU_REG *)rm; - taga = flags & 0x0f; - } - else - { - a = &st(rm); - st_ptr = a; - taga = FPU_gettagi(rm); - } - } - else - { - a = &st(0); - st0_ptr = a; - taga = FPU_gettag0(); - if ( flags & LOADED ) - { - b = (FPU_REG *)rm; - tagb = flags & 0x0f; - } - else - { - b = &st(rm); - st_ptr = b; - tagb = FPU_gettagi(rm); - } - } - - signa = getsign(a); - signb = getsign(b); - - sign = signa ^ signb; - - dest = &st(deststnr); - saved_sign = getsign(dest); - - if ( !(taga | tagb) ) - { - /* Both regs Valid, this should be the most common case. */ - reg_copy(a, &x); - reg_copy(b, &y); - setpositive(&x); - setpositive(&y); - tag = FPU_u_div(&x, &y, dest, control_w, sign); - - if ( tag < 0 ) - return tag; - - FPU_settagi(deststnr, tag); - return tag; - } - - if ( taga == TAG_Special ) - taga = FPU_Special(a); - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - - if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) - || ((taga == TW_Denormal) && (tagb == TAG_Valid)) - || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) - { - if ( denormal_operand() < 0 ) - return FPU_Exception; - - FPU_to_exp16(a, &x); - FPU_to_exp16(b, &y); - tag = FPU_u_div(&x, &y, dest, control_w, sign); - if ( tag < 0 ) - return tag; - - FPU_settagi(deststnr, tag); - return tag; - } - else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) ) - { - if ( tagb != TAG_Zero ) - { - /* Want to find Zero/Valid */ - if ( tagb == TW_Denormal ) - { - if ( denormal_operand() < 0 ) - return FPU_Exception; - } - - /* The result is zero. */ - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - setsign(dest, sign); - return TAG_Zero; - } - /* We have an exception condition, either 0/0 or Valid/Zero. */ - if ( taga == TAG_Zero ) - { - /* 0/0 */ - return arith_invalid(deststnr); - } - /* Valid/Zero */ - return FPU_divide_by_zero(deststnr, sign); - } - /* Must have infinities, NaNs, etc */ - else if ( (taga == TW_NaN) || (tagb == TW_NaN) ) - { - if ( flags & LOADED ) - return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr); - - if ( flags & DEST_RM ) - { - int tag; - tag = FPU_gettag0(); - if ( tag == TAG_Special ) - tag = FPU_Special(st0_ptr); - return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm)); - } - else - { - int tag; - tag = FPU_gettagi(rm); - if ( tag == TAG_Special ) - tag = FPU_Special(&st(rm)); - return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm)); - } - } - else if (taga == TW_Infinity) - { - if (tagb == TW_Infinity) - { - /* infinity/infinity */ - return arith_invalid(deststnr); - } - else - { - /* tagb must be Valid or Zero */ - if ( (tagb == TW_Denormal) && (denormal_operand() < 0) ) - return FPU_Exception; - - /* Infinity divided by Zero or Valid does - not raise and exception, but returns Infinity */ - FPU_copy_to_regi(a, TAG_Special, deststnr); - setsign(dest, sign); - return taga; - } - } - else if (tagb == TW_Infinity) - { - if ( (taga == TW_Denormal) && (denormal_operand() < 0) ) - return FPU_Exception; - - /* The result is zero. */ - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - setsign(dest, sign); - return TAG_Zero; - } -#ifdef PARANOID - else - { - EXCEPTION(EX_INTERNAL|0x102); - return FPU_Exception; - } -#endif /* PARANOID */ - - return 0; -} diff --git a/arch/i386/math-emu/reg_ld_str.c b/arch/i386/math-emu/reg_ld_str.c deleted file mode 100644 index e976caef649..00000000000 --- a/arch/i386/math-emu/reg_ld_str.c +++ /dev/null @@ -1,1375 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_ld_str.c | - | | - | All of the functions which transfer data between user memory and FPU_REGs.| - | | - | Copyright (C) 1992,1993,1994,1996,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Note: | - | The file contains code which accesses user memory. | - | Emulator static data may change when user memory is accessed, due to | - | other processes using the emulator while swapping is in progress. | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" - -#include - -#include "fpu_system.h" -#include "exception.h" -#include "reg_constant.h" -#include "control_w.h" -#include "status_w.h" - - -#define DOUBLE_Emax 1023 /* largest valid exponent */ -#define DOUBLE_Ebias 1023 -#define DOUBLE_Emin (-1022) /* smallest valid exponent */ - -#define SINGLE_Emax 127 /* largest valid exponent */ -#define SINGLE_Ebias 127 -#define SINGLE_Emin (-126) /* smallest valid exponent */ - - -static u_char normalize_no_excep(FPU_REG *r, int exp, int sign) -{ - u_char tag; - - setexponent16(r, exp); - - tag = FPU_normalize_nuo(r); - stdexp(r); - if ( sign ) - setnegative(r); - - return tag; -} - - -int FPU_tagof(FPU_REG *ptr) -{ - int exp; - - exp = exponent16(ptr) & 0x7fff; - if ( exp == 0 ) - { - if ( !(ptr->sigh | ptr->sigl) ) - { - return TAG_Zero; - } - /* The number is a de-normal or pseudodenormal. */ - return TAG_Special; - } - - if ( exp == 0x7fff ) - { - /* Is an Infinity, a NaN, or an unsupported data type. */ - return TAG_Special; - } - - if ( !(ptr->sigh & 0x80000000) ) - { - /* Unsupported data type. */ - /* Valid numbers have the ms bit set to 1. */ - /* Unnormal. */ - return TAG_Special; - } - - return TAG_Valid; -} - - -/* Get a long double from user memory */ -int FPU_load_extended(long double __user *s, int stnr) -{ - FPU_REG *sti_ptr = &st(stnr); - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, s, 10); - __copy_from_user(sti_ptr, s, 10); - RE_ENTRANT_CHECK_ON; - - return FPU_tagof(sti_ptr); -} - - -/* Get a double from user memory */ -int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data) -{ - int exp, tag, negative; - unsigned m64, l64; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, dfloat, 8); - FPU_get_user(m64, 1 + (unsigned long __user *) dfloat); - FPU_get_user(l64, (unsigned long __user *) dfloat); - RE_ENTRANT_CHECK_ON; - - negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive; - exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias; - m64 &= 0xfffff; - if ( exp > DOUBLE_Emax + EXTENDED_Ebias ) - { - /* Infinity or NaN */ - if ((m64 == 0) && (l64 == 0)) - { - /* +- infinity */ - loaded_data->sigh = 0x80000000; - loaded_data->sigl = 0x00000000; - exp = EXP_Infinity + EXTENDED_Ebias; - tag = TAG_Special; - } - else - { - /* Must be a signaling or quiet NaN */ - exp = EXP_NaN + EXTENDED_Ebias; - loaded_data->sigh = (m64 << 11) | 0x80000000; - loaded_data->sigh |= l64 >> 21; - loaded_data->sigl = l64 << 11; - tag = TAG_Special; /* The calling function must look for NaNs */ - } - } - else if ( exp < DOUBLE_Emin + EXTENDED_Ebias ) - { - /* Zero or de-normal */ - if ((m64 == 0) && (l64 == 0)) - { - /* Zero */ - reg_copy(&CONST_Z, loaded_data); - exp = 0; - tag = TAG_Zero; - } - else - { - /* De-normal */ - loaded_data->sigh = m64 << 11; - loaded_data->sigh |= l64 >> 21; - loaded_data->sigl = l64 << 11; - - return normalize_no_excep(loaded_data, DOUBLE_Emin, negative) - | (denormal_operand() < 0 ? FPU_Exception : 0); - } - } - else - { - loaded_data->sigh = (m64 << 11) | 0x80000000; - loaded_data->sigh |= l64 >> 21; - loaded_data->sigl = l64 << 11; - - tag = TAG_Valid; - } - - setexponent16(loaded_data, exp | negative); - - return tag; -} - - -/* Get a float from user memory */ -int FPU_load_single(float __user *single, FPU_REG *loaded_data) -{ - unsigned m32; - int exp, tag, negative; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, single, 4); - FPU_get_user(m32, (unsigned long __user *) single); - RE_ENTRANT_CHECK_ON; - - negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive; - - if (!(m32 & 0x7fffffff)) - { - /* Zero */ - reg_copy(&CONST_Z, loaded_data); - addexponent(loaded_data, negative); - return TAG_Zero; - } - exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias; - m32 = (m32 & 0x7fffff) << 8; - if ( exp < SINGLE_Emin + EXTENDED_Ebias ) - { - /* De-normals */ - loaded_data->sigh = m32; - loaded_data->sigl = 0; - - return normalize_no_excep(loaded_data, SINGLE_Emin, negative) - | (denormal_operand() < 0 ? FPU_Exception : 0); - } - else if ( exp > SINGLE_Emax + EXTENDED_Ebias ) - { - /* Infinity or NaN */ - if ( m32 == 0 ) - { - /* +- infinity */ - loaded_data->sigh = 0x80000000; - loaded_data->sigl = 0x00000000; - exp = EXP_Infinity + EXTENDED_Ebias; - tag = TAG_Special; - } - else - { - /* Must be a signaling or quiet NaN */ - exp = EXP_NaN + EXTENDED_Ebias; - loaded_data->sigh = m32 | 0x80000000; - loaded_data->sigl = 0; - tag = TAG_Special; /* The calling function must look for NaNs */ - } - } - else - { - loaded_data->sigh = m32 | 0x80000000; - loaded_data->sigl = 0; - tag = TAG_Valid; - } - - setexponent16(loaded_data, exp | negative); /* Set the sign. */ - - return tag; -} - - -/* Get a long long from user memory */ -int FPU_load_int64(long long __user *_s) -{ - long long s; - int sign; - FPU_REG *st0_ptr = &st(0); - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, _s, 8); - if (copy_from_user(&s,_s,8)) - FPU_abort; - RE_ENTRANT_CHECK_ON; - - if (s == 0) - { - reg_copy(&CONST_Z, st0_ptr); - return TAG_Zero; - } - - if (s > 0) - sign = SIGN_Positive; - else - { - s = -s; - sign = SIGN_Negative; - } - - significand(st0_ptr) = s; - - return normalize_no_excep(st0_ptr, 63, sign); -} - - -/* Get a long from user memory */ -int FPU_load_int32(long __user *_s, FPU_REG *loaded_data) -{ - long s; - int negative; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, _s, 4); - FPU_get_user(s, _s); - RE_ENTRANT_CHECK_ON; - - if (s == 0) - { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; } - - if (s > 0) - negative = SIGN_Positive; - else - { - s = -s; - negative = SIGN_Negative; - } - - loaded_data->sigh = s; - loaded_data->sigl = 0; - - return normalize_no_excep(loaded_data, 31, negative); -} - - -/* Get a short from user memory */ -int FPU_load_int16(short __user *_s, FPU_REG *loaded_data) -{ - int s, negative; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, _s, 2); - /* Cast as short to get the sign extended. */ - FPU_get_user(s, _s); - RE_ENTRANT_CHECK_ON; - - if (s == 0) - { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; } - - if (s > 0) - negative = SIGN_Positive; - else - { - s = -s; - negative = SIGN_Negative; - } - - loaded_data->sigh = s << 16; - loaded_data->sigl = 0; - - return normalize_no_excep(loaded_data, 15, negative); -} - - -/* Get a packed bcd array from user memory */ -int FPU_load_bcd(u_char __user *s) -{ - FPU_REG *st0_ptr = &st(0); - int pos; - u_char bcd; - long long l=0; - int sign; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, s, 10); - RE_ENTRANT_CHECK_ON; - for ( pos = 8; pos >= 0; pos--) - { - l *= 10; - RE_ENTRANT_CHECK_OFF; - FPU_get_user(bcd, s+pos); - RE_ENTRANT_CHECK_ON; - l += bcd >> 4; - l *= 10; - l += bcd & 0x0f; - } - - RE_ENTRANT_CHECK_OFF; - FPU_get_user(sign, s+9); - sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive; - RE_ENTRANT_CHECK_ON; - - if ( l == 0 ) - { - reg_copy(&CONST_Z, st0_ptr); - addexponent(st0_ptr, sign); /* Set the sign. */ - return TAG_Zero; - } - else - { - significand(st0_ptr) = l; - return normalize_no_excep(st0_ptr, 63, sign); - } -} - -/*===========================================================================*/ - -/* Put a long double into user memory */ -int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d) -{ - /* - The only exception raised by an attempt to store to an - extended format is the Invalid Stack exception, i.e. - attempting to store from an empty register. - */ - - if ( st0_tag != TAG_Empty ) - { - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE, d, 10); - - FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d); - FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4)); - FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8)); - RE_ENTRANT_CHECK_ON; - - return 1; - } - - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - if ( control_word & CW_Invalid ) - { - /* The masked response */ - /* Put out the QNaN indefinite */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,10); - FPU_put_user(0, (unsigned long __user *) d); - FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d); - FPU_put_user(0xffff, 4 + (short __user *) d); - RE_ENTRANT_CHECK_ON; - return 1; - } - else - return 0; - -} - - -/* Put a double into user memory */ -int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat) -{ - unsigned long l[2]; - unsigned long increment = 0; /* avoid gcc warnings */ - int precision_loss; - int exp; - FPU_REG tmp; - - if ( st0_tag == TAG_Valid ) - { - reg_copy(st0_ptr, &tmp); - exp = exponent(&tmp); - - if ( exp < DOUBLE_Emin ) /* It may be a denormal */ - { - addexponent(&tmp, -DOUBLE_Emin + 52); /* largest exp to be 51 */ - - denormal_arg: - - if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) ) - { -#ifdef PECULIAR_486 - /* Did it round to a non-denormal ? */ - /* This behaviour might be regarded as peculiar, it appears - that the 80486 rounds to the dest precision, then - converts to decide underflow. */ - if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) && - (st0_ptr->sigl & 0x000007ff)) ) -#endif /* PECULIAR_486 */ - { - EXCEPTION(EX_Underflow); - /* This is a special case: see sec 16.2.5.1 of - the 80486 book */ - if ( !(control_word & CW_Underflow) ) - return 0; - } - EXCEPTION(precision_loss); - if ( !(control_word & CW_Precision) ) - return 0; - } - l[0] = tmp.sigl; - l[1] = tmp.sigh; - } - else - { - if ( tmp.sigl & 0x000007ff ) - { - precision_loss = 1; - switch (control_word & CW_RC) - { - case RC_RND: - /* Rounding can get a little messy.. */ - increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */ - ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */ - break; - case RC_DOWN: /* towards -infinity */ - increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff; - break; - case RC_UP: /* towards +infinity */ - increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0; - break; - case RC_CHOP: - increment = 0; - break; - } - - /* Truncate the mantissa */ - tmp.sigl &= 0xfffff800; - - if ( increment ) - { - if ( tmp.sigl >= 0xfffff800 ) - { - /* the sigl part overflows */ - if ( tmp.sigh == 0xffffffff ) - { - /* The sigh part overflows */ - tmp.sigh = 0x80000000; - exp++; - if (exp >= EXP_OVER) - goto overflow; - } - else - { - tmp.sigh ++; - } - tmp.sigl = 0x00000000; - } - else - { - /* We only need to increment sigl */ - tmp.sigl += 0x00000800; - } - } - } - else - precision_loss = 0; - - l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21); - l[1] = ((tmp.sigh >> 11) & 0xfffff); - - if ( exp > DOUBLE_Emax ) - { - overflow: - EXCEPTION(EX_Overflow); - if ( !(control_word & CW_Overflow) ) - return 0; - set_precision_flag_up(); - if ( !(control_word & CW_Precision) ) - return 0; - - /* This is a special case: see sec 16.2.5.1 of the 80486 book */ - /* Overflow to infinity */ - l[0] = 0x00000000; /* Set to */ - l[1] = 0x7ff00000; /* + INF */ - } - else - { - if ( precision_loss ) - { - if ( increment ) - set_precision_flag_up(); - else - set_precision_flag_down(); - } - /* Add the exponent */ - l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20); - } - } - } - else if (st0_tag == TAG_Zero) - { - /* Number is zero */ - l[0] = 0; - l[1] = 0; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if ( st0_tag == TW_Denormal ) - { - /* A denormal will always underflow. */ -#ifndef PECULIAR_486 - /* An 80486 is supposed to be able to generate - a denormal exception here, but... */ - /* Underflow has priority. */ - if ( control_word & CW_Underflow ) - denormal_operand(); -#endif /* PECULIAR_486 */ - reg_copy(st0_ptr, &tmp); - goto denormal_arg; - } - else if (st0_tag == TW_Infinity) - { - l[0] = 0; - l[1] = 0x7ff00000; - } - else if (st0_tag == TW_NaN) - { - /* Is it really a NaN ? */ - if ( (exponent(st0_ptr) == EXP_OVER) - && (st0_ptr->sigh & 0x80000000) ) - { - /* See if we can get a valid NaN from the FPU_REG */ - l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21); - l[1] = ((st0_ptr->sigh >> 11) & 0xfffff); - if ( !(st0_ptr->sigh & 0x40000000) ) - { - /* It is a signalling NaN */ - EXCEPTION(EX_Invalid); - if ( !(control_word & CW_Invalid) ) - return 0; - l[1] |= (0x40000000 >> 11); - } - l[1] |= 0x7ff00000; - } - else - { - /* It is an unsupported data type */ - EXCEPTION(EX_Invalid); - if ( !(control_word & CW_Invalid) ) - return 0; - l[0] = 0; - l[1] = 0xfff80000; - } - } - } - else if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - if ( control_word & CW_Invalid ) - { - /* The masked response */ - /* Put out the QNaN indefinite */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,dfloat,8); - FPU_put_user(0, (unsigned long __user *) dfloat); - FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat); - RE_ENTRANT_CHECK_ON; - return 1; - } - else - return 0; - } - if ( getsign(st0_ptr) ) - l[1] |= 0x80000000; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,dfloat,8); - FPU_put_user(l[0], (unsigned long __user *)dfloat); - FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat); - RE_ENTRANT_CHECK_ON; - - return 1; -} - - -/* Put a float into user memory */ -int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single) -{ - long templ = 0; - unsigned long increment = 0; /* avoid gcc warnings */ - int precision_loss; - int exp; - FPU_REG tmp; - - if ( st0_tag == TAG_Valid ) - { - - reg_copy(st0_ptr, &tmp); - exp = exponent(&tmp); - - if ( exp < SINGLE_Emin ) - { - addexponent(&tmp, -SINGLE_Emin + 23); /* largest exp to be 22 */ - - denormal_arg: - - if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) ) - { -#ifdef PECULIAR_486 - /* Did it round to a non-denormal ? */ - /* This behaviour might be regarded as peculiar, it appears - that the 80486 rounds to the dest precision, then - converts to decide underflow. */ - if ( !((tmp.sigl == 0x00800000) && - ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) ) -#endif /* PECULIAR_486 */ - { - EXCEPTION(EX_Underflow); - /* This is a special case: see sec 16.2.5.1 of - the 80486 book */ - if ( !(control_word & CW_Underflow) ) - return 0; - } - EXCEPTION(precision_loss); - if ( !(control_word & CW_Precision) ) - return 0; - } - templ = tmp.sigl; - } - else - { - if ( tmp.sigl | (tmp.sigh & 0x000000ff) ) - { - unsigned long sigh = tmp.sigh; - unsigned long sigl = tmp.sigl; - - precision_loss = 1; - switch (control_word & CW_RC) - { - case RC_RND: - increment = ((sigh & 0xff) > 0x80) /* more than half */ - || (((sigh & 0xff) == 0x80) && sigl) /* more than half */ - || ((sigh & 0x180) == 0x180); /* round to even */ - break; - case RC_DOWN: /* towards -infinity */ - increment = signpositive(&tmp) - ? 0 : (sigl | (sigh & 0xff)); - break; - case RC_UP: /* towards +infinity */ - increment = signpositive(&tmp) - ? (sigl | (sigh & 0xff)) : 0; - break; - case RC_CHOP: - increment = 0; - break; - } - - /* Truncate part of the mantissa */ - tmp.sigl = 0; - - if (increment) - { - if ( sigh >= 0xffffff00 ) - { - /* The sigh part overflows */ - tmp.sigh = 0x80000000; - exp++; - if ( exp >= EXP_OVER ) - goto overflow; - } - else - { - tmp.sigh &= 0xffffff00; - tmp.sigh += 0x100; - } - } - else - { - tmp.sigh &= 0xffffff00; /* Finish the truncation */ - } - } - else - precision_loss = 0; - - templ = (tmp.sigh >> 8) & 0x007fffff; - - if ( exp > SINGLE_Emax ) - { - overflow: - EXCEPTION(EX_Overflow); - if ( !(control_word & CW_Overflow) ) - return 0; - set_precision_flag_up(); - if ( !(control_word & CW_Precision) ) - return 0; - - /* This is a special case: see sec 16.2.5.1 of the 80486 book. */ - /* Masked response is overflow to infinity. */ - templ = 0x7f800000; - } - else - { - if ( precision_loss ) - { - if ( increment ) - set_precision_flag_up(); - else - set_precision_flag_down(); - } - /* Add the exponent */ - templ |= ((exp+SINGLE_Ebias) & 0xff) << 23; - } - } - } - else if (st0_tag == TAG_Zero) - { - templ = 0; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if (st0_tag == TW_Denormal) - { - reg_copy(st0_ptr, &tmp); - - /* A denormal will always underflow. */ -#ifndef PECULIAR_486 - /* An 80486 is supposed to be able to generate - a denormal exception here, but... */ - /* Underflow has priority. */ - if ( control_word & CW_Underflow ) - denormal_operand(); -#endif /* PECULIAR_486 */ - goto denormal_arg; - } - else if (st0_tag == TW_Infinity) - { - templ = 0x7f800000; - } - else if (st0_tag == TW_NaN) - { - /* Is it really a NaN ? */ - if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) ) - { - /* See if we can get a valid NaN from the FPU_REG */ - templ = st0_ptr->sigh >> 8; - if ( !(st0_ptr->sigh & 0x40000000) ) - { - /* It is a signalling NaN */ - EXCEPTION(EX_Invalid); - if ( !(control_word & CW_Invalid) ) - return 0; - templ |= (0x40000000 >> 8); - } - templ |= 0x7f800000; - } - else - { - /* It is an unsupported data type */ - EXCEPTION(EX_Invalid); - if ( !(control_word & CW_Invalid) ) - return 0; - templ = 0xffc00000; - } - } -#ifdef PARANOID - else - { - EXCEPTION(EX_INTERNAL|0x164); - return 0; - } -#endif - } - else if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - if ( control_word & EX_Invalid ) - { - /* The masked response */ - /* Put out the QNaN indefinite */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,single,4); - FPU_put_user(0xffc00000, (unsigned long __user *) single); - RE_ENTRANT_CHECK_ON; - return 1; - } - else - return 0; - } -#ifdef PARANOID - else - { - EXCEPTION(EX_INTERNAL|0x163); - return 0; - } -#endif - if ( getsign(st0_ptr) ) - templ |= 0x80000000; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,single,4); - FPU_put_user(templ,(unsigned long __user *) single); - RE_ENTRANT_CHECK_ON; - - return 1; -} - - -/* Put a long long into user memory */ -int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d) -{ - FPU_REG t; - long long tll; - int precision_loss; - - if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - goto invalid_operand; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if ( (st0_tag == TW_Infinity) || - (st0_tag == TW_NaN) ) - { - EXCEPTION(EX_Invalid); - goto invalid_operand; - } - } - - reg_copy(st0_ptr, &t); - precision_loss = FPU_round_to_int(&t, st0_tag); - ((long *)&tll)[0] = t.sigl; - ((long *)&tll)[1] = t.sigh; - if ( (precision_loss == 1) || - ((t.sigh & 0x80000000) && - !((t.sigh == 0x80000000) && (t.sigl == 0) && - signnegative(&t))) ) - { - EXCEPTION(EX_Invalid); - /* This is a special case: see sec 16.2.5.1 of the 80486 book */ - invalid_operand: - if ( control_word & EX_Invalid ) - { - /* Produce something like QNaN "indefinite" */ - tll = 0x8000000000000000LL; - } - else - return 0; - } - else - { - if ( precision_loss ) - set_precision_flag(precision_loss); - if ( signnegative(&t) ) - tll = - tll; - } - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,8); - if (copy_to_user(d, &tll, 8)) - FPU_abort; - RE_ENTRANT_CHECK_ON; - - return 1; -} - - -/* Put a long into user memory */ -int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d) -{ - FPU_REG t; - int precision_loss; - - if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - goto invalid_operand; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if ( (st0_tag == TW_Infinity) || - (st0_tag == TW_NaN) ) - { - EXCEPTION(EX_Invalid); - goto invalid_operand; - } - } - - reg_copy(st0_ptr, &t); - precision_loss = FPU_round_to_int(&t, st0_tag); - if (t.sigh || - ((t.sigl & 0x80000000) && - !((t.sigl == 0x80000000) && signnegative(&t))) ) - { - EXCEPTION(EX_Invalid); - /* This is a special case: see sec 16.2.5.1 of the 80486 book */ - invalid_operand: - if ( control_word & EX_Invalid ) - { - /* Produce something like QNaN "indefinite" */ - t.sigl = 0x80000000; - } - else - return 0; - } - else - { - if ( precision_loss ) - set_precision_flag(precision_loss); - if ( signnegative(&t) ) - t.sigl = -(long)t.sigl; - } - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,4); - FPU_put_user(t.sigl, (unsigned long __user *) d); - RE_ENTRANT_CHECK_ON; - - return 1; -} - - -/* Put a short into user memory */ -int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d) -{ - FPU_REG t; - int precision_loss; - - if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - goto invalid_operand; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if ( (st0_tag == TW_Infinity) || - (st0_tag == TW_NaN) ) - { - EXCEPTION(EX_Invalid); - goto invalid_operand; - } - } - - reg_copy(st0_ptr, &t); - precision_loss = FPU_round_to_int(&t, st0_tag); - if (t.sigh || - ((t.sigl & 0xffff8000) && - !((t.sigl == 0x8000) && signnegative(&t))) ) - { - EXCEPTION(EX_Invalid); - /* This is a special case: see sec 16.2.5.1 of the 80486 book */ - invalid_operand: - if ( control_word & EX_Invalid ) - { - /* Produce something like QNaN "indefinite" */ - t.sigl = 0x8000; - } - else - return 0; - } - else - { - if ( precision_loss ) - set_precision_flag(precision_loss); - if ( signnegative(&t) ) - t.sigl = -t.sigl; - } - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,2); - FPU_put_user((short)t.sigl, d); - RE_ENTRANT_CHECK_ON; - - return 1; -} - - -/* Put a packed bcd array into user memory */ -int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d) -{ - FPU_REG t; - unsigned long long ll; - u_char b; - int i, precision_loss; - u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0; - - if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - goto invalid_operand; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if ( (st0_tag == TW_Infinity) || - (st0_tag == TW_NaN) ) - { - EXCEPTION(EX_Invalid); - goto invalid_operand; - } - } - - reg_copy(st0_ptr, &t); - precision_loss = FPU_round_to_int(&t, st0_tag); - ll = significand(&t); - - /* Check for overflow, by comparing with 999999999999999999 decimal. */ - if ( (t.sigh > 0x0de0b6b3) || - ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) ) - { - EXCEPTION(EX_Invalid); - /* This is a special case: see sec 16.2.5.1 of the 80486 book */ - invalid_operand: - if ( control_word & CW_Invalid ) - { - /* Produce the QNaN "indefinite" */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,10); - for ( i = 0; i < 7; i++) - FPU_put_user(0, d+i); /* These bytes "undefined" */ - FPU_put_user(0xc0, d+7); /* This byte "undefined" */ - FPU_put_user(0xff, d+8); - FPU_put_user(0xff, d+9); - RE_ENTRANT_CHECK_ON; - return 1; - } - else - return 0; - } - else if ( precision_loss ) - { - /* Precision loss doesn't stop the data transfer */ - set_precision_flag(precision_loss); - } - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,10); - RE_ENTRANT_CHECK_ON; - for ( i = 0; i < 9; i++) - { - b = FPU_div_small(&ll, 10); - b |= (FPU_div_small(&ll, 10)) << 4; - RE_ENTRANT_CHECK_OFF; - FPU_put_user(b, d+i); - RE_ENTRANT_CHECK_ON; - } - RE_ENTRANT_CHECK_OFF; - FPU_put_user(sign, d+9); - RE_ENTRANT_CHECK_ON; - - return 1; -} - -/*===========================================================================*/ - -/* r gets mangled such that sig is int, sign: - it is NOT normalized */ -/* The return value (in eax) is zero if the result is exact, - if bits are changed due to rounding, truncation, etc, then - a non-zero value is returned */ -/* Overflow is signalled by a non-zero return value (in eax). - In the case of overflow, the returned significand always has the - largest possible value */ -int FPU_round_to_int(FPU_REG *r, u_char tag) -{ - u_char very_big; - unsigned eax; - - if (tag == TAG_Zero) - { - /* Make sure that zero is returned */ - significand(r) = 0; - return 0; /* o.k. */ - } - - if (exponent(r) > 63) - { - r->sigl = r->sigh = ~0; /* The largest representable number */ - return 1; /* overflow */ - } - - eax = FPU_shrxs(&r->sigl, 63 - exponent(r)); - very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */ -#define half_or_more (eax & 0x80000000) -#define frac_part (eax) -#define more_than_half ((eax & 0x80000001) == 0x80000001) - switch (control_word & CW_RC) - { - case RC_RND: - if ( more_than_half /* nearest */ - || (half_or_more && (r->sigl & 1)) ) /* odd -> even */ - { - if ( very_big ) return 1; /* overflow */ - significand(r) ++; - return PRECISION_LOST_UP; - } - break; - case RC_DOWN: - if (frac_part && getsign(r)) - { - if ( very_big ) return 1; /* overflow */ - significand(r) ++; - return PRECISION_LOST_UP; - } - break; - case RC_UP: - if (frac_part && !getsign(r)) - { - if ( very_big ) return 1; /* overflow */ - significand(r) ++; - return PRECISION_LOST_UP; - } - break; - case RC_CHOP: - break; - } - - return eax ? PRECISION_LOST_DOWN : 0; - -} - -/*===========================================================================*/ - -u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s) -{ - unsigned short tag_word = 0; - u_char tag; - int i; - - if ( (addr_modes.default_mode == VM86) || - ((addr_modes.default_mode == PM16) - ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) - { - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, s, 0x0e); - FPU_get_user(control_word, (unsigned short __user *) s); - FPU_get_user(partial_status, (unsigned short __user *) (s+2)); - FPU_get_user(tag_word, (unsigned short __user *) (s+4)); - FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6)); - FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8)); - FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a)); - FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c)); - RE_ENTRANT_CHECK_ON; - s += 0x0e; - if ( addr_modes.default_mode == VM86 ) - { - instruction_address.offset - += (instruction_address.selector & 0xf000) << 4; - operand_address.offset += (operand_address.selector & 0xf000) << 4; - } - } - else - { - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, s, 0x1c); - FPU_get_user(control_word, (unsigned short __user *) s); - FPU_get_user(partial_status, (unsigned short __user *) (s+4)); - FPU_get_user(tag_word, (unsigned short __user *) (s+8)); - FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c)); - FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10)); - FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12)); - FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14)); - FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18)); - RE_ENTRANT_CHECK_ON; - s += 0x1c; - } - -#ifdef PECULIAR_486 - control_word &= ~0xe080; -#endif /* PECULIAR_486 */ - - top = (partial_status >> SW_Top_Shift) & 7; - - if ( partial_status & ~control_word & CW_Exceptions ) - partial_status |= (SW_Summary | SW_Backward); - else - partial_status &= ~(SW_Summary | SW_Backward); - - for ( i = 0; i < 8; i++ ) - { - tag = tag_word & 3; - tag_word >>= 2; - - if ( tag == TAG_Empty ) - /* New tag is empty. Accept it */ - FPU_settag(i, TAG_Empty); - else if ( FPU_gettag(i) == TAG_Empty ) - { - /* Old tag is empty and new tag is not empty. New tag is determined - by old reg contents */ - if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias ) - { - if ( !(fpu_register(i).sigl | fpu_register(i).sigh) ) - FPU_settag(i, TAG_Zero); - else - FPU_settag(i, TAG_Special); - } - else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias ) - { - FPU_settag(i, TAG_Special); - } - else if ( fpu_register(i).sigh & 0x80000000 ) - FPU_settag(i, TAG_Valid); - else - FPU_settag(i, TAG_Special); /* An Un-normal */ - } - /* Else old tag is not empty and new tag is not empty. Old tag - remains correct */ - } - - return s; -} - - -void frstor(fpu_addr_modes addr_modes, u_char __user *data_address) -{ - int i, regnr; - u_char __user *s = fldenv(addr_modes, data_address); - int offset = (top & 7) * 10, other = 80 - offset; - - /* Copy all registers in stack order. */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ,s,80); - __copy_from_user(register_base+offset, s, other); - if ( offset ) - __copy_from_user(register_base, s+other, offset); - RE_ENTRANT_CHECK_ON; - - for ( i = 0; i < 8; i++ ) - { - regnr = (i+top) & 7; - if ( FPU_gettag(regnr) != TAG_Empty ) - /* The loaded data over-rides all other cases. */ - FPU_settag(regnr, FPU_tagof(&st(i))); - } - -} - - -u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d) -{ - if ( (addr_modes.default_mode == VM86) || - ((addr_modes.default_mode == PM16) - ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) - { - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,14); -#ifdef PECULIAR_486 - FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d); -#else - FPU_put_user(control_word, (unsigned short __user *) d); -#endif /* PECULIAR_486 */ - FPU_put_user(status_word(), (unsigned short __user *) (d+2)); - FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4)); - FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6)); - FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a)); - if ( addr_modes.default_mode == VM86 ) - { - FPU_put_user((instruction_address.offset & 0xf0000) >> 4, - (unsigned short __user *) (d+8)); - FPU_put_user((operand_address.offset & 0xf0000) >> 4, - (unsigned short __user *) (d+0x0c)); - } - else - { - FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8)); - FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c)); - } - RE_ENTRANT_CHECK_ON; - d += 0x0e; - } - else - { - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE, d, 7*4); -#ifdef PECULIAR_486 - control_word &= ~0xe080; - /* An 80486 sets nearly all of the reserved bits to 1. */ - control_word |= 0xffff0040; - partial_status = status_word() | 0xffff0000; - fpu_tag_word |= 0xffff0000; - I387.soft.fcs &= ~0xf8000000; - I387.soft.fos |= 0xffff0000; -#endif /* PECULIAR_486 */ - if (__copy_to_user(d, &control_word, 7*4)) - FPU_abort; - RE_ENTRANT_CHECK_ON; - d += 0x1c; - } - - control_word |= CW_Exceptions; - partial_status &= ~(SW_Summary | SW_Backward); - - return d; -} - - -void fsave(fpu_addr_modes addr_modes, u_char __user *data_address) -{ - u_char __user *d; - int offset = (top & 7) * 10, other = 80 - offset; - - d = fstenv(addr_modes, data_address); - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,80); - - /* Copy all registers in stack order. */ - if (__copy_to_user(d, register_base+offset, other)) - FPU_abort; - if ( offset ) - if (__copy_to_user(d+other, register_base, offset)) - FPU_abort; - RE_ENTRANT_CHECK_ON; - - finit(); -} - -/*===========================================================================*/ diff --git a/arch/i386/math-emu/reg_mul.c b/arch/i386/math-emu/reg_mul.c deleted file mode 100644 index 40f50b61bc6..00000000000 --- a/arch/i386/math-emu/reg_mul.c +++ /dev/null @@ -1,132 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_mul.c | - | | - | Multiply one FPU_REG by another, put the result in a destination FPU_REG. | - | | - | Copyright (C) 1992,1993,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | Returns the tag of the result if no exceptions or errors occurred. | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | The destination may be any FPU_REG, including one of the source FPU_REGs. | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" -#include "exception.h" -#include "reg_constant.h" -#include "fpu_system.h" - - -/* - Multiply two registers to give a register result. - The sources are st(deststnr) and (b,tagb,signb). - The destination is st(deststnr). - */ -/* This routine must be called with non-empty source registers */ -int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w) -{ - FPU_REG *a = &st(deststnr); - FPU_REG *dest = a; - u_char taga = FPU_gettagi(deststnr); - u_char saved_sign = getsign(dest); - u_char sign = (getsign(a) ^ getsign(b)); - int tag; - - - if ( !(taga | tagb) ) - { - /* Both regs Valid, this should be the most common case. */ - - tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b)); - if ( tag < 0 ) - { - setsign(dest, saved_sign); - return tag; - } - FPU_settagi(deststnr, tag); - return tag; - } - - if ( taga == TAG_Special ) - taga = FPU_Special(a); - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - - if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) - || ((taga == TW_Denormal) && (tagb == TAG_Valid)) - || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) - { - FPU_REG x, y; - if ( denormal_operand() < 0 ) - return FPU_Exception; - - FPU_to_exp16(a, &x); - FPU_to_exp16(b, &y); - tag = FPU_u_mul(&x, &y, dest, control_w, sign, - exponent16(&x) + exponent16(&y)); - if ( tag < 0 ) - { - setsign(dest, saved_sign); - return tag; - } - FPU_settagi(deststnr, tag); - return tag; - } - else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) ) - { - if ( ((tagb == TW_Denormal) || (taga == TW_Denormal)) - && (denormal_operand() < 0) ) - return FPU_Exception; - - /* Must have either both arguments == zero, or - one valid and the other zero. - The result is therefore zero. */ - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - /* The 80486 book says that the answer is +0, but a real - 80486 behaves this way. - IEEE-754 apparently says it should be this way. */ - setsign(dest, sign); - return TAG_Zero; - } - /* Must have infinities, NaNs, etc */ - else if ( (taga == TW_NaN) || (tagb == TW_NaN) ) - { - return real_2op_NaN(b, tagb, deststnr, &st(0)); - } - else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero)) - || ((tagb == TW_Infinity) && (taga == TAG_Zero)) ) - { - return arith_invalid(deststnr); /* Zero*Infinity is invalid */ - } - else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal)) - && (denormal_operand() < 0) ) - { - return FPU_Exception; - } - else if (taga == TW_Infinity) - { - FPU_copy_to_regi(a, TAG_Special, deststnr); - setsign(dest, sign); - return TAG_Special; - } - else if (tagb == TW_Infinity) - { - FPU_copy_to_regi(b, TAG_Special, deststnr); - setsign(dest, sign); - return TAG_Special; - } - -#ifdef PARANOID - else - { - EXCEPTION(EX_INTERNAL|0x102); - return FPU_Exception; - } -#endif /* PARANOID */ - - return 0; -} diff --git a/arch/i386/math-emu/reg_norm.S b/arch/i386/math-emu/reg_norm.S deleted file mode 100644 index 8b6352efcee..00000000000 --- a/arch/i386/math-emu/reg_norm.S +++ /dev/null @@ -1,147 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_norm.S | - | | - | Copyright (C) 1992,1993,1994,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | Normalize the value in a FPU_REG. | - | | - | Call from C as: | - | int FPU_normalize(FPU_REG *n) | - | | - | int FPU_normalize_nuo(FPU_REG *n) | - | | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" - - -.text -ENTRY(FPU_normalize) - pushl %ebp - movl %esp,%ebp - pushl %ebx - - movl PARAM1,%ebx - - movl SIGH(%ebx),%edx - movl SIGL(%ebx),%eax - - orl %edx,%edx /* ms bits */ - js L_done /* Already normalized */ - jnz L_shift_1 /* Shift left 1 - 31 bits */ - - orl %eax,%eax - jz L_zero /* The contents are zero */ - - movl %eax,%edx - xorl %eax,%eax - subw $32,EXP(%ebx) /* This can cause an underflow */ - -/* We need to shift left by 1 - 31 bits */ -L_shift_1: - bsrl %edx,%ecx /* get the required shift in %ecx */ - subl $31,%ecx - negl %ecx - shld %cl,%eax,%edx - shl %cl,%eax - subw %cx,EXP(%ebx) /* This can cause an underflow */ - - movl %edx,SIGH(%ebx) - movl %eax,SIGL(%ebx) - -L_done: - cmpw EXP_OVER,EXP(%ebx) - jge L_overflow - - cmpw EXP_UNDER,EXP(%ebx) - jle L_underflow - -L_exit_valid: - movl TAG_Valid,%eax - - /* Convert the exponent to 80x87 form. */ - addw EXTENDED_Ebias,EXP(%ebx) - andw $0x7fff,EXP(%ebx) - -L_exit: - popl %ebx - leave - ret - - -L_zero: - movw $0,EXP(%ebx) - movl TAG_Zero,%eax - jmp L_exit - -L_underflow: - /* Convert the exponent to 80x87 form. */ - addw EXTENDED_Ebias,EXP(%ebx) - push %ebx - call arith_underflow - pop %ebx - jmp L_exit - -L_overflow: - /* Convert the exponent to 80x87 form. */ - addw EXTENDED_Ebias,EXP(%ebx) - push %ebx - call arith_overflow - pop %ebx - jmp L_exit - - - -/* Normalise without reporting underflow or overflow */ -ENTRY(FPU_normalize_nuo) - pushl %ebp - movl %esp,%ebp - pushl %ebx - - movl PARAM1,%ebx - - movl SIGH(%ebx),%edx - movl SIGL(%ebx),%eax - - orl %edx,%edx /* ms bits */ - js L_exit_nuo_valid /* Already normalized */ - jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */ - - orl %eax,%eax - jz L_exit_nuo_zero /* The contents are zero */ - - movl %eax,%edx - xorl %eax,%eax - subw $32,EXP(%ebx) /* This can cause an underflow */ - -/* We need to shift left by 1 - 31 bits */ -L_nuo_shift_1: - bsrl %edx,%ecx /* get the required shift in %ecx */ - subl $31,%ecx - negl %ecx - shld %cl,%eax,%edx - shl %cl,%eax - subw %cx,EXP(%ebx) /* This can cause an underflow */ - - movl %edx,SIGH(%ebx) - movl %eax,SIGL(%ebx) - -L_exit_nuo_valid: - movl TAG_Valid,%eax - - popl %ebx - leave - ret - -L_exit_nuo_zero: - movl TAG_Zero,%eax - movw EXP_UNDER,EXP(%ebx) - - popl %ebx - leave - ret diff --git a/arch/i386/math-emu/reg_round.S b/arch/i386/math-emu/reg_round.S deleted file mode 100644 index d1d4e48b4f6..00000000000 --- a/arch/i386/math-emu/reg_round.S +++ /dev/null @@ -1,708 +0,0 @@ - .file "reg_round.S" -/*---------------------------------------------------------------------------+ - | reg_round.S | - | | - | Rounding/truncation/etc for FPU basic arithmetic functions. | - | | - | Copyright (C) 1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | This code has four possible entry points. | - | The following must be entered by a jmp instruction: | - | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | - | | - | The FPU_round entry point is intended to be used by C code. | - | From C, call as: | - | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | - | | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - | | - | For correct "up" and "down" rounding, the argument must have the correct | - | sign. | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Four entry points. | - | | - | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | - | %eax:%ebx 64 bit significand | - | %edx 32 bit extension of the significand | - | %edi pointer to an FPU_REG for the result to be stored | - | stack calling function must have set up a C stack frame and | - | pushed %esi, %edi, and %ebx | - | | - | Needed just for the fpu_reg_round_sqrt entry point: | - | %cx A control word in the same format as the FPU control word. | - | Otherwise, PARAM4 must give such a value. | - | | - | | - | The significand and its extension are assumed to be exact in the | - | following sense: | - | If the significand by itself is the exact result then the significand | - | extension (%edx) must contain 0, otherwise the significand extension | - | must be non-zero. | - | If the significand extension is non-zero then the significand is | - | smaller than the magnitude of the correct exact result by an amount | - | greater than zero and less than one ls bit of the significand. | - | The significand extension is only required to have three possible | - | non-zero values: | - | less than 0x80000000 <=> the significand is less than 1/2 an ls | - | bit smaller than the magnitude of the | - | true exact result. | - | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | - | smaller than the magnitude of the true | - | exact result. | - | greater than 0x80000000 <=> the significand is more than 1/2 an ls | - | bit smaller than the magnitude of the | - | true exact result. | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | The code in this module has become quite complex, but it should handle | - | all of the FPU flags which are set at this stage of the basic arithmetic | - | computations. | - | There are a few rare cases where the results are not set identically to | - | a real FPU. These require a bit more thought because at this stage the | - | results of the code here appear to be more consistent... | - | This may be changed in a future version. | - +---------------------------------------------------------------------------*/ - - -#include "fpu_emu.h" -#include "exception.h" -#include "control_w.h" - -/* Flags for FPU_bits_lost */ -#define LOST_DOWN $1 -#define LOST_UP $2 - -/* Flags for FPU_denormal */ -#define DENORMAL $1 -#define UNMASKED_UNDERFLOW $2 - - -#ifndef NON_REENTRANT_FPU -/* Make the code re-entrant by putting - local storage on the stack: */ -#define FPU_bits_lost (%esp) -#define FPU_denormal 1(%esp) - -#else -/* Not re-entrant, so we can gain speed by putting - local storage in a static area: */ -.data - .align 4,0 -FPU_bits_lost: - .byte 0 -FPU_denormal: - .byte 0 -#endif /* NON_REENTRANT_FPU */ - - -.text -.globl fpu_reg_round -.globl fpu_Arith_exit - -/* Entry point when called from C */ -ENTRY(FPU_round) - pushl %ebp - movl %esp,%ebp - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%edi - movl SIGH(%edi),%eax - movl SIGL(%edi),%ebx - movl PARAM2,%edx - -fpu_reg_round: /* Normal entry point */ - movl PARAM4,%ecx - -#ifndef NON_REENTRANT_FPU - pushl %ebx /* adjust the stack pointer */ -#endif /* NON_REENTRANT_FPU */ - -#ifdef PARANOID -/* Cannot use this here yet */ -/* orl %eax,%eax */ -/* jns L_entry_bugged */ -#endif /* PARANOID */ - - cmpw EXP_UNDER,EXP(%edi) - jle L_Make_denorm /* The number is a de-normal */ - - movb $0,FPU_denormal /* 0 -> not a de-normal */ - -Denorm_done: - movb $0,FPU_bits_lost /* No bits yet lost in rounding */ - - movl %ecx,%esi - andl CW_PC,%ecx - cmpl PR_64_BITS,%ecx - je LRound_To_64 - - cmpl PR_53_BITS,%ecx - je LRound_To_53 - - cmpl PR_24_BITS,%ecx - je LRound_To_24 - -#ifdef PECULIAR_486 -/* With the precision control bits set to 01 "(reserved)", a real 80486 - behaves as if the precision control bits were set to 11 "64 bits" */ - cmpl PR_RESERVED_BITS,%ecx - je LRound_To_64 -#ifdef PARANOID - jmp L_bugged_denorm_486 -#endif /* PARANOID */ -#else -#ifdef PARANOID - jmp L_bugged_denorm /* There is no bug, just a bad control word */ -#endif /* PARANOID */ -#endif /* PECULIAR_486 */ - - -/* Round etc to 24 bit precision */ -LRound_To_24: - movl %esi,%ecx - andl CW_RC,%ecx - cmpl RC_RND,%ecx - je LRound_nearest_24 - - cmpl RC_CHOP,%ecx - je LCheck_truncate_24 - - cmpl RC_UP,%ecx /* Towards +infinity */ - je LUp_24 - - cmpl RC_DOWN,%ecx /* Towards -infinity */ - je LDown_24 - -#ifdef PARANOID - jmp L_bugged_round24 -#endif /* PARANOID */ - -LUp_24: - cmpb SIGN_POS,PARAM5 - jne LCheck_truncate_24 /* If negative then up==truncate */ - - jmp LCheck_24_round_up - -LDown_24: - cmpb SIGN_POS,PARAM5 - je LCheck_truncate_24 /* If positive then down==truncate */ - -LCheck_24_round_up: - movl %eax,%ecx - andl $0x000000ff,%ecx - orl %ebx,%ecx - orl %edx,%ecx - jnz LDo_24_round_up - jmp L_Re_normalise - -LRound_nearest_24: - /* Do rounding of the 24th bit if needed (nearest or even) */ - movl %eax,%ecx - andl $0x000000ff,%ecx - cmpl $0x00000080,%ecx - jc LCheck_truncate_24 /* less than half, no increment needed */ - - jne LGreater_Half_24 /* greater than half, increment needed */ - - /* Possibly half, we need to check the ls bits */ - orl %ebx,%ebx - jnz LGreater_Half_24 /* greater than half, increment needed */ - - orl %edx,%edx - jnz LGreater_Half_24 /* greater than half, increment needed */ - - /* Exactly half, increment only if 24th bit is 1 (round to even) */ - testl $0x00000100,%eax - jz LDo_truncate_24 - -LGreater_Half_24: /* Rounding: increment at the 24th bit */ -LDo_24_round_up: - andl $0xffffff00,%eax /* Truncate to 24 bits */ - xorl %ebx,%ebx - movb LOST_UP,FPU_bits_lost - addl $0x00000100,%eax - jmp LCheck_Round_Overflow - -LCheck_truncate_24: - movl %eax,%ecx - andl $0x000000ff,%ecx - orl %ebx,%ecx - orl %edx,%ecx - jz L_Re_normalise /* No truncation needed */ - -LDo_truncate_24: - andl $0xffffff00,%eax /* Truncate to 24 bits */ - xorl %ebx,%ebx - movb LOST_DOWN,FPU_bits_lost - jmp L_Re_normalise - - -/* Round etc to 53 bit precision */ -LRound_To_53: - movl %esi,%ecx - andl CW_RC,%ecx - cmpl RC_RND,%ecx - je LRound_nearest_53 - - cmpl RC_CHOP,%ecx - je LCheck_truncate_53 - - cmpl RC_UP,%ecx /* Towards +infinity */ - je LUp_53 - - cmpl RC_DOWN,%ecx /* Towards -infinity */ - je LDown_53 - -#ifdef PARANOID - jmp L_bugged_round53 -#endif /* PARANOID */ - -LUp_53: - cmpb SIGN_POS,PARAM5 - jne LCheck_truncate_53 /* If negative then up==truncate */ - - jmp LCheck_53_round_up - -LDown_53: - cmpb SIGN_POS,PARAM5 - je LCheck_truncate_53 /* If positive then down==truncate */ - -LCheck_53_round_up: - movl %ebx,%ecx - andl $0x000007ff,%ecx - orl %edx,%ecx - jnz LDo_53_round_up - jmp L_Re_normalise - -LRound_nearest_53: - /* Do rounding of the 53rd bit if needed (nearest or even) */ - movl %ebx,%ecx - andl $0x000007ff,%ecx - cmpl $0x00000400,%ecx - jc LCheck_truncate_53 /* less than half, no increment needed */ - - jnz LGreater_Half_53 /* greater than half, increment needed */ - - /* Possibly half, we need to check the ls bits */ - orl %edx,%edx - jnz LGreater_Half_53 /* greater than half, increment needed */ - - /* Exactly half, increment only if 53rd bit is 1 (round to even) */ - testl $0x00000800,%ebx - jz LTruncate_53 - -LGreater_Half_53: /* Rounding: increment at the 53rd bit */ -LDo_53_round_up: - movb LOST_UP,FPU_bits_lost - andl $0xfffff800,%ebx /* Truncate to 53 bits */ - addl $0x00000800,%ebx - adcl $0,%eax - jmp LCheck_Round_Overflow - -LCheck_truncate_53: - movl %ebx,%ecx - andl $0x000007ff,%ecx - orl %edx,%ecx - jz L_Re_normalise - -LTruncate_53: - movb LOST_DOWN,FPU_bits_lost - andl $0xfffff800,%ebx /* Truncate to 53 bits */ - jmp L_Re_normalise - - -/* Round etc to 64 bit precision */ -LRound_To_64: - movl %esi,%ecx - andl CW_RC,%ecx - cmpl RC_RND,%ecx - je LRound_nearest_64 - - cmpl RC_CHOP,%ecx - je LCheck_truncate_64 - - cmpl RC_UP,%ecx /* Towards +infinity */ - je LUp_64 - - cmpl RC_DOWN,%ecx /* Towards -infinity */ - je LDown_64 - -#ifdef PARANOID - jmp L_bugged_round64 -#endif /* PARANOID */ - -LUp_64: - cmpb SIGN_POS,PARAM5 - jne LCheck_truncate_64 /* If negative then up==truncate */ - - orl %edx,%edx - jnz LDo_64_round_up - jmp L_Re_normalise - -LDown_64: - cmpb SIGN_POS,PARAM5 - je LCheck_truncate_64 /* If positive then down==truncate */ - - orl %edx,%edx - jnz LDo_64_round_up - jmp L_Re_normalise - -LRound_nearest_64: - cmpl $0x80000000,%edx - jc LCheck_truncate_64 - - jne LDo_64_round_up - - /* Now test for round-to-even */ - testb $1,%bl - jz LCheck_truncate_64 - -LDo_64_round_up: - movb LOST_UP,FPU_bits_lost - addl $1,%ebx - adcl $0,%eax - -LCheck_Round_Overflow: - jnc L_Re_normalise - - /* Overflow, adjust the result (significand to 1.0) */ - rcrl $1,%eax - rcrl $1,%ebx - incw EXP(%edi) - jmp L_Re_normalise - -LCheck_truncate_64: - orl %edx,%edx - jz L_Re_normalise - -LTruncate_64: - movb LOST_DOWN,FPU_bits_lost - -L_Re_normalise: - testb $0xff,FPU_denormal - jnz Normalise_result - -L_Normalised: - movl TAG_Valid,%edx - -L_deNormalised: - cmpb LOST_UP,FPU_bits_lost - je L_precision_lost_up - - cmpb LOST_DOWN,FPU_bits_lost - je L_precision_lost_down - -L_no_precision_loss: - /* store the result */ - -L_Store_significand: - movl %eax,SIGH(%edi) - movl %ebx,SIGL(%edi) - - cmpw EXP_OVER,EXP(%edi) - jge L_overflow - - movl %edx,%eax - - /* Convert the exponent to 80x87 form. */ - addw EXTENDED_Ebias,EXP(%edi) - andw $0x7fff,EXP(%edi) - -fpu_reg_round_signed_special_exit: - - cmpb SIGN_POS,PARAM5 - je fpu_reg_round_special_exit - - orw $0x8000,EXP(%edi) /* Negative sign for the result. */ - -fpu_reg_round_special_exit: - -#ifndef NON_REENTRANT_FPU - popl %ebx /* adjust the stack pointer */ -#endif /* NON_REENTRANT_FPU */ - -fpu_Arith_exit: - popl %ebx - popl %edi - popl %esi - leave - ret - - -/* - * Set the FPU status flags to represent precision loss due to - * round-up. - */ -L_precision_lost_up: - push %edx - push %eax - call set_precision_flag_up - popl %eax - popl %edx - jmp L_no_precision_loss - -/* - * Set the FPU status flags to represent precision loss due to - * truncation. - */ -L_precision_lost_down: - push %edx - push %eax - call set_precision_flag_down - popl %eax - popl %edx - jmp L_no_precision_loss - - -/* - * The number is a denormal (which might get rounded up to a normal) - * Shift the number right the required number of bits, which will - * have to be undone later... - */ -L_Make_denorm: - /* The action to be taken depends upon whether the underflow - exception is masked */ - testb CW_Underflow,%cl /* Underflow mask. */ - jz Unmasked_underflow /* Do not make a denormal. */ - - movb DENORMAL,FPU_denormal - - pushl %ecx /* Save */ - movw EXP_UNDER+1,%cx - subw EXP(%edi),%cx - - cmpw $64,%cx /* shrd only works for 0..31 bits */ - jnc Denorm_shift_more_than_63 - - cmpw $32,%cx /* shrd only works for 0..31 bits */ - jnc Denorm_shift_more_than_32 - -/* - * We got here without jumps by assuming that the most common requirement - * is for a small de-normalising shift. - * Shift by [1..31] bits - */ - addw %cx,EXP(%edi) - orl %edx,%edx /* extension */ - setne %ch /* Save whether %edx is non-zero */ - xorl %edx,%edx - shrd %cl,%ebx,%edx - shrd %cl,%eax,%ebx - shr %cl,%eax - orb %ch,%dl - popl %ecx - jmp Denorm_done - -/* Shift by [32..63] bits */ -Denorm_shift_more_than_32: - addw %cx,EXP(%edi) - subb $32,%cl - orl %edx,%edx - setne %ch - orb %ch,%bl - xorl %edx,%edx - shrd %cl,%ebx,%edx - shrd %cl,%eax,%ebx - shr %cl,%eax - orl %edx,%edx /* test these 32 bits */ - setne %cl - orb %ch,%bl - orb %cl,%bl - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - popl %ecx - jmp Denorm_done - -/* Shift by [64..) bits */ -Denorm_shift_more_than_63: - cmpw $64,%cx - jne Denorm_shift_more_than_64 - -/* Exactly 64 bit shift */ - addw %cx,EXP(%edi) - xorl %ecx,%ecx - orl %edx,%edx - setne %cl - orl %ebx,%ebx - setne %ch - orb %ch,%cl - orb %cl,%al - movl %eax,%edx - xorl %eax,%eax - xorl %ebx,%ebx - popl %ecx - jmp Denorm_done - -Denorm_shift_more_than_64: - movw EXP_UNDER+1,EXP(%edi) -/* This is easy, %eax must be non-zero, so.. */ - movl $1,%edx - xorl %eax,%eax - xorl %ebx,%ebx - popl %ecx - jmp Denorm_done - - -Unmasked_underflow: - movb UNMASKED_UNDERFLOW,FPU_denormal - jmp Denorm_done - - -/* Undo the de-normalisation. */ -Normalise_result: - cmpb UNMASKED_UNDERFLOW,FPU_denormal - je Signal_underflow - -/* The number must be a denormal if we got here. */ -#ifdef PARANOID - /* But check it... just in case. */ - cmpw EXP_UNDER+1,EXP(%edi) - jne L_norm_bugged -#endif /* PARANOID */ - -#ifdef PECULIAR_486 - /* - * This implements a special feature of 80486 behaviour. - * Underflow will be signalled even if the number is - * not a denormal after rounding. - * This difference occurs only for masked underflow, and not - * in the unmasked case. - * Actual 80486 behaviour differs from this in some circumstances. - */ - orl %eax,%eax /* ms bits */ - js LPseudoDenormal /* Will be masked underflow */ -#else - orl %eax,%eax /* ms bits */ - js L_Normalised /* No longer a denormal */ -#endif /* PECULIAR_486 */ - - jnz LDenormal_adj_exponent - - orl %ebx,%ebx - jz L_underflow_to_zero /* The contents are zero */ - -LDenormal_adj_exponent: - decw EXP(%edi) - -LPseudoDenormal: - testb $0xff,FPU_bits_lost /* bits lost == underflow */ - movl TAG_Special,%edx - jz L_deNormalised - - /* There must be a masked underflow */ - push %eax - pushl EX_Underflow - call EXCEPTION - popl %eax - popl %eax - movl TAG_Special,%edx - jmp L_deNormalised - - -/* - * The operations resulted in a number too small to represent. - * Masked response. - */ -L_underflow_to_zero: - push %eax - call set_precision_flag_down - popl %eax - - push %eax - pushl EX_Underflow - call EXCEPTION - popl %eax - popl %eax - -/* Reduce the exponent to EXP_UNDER */ - movw EXP_UNDER,EXP(%edi) - movl TAG_Zero,%edx - jmp L_Store_significand - - -/* The operations resulted in a number too large to represent. */ -L_overflow: - addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ - push %edi - call arith_overflow - pop %edi - jmp fpu_reg_round_signed_special_exit - - -Signal_underflow: - /* The number may have been changed to a non-denormal */ - /* by the rounding operations. */ - cmpw EXP_UNDER,EXP(%edi) - jle Do_unmasked_underflow - - jmp L_Normalised - -Do_unmasked_underflow: - /* Increase the exponent by the magic number */ - addw $(3*(1<<13)),EXP(%edi) - push %eax - pushl EX_Underflow - call EXCEPTION - popl %eax - popl %eax - jmp L_Normalised - - -#ifdef PARANOID -#ifdef PECULIAR_486 -L_bugged_denorm_486: - pushl EX_INTERNAL|0x236 - call EXCEPTION - popl %ebx - jmp L_exception_exit -#else -L_bugged_denorm: - pushl EX_INTERNAL|0x230 - call EXCEPTION - popl %ebx - jmp L_exception_exit -#endif /* PECULIAR_486 */ - -L_bugged_round24: - pushl EX_INTERNAL|0x231 - call EXCEPTION - popl %ebx - jmp L_exception_exit - -L_bugged_round53: - pushl EX_INTERNAL|0x232 - call EXCEPTION - popl %ebx - jmp L_exception_exit - -L_bugged_round64: - pushl EX_INTERNAL|0x233 - call EXCEPTION - popl %ebx - jmp L_exception_exit - -L_norm_bugged: - pushl EX_INTERNAL|0x234 - call EXCEPTION - popl %ebx - jmp L_exception_exit - -L_entry_bugged: - pushl EX_INTERNAL|0x235 - call EXCEPTION - popl %ebx -L_exception_exit: - mov $-1,%eax - jmp fpu_reg_round_special_exit -#endif /* PARANOID */ diff --git a/arch/i386/math-emu/reg_u_add.S b/arch/i386/math-emu/reg_u_add.S deleted file mode 100644 index 47c4c2434d8..00000000000 --- a/arch/i386/math-emu/reg_u_add.S +++ /dev/null @@ -1,167 +0,0 @@ - .file "reg_u_add.S" -/*---------------------------------------------------------------------------+ - | reg_u_add.S | - | | - | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the | - | result in a destination FPU_REG. | - | | - | Copyright (C) 1992,1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | Call from C as: | - | int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | - | int control_w) | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - | | - +---------------------------------------------------------------------------*/ - -/* - | Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ). - | Takes two valid reg f.p. numbers (TAG_Valid), which are - | treated as unsigned numbers, - | and returns their sum as a TAG_Valid or TAG_Special f.p. number. - | The returned number is normalized. - | Basic checks are performed if PARANOID is defined. - */ - -#include "exception.h" -#include "fpu_emu.h" -#include "control_w.h" - -.text -ENTRY(FPU_u_add) - pushl %ebp - movl %esp,%ebp - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%esi /* source 1 */ - movl PARAM2,%edi /* source 2 */ - - movl PARAM6,%ecx - movl %ecx,%edx - subl PARAM7,%ecx /* exp1 - exp2 */ - jge L_arg1_larger - - /* num1 is smaller */ - movl SIGL(%esi),%ebx - movl SIGH(%esi),%eax - - movl %edi,%esi - movl PARAM7,%edx - negw %cx - jmp L_accum_loaded - -L_arg1_larger: - /* num1 has larger or equal exponent */ - movl SIGL(%edi),%ebx - movl SIGH(%edi),%eax - -L_accum_loaded: - movl PARAM3,%edi /* destination */ - movw %dx,EXP(%edi) /* Copy exponent to destination */ - - xorl %edx,%edx /* clear the extension */ - -#ifdef PARANOID - testl $0x80000000,%eax - je L_bugged - - testl $0x80000000,SIGH(%esi) - je L_bugged -#endif /* PARANOID */ - -/* The number to be shifted is in %eax:%ebx:%edx */ - cmpw $32,%cx /* shrd only works for 0..31 bits */ - jnc L_more_than_31 - -/* less than 32 bits */ - shrd %cl,%ebx,%edx - shrd %cl,%eax,%ebx - shr %cl,%eax - jmp L_shift_done - -L_more_than_31: - cmpw $64,%cx - jnc L_more_than_63 - - subb $32,%cl - jz L_exactly_32 - - shrd %cl,%eax,%edx - shr %cl,%eax - orl %ebx,%ebx - jz L_more_31_no_low /* none of the lowest bits is set */ - - orl $1,%edx /* record the fact in the extension */ - -L_more_31_no_low: - movl %eax,%ebx - xorl %eax,%eax - jmp L_shift_done - -L_exactly_32: - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - jmp L_shift_done - -L_more_than_63: - cmpw $65,%cx - jnc L_more_than_64 - - movl %eax,%edx - orl %ebx,%ebx - jz L_more_63_no_low - - orl $1,%edx - jmp L_more_63_no_low - -L_more_than_64: - movl $1,%edx /* The shifted nr always at least one '1' */ - -L_more_63_no_low: - xorl %ebx,%ebx - xorl %eax,%eax - -L_shift_done: - /* Now do the addition */ - addl SIGL(%esi),%ebx - adcl SIGH(%esi),%eax - jnc L_round_the_result - - /* Overflow, adjust the result */ - rcrl $1,%eax - rcrl $1,%ebx - rcrl $1,%edx - jnc L_no_bit_lost - - orl $1,%edx - -L_no_bit_lost: - incw EXP(%edi) - -L_round_the_result: - jmp fpu_reg_round /* Round the result */ - - - -#ifdef PARANOID -/* If we ever get here then we have problems! */ -L_bugged: - pushl EX_INTERNAL|0x201 - call EXCEPTION - pop %ebx - movl $-1,%eax - jmp L_exit - -L_exit: - popl %ebx - popl %edi - popl %esi - leave - ret -#endif /* PARANOID */ diff --git a/arch/i386/math-emu/reg_u_div.S b/arch/i386/math-emu/reg_u_div.S deleted file mode 100644 index cc00654b6f9..00000000000 --- a/arch/i386/math-emu/reg_u_div.S +++ /dev/null @@ -1,471 +0,0 @@ - .file "reg_u_div.S" -/*---------------------------------------------------------------------------+ - | reg_u_div.S | - | | - | Divide one FPU_REG by another and put the result in a destination FPU_REG.| - | | - | Copyright (C) 1992,1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Call from C as: | - | int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, | - | unsigned int control_word, char *sign) | - | | - | Does not compute the destination exponent, but does adjust it. | - | | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "fpu_emu.h" -#include "control_w.h" - - -/* #define dSIGL(x) (x) */ -/* #define dSIGH(x) 4(x) */ - - -#ifndef NON_REENTRANT_FPU -/* - Local storage on the stack: - Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 - Overflow flag: ovfl_flag - */ -#define FPU_accum_3 -4(%ebp) -#define FPU_accum_2 -8(%ebp) -#define FPU_accum_1 -12(%ebp) -#define FPU_accum_0 -16(%ebp) -#define FPU_result_1 -20(%ebp) -#define FPU_result_2 -24(%ebp) -#define FPU_ovfl_flag -28(%ebp) - -#else -.data -/* - Local storage in a static area: - Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 - Overflow flag: ovfl_flag - */ - .align 4,0 -FPU_accum_3: - .long 0 -FPU_accum_2: - .long 0 -FPU_accum_1: - .long 0 -FPU_accum_0: - .long 0 -FPU_result_1: - .long 0 -FPU_result_2: - .long 0 -FPU_ovfl_flag: - .byte 0 -#endif /* NON_REENTRANT_FPU */ - -#define REGA PARAM1 -#define REGB PARAM2 -#define DEST PARAM3 - -.text -ENTRY(FPU_u_div) - pushl %ebp - movl %esp,%ebp -#ifndef NON_REENTRANT_FPU - subl $28,%esp -#endif /* NON_REENTRANT_FPU */ - - pushl %esi - pushl %edi - pushl %ebx - - movl REGA,%esi - movl REGB,%ebx - movl DEST,%edi - - movswl EXP(%esi),%edx - movswl EXP(%ebx),%eax - subl %eax,%edx - addl EXP_BIAS,%edx - - /* A denormal and a large number can cause an exponent underflow */ - cmpl EXP_WAY_UNDER,%edx - jg xExp_not_underflow - - /* Set to a really low value allow correct handling */ - movl EXP_WAY_UNDER,%edx - -xExp_not_underflow: - - movw %dx,EXP(%edi) - -#ifdef PARANOID -/* testl $0x80000000, SIGH(%esi) // Dividend */ -/* je L_bugged */ - testl $0x80000000, SIGH(%ebx) /* Divisor */ - je L_bugged -#endif /* PARANOID */ - -/* Check if the divisor can be treated as having just 32 bits */ - cmpl $0,SIGL(%ebx) - jnz L_Full_Division /* Can't do a quick divide */ - -/* We should be able to zip through the division here */ - movl SIGH(%ebx),%ecx /* The divisor */ - movl SIGH(%esi),%edx /* Dividend */ - movl SIGL(%esi),%eax /* Dividend */ - - cmpl %ecx,%edx - setaeb FPU_ovfl_flag /* Keep a record */ - jb L_no_adjust - - subl %ecx,%edx /* Prevent the overflow */ - -L_no_adjust: - /* Divide the 64 bit number by the 32 bit denominator */ - divl %ecx - movl %eax,FPU_result_2 - - /* Work on the remainder of the first division */ - xorl %eax,%eax - divl %ecx - movl %eax,FPU_result_1 - - /* Work on the remainder of the 64 bit division */ - xorl %eax,%eax - divl %ecx - - testb $255,FPU_ovfl_flag /* was the num > denom ? */ - je L_no_overflow - - /* Do the shifting here */ - /* increase the exponent */ - incw EXP(%edi) - - /* shift the mantissa right one bit */ - stc /* To set the ms bit */ - rcrl FPU_result_2 - rcrl FPU_result_1 - rcrl %eax - -L_no_overflow: - jmp LRound_precision /* Do the rounding as required */ - - -/*---------------------------------------------------------------------------+ - | Divide: Return arg1/arg2 to arg3. | - | | - | This routine does not use the exponents of arg1 and arg2, but does | - | adjust the exponent of arg3. | - | | - | The maximum returned value is (ignoring exponents) | - | .ffffffff ffffffff | - | ------------------ = 1.ffffffff fffffffe | - | .80000000 00000000 | - | and the minimum is | - | .80000000 00000000 | - | ------------------ = .80000000 00000001 (rounded) | - | .ffffffff ffffffff | - | | - +---------------------------------------------------------------------------*/ - - -L_Full_Division: - /* Save extended dividend in local register */ - movl SIGL(%esi),%eax - movl %eax,FPU_accum_2 - movl SIGH(%esi),%eax - movl %eax,FPU_accum_3 - xorl %eax,%eax - movl %eax,FPU_accum_1 /* zero the extension */ - movl %eax,FPU_accum_0 /* zero the extension */ - - movl SIGL(%esi),%eax /* Get the current num */ - movl SIGH(%esi),%edx - -/*----------------------------------------------------------------------*/ -/* Initialization done. - Do the first 32 bits. */ - - movb $0,FPU_ovfl_flag - cmpl SIGH(%ebx),%edx /* Test for imminent overflow */ - jb LLess_than_1 - ja LGreater_than_1 - - cmpl SIGL(%ebx),%eax - jb LLess_than_1 - -LGreater_than_1: -/* The dividend is greater or equal, would cause overflow */ - setaeb FPU_ovfl_flag /* Keep a record */ - - subl SIGL(%ebx),%eax - sbbl SIGH(%ebx),%edx /* Prevent the overflow */ - movl %eax,FPU_accum_2 - movl %edx,FPU_accum_3 - -LLess_than_1: -/* At this point, we have a dividend < divisor, with a record of - adjustment in FPU_ovfl_flag */ - - /* We will divide by a number which is too large */ - movl SIGH(%ebx),%ecx - addl $1,%ecx - jnc LFirst_div_not_1 - - /* here we need to divide by 100000000h, - i.e., no division at all.. */ - mov %edx,%eax - jmp LFirst_div_done - -LFirst_div_not_1: - divl %ecx /* Divide the numerator by the augmented - denom ms dw */ - -LFirst_div_done: - movl %eax,FPU_result_2 /* Put the result in the answer */ - - mull SIGH(%ebx) /* mul by the ms dw of the denom */ - - subl %eax,FPU_accum_2 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_3 - - movl FPU_result_2,%eax /* Get the result back */ - mull SIGL(%ebx) /* now mul the ls dw of the denom */ - - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - sbbl $0,FPU_accum_3 - je LDo_2nd_32_bits /* Must check for non-zero result here */ - -#ifdef PARANOID - jb L_bugged_1 -#endif /* PARANOID */ - - /* need to subtract another once of the denom */ - incl FPU_result_2 /* Correct the answer */ - - movl SIGL(%ebx),%eax - movl SIGH(%ebx),%edx - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - -#ifdef PARANOID - sbbl $0,FPU_accum_3 - jne L_bugged_1 /* Must check for non-zero result here */ -#endif /* PARANOID */ - -/*----------------------------------------------------------------------*/ -/* Half of the main problem is done, there is just a reduced numerator - to handle now. - Work with the second 32 bits, FPU_accum_0 not used from now on */ -LDo_2nd_32_bits: - movl FPU_accum_2,%edx /* get the reduced num */ - movl FPU_accum_1,%eax - - /* need to check for possible subsequent overflow */ - cmpl SIGH(%ebx),%edx - jb LDo_2nd_div - ja LPrevent_2nd_overflow - - cmpl SIGL(%ebx),%eax - jb LDo_2nd_div - -LPrevent_2nd_overflow: -/* The numerator is greater or equal, would cause overflow */ - /* prevent overflow */ - subl SIGL(%ebx),%eax - sbbl SIGH(%ebx),%edx - movl %edx,FPU_accum_2 - movl %eax,FPU_accum_1 - - incl FPU_result_2 /* Reflect the subtraction in the answer */ - -#ifdef PARANOID - je L_bugged_2 /* Can't bump the result to 1.0 */ -#endif /* PARANOID */ - -LDo_2nd_div: - cmpl $0,%ecx /* augmented denom msw */ - jnz LSecond_div_not_1 - - /* %ecx == 0, we are dividing by 1.0 */ - mov %edx,%eax - jmp LSecond_div_done - -LSecond_div_not_1: - divl %ecx /* Divide the numerator by the denom ms dw */ - -LSecond_div_done: - movl %eax,FPU_result_1 /* Put the result in the answer */ - - mull SIGH(%ebx) /* mul by the ms dw of the denom */ - - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 -#endif /* PARANOID */ - - movl FPU_result_1,%eax /* Get the result back */ - mull SIGL(%ebx) /* now mul the ls dw of the denom */ - - subl %eax,FPU_accum_0 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ - sbbl $0,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 -#endif /* PARANOID */ - - jz LDo_3rd_32_bits - -#ifdef PARANOID - cmpl $1,FPU_accum_2 - jne L_bugged_2 -#endif /* PARANOID */ - - /* need to subtract another once of the denom */ - movl SIGL(%ebx),%eax - movl SIGH(%ebx),%edx - subl %eax,FPU_accum_0 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_1 - sbbl $0,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 - jne L_bugged_2 -#endif /* PARANOID */ - - addl $1,FPU_result_1 /* Correct the answer */ - adcl $0,FPU_result_2 - -#ifdef PARANOID - jc L_bugged_2 /* Must check for non-zero result here */ -#endif /* PARANOID */ - -/*----------------------------------------------------------------------*/ -/* The division is essentially finished here, we just need to perform - tidying operations. - Deal with the 3rd 32 bits */ -LDo_3rd_32_bits: - movl FPU_accum_1,%edx /* get the reduced num */ - movl FPU_accum_0,%eax - - /* need to check for possible subsequent overflow */ - cmpl SIGH(%ebx),%edx /* denom */ - jb LRound_prep - ja LPrevent_3rd_overflow - - cmpl SIGL(%ebx),%eax /* denom */ - jb LRound_prep - -LPrevent_3rd_overflow: - /* prevent overflow */ - subl SIGL(%ebx),%eax - sbbl SIGH(%ebx),%edx - movl %edx,FPU_accum_1 - movl %eax,FPU_accum_0 - - addl $1,FPU_result_1 /* Reflect the subtraction in the answer */ - adcl $0,FPU_result_2 - jne LRound_prep - jnc LRound_prep - - /* This is a tricky spot, there is an overflow of the answer */ - movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */ - -LRound_prep: -/* - * Prepare for rounding. - * To test for rounding, we just need to compare 2*accum with the - * denom. - */ - movl FPU_accum_0,%ecx - movl FPU_accum_1,%edx - movl %ecx,%eax - orl %edx,%eax - jz LRound_ovfl /* The accumulator contains zero. */ - - /* Multiply by 2 */ - clc - rcll $1,%ecx - rcll $1,%edx - jc LRound_large /* No need to compare, denom smaller */ - - subl SIGL(%ebx),%ecx - sbbl SIGH(%ebx),%edx - jnc LRound_not_small - - movl $0x70000000,%eax /* Denom was larger */ - jmp LRound_ovfl - -LRound_not_small: - jnz LRound_large - - movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */ - jmp LRound_ovfl - -LRound_large: - movl $0xff000000,%eax /* Denom was smaller */ - -LRound_ovfl: -/* We are now ready to deal with rounding, but first we must get - the bits properly aligned */ - testb $255,FPU_ovfl_flag /* was the num > denom ? */ - je LRound_precision - - incw EXP(%edi) - - /* shift the mantissa right one bit */ - stc /* Will set the ms bit */ - rcrl FPU_result_2 - rcrl FPU_result_1 - rcrl %eax - -/* Round the result as required */ -LRound_precision: - decw EXP(%edi) /* binary point between 1st & 2nd bits */ - - movl %eax,%edx - movl FPU_result_1,%ebx - movl FPU_result_2,%eax - jmp fpu_reg_round - - -#ifdef PARANOID -/* The logic is wrong if we got here */ -L_bugged: - pushl EX_INTERNAL|0x202 - call EXCEPTION - pop %ebx - jmp L_exit - -L_bugged_1: - pushl EX_INTERNAL|0x203 - call EXCEPTION - pop %ebx - jmp L_exit - -L_bugged_2: - pushl EX_INTERNAL|0x204 - call EXCEPTION - pop %ebx - jmp L_exit - -L_exit: - movl $-1,%eax - popl %ebx - popl %edi - popl %esi - - leave - ret -#endif /* PARANOID */ diff --git a/arch/i386/math-emu/reg_u_mul.S b/arch/i386/math-emu/reg_u_mul.S deleted file mode 100644 index 973f12af97d..00000000000 --- a/arch/i386/math-emu/reg_u_mul.S +++ /dev/null @@ -1,148 +0,0 @@ - .file "reg_u_mul.S" -/*---------------------------------------------------------------------------+ - | reg_u_mul.S | - | | - | Core multiplication routine | - | | - | Copyright (C) 1992,1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Basic multiplication routine. | - | Does not check the resulting exponent for overflow/underflow | - | | - | FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); | - | | - | Internal working is at approx 128 bits. | - | Result is rounded to nearest 53 or 64 bits, using "nearest or even". | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "fpu_emu.h" -#include "control_w.h" - - - -#ifndef NON_REENTRANT_FPU -/* Local storage on the stack: */ -#define FPU_accum_0 -4(%ebp) /* ms word */ -#define FPU_accum_1 -8(%ebp) - -#else -/* Local storage in a static area: */ -.data - .align 4,0 -FPU_accum_0: - .long 0 -FPU_accum_1: - .long 0 -#endif /* NON_REENTRANT_FPU */ - - -.text -ENTRY(FPU_u_mul) - pushl %ebp - movl %esp,%ebp -#ifndef NON_REENTRANT_FPU - subl $8,%esp -#endif /* NON_REENTRANT_FPU */ - - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%esi - movl PARAM2,%edi - -#ifdef PARANOID - testl $0x80000000,SIGH(%esi) - jz L_bugged - testl $0x80000000,SIGH(%edi) - jz L_bugged -#endif /* PARANOID */ - - xorl %ecx,%ecx - xorl %ebx,%ebx - - movl SIGL(%esi),%eax - mull SIGL(%edi) - movl %eax,FPU_accum_0 - movl %edx,FPU_accum_1 - - movl SIGL(%esi),%eax - mull SIGH(%edi) - addl %eax,FPU_accum_1 - adcl %edx,%ebx -/* adcl $0,%ecx // overflow here is not possible */ - - movl SIGH(%esi),%eax - mull SIGL(%edi) - addl %eax,FPU_accum_1 - adcl %edx,%ebx - adcl $0,%ecx - - movl SIGH(%esi),%eax - mull SIGH(%edi) - addl %eax,%ebx - adcl %edx,%ecx - - /* Get the sum of the exponents. */ - movl PARAM6,%eax - subl EXP_BIAS-1,%eax - - /* Two denormals can cause an exponent underflow */ - cmpl EXP_WAY_UNDER,%eax - jg Exp_not_underflow - - /* Set to a really low value allow correct handling */ - movl EXP_WAY_UNDER,%eax - -Exp_not_underflow: - -/* Have now finished with the sources */ - movl PARAM3,%edi /* Point to the destination */ - movw %ax,EXP(%edi) - -/* Now make sure that the result is normalized */ - testl $0x80000000,%ecx - jnz LResult_Normalised - - /* Normalize by shifting left one bit */ - shll $1,FPU_accum_0 - rcll $1,FPU_accum_1 - rcll $1,%ebx - rcll $1,%ecx - decw EXP(%edi) - -LResult_Normalised: - movl FPU_accum_0,%eax - movl FPU_accum_1,%edx - orl %eax,%eax - jz L_extent_zero - - orl $1,%edx - -L_extent_zero: - movl %ecx,%eax - jmp fpu_reg_round - - -#ifdef PARANOID -L_bugged: - pushl EX_INTERNAL|0x205 - call EXCEPTION - pop %ebx - jmp L_exit - -L_exit: - popl %ebx - popl %edi - popl %esi - leave - ret -#endif /* PARANOID */ - diff --git a/arch/i386/math-emu/reg_u_sub.S b/arch/i386/math-emu/reg_u_sub.S deleted file mode 100644 index 1b6c24801d2..00000000000 --- a/arch/i386/math-emu/reg_u_sub.S +++ /dev/null @@ -1,272 +0,0 @@ - .file "reg_u_sub.S" -/*---------------------------------------------------------------------------+ - | reg_u_sub.S | - | | - | Core floating point subtraction routine. | - | | - | Copyright (C) 1992,1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | Call from C as: | - | int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | - | int control_w) | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - | | - +---------------------------------------------------------------------------*/ - -/* - | Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ). - | Takes two valid reg f.p. numbers (TAG_Valid), which are - | treated as unsigned numbers, - | and returns their difference as a TAG_Valid or TAG_Zero f.p. - | number. - | The first number (arg1) must be the larger. - | The returned number is normalized. - | Basic checks are performed if PARANOID is defined. - */ - -#include "exception.h" -#include "fpu_emu.h" -#include "control_w.h" - -.text -ENTRY(FPU_u_sub) - pushl %ebp - movl %esp,%ebp - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%esi /* source 1 */ - movl PARAM2,%edi /* source 2 */ - - movl PARAM6,%ecx - subl PARAM7,%ecx /* exp1 - exp2 */ - -#ifdef PARANOID - /* source 2 is always smaller than source 1 */ - js L_bugged_1 - - testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */ - je L_bugged_2 - - testl $0x80000000,SIGH(%esi) - je L_bugged_2 -#endif /* PARANOID */ - -/*--------------------------------------+ - | Form a register holding the | - | smaller number | - +--------------------------------------*/ - movl SIGH(%edi),%eax /* register ms word */ - movl SIGL(%edi),%ebx /* register ls word */ - - movl PARAM3,%edi /* destination */ - movl PARAM6,%edx - movw %dx,EXP(%edi) /* Copy exponent to destination */ - - xorl %edx,%edx /* register extension */ - -/*--------------------------------------+ - | Shift the temporary register | - | right the required number of | - | places. | - +--------------------------------------*/ - - cmpw $32,%cx /* shrd only works for 0..31 bits */ - jnc L_more_than_31 - -/* less than 32 bits */ - shrd %cl,%ebx,%edx - shrd %cl,%eax,%ebx - shr %cl,%eax - jmp L_shift_done - -L_more_than_31: - cmpw $64,%cx - jnc L_more_than_63 - - subb $32,%cl - jz L_exactly_32 - - shrd %cl,%eax,%edx - shr %cl,%eax - orl %ebx,%ebx - jz L_more_31_no_low /* none of the lowest bits is set */ - - orl $1,%edx /* record the fact in the extension */ - -L_more_31_no_low: - movl %eax,%ebx - xorl %eax,%eax - jmp L_shift_done - -L_exactly_32: - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - jmp L_shift_done - -L_more_than_63: - cmpw $65,%cx - jnc L_more_than_64 - - /* Shift right by 64 bits */ - movl %eax,%edx - orl %ebx,%ebx - jz L_more_63_no_low - - orl $1,%edx - jmp L_more_63_no_low - -L_more_than_64: - jne L_more_than_65 - - /* Shift right by 65 bits */ - /* Carry is clear if we get here */ - movl %eax,%edx - rcrl %edx - jnc L_shift_65_nc - - orl $1,%edx - jmp L_more_63_no_low - -L_shift_65_nc: - orl %ebx,%ebx - jz L_more_63_no_low - - orl $1,%edx - jmp L_more_63_no_low - -L_more_than_65: - movl $1,%edx /* The shifted nr always at least one '1' */ - -L_more_63_no_low: - xorl %ebx,%ebx - xorl %eax,%eax - -L_shift_done: -L_subtr: -/*------------------------------+ - | Do the subtraction | - +------------------------------*/ - xorl %ecx,%ecx - subl %edx,%ecx - movl %ecx,%edx - movl SIGL(%esi),%ecx - sbbl %ebx,%ecx - movl %ecx,%ebx - movl SIGH(%esi),%ecx - sbbl %eax,%ecx - movl %ecx,%eax - -#ifdef PARANOID - /* We can never get a borrow */ - jc L_bugged -#endif /* PARANOID */ - -/*--------------------------------------+ - | Normalize the result | - +--------------------------------------*/ - testl $0x80000000,%eax - jnz L_round /* no shifting needed */ - - orl %eax,%eax - jnz L_shift_1 /* shift left 1 - 31 bits */ - - orl %ebx,%ebx - jnz L_shift_32 /* shift left 32 - 63 bits */ - -/* - * A rare case, the only one which is non-zero if we got here - * is: 1000000 .... 0000 - * -0111111 .... 1111 1 - * -------------------- - * 0000000 .... 0000 1 - */ - - cmpl $0x80000000,%edx - jnz L_must_be_zero - - /* Shift left 64 bits */ - subw $64,EXP(%edi) - xchg %edx,%eax - jmp fpu_reg_round - -L_must_be_zero: -#ifdef PARANOID - orl %edx,%edx - jnz L_bugged_3 -#endif /* PARANOID */ - - /* The result is zero */ - movw $0,EXP(%edi) /* exponent */ - movl $0,SIGL(%edi) - movl $0,SIGH(%edi) - movl TAG_Zero,%eax - jmp L_exit - -L_shift_32: - movl %ebx,%eax - movl %edx,%ebx - movl $0,%edx - subw $32,EXP(%edi) /* Can get underflow here */ - -/* We need to shift left by 1 - 31 bits */ -L_shift_1: - bsrl %eax,%ecx /* get the required shift in %ecx */ - subl $31,%ecx - negl %ecx - shld %cl,%ebx,%eax - shld %cl,%edx,%ebx - shl %cl,%edx - subw %cx,EXP(%edi) /* Can get underflow here */ - -L_round: - jmp fpu_reg_round /* Round the result */ - - -#ifdef PARANOID -L_bugged_1: - pushl EX_INTERNAL|0x206 - call EXCEPTION - pop %ebx - jmp L_error_exit - -L_bugged_2: - pushl EX_INTERNAL|0x209 - call EXCEPTION - pop %ebx - jmp L_error_exit - -L_bugged_3: - pushl EX_INTERNAL|0x210 - call EXCEPTION - pop %ebx - jmp L_error_exit - -L_bugged_4: - pushl EX_INTERNAL|0x211 - call EXCEPTION - pop %ebx - jmp L_error_exit - -L_bugged: - pushl EX_INTERNAL|0x212 - call EXCEPTION - pop %ebx - jmp L_error_exit - -L_error_exit: - movl $-1,%eax - -#endif /* PARANOID */ - -L_exit: - popl %ebx - popl %edi - popl %esi - leave - ret diff --git a/arch/i386/math-emu/round_Xsig.S b/arch/i386/math-emu/round_Xsig.S deleted file mode 100644 index bbe0e87718e..00000000000 --- a/arch/i386/math-emu/round_Xsig.S +++ /dev/null @@ -1,141 +0,0 @@ -/*---------------------------------------------------------------------------+ - | round_Xsig.S | - | | - | Copyright (C) 1992,1993,1994,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | Normalize and round a 12 byte quantity. | - | Call from C as: | - | int round_Xsig(Xsig *n) | - | | - | Normalize a 12 byte quantity. | - | Call from C as: | - | int norm_Xsig(Xsig *n) | - | | - | Each function returns the size of the shift (nr of bits). | - | | - +---------------------------------------------------------------------------*/ - .file "round_Xsig.S" - -#include "fpu_emu.h" - - -.text -ENTRY(round_Xsig) - pushl %ebp - movl %esp,%ebp - pushl %ebx /* Reserve some space */ - pushl %ebx - pushl %esi - - movl PARAM1,%esi - - movl 8(%esi),%edx - movl 4(%esi),%ebx - movl (%esi),%eax - - movl $0,-4(%ebp) - - orl %edx,%edx /* ms bits */ - js L_round /* Already normalized */ - jnz L_shift_1 /* Shift left 1 - 31 bits */ - - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - movl $-32,-4(%ebp) - -/* We need to shift left by 1 - 31 bits */ -L_shift_1: - bsrl %edx,%ecx /* get the required shift in %ecx */ - subl $31,%ecx - negl %ecx - subl %ecx,-4(%ebp) - shld %cl,%ebx,%edx - shld %cl,%eax,%ebx - shl %cl,%eax - -L_round: - testl $0x80000000,%eax - jz L_exit - - addl $1,%ebx - adcl $0,%edx - jnz L_exit - - movl $0x80000000,%edx - incl -4(%ebp) - -L_exit: - movl %edx,8(%esi) - movl %ebx,4(%esi) - movl %eax,(%esi) - - movl -4(%ebp),%eax - - popl %esi - popl %ebx - leave - ret - - - - -ENTRY(norm_Xsig) - pushl %ebp - movl %esp,%ebp - pushl %ebx /* Reserve some space */ - pushl %ebx - pushl %esi - - movl PARAM1,%esi - - movl 8(%esi),%edx - movl 4(%esi),%ebx - movl (%esi),%eax - - movl $0,-4(%ebp) - - orl %edx,%edx /* ms bits */ - js L_n_exit /* Already normalized */ - jnz L_n_shift_1 /* Shift left 1 - 31 bits */ - - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - movl $-32,-4(%ebp) - - orl %edx,%edx /* ms bits */ - js L_n_exit /* Normalized now */ - jnz L_n_shift_1 /* Shift left 1 - 31 bits */ - - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - addl $-32,-4(%ebp) - jmp L_n_exit /* Might not be normalized, - but shift no more. */ - -/* We need to shift left by 1 - 31 bits */ -L_n_shift_1: - bsrl %edx,%ecx /* get the required shift in %ecx */ - subl $31,%ecx - negl %ecx - subl %ecx,-4(%ebp) - shld %cl,%ebx,%edx - shld %cl,%eax,%ebx - shl %cl,%eax - -L_n_exit: - movl %edx,8(%esi) - movl %ebx,4(%esi) - movl %eax,(%esi) - - movl -4(%ebp),%eax - - popl %esi - popl %ebx - leave - ret - diff --git a/arch/i386/math-emu/shr_Xsig.S b/arch/i386/math-emu/shr_Xsig.S deleted file mode 100644 index 31cdd118e91..00000000000 --- a/arch/i386/math-emu/shr_Xsig.S +++ /dev/null @@ -1,87 +0,0 @@ - .file "shr_Xsig.S" -/*---------------------------------------------------------------------------+ - | shr_Xsig.S | - | | - | 12 byte right shift function | - | | - | Copyright (C) 1992,1994,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | Call from C as: | - | void shr_Xsig(Xsig *arg, unsigned nr) | - | | - | Extended shift right function. | - | Fastest for small shifts. | - | Shifts the 12 byte quantity pointed to by the first arg (arg) | - | right by the number of bits specified by the second arg (nr). | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" - -.text -ENTRY(shr_Xsig) - push %ebp - movl %esp,%ebp - pushl %esi - movl PARAM2,%ecx - movl PARAM1,%esi - cmpl $32,%ecx /* shrd only works for 0..31 bits */ - jnc L_more_than_31 - -/* less than 32 bits */ - pushl %ebx - movl (%esi),%eax /* lsl */ - movl 4(%esi),%ebx /* midl */ - movl 8(%esi),%edx /* msl */ - shrd %cl,%ebx,%eax - shrd %cl,%edx,%ebx - shr %cl,%edx - movl %eax,(%esi) - movl %ebx,4(%esi) - movl %edx,8(%esi) - popl %ebx - popl %esi - leave - ret - -L_more_than_31: - cmpl $64,%ecx - jnc L_more_than_63 - - subb $32,%cl - movl 4(%esi),%eax /* midl */ - movl 8(%esi),%edx /* msl */ - shrd %cl,%edx,%eax - shr %cl,%edx - movl %eax,(%esi) - movl %edx,4(%esi) - movl $0,8(%esi) - popl %esi - leave - ret - -L_more_than_63: - cmpl $96,%ecx - jnc L_more_than_95 - - subb $64,%cl - movl 8(%esi),%eax /* msl */ - shr %cl,%eax - xorl %edx,%edx - movl %eax,(%esi) - movl %edx,4(%esi) - movl %edx,8(%esi) - popl %esi - leave - ret - -L_more_than_95: - xorl %eax,%eax - movl %eax,(%esi) - movl %eax,4(%esi) - movl %eax,8(%esi) - popl %esi - leave - ret diff --git a/arch/i386/math-emu/status_w.h b/arch/i386/math-emu/status_w.h deleted file mode 100644 index 59e73302aa6..00000000000 --- a/arch/i386/math-emu/status_w.h +++ /dev/null @@ -1,67 +0,0 @@ -/*---------------------------------------------------------------------------+ - | status_w.h | - | | - | Copyright (C) 1992,1993 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@vaxc.cc.monash.edu.au | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _STATUS_H_ -#define _STATUS_H_ - -#include "fpu_emu.h" /* for definition of PECULIAR_486 */ - -#ifdef __ASSEMBLY__ -#define Const__(x) $##x -#else -#define Const__(x) x -#endif - -#define SW_Backward Const__(0x8000) /* backward compatibility */ -#define SW_C3 Const__(0x4000) /* condition bit 3 */ -#define SW_Top Const__(0x3800) /* top of stack */ -#define SW_Top_Shift Const__(11) /* shift for top of stack bits */ -#define SW_C2 Const__(0x0400) /* condition bit 2 */ -#define SW_C1 Const__(0x0200) /* condition bit 1 */ -#define SW_C0 Const__(0x0100) /* condition bit 0 */ -#define SW_Summary Const__(0x0080) /* exception summary */ -#define SW_Stack_Fault Const__(0x0040) /* stack fault */ -#define SW_Precision Const__(0x0020) /* loss of precision */ -#define SW_Underflow Const__(0x0010) /* underflow */ -#define SW_Overflow Const__(0x0008) /* overflow */ -#define SW_Zero_Div Const__(0x0004) /* divide by zero */ -#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */ -#define SW_Invalid Const__(0x0001) /* invalid operation */ - -#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */ - -#ifndef __ASSEMBLY__ - -#define COMP_A_gt_B 1 -#define COMP_A_eq_B 2 -#define COMP_A_lt_B 3 -#define COMP_No_Comp 4 -#define COMP_Denormal 0x20 -#define COMP_NaN 0x40 -#define COMP_SNaN 0x80 - -#define status_word() \ - ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) -static inline void setcc(int cc) -{ - partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); - partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); -} - -#ifdef PECULIAR_486 - /* Default, this conveys no information, but an 80486 does it. */ - /* Clear the SW_C1 bit, "other bits undefined". */ -# define clear_C1() { partial_status &= ~SW_C1; } -# else -# define clear_C1() -#endif /* PECULIAR_486 */ - -#endif /* __ASSEMBLY__ */ - -#endif /* _STATUS_H_ */ diff --git a/arch/i386/math-emu/version.h b/arch/i386/math-emu/version.h deleted file mode 100644 index a0d73a1d2b6..00000000000 --- a/arch/i386/math-emu/version.h +++ /dev/null @@ -1,12 +0,0 @@ -/*---------------------------------------------------------------------------+ - | version.h | - | | - | | - | Copyright (C) 1992,1993,1994,1996,1997,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@melbpc.org.au | - | | - | | - +---------------------------------------------------------------------------*/ - -#define FPU_VERSION "wm-FPU-emu version 2.01" diff --git a/arch/i386/math-emu/wm_shrx.S b/arch/i386/math-emu/wm_shrx.S deleted file mode 100644 index 51842831798..00000000000 --- a/arch/i386/math-emu/wm_shrx.S +++ /dev/null @@ -1,204 +0,0 @@ - .file "wm_shrx.S" -/*---------------------------------------------------------------------------+ - | wm_shrx.S | - | | - | 64 bit right shift functions | - | | - | Copyright (C) 1992,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | Call from C as: | - | unsigned FPU_shrx(void *arg1, unsigned arg2) | - | and | - | unsigned FPU_shrxs(void *arg1, unsigned arg2) | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" - -.text -/*---------------------------------------------------------------------------+ - | unsigned FPU_shrx(void *arg1, unsigned arg2) | - | | - | Extended shift right function. | - | Fastest for small shifts. | - | Shifts the 64 bit quantity pointed to by the first arg (arg1) | - | right by the number of bits specified by the second arg (arg2). | - | Forms a 96 bit quantity from the 64 bit arg and eax: | - | [ 64 bit arg ][ eax ] | - | shift right ---------> | - | The eax register is initialized to 0 before the shifting. | - | Results returned in the 64 bit arg and eax. | - +---------------------------------------------------------------------------*/ - -ENTRY(FPU_shrx) - push %ebp - movl %esp,%ebp - pushl %esi - movl PARAM2,%ecx - movl PARAM1,%esi - cmpl $32,%ecx /* shrd only works for 0..31 bits */ - jnc L_more_than_31 - -/* less than 32 bits */ - pushl %ebx - movl (%esi),%ebx /* lsl */ - movl 4(%esi),%edx /* msl */ - xorl %eax,%eax /* extension */ - shrd %cl,%ebx,%eax - shrd %cl,%edx,%ebx - shr %cl,%edx - movl %ebx,(%esi) - movl %edx,4(%esi) - popl %ebx - popl %esi - leave - ret - -L_more_than_31: - cmpl $64,%ecx - jnc L_more_than_63 - - subb $32,%cl - movl (%esi),%eax /* lsl */ - movl 4(%esi),%edx /* msl */ - shrd %cl,%edx,%eax - shr %cl,%edx - movl %edx,(%esi) - movl $0,4(%esi) - popl %esi - leave - ret - -L_more_than_63: - cmpl $96,%ecx - jnc L_more_than_95 - - subb $64,%cl - movl 4(%esi),%eax /* msl */ - shr %cl,%eax - xorl %edx,%edx - movl %edx,(%esi) - movl %edx,4(%esi) - popl %esi - leave - ret - -L_more_than_95: - xorl %eax,%eax - movl %eax,(%esi) - movl %eax,4(%esi) - popl %esi - leave - ret - - -/*---------------------------------------------------------------------------+ - | unsigned FPU_shrxs(void *arg1, unsigned arg2) | - | | - | Extended shift right function (optimized for small floating point | - | integers). | - | Shifts the 64 bit quantity pointed to by the first arg (arg1) | - | right by the number of bits specified by the second arg (arg2). | - | Forms a 96 bit quantity from the 64 bit arg and eax: | - | [ 64 bit arg ][ eax ] | - | shift right ---------> | - | The eax register is initialized to 0 before the shifting. | - | The lower 8 bits of eax are lost and replaced by a flag which is | - | set (to 0x01) if any bit, apart from the first one, is set in the | - | part which has been shifted out of the arg. | - | Results returned in the 64 bit arg and eax. | - +---------------------------------------------------------------------------*/ -ENTRY(FPU_shrxs) - push %ebp - movl %esp,%ebp - pushl %esi - pushl %ebx - movl PARAM2,%ecx - movl PARAM1,%esi - cmpl $64,%ecx /* shrd only works for 0..31 bits */ - jnc Ls_more_than_63 - - cmpl $32,%ecx /* shrd only works for 0..31 bits */ - jc Ls_less_than_32 - -/* We got here without jumps by assuming that the most common requirement - is for small integers */ -/* Shift by [32..63] bits */ - subb $32,%cl - movl (%esi),%eax /* lsl */ - movl 4(%esi),%edx /* msl */ - xorl %ebx,%ebx - shrd %cl,%eax,%ebx - shrd %cl,%edx,%eax - shr %cl,%edx - orl %ebx,%ebx /* test these 32 bits */ - setne %bl - test $0x7fffffff,%eax /* and 31 bits here */ - setne %bh - orw %bx,%bx /* Any of the 63 bit set ? */ - setne %al - movl %edx,(%esi) - movl $0,4(%esi) - popl %ebx - popl %esi - leave - ret - -/* Shift by [0..31] bits */ -Ls_less_than_32: - movl (%esi),%ebx /* lsl */ - movl 4(%esi),%edx /* msl */ - xorl %eax,%eax /* extension */ - shrd %cl,%ebx,%eax - shrd %cl,%edx,%ebx - shr %cl,%edx - test $0x7fffffff,%eax /* only need to look at eax here */ - setne %al - movl %ebx,(%esi) - movl %edx,4(%esi) - popl %ebx - popl %esi - leave - ret - -/* Shift by [64..95] bits */ -Ls_more_than_63: - cmpl $96,%ecx - jnc Ls_more_than_95 - - subb $64,%cl - movl (%esi),%ebx /* lsl */ - movl 4(%esi),%eax /* msl */ - xorl %edx,%edx /* extension */ - shrd %cl,%ebx,%edx - shrd %cl,%eax,%ebx - shr %cl,%eax - orl %ebx,%edx - setne %bl - test $0x7fffffff,%eax /* only need to look at eax here */ - setne %bh - orw %bx,%bx - setne %al - xorl %edx,%edx - movl %edx,(%esi) /* set to zero */ - movl %edx,4(%esi) /* set to zero */ - popl %ebx - popl %esi - leave - ret - -Ls_more_than_95: -/* Shift by [96..inf) bits */ - xorl %eax,%eax - movl (%esi),%ebx - orl 4(%esi),%ebx - setne %al - xorl %ebx,%ebx - movl %ebx,(%esi) - movl %ebx,4(%esi) - popl %ebx - popl %esi - leave - ret diff --git a/arch/i386/math-emu/wm_sqrt.S b/arch/i386/math-emu/wm_sqrt.S deleted file mode 100644 index d258f59564e..00000000000 --- a/arch/i386/math-emu/wm_sqrt.S +++ /dev/null @@ -1,470 +0,0 @@ - .file "wm_sqrt.S" -/*---------------------------------------------------------------------------+ - | wm_sqrt.S | - | | - | Fixed point arithmetic square root evaluation. | - | | - | Copyright (C) 1992,1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | Call from C as: | - | int wm_sqrt(FPU_REG *n, unsigned int control_word) | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | wm_sqrt(FPU_REG *n, unsigned int control_word) | - | returns the square root of n in n. | - | | - | Use Newton's method to compute the square root of a number, which must | - | be in the range [1.0 .. 4.0), to 64 bits accuracy. | - | Does not check the sign or tag of the argument. | - | Sets the exponent, but not the sign or tag of the result. | - | | - | The guess is kept in %esi:%edi | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "fpu_emu.h" - - -#ifndef NON_REENTRANT_FPU -/* Local storage on the stack: */ -#define FPU_accum_3 -4(%ebp) /* ms word */ -#define FPU_accum_2 -8(%ebp) -#define FPU_accum_1 -12(%ebp) -#define FPU_accum_0 -16(%ebp) - -/* - * The de-normalised argument: - * sq_2 sq_1 sq_0 - * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 - * ^ binary point here - */ -#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ -#define FPU_fsqrt_arg_1 -24(%ebp) -#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ - -#else -/* Local storage in a static area: */ -.data - .align 4,0 -FPU_accum_3: - .long 0 /* ms word */ -FPU_accum_2: - .long 0 -FPU_accum_1: - .long 0 -FPU_accum_0: - .long 0 - -/* The de-normalised argument: - sq_2 sq_1 sq_0 - b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 - ^ binary point here - */ -FPU_fsqrt_arg_2: - .long 0 /* ms word */ -FPU_fsqrt_arg_1: - .long 0 -FPU_fsqrt_arg_0: - .long 0 /* ls word, at most the ms bit is set */ -#endif /* NON_REENTRANT_FPU */ - - -.text -ENTRY(wm_sqrt) - pushl %ebp - movl %esp,%ebp -#ifndef NON_REENTRANT_FPU - subl $28,%esp -#endif /* NON_REENTRANT_FPU */ - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%esi - - movl SIGH(%esi),%eax - movl SIGL(%esi),%ecx - xorl %edx,%edx - -/* We use a rough linear estimate for the first guess.. */ - - cmpw EXP_BIAS,EXP(%esi) - jnz sqrt_arg_ge_2 - - shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ - rcrl $1,%ecx - rcrl $1,%edx - -sqrt_arg_ge_2: -/* From here on, n is never accessed directly again until it is - replaced by the answer. */ - - movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ - movl %ecx,FPU_fsqrt_arg_1 - movl %edx,FPU_fsqrt_arg_0 - -/* Make a linear first estimate */ - shrl $1,%eax - addl $0x40000000,%eax - movl $0xaaaaaaaa,%ecx - mull %ecx - shll %edx /* max result was 7fff... */ - testl $0x80000000,%edx /* but min was 3fff... */ - jnz sqrt_prelim_no_adjust - - movl $0x80000000,%edx /* round up */ - -sqrt_prelim_no_adjust: - movl %edx,%esi /* Our first guess */ - -/* We have now computed (approx) (2 + x) / 3, which forms the basis - for a few iterations of Newton's method */ - - movl FPU_fsqrt_arg_2,%ecx /* ms word */ - -/* - * From our initial estimate, three iterations are enough to get us - * to 30 bits or so. This will then allow two iterations at better - * precision to complete the process. - */ - -/* Compute (g + n/g)/2 at each iteration (g is the guess). */ - shrl %ecx /* Doing this first will prevent a divide */ - /* overflow later. */ - - movl %ecx,%edx /* msw of the arg / 2 */ - divl %esi /* current estimate */ - shrl %esi /* divide by 2 */ - addl %eax,%esi /* the new estimate */ - - movl %ecx,%edx - divl %esi - shrl %esi - addl %eax,%esi - - movl %ecx,%edx - divl %esi - shrl %esi - addl %eax,%esi - -/* - * Now that an estimate accurate to about 30 bits has been obtained (in %esi), - * we improve it to 60 bits or so. - * - * The strategy from now on is to compute new estimates from - * guess := guess + (n - guess^2) / (2 * guess) - */ - -/* First, find the square of the guess */ - movl %esi,%eax - mull %esi -/* guess^2 now in %edx:%eax */ - - movl FPU_fsqrt_arg_1,%ecx - subl %ecx,%eax - movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ - sbbl %ecx,%edx - jnc sqrt_stage_2_positive - -/* Subtraction gives a negative result, - negate the result before division. */ - notl %edx - notl %eax - addl $1,%eax - adcl $0,%edx - - divl %esi - movl %eax,%ecx - - movl %edx,%eax - divl %esi - jmp sqrt_stage_2_finish - -sqrt_stage_2_positive: - divl %esi - movl %eax,%ecx - - movl %edx,%eax - divl %esi - - notl %ecx - notl %eax - addl $1,%eax - adcl $0,%ecx - -sqrt_stage_2_finish: - sarl $1,%ecx /* divide by 2 */ - rcrl $1,%eax - - /* Form the new estimate in %esi:%edi */ - movl %eax,%edi - addl %ecx,%esi - - jnz sqrt_stage_2_done /* result should be [1..2) */ - -#ifdef PARANOID -/* It should be possible to get here only if the arg is ffff....ffff */ - cmp $0xffffffff,FPU_fsqrt_arg_1 - jnz sqrt_stage_2_error -#endif /* PARANOID */ - -/* The best rounded result. */ - xorl %eax,%eax - decl %eax - movl %eax,%edi - movl %eax,%esi - movl $0x7fffffff,%eax - jmp sqrt_round_result - -#ifdef PARANOID -sqrt_stage_2_error: - pushl EX_INTERNAL|0x213 - call EXCEPTION -#endif /* PARANOID */ - -sqrt_stage_2_done: - -/* Now the square root has been computed to better than 60 bits. */ - -/* Find the square of the guess. */ - movl %edi,%eax /* ls word of guess */ - mull %edi - movl %edx,FPU_accum_1 - - movl %esi,%eax - mull %esi - movl %edx,FPU_accum_3 - movl %eax,FPU_accum_2 - - movl %edi,%eax - mull %esi - addl %eax,FPU_accum_1 - adcl %edx,FPU_accum_2 - adcl $0,FPU_accum_3 - -/* movl %esi,%eax */ -/* mull %edi */ - addl %eax,FPU_accum_1 - adcl %edx,FPU_accum_2 - adcl $0,FPU_accum_3 - -/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ - - movl FPU_fsqrt_arg_0,%eax /* get normalized n */ - subl %eax,FPU_accum_1 - movl FPU_fsqrt_arg_1,%eax - sbbl %eax,FPU_accum_2 - movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ - sbbl %eax,FPU_accum_3 - jnc sqrt_stage_3_positive - -/* Subtraction gives a negative result, - negate the result before division */ - notl FPU_accum_1 - notl FPU_accum_2 - notl FPU_accum_3 - addl $1,FPU_accum_1 - adcl $0,FPU_accum_2 - -#ifdef PARANOID - adcl $0,FPU_accum_3 /* This must be zero */ - jz sqrt_stage_3_no_error - -sqrt_stage_3_error: - pushl EX_INTERNAL|0x207 - call EXCEPTION - -sqrt_stage_3_no_error: -#endif /* PARANOID */ - - movl FPU_accum_2,%edx - movl FPU_accum_1,%eax - divl %esi - movl %eax,%ecx - - movl %edx,%eax - divl %esi - - sarl $1,%ecx /* divide by 2 */ - rcrl $1,%eax - - /* prepare to round the result */ - - addl %ecx,%edi - adcl $0,%esi - - jmp sqrt_stage_3_finished - -sqrt_stage_3_positive: - movl FPU_accum_2,%edx - movl FPU_accum_1,%eax - divl %esi - movl %eax,%ecx - - movl %edx,%eax - divl %esi - - sarl $1,%ecx /* divide by 2 */ - rcrl $1,%eax - - /* prepare to round the result */ - - notl %eax /* Negate the correction term */ - notl %ecx - addl $1,%eax - adcl $0,%ecx /* carry here ==> correction == 0 */ - adcl $0xffffffff,%esi - - addl %ecx,%edi - adcl $0,%esi - -sqrt_stage_3_finished: - -/* - * The result in %esi:%edi:%esi should be good to about 90 bits here, - * and the rounding information here does not have sufficient accuracy - * in a few rare cases. - */ - cmpl $0xffffffe0,%eax - ja sqrt_near_exact_x - - cmpl $0x00000020,%eax - jb sqrt_near_exact - - cmpl $0x7fffffe0,%eax - jb sqrt_round_result - - cmpl $0x80000020,%eax - jb sqrt_get_more_precision - -sqrt_round_result: -/* Set up for rounding operations */ - movl %eax,%edx - movl %esi,%eax - movl %edi,%ebx - movl PARAM1,%edi - movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ - jmp fpu_reg_round - - -sqrt_near_exact_x: -/* First, the estimate must be rounded up. */ - addl $1,%edi - adcl $0,%esi - -sqrt_near_exact: -/* - * This is an easy case because x^1/2 is monotonic. - * We need just find the square of our estimate, compare it - * with the argument, and deduce whether our estimate is - * above, below, or exact. We use the fact that the estimate - * is known to be accurate to about 90 bits. - */ - movl %edi,%eax /* ls word of guess */ - mull %edi - movl %edx,%ebx /* 2nd ls word of square */ - movl %eax,%ecx /* ls word of square */ - - movl %edi,%eax - mull %esi - addl %eax,%ebx - addl %eax,%ebx - -#ifdef PARANOID - cmp $0xffffffb0,%ebx - jb sqrt_near_exact_ok - - cmp $0x00000050,%ebx - ja sqrt_near_exact_ok - - pushl EX_INTERNAL|0x214 - call EXCEPTION - -sqrt_near_exact_ok: -#endif /* PARANOID */ - - or %ebx,%ebx - js sqrt_near_exact_small - - jnz sqrt_near_exact_large - - or %ebx,%edx - jnz sqrt_near_exact_large - -/* Our estimate is exactly the right answer */ - xorl %eax,%eax - jmp sqrt_round_result - -sqrt_near_exact_small: -/* Our estimate is too small */ - movl $0x000000ff,%eax - jmp sqrt_round_result - -sqrt_near_exact_large: -/* Our estimate is too large, we need to decrement it */ - subl $1,%edi - sbbl $0,%esi - movl $0xffffff00,%eax - jmp sqrt_round_result - - -sqrt_get_more_precision: -/* This case is almost the same as the above, except we start - with an extra bit of precision in the estimate. */ - stc /* The extra bit. */ - rcll $1,%edi /* Shift the estimate left one bit */ - rcll $1,%esi - - movl %edi,%eax /* ls word of guess */ - mull %edi - movl %edx,%ebx /* 2nd ls word of square */ - movl %eax,%ecx /* ls word of square */ - - movl %edi,%eax - mull %esi - addl %eax,%ebx - addl %eax,%ebx - -/* Put our estimate back to its original value */ - stc /* The ms bit. */ - rcrl $1,%esi /* Shift the estimate left one bit */ - rcrl $1,%edi - -#ifdef PARANOID - cmp $0xffffff60,%ebx - jb sqrt_more_prec_ok - - cmp $0x000000a0,%ebx - ja sqrt_more_prec_ok - - pushl EX_INTERNAL|0x215 - call EXCEPTION - -sqrt_more_prec_ok: -#endif /* PARANOID */ - - or %ebx,%ebx - js sqrt_more_prec_small - - jnz sqrt_more_prec_large - - or %ebx,%ecx - jnz sqrt_more_prec_large - -/* Our estimate is exactly the right answer */ - movl $0x80000000,%eax - jmp sqrt_round_result - -sqrt_more_prec_small: -/* Our estimate is too small */ - movl $0x800000ff,%eax - jmp sqrt_round_result - -sqrt_more_prec_large: -/* Our estimate is too large */ - movl $0x7fffff00,%eax - jmp sqrt_round_result diff --git a/arch/x86/math-emu/Makefile b/arch/x86/math-emu/Makefile new file mode 100644 index 00000000000..9c943fa6ce6 --- /dev/null +++ b/arch/x86/math-emu/Makefile @@ -0,0 +1,30 @@ +# +# Makefile for wm-FPU-emu +# + +#DEBUG = -DDEBUGGING +DEBUG = +PARANOID = -DPARANOID +CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION) + +EXTRA_AFLAGS := $(PARANOID) + +# From 'C' language sources: +C_OBJS =fpu_entry.o errors.o \ + fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \ + load_store.o get_address.o \ + poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \ + reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \ + reg_ld_str.o reg_divide.o reg_mul.o + +# From 80x86 assembler sources: +A_OBJS =reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \ + div_small.o reg_norm.o reg_round.o \ + wm_shrx.o wm_sqrt.o \ + div_Xsig.o polynom_Xsig.o round_Xsig.o \ + shr_Xsig.o mul_Xsig.o + +obj-y =$(C_OBJS) $(A_OBJS) + +proto: + cproto -e -DMAKING_PROTO *.c >fpu_proto.h diff --git a/arch/x86/math-emu/README b/arch/x86/math-emu/README new file mode 100644 index 00000000000..e6235491d6e --- /dev/null +++ b/arch/x86/math-emu/README @@ -0,0 +1,427 @@ + +---------------------------------------------------------------------------+ + | wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. | + | | + | Copyright (C) 1992,1993,1994,1995,1996,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | This program is free software; you can redistribute it and/or modify | + | it under the terms of the GNU General Public License version 2 as | + | published by the Free Software Foundation. | + | | + | This program is distributed in the hope that it will be useful, | + | but WITHOUT ANY WARRANTY; without even the implied warranty of | + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | + | GNU General Public License for more details. | + | | + | You should have received a copy of the GNU General Public License | + | along with this program; if not, write to the Free Software | + | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | + | | + +---------------------------------------------------------------------------+ + + + +wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387 +which was my 80387 emulator for early versions of djgpp (gcc under +msdos); wm-emu387 was in turn based upon emu387 which was written by +DJ Delorie for djgpp. The interface to the Linux kernel is based upon +the original Linux math emulator by Linus Torvalds. + +My target FPU for wm-FPU-emu is that described in the Intel486 +Programmer's Reference Manual (1992 edition). Unfortunately, numerous +facets of the functioning of the FPU are not well covered in the +Reference Manual. The information in the manual has been supplemented +with measurements on real 80486's. Unfortunately, it is simply not +possible to be sure that all of the peculiarities of the 80486 have +been discovered, so there is always likely to be obscure differences +in the detailed behaviour of the emulator and a real 80486. + +wm-FPU-emu does not implement all of the behaviour of the 80486 FPU, +but is very close. See "Limitations" later in this file for a list of +some differences. + +Please report bugs, etc to me at: + billm@melbpc.org.au +or b.metzenthen@medoto.unimelb.edu.au + +For more information on the emulator and on floating point topics, see +my web pages, currently at http://www.suburbia.net/~billm/ + + +--Bill Metzenthen + December 1999 + + +----------------------- Internals of wm-FPU-emu ----------------------- + +Numeric algorithms: +(1) Add, subtract, and multiply. Nothing remarkable in these. +(2) Divide has been tuned to get reasonable performance. The algorithm + is not the obvious one which most people seem to use, but is designed + to take advantage of the characteristics of the 80386. I expect that + it has been invented many times before I discovered it, but I have not + seen it. It is based upon one of those ideas which one carries around + for years without ever bothering to check it out. +(3) The sqrt function has been tuned to get good performance. It is based + upon Newton's classic method. Performance was improved by capitalizing + upon the properties of Newton's method, and the code is once again + structured taking account of the 80386 characteristics. +(4) The trig, log, and exp functions are based in each case upon quasi- + "optimal" polynomial approximations. My definition of "optimal" was + based upon getting good accuracy with reasonable speed. +(5) The argument reducing code for the trig function effectively uses + a value of pi which is accurate to more than 128 bits. As a consequence, + the reduced argument is accurate to more than 64 bits for arguments up + to a few pi, and accurate to more than 64 bits for most arguments, + even for arguments approaching 2^63. This is far superior to an + 80486, which uses a value of pi which is accurate to 66 bits. + +The code of the emulator is complicated slightly by the need to +account for a limited form of re-entrancy. Normally, the emulator will +emulate each FPU instruction to completion without interruption. +However, it may happen that when the emulator is accessing the user +memory space, swapping may be needed. In this case the emulator may be +temporarily suspended while disk i/o takes place. During this time +another process may use the emulator, thereby perhaps changing static +variables. The code which accesses user memory is confined to five +files: + fpu_entry.c + reg_ld_str.c + load_store.c + get_address.c + errors.c +As from version 1.12 of the emulator, no static variables are used +(apart from those in the kernel's per-process tables). The emulator is +therefore now fully re-entrant, rather than having just the restricted +form of re-entrancy which is required by the Linux kernel. + +----------------------- Limitations of wm-FPU-emu ----------------------- + +There are a number of differences between the current wm-FPU-emu +(version 2.01) and the 80486 FPU (apart from bugs). The differences +are fewer than those which applied to the 1.xx series of the emulator. +Some of the more important differences are listed below: + +The Roundup flag does not have much meaning for the transcendental +functions and its 80486 value with these functions is likely to differ +from its emulator value. + +In a few rare cases the Underflow flag obtained with the emulator will +be different from that obtained with an 80486. This occurs when the +following conditions apply simultaneously: +(a) the operands have a higher precision than the current setting of the + precision control (PC) flags. +(b) the underflow exception is masked. +(c) the magnitude of the exact result (before rounding) is less than 2^-16382. +(d) the magnitude of the final result (after rounding) is exactly 2^-16382. +(e) the magnitude of the exact result would be exactly 2^-16382 if the + operands were rounded to the current precision before the arithmetic + operation was performed. +If all of these apply, the emulator will set the Underflow flag but a real +80486 will not. + +NOTE: Certain formats of Extended Real are UNSUPPORTED. They are +unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities, +and Unnormals. None of these will be generated by an 80486 or by the +emulator. Do not use them. The emulator treats them differently in +detail from the way an 80486 does. + +Self modifying code can cause the emulator to fail. An example of such +code is: + movl %esp,[%ebx] + fld1 +The FPU instruction may be (usually will be) loaded into the pre-fetch +queue of the CPU before the mov instruction is executed. If the +destination of the 'movl' overlaps the FPU instruction then the bytes +in the prefetch queue and memory will be inconsistent when the FPU +instruction is executed. The emulator will be invoked but will not be +able to find the instruction which caused the device-not-present +exception. For this case, the emulator cannot emulate the behaviour of +an 80486DX. + +Handling of the address size override prefix byte (0x67) has not been +extensively tested yet. A major problem exists because using it in +vm86 mode can cause a general protection fault. Address offsets +greater than 0xffff appear to be illegal in vm86 mode but are quite +acceptable (and work) in real mode. A small test program developed to +check the addressing, and which runs successfully in real mode, +crashes dosemu under Linux and also brings Windows down with a general +protection fault message when run under the MS-DOS prompt of Windows +3.1. (The program simply reads data from a valid address). + +The emulator supports 16-bit protected mode, with one difference from +an 80486DX. A 80486DX will allow some floating point instructions to +write a few bytes below the lowest address of the stack. The emulator +will not allow this in 16-bit protected mode: no instructions are +allowed to write outside the bounds set by the protection. + +----------------------- Performance of wm-FPU-emu ----------------------- + +Speed. +----- + +The speed of floating point computation with the emulator will depend +upon instruction mix. Relative performance is best for the instructions +which require most computation. The simple instructions are adversely +affected by the FPU instruction trap overhead. + + +Timing: Some simple timing tests have been made on the emulator functions. +The times include load/store instructions. All times are in microseconds +measured on a 33MHz 386 with 64k cache. The Turbo C tests were under +ms-dos, the next two columns are for emulators running with the djgpp +ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97, +using libm4.0 (hard). + +function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu + + + 60.5 154.8 76.5 139.4 + - 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7 + * 71.0 190.8 79.6 146.6 + / 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1 + + sin() 310.8 4692.0 319.0 398.5 + cos() 284.4 4855.2 308.0 388.7 + tan() 495.0 8807.1 394.9 504.7 + atan() 328.9 4866.4 601.1 419.5-491.9 + + sqrt() 128.7 crashed 145.2 227.0 + log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1 + exp() 479.1 6619.2 469.1 850.8 + + +The performance under Linux is improved by the use of look-ahead code. +The following results show the improvement which is obtained under +Linux due to the look-ahead code. Also given are the times for the +original Linux emulator with the 4.1 'soft' lib. + + [ Linus' note: I changed look-ahead to be the default under linux, as + there was no reason not to use it after I had edited it to be + disabled during tracing ] + + wm-FPU-emu w original w + look-ahead 'soft' lib + + 106.4 190.2 + - 108.6-111.6 192.4-216.2 + * 113.4 193.1 + / 108.8-124.4 700.1-706.2 + + sin() 390.5 2642.0 + cos() 381.5 2767.4 + tan() 496.5 3153.3 + atan() 367.2-435.5 2439.4-3396.8 + + sqrt() 195.1 4732.5 + log() 358.0-387.5 3359.2-3390.3 + exp() 619.3 4046.4 + + +These figures are now somewhat out-of-date. The emulator has become +progressively slower for most functions as more of the 80486 features +have been implemented. + + +----------------------- Accuracy of wm-FPU-emu ----------------------- + + +The accuracy of the emulator is in almost all cases equal to or better +than that of an Intel 80486 FPU. + +The results of the basic arithmetic functions (+,-,*,/), and fsqrt +match those of an 80486 FPU. They are the best possible; the error for +these never exceeds 1/2 an lsb. The fprem and fprem1 instructions +return exact results; they have no error. + + +The following table compares the emulator accuracy for the sqrt(), +trig and log functions against the Turbo C "emulator". For this table, +each function was tested at about 400 points. Ideal worst-case results +would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for +arguments greater than pi/4 can be thought of as being related to the +precision of the argument x; e.g. an argument of pi/2-(1e-10) which is +accurate to 64 bits can result in a relative accuracy in cos() of +about 64 + log2(cos(x)) = 31 bits. + + +Function Tested x range Worst result Turbo C + (relative bits) + +sqrt(x) 1 .. 2 64.1 63.2 +atan(x) 1e-10 .. 200 64.2 62.8 +cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4 + 64.1 (x = pi/2-(1e-10)) 31.9 +sin(x) 1e-10 .. pi/2 64.0 62.8 +tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1 + 64.1 (x = pi/2-(1e-10)) 31.9 +exp(x) 0 .. 1 63.1 ** 62.9 +log(x) 1+1e-6 .. 2 63.8 ** 62.1 + +** The accuracy for exp() and log() is low because the FPU (emulator) +does not compute them directly; two operations are required. + + +The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or +later) for 'float' variables (24 bit precision numbers) when precision +control is set to 24, 53 or 64 bits, and for 'double' variables (53 +bit precision numbers) when precision control is set to 53 bits (a +properly performing FPU cannot pass the 'paranoia' tests for 'double' +variables when precision control is set to 64 bits). + +The code for reducing the argument for the trig functions (fsin, fcos, +fptan and fsincos) has been improved and now effectively uses a value +for pi which is accurate to more than 128 bits precision. As a +consequence, the accuracy of these functions for large arguments has +been dramatically improved (and is now very much better than an 80486 +FPU). There is also now no degradation of accuracy for fcos and fptan +for operands close to pi/2. Measured results are (note that the +definition of accuracy has changed slightly from that used for the +above table): + +Function Tested x range Worst result + (absolute bits) + +cos(x) 0 .. 9.22e+18 62.0 +sin(x) 1e-16 .. 9.22e+18 62.1 +tan(x) 1e-16 .. 9.22e+18 61.8 + +It is possible with some effort to find very large arguments which +give much degraded precision. For example, the integer number + 8227740058411162616.0 +is within about 10e-7 of a multiple of pi. To find the tan (for +example) of this number to 64 bits precision it would be necessary to +have a value of pi which had about 150 bits precision. The FPU +emulator computes the result to about 42.6 bits precision (the correct +result is about -9.739715e-8). On the other hand, an 80486 FPU returns +0.01059, which in relative terms is hopelessly inaccurate. + +For arguments close to critical angles (which occur at multiples of +pi/2) the emulator is more accurate than an 80486 FPU. For very large +arguments, the emulator is far more accurate. + + +Prior to version 1.20 of the emulator, the accuracy of the results for +the transcendental functions (in their principal range) was not as +good as the results from an 80486 FPU. From version 1.20, the accuracy +has been considerably improved and these functions now give measured +worst-case results which are better than the worst-case results given +by an 80486 FPU. + +The following table gives the measured results for the emulator. The +number of randomly selected arguments in each case is about half a +million. The group of three columns gives the frequency of the given +accuracy in number of times per million, thus the second of these +columns shows that an accuracy of between 63.80 and 63.89 bits was +found at a rate of 133 times per one million measurements for fsin. +The results show that the fsin, fcos and fptan instructions return +results which are in error (i.e. less accurate than the best possible +result (which is 64 bits)) for about one per cent of all arguments +between -pi/2 and +pi/2. The other instructions have a lower +frequency of results which are in error. The last two columns give +the worst accuracy which was found (in bits) and the approximate value +of the argument which produced it. + + frequency (per M) + ------------------- --------------- +instr arg range # tests 63.7 63.8 63.9 worst at arg + bits bits bits bits +----- ------------ ------- ---- ---- ----- ----- -------- +fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317 +fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801 +fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876 +fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q) +fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x) +fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x) +f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709 + + +Tests performed on an 80486 FPU showed results of lower accuracy. The +following table gives the results which were obtained with an AMD +486DX2/66 (other tests indicate that an Intel 486DX produces +identical results). The tests were basically the same as those used +to measure the emulator (the values, being random, were in general not +the same). The total number of tests for each instruction are given +at the end of the table, in case each about 100k tests were performed. +Another line of figures at the end of the table shows that most of the +instructions return results which are in error for more than 10 +percent of the arguments tested. + +The numbers in the body of the table give the approx number of times a +result of the given accuracy in bits (given in the left-most column) +was obtained per one million arguments. For three of the instructions, +two columns of results are given: * The second column for f2xm1 gives +the number cases where the results of the first column were for a +positive argument, this shows that this instruction gives better +results for positive arguments than it does for negative. * In the +cases of fcos and fptan, the first column gives the results when all +cases where arguments greater than 1.5 were removed from the results +given in the second column. Unlike the emulator, an 80486 FPU returns +results of relatively poor accuracy for these instructions when the +argument approaches pi/2. The table does not show those cases when the +accuracy of the results were less than 62 bits, which occurs quite +often for fsin and fptan when the argument approaches pi/2. This poor +accuracy is discussed above in relation to the Turbo C "emulator", and +the accuracy of the value of pi. + + +bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan +62.0 0 0 0 0 437 0 0 0 0 925 +62.1 0 0 10 0 894 0 0 0 0 1023 +62.2 14 0 0 0 1033 0 0 0 0 945 +62.3 57 0 0 0 1202 0 0 0 0 1023 +62.4 385 0 0 10 1292 0 23 0 0 1178 +62.5 1140 0 0 119 1649 0 39 0 0 1149 +62.6 2037 0 0 189 1620 0 16 0 0 1169 +62.7 5086 14 0 646 2315 10 101 35 39 1402 +62.8 8818 86 0 984 3050 59 287 131 224 2036 +62.9 11340 1355 0 2126 4153 79 605 357 321 1948 +63.0 15557 4750 0 3319 5376 246 1281 862 808 2688 +63.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302 +63.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724 +63.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236 +63.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587 +63.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262 +63.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346 +63.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209 +63.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329 +63.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601 + +Per cent with error: + 30.9 3.2 18.5 9.8 13.1 11.6 17.4 +Total arguments tested: + 70194 70099 101784 100641 100641 101799 128853 114893 102675 102675 + + +------------------------- Contributors ------------------------------- + +A number of people have contributed to the development of the +emulator, often by just reporting bugs, sometimes with suggested +fixes, and a few kind people have provided me with access in one way +or another to an 80486 machine. Contributors include (to those people +who I may have forgotten, please forgive me): + +Linus Torvalds +Tommy.Thorn@daimi.aau.dk +Andrew.Tridgell@anu.edu.au +Nick Holloway, alfie@dcs.warwick.ac.uk +Hermano Moura, moura@dcs.gla.ac.uk +Jon Jagger, J.Jagger@scp.ac.uk +Lennart Benschop +Brian Gallew, geek+@CMU.EDU +Thomas Staniszewski, ts3v+@andrew.cmu.edu +Martin Howell, mph@plasma.apana.org.au +M Saggaf, alsaggaf@athena.mit.edu +Peter Barker, PETER@socpsy.sci.fau.edu +tom@vlsivie.tuwien.ac.at +Dan Russel, russed@rpi.edu +Daniel Carosone, danielce@ee.mu.oz.au +cae@jpmorgan.com +Hamish Coleman, t933093@minyos.xx.rmit.oz.au +Bruce Evans, bde@kralizec.zeta.org.au +Timo Korvola, Timo.Korvola@hut.fi +Rick Lyons, rick@razorback.brisnet.org.au +Rick, jrs@world.std.com + +...and numerous others who responded to my request for help with +a real 80486. + diff --git a/arch/x86/math-emu/control_w.h b/arch/x86/math-emu/control_w.h new file mode 100644 index 00000000000..ae2274dbd30 --- /dev/null +++ b/arch/x86/math-emu/control_w.h @@ -0,0 +1,45 @@ +/*---------------------------------------------------------------------------+ + | control_w.h | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _CONTROLW_H_ +#define _CONTROLW_H_ + +#ifdef __ASSEMBLY__ +#define _Const_(x) $##x +#else +#define _Const_(x) x +#endif + +#define CW_RC _Const_(0x0C00) /* rounding control */ +#define CW_PC _Const_(0x0300) /* precision control */ + +#define CW_Precision Const_(0x0020) /* loss of precision mask */ +#define CW_Underflow Const_(0x0010) /* underflow mask */ +#define CW_Overflow Const_(0x0008) /* overflow mask */ +#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */ +#define CW_Denormal Const_(0x0002) /* denormalized operand mask */ +#define CW_Invalid Const_(0x0001) /* invalid operation mask */ + +#define CW_Exceptions _Const_(0x003f) /* all masks */ + +#define RC_RND _Const_(0x0000) +#define RC_DOWN _Const_(0x0400) +#define RC_UP _Const_(0x0800) +#define RC_CHOP _Const_(0x0C00) + +/* p 15-5: Precision control bits affect only the following: + ADD, SUB(R), MUL, DIV(R), and SQRT */ +#define PR_24_BITS _Const_(0x000) +#define PR_53_BITS _Const_(0x200) +#define PR_64_BITS _Const_(0x300) +#define PR_RESERVED_BITS _Const_(0x100) +/* FULL_PRECISION simulates all exceptions masked */ +#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f) + +#endif /* _CONTROLW_H_ */ diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S new file mode 100644 index 00000000000..f77ba3058b3 --- /dev/null +++ b/arch/x86/math-emu/div_Xsig.S @@ -0,0 +1,365 @@ + .file "div_Xsig.S" +/*---------------------------------------------------------------------------+ + | div_Xsig.S | + | | + | Division subroutine for 96 bit quantities | + | | + | Copyright (C) 1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | + | put the 96 bit result at the location d. | + | | + | The result may not be accurate to 96 bits. It is intended for use where | + | a result better than 64 bits is required. The result should usually be | + | good to at least 94 bits. | + | The returned result is actually divided by one half. This is done to | + | prevent overflow. | + | | + | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | + | | + | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" + + +#define XsigLL(x) (x) +#define XsigL(x) 4(x) +#define XsigH(x) 8(x) + + +#ifndef NON_REENTRANT_FPU +/* + Local storage on the stack: + Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + */ +#define FPU_accum_3 -4(%ebp) +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) +#define FPU_result_3 -20(%ebp) +#define FPU_result_2 -24(%ebp) +#define FPU_result_1 -28(%ebp) + +#else +.data +/* + Local storage in a static area: + Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + */ + .align 4,0 +FPU_accum_3: + .long 0 +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 +FPU_result_3: + .long 0 +FPU_result_2: + .long 0 +FPU_result_1: + .long 0 +#endif /* NON_REENTRANT_FPU */ + + +.text +ENTRY(div_Xsig) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif /* NON_REENTRANT_FPU */ + + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* pointer to num */ + movl PARAM2,%ebx /* pointer to denom */ + +#ifdef PARANOID + testl $0x80000000, XsigH(%ebx) /* Divisor */ + je L_bugged +#endif /* PARANOID */ + + +/*---------------------------------------------------------------------------+ + | Divide: Return arg1/arg2 to arg3. | + | | + | The maximum returned value is (ignoring exponents) | + | .ffffffff ffffffff | + | ------------------ = 1.ffffffff fffffffe | + | .80000000 00000000 | + | and the minimum is | + | .80000000 00000000 | + | ------------------ = .80000000 00000001 (rounded) | + | .ffffffff ffffffff | + | | + +---------------------------------------------------------------------------*/ + + /* Save extended dividend in local register */ + + /* Divide by 2 to prevent overflow */ + clc + movl XsigH(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_3 + movl XsigL(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_2 + movl XsigLL(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_1 + movl $0,%eax + rcrl %eax + movl %eax,FPU_accum_0 + + movl FPU_accum_2,%eax /* Get the current num */ + movl FPU_accum_3,%edx + +/*----------------------------------------------------------------------*/ +/* Initialization done. + Do the first 32 bits. */ + + /* We will divide by a number which is too large */ + movl XsigH(%ebx),%ecx + addl $1,%ecx + jnc LFirst_div_not_1 + + /* here we need to divide by 100000000h, + i.e., no division at all.. */ + mov %edx,%eax + jmp LFirst_div_done + +LFirst_div_not_1: + divl %ecx /* Divide the numerator by the augmented + denom ms dw */ + +LFirst_div_done: + movl %eax,FPU_result_3 /* Put the result in the answer */ + + mull XsigH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_2 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_3 + + movl FPU_result_3,%eax /* Get the result back */ + mull XsigL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + sbbl $0,FPU_accum_3 + je LDo_2nd_32_bits /* Must check for non-zero result here */ + +#ifdef PARANOID + jb L_bugged_1 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + incl FPU_result_3 /* Correct the answer */ + + movl XsigL(%ebx),%eax + movl XsigH(%ebx),%edx + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + sbbl $0,FPU_accum_3 + jne L_bugged_1 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* Half of the main problem is done, there is just a reduced numerator + to handle now. + Work with the second 32 bits, FPU_accum_0 not used from now on */ +LDo_2nd_32_bits: + movl FPU_accum_2,%edx /* get the reduced num */ + movl FPU_accum_1,%eax + + /* need to check for possible subsequent overflow */ + cmpl XsigH(%ebx),%edx + jb LDo_2nd_div + ja LPrevent_2nd_overflow + + cmpl XsigL(%ebx),%eax + jb LDo_2nd_div + +LPrevent_2nd_overflow: +/* The numerator is greater or equal, would cause overflow */ + /* prevent overflow */ + subl XsigL(%ebx),%eax + sbbl XsigH(%ebx),%edx + movl %edx,FPU_accum_2 + movl %eax,FPU_accum_1 + + incl FPU_result_3 /* Reflect the subtraction in the answer */ + +#ifdef PARANOID + je L_bugged_2 /* Can't bump the result to 1.0 */ +#endif /* PARANOID */ + +LDo_2nd_div: + cmpl $0,%ecx /* augmented denom msw */ + jnz LSecond_div_not_1 + + /* %ecx == 0, we are dividing by 1.0 */ + mov %edx,%eax + jmp LSecond_div_done + +LSecond_div_not_1: + divl %ecx /* Divide the numerator by the denom ms dw */ + +LSecond_div_done: + movl %eax,FPU_result_2 /* Put the result in the answer */ + + mull XsigH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + movl FPU_result_2,%eax /* Get the result back */ + mull XsigL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + jz LDo_3rd_32_bits + +#ifdef PARANOID + cmpl $1,FPU_accum_2 + jne L_bugged_2 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + movl XsigL(%ebx),%eax + movl XsigH(%ebx),%edx + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 + jne L_bugged_2 +#endif /* PARANOID */ + + addl $1,FPU_result_2 /* Correct the answer */ + adcl $0,FPU_result_3 + +#ifdef PARANOID + jc L_bugged_2 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* The division is essentially finished here, we just need to perform + tidying operations. + Deal with the 3rd 32 bits */ +LDo_3rd_32_bits: + /* We use an approximation for the third 32 bits. + To take account of the 3rd 32 bits of the divisor + (call them del), we subtract del * (a/b) */ + + movl FPU_result_3,%eax /* a/b */ + mull XsigLL(%ebx) /* del */ + + subl %edx,FPU_accum_1 + + /* A borrow indicates that the result is negative */ + jnb LTest_over + + movl XsigH(%ebx),%edx + addl %edx,FPU_accum_1 + + subl $1,FPU_result_2 /* Adjust the answer */ + sbbl $0,FPU_result_3 + + /* The above addition might not have been enough, check again. */ + movl FPU_accum_1,%edx /* get the reduced num */ + cmpl XsigH(%ebx),%edx /* denom */ + jb LDo_3rd_div + + movl XsigH(%ebx),%edx + addl %edx,FPU_accum_1 + + subl $1,FPU_result_2 /* Adjust the answer */ + sbbl $0,FPU_result_3 + jmp LDo_3rd_div + +LTest_over: + movl FPU_accum_1,%edx /* get the reduced num */ + + /* need to check for possible subsequent overflow */ + cmpl XsigH(%ebx),%edx /* denom */ + jb LDo_3rd_div + + /* prevent overflow */ + subl XsigH(%ebx),%edx + movl %edx,FPU_accum_1 + + addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ + adcl $0,FPU_result_3 + +LDo_3rd_div: + movl FPU_accum_0,%eax + movl FPU_accum_1,%edx + divl XsigH(%ebx) + + movl %eax,FPU_result_1 /* Rough estimate of third word */ + + movl PARAM3,%esi /* pointer to answer */ + + movl FPU_result_1,%eax + movl %eax,XsigLL(%esi) + movl FPU_result_2,%eax + movl %eax,XsigL(%esi) + movl FPU_result_3,%eax + movl %eax,XsigH(%esi) + +L_exit: + popl %ebx + popl %edi + popl %esi + + leave + ret + + +#ifdef PARANOID +/* The logic is wrong if we got here */ +L_bugged: + pushl EX_INTERNAL|0x240 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_1: + pushl EX_INTERNAL|0x241 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_2: + pushl EX_INTERNAL|0x242 + call EXCEPTION + pop %ebx + jmp L_exit +#endif /* PARANOID */ diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S new file mode 100644 index 00000000000..47099628fa4 --- /dev/null +++ b/arch/x86/math-emu/div_small.S @@ -0,0 +1,47 @@ + .file "div_small.S" +/*---------------------------------------------------------------------------+ + | div_small.S | + | | + | Divide a 64 bit integer by a 32 bit integer & return remainder. | + | | + | Copyright (C) 1992,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | unsigned long FPU_div_small(unsigned long long *x, unsigned long y) | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +.text +ENTRY(FPU_div_small) + pushl %ebp + movl %esp,%ebp + + pushl %esi + + movl PARAM1,%esi /* pointer to num */ + movl PARAM2,%ecx /* The denominator */ + + movl 4(%esi),%eax /* Get the current num msw */ + xorl %edx,%edx + divl %ecx + + movl %eax,4(%esi) + + movl (%esi),%eax /* Get the num lsw */ + divl %ecx + + movl %eax,(%esi) + + movl %edx,%eax /* Return the remainder in eax */ + + popl %esi + + leave + ret + diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c new file mode 100644 index 00000000000..a1b0d22f697 --- /dev/null +++ b/arch/x86/math-emu/errors.c @@ -0,0 +1,739 @@ +/*---------------------------------------------------------------------------+ + | errors.c | + | | + | The error handling functions for wm-FPU-emu | + | | + | Copyright (C) 1992,1993,1994,1996 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include + +#include + +#include "fpu_emu.h" +#include "fpu_system.h" +#include "exception.h" +#include "status_w.h" +#include "control_w.h" +#include "reg_constant.h" +#include "version.h" + +/* */ +#undef PRINT_MESSAGES +/* */ + + +#if 0 +void Un_impl(void) +{ + u_char byte1, FPU_modrm; + unsigned long address = FPU_ORIG_EIP; + + RE_ENTRANT_CHECK_OFF; + /* No need to check access_ok(), we have previously fetched these bytes. */ + printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address); + if ( FPU_CS == __USER_CS ) + { + while ( 1 ) + { + FPU_get_user(byte1, (u_char __user *) address); + if ( (byte1 & 0xf8) == 0xd8 ) break; + printk("[%02x]", byte1); + address++; + } + printk("%02x ", byte1); + FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); + + if (FPU_modrm >= 0300) + printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); + else + printk("/%d\n", (FPU_modrm >> 3) & 7); + } + else + { + printk("cs selector = %04x\n", FPU_CS); + } + + RE_ENTRANT_CHECK_ON; + + EXCEPTION(EX_Invalid); + +} +#endif /* 0 */ + + +/* + Called for opcodes which are illegal and which are known to result in a + SIGILL with a real 80486. + */ +void FPU_illegal(void) +{ + math_abort(FPU_info,SIGILL); +} + + + +void FPU_printall(void) +{ + int i; + static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty", + "DeNorm", "Inf", "NaN" }; + u_char byte1, FPU_modrm; + unsigned long address = FPU_ORIG_EIP; + + RE_ENTRANT_CHECK_OFF; + /* No need to check access_ok(), we have previously fetched these bytes. */ + printk("At %p:", (void *) address); + if ( FPU_CS == __USER_CS ) + { +#define MAX_PRINTED_BYTES 20 + for ( i = 0; i < MAX_PRINTED_BYTES; i++ ) + { + FPU_get_user(byte1, (u_char __user *) address); + if ( (byte1 & 0xf8) == 0xd8 ) + { + printk(" %02x", byte1); + break; + } + printk(" [%02x]", byte1); + address++; + } + if ( i == MAX_PRINTED_BYTES ) + printk(" [more..]\n"); + else + { + FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); + + if (FPU_modrm >= 0300) + printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); + else + printk(" /%d, mod=%d rm=%d\n", + (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7); + } + } + else + { + printk("%04x\n", FPU_CS); + } + + partial_status = status_word(); + +#ifdef DEBUGGING +if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n"); +if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n"); +if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n"); +if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n"); +if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n"); +if ( partial_status & SW_Summary ) printk("SW: exception summary\n"); +if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n"); +if ( partial_status & SW_Precision ) printk("SW: loss of precision\n"); +if ( partial_status & SW_Underflow ) printk("SW: underflow\n"); +if ( partial_status & SW_Overflow ) printk("SW: overflow\n"); +if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n"); +if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n"); +if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n"); +#endif /* DEBUGGING */ + + printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", + partial_status & 0x8000 ? 1 : 0, /* busy */ + (partial_status & 0x3800) >> 11, /* stack top pointer */ + partial_status & 0x80 ? 1 : 0, /* Error summary status */ + partial_status & 0x40 ? 1 : 0, /* Stack flag */ + partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */ + partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */ + partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0, + partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0, + partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0); + +printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n", + control_word & 0x1000 ? 1 : 0, + (control_word & 0x800) >> 11, (control_word & 0x400) >> 10, + (control_word & 0x200) >> 9, (control_word & 0x100) >> 8, + control_word & 0x80 ? 1 : 0, + control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0, + control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0, + control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0); + + for ( i = 0; i < 8; i++ ) + { + FPU_REG *r = &st(i); + u_char tagi = FPU_gettagi(i); + switch (tagi) + { + case TAG_Empty: + continue; + break; + case TAG_Zero: + case TAG_Special: + tagi = FPU_Special(r); + case TAG_Valid: + printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i, + getsign(r) ? '-' : '+', + (long)(r->sigh >> 16), + (long)(r->sigh & 0xFFFF), + (long)(r->sigl >> 16), + (long)(r->sigl & 0xFFFF), + exponent(r) - EXP_BIAS + 1); + break; + default: + printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi); + continue; + break; + } + printk("%s\n", tag_desc[(int) (unsigned) tagi]); + } + + RE_ENTRANT_CHECK_ON; + +} + +static struct { + int type; + const char *name; +} exception_names[] = { + { EX_StackOver, "stack overflow" }, + { EX_StackUnder, "stack underflow" }, + { EX_Precision, "loss of precision" }, + { EX_Underflow, "underflow" }, + { EX_Overflow, "overflow" }, + { EX_ZeroDiv, "divide by zero" }, + { EX_Denormal, "denormalized operand" }, + { EX_Invalid, "invalid operation" }, + { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION }, + { 0, NULL } +}; + +/* + EX_INTERNAL is always given with a code which indicates where the + error was detected. + + Internal error types: + 0x14 in fpu_etc.c + 0x1nn in a *.c file: + 0x101 in reg_add_sub.c + 0x102 in reg_mul.c + 0x104 in poly_atan.c + 0x105 in reg_mul.c + 0x107 in fpu_trig.c + 0x108 in reg_compare.c + 0x109 in reg_compare.c + 0x110 in reg_add_sub.c + 0x111 in fpe_entry.c + 0x112 in fpu_trig.c + 0x113 in errors.c + 0x115 in fpu_trig.c + 0x116 in fpu_trig.c + 0x117 in fpu_trig.c + 0x118 in fpu_trig.c + 0x119 in fpu_trig.c + 0x120 in poly_atan.c + 0x121 in reg_compare.c + 0x122 in reg_compare.c + 0x123 in reg_compare.c + 0x125 in fpu_trig.c + 0x126 in fpu_entry.c + 0x127 in poly_2xm1.c + 0x128 in fpu_entry.c + 0x129 in fpu_entry.c + 0x130 in get_address.c + 0x131 in get_address.c + 0x132 in get_address.c + 0x133 in get_address.c + 0x140 in load_store.c + 0x141 in load_store.c + 0x150 in poly_sin.c + 0x151 in poly_sin.c + 0x160 in reg_ld_str.c + 0x161 in reg_ld_str.c + 0x162 in reg_ld_str.c + 0x163 in reg_ld_str.c + 0x164 in reg_ld_str.c + 0x170 in fpu_tags.c + 0x171 in fpu_tags.c + 0x172 in fpu_tags.c + 0x180 in reg_convert.c + 0x2nn in an *.S file: + 0x201 in reg_u_add.S + 0x202 in reg_u_div.S + 0x203 in reg_u_div.S + 0x204 in reg_u_div.S + 0x205 in reg_u_mul.S + 0x206 in reg_u_sub.S + 0x207 in wm_sqrt.S + 0x208 in reg_div.S + 0x209 in reg_u_sub.S + 0x210 in reg_u_sub.S + 0x211 in reg_u_sub.S + 0x212 in reg_u_sub.S + 0x213 in wm_sqrt.S + 0x214 in wm_sqrt.S + 0x215 in wm_sqrt.S + 0x220 in reg_norm.S + 0x221 in reg_norm.S + 0x230 in reg_round.S + 0x231 in reg_round.S + 0x232 in reg_round.S + 0x233 in reg_round.S + 0x234 in reg_round.S + 0x235 in reg_round.S + 0x236 in reg_round.S + 0x240 in div_Xsig.S + 0x241 in div_Xsig.S + 0x242 in div_Xsig.S + */ + +asmlinkage void FPU_exception(int n) +{ + int i, int_type; + + int_type = 0; /* Needed only to stop compiler warnings */ + if ( n & EX_INTERNAL ) + { + int_type = n - EX_INTERNAL; + n = EX_INTERNAL; + /* Set lots of exception bits! */ + partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward); + } + else + { + /* Extract only the bits which we use to set the status word */ + n &= (SW_Exc_Mask); + /* Set the corresponding exception bit */ + partial_status |= n; + /* Set summary bits iff exception isn't masked */ + if ( partial_status & ~control_word & CW_Exceptions ) + partial_status |= (SW_Summary | SW_Backward); + if ( n & (SW_Stack_Fault | EX_Precision) ) + { + if ( !(n & SW_C1) ) + /* This bit distinguishes over- from underflow for a stack fault, + and roundup from round-down for precision loss. */ + partial_status &= ~SW_C1; + } + } + + RE_ENTRANT_CHECK_OFF; + if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) ) + { +#ifdef PRINT_MESSAGES + /* My message from the sponsor */ + printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n"); +#endif /* PRINT_MESSAGES */ + + /* Get a name string for error reporting */ + for (i=0; exception_names[i].type; i++) + if ( (exception_names[i].type & n) == exception_names[i].type ) + break; + + if (exception_names[i].type) + { +#ifdef PRINT_MESSAGES + printk("FP Exception: %s!\n", exception_names[i].name); +#endif /* PRINT_MESSAGES */ + } + else + printk("FPU emulator: Unknown Exception: 0x%04x!\n", n); + + if ( n == EX_INTERNAL ) + { + printk("FPU emulator: Internal error type 0x%04x\n", int_type); + FPU_printall(); + } +#ifdef PRINT_MESSAGES + else + FPU_printall(); +#endif /* PRINT_MESSAGES */ + + /* + * The 80486 generates an interrupt on the next non-control FPU + * instruction. So we need some means of flagging it. + * We use the ES (Error Summary) bit for this. + */ + } + RE_ENTRANT_CHECK_ON; + +#ifdef __DEBUG__ + math_abort(FPU_info,SIGFPE); +#endif /* __DEBUG__ */ + +} + + +/* Real operation attempted on a NaN. */ +/* Returns < 0 if the exception is unmasked */ +int real_1op_NaN(FPU_REG *a) +{ + int signalling, isNaN; + + isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000); + + /* The default result for the case of two "equal" NaNs (signs may + differ) is chosen to reproduce 80486 behaviour */ + signalling = isNaN && !(a->sigh & 0x40000000); + + if ( !signalling ) + { + if ( !isNaN ) /* pseudo-NaN, or other unsupported? */ + { + if ( control_word & CW_Invalid ) + { + /* Masked response */ + reg_copy(&CONST_QNaN, a); + } + EXCEPTION(EX_Invalid); + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; + } + return TAG_Special; + } + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + if ( !(a->sigh & 0x80000000) ) /* pseudo-NaN ? */ + { + reg_copy(&CONST_QNaN, a); + } + /* ensure a Quiet NaN */ + a->sigh |= 0x40000000; + } + + EXCEPTION(EX_Invalid); + + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; +} + + +/* Real operation attempted on two operands, one a NaN. */ +/* Returns < 0 if the exception is unmasked */ +int real_2op_NaN(FPU_REG const *b, u_char tagb, + int deststnr, + FPU_REG const *defaultNaN) +{ + FPU_REG *dest = &st(deststnr); + FPU_REG const *a = dest; + u_char taga = FPU_gettagi(deststnr); + FPU_REG const *x; + int signalling, unsupported; + + if ( taga == TAG_Special ) + taga = FPU_Special(a); + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + + /* TW_NaN is also used for unsupported data types. */ + unsupported = ((taga == TW_NaN) + && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000))) + || ((tagb == TW_NaN) + && !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000))); + if ( unsupported ) + { + if ( control_word & CW_Invalid ) + { + /* Masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); + } + EXCEPTION(EX_Invalid); + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; + } + + if (taga == TW_NaN) + { + x = a; + if (tagb == TW_NaN) + { + signalling = !(a->sigh & b->sigh & 0x40000000); + if ( significand(b) > significand(a) ) + x = b; + else if ( significand(b) == significand(a) ) + { + /* The default result for the case of two "equal" NaNs (signs may + differ) is chosen to reproduce 80486 behaviour */ + x = defaultNaN; + } + } + else + { + /* return the quiet version of the NaN in a */ + signalling = !(a->sigh & 0x40000000); + } + } + else +#ifdef PARANOID + if (tagb == TW_NaN) +#endif /* PARANOID */ + { + signalling = !(b->sigh & 0x40000000); + x = b; + } +#ifdef PARANOID + else + { + signalling = 0; + EXCEPTION(EX_INTERNAL|0x113); + x = &CONST_QNaN; + } +#endif /* PARANOID */ + + if ( (!signalling) || (control_word & CW_Invalid) ) + { + if ( ! x ) + x = b; + + if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */ + x = &CONST_QNaN; + + FPU_copy_to_regi(x, TAG_Special, deststnr); + + if ( !signalling ) + return TAG_Special; + + /* ensure a Quiet NaN */ + dest->sigh |= 0x40000000; + } + + EXCEPTION(EX_Invalid); + + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; +} + + +/* Invalid arith operation on Valid registers */ +/* Returns < 0 if the exception is unmasked */ +asmlinkage int arith_invalid(int deststnr) +{ + + EXCEPTION(EX_Invalid); + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); + } + + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid; + +} + + +/* Divide a finite number by zero */ +asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign) +{ + FPU_REG *dest = &st(deststnr); + int tag = TAG_Valid; + + if ( control_word & CW_ZeroDiv ) + { + /* The masked response */ + FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr); + setsign(dest, sign); + tag = TAG_Special; + } + + EXCEPTION(EX_ZeroDiv); + + return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag; + +} + + +/* This may be called often, so keep it lean */ +int set_precision_flag(int flags) +{ + if ( control_word & CW_Precision ) + { + partial_status &= ~(SW_C1 & flags); + partial_status |= flags; /* The masked response */ + return 0; + } + else + { + EXCEPTION(flags); + return 1; + } +} + + +/* This may be called often, so keep it lean */ +asmlinkage void set_precision_flag_up(void) +{ + if ( control_word & CW_Precision ) + partial_status |= (SW_Precision | SW_C1); /* The masked response */ + else + EXCEPTION(EX_Precision | SW_C1); +} + + +/* This may be called often, so keep it lean */ +asmlinkage void set_precision_flag_down(void) +{ + if ( control_word & CW_Precision ) + { /* The masked response */ + partial_status &= ~SW_C1; + partial_status |= SW_Precision; + } + else + EXCEPTION(EX_Precision); +} + + +asmlinkage int denormal_operand(void) +{ + if ( control_word & CW_Denormal ) + { /* The masked response */ + partial_status |= SW_Denorm_Op; + return TAG_Special; + } + else + { + EXCEPTION(EX_Denormal); + return TAG_Special | FPU_Exception; + } +} + + +asmlinkage int arith_overflow(FPU_REG *dest) +{ + int tag = TAG_Valid; + + if ( control_word & CW_Overflow ) + { + /* The masked response */ +/* ###### The response here depends upon the rounding mode */ + reg_copy(&CONST_INF, dest); + tag = TAG_Special; + } + else + { + /* Subtract the magic number from the exponent */ + addexponent(dest, (-3 * (1 << 13))); + } + + EXCEPTION(EX_Overflow); + if ( control_word & CW_Overflow ) + { + /* The overflow exception is masked. */ + /* By definition, precision is lost. + The roundup bit (C1) is also set because we have + "rounded" upwards to Infinity. */ + EXCEPTION(EX_Precision | SW_C1); + return tag; + } + + return tag; + +} + + +asmlinkage int arith_underflow(FPU_REG *dest) +{ + int tag = TAG_Valid; + + if ( control_word & CW_Underflow ) + { + /* The masked response */ + if ( exponent16(dest) <= EXP_UNDER - 63 ) + { + reg_copy(&CONST_Z, dest); + partial_status &= ~SW_C1; /* Round down. */ + tag = TAG_Zero; + } + else + { + stdexp(dest); + } + } + else + { + /* Add the magic number to the exponent. */ + addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias); + } + + EXCEPTION(EX_Underflow); + if ( control_word & CW_Underflow ) + { + /* The underflow exception is masked. */ + EXCEPTION(EX_Precision); + return tag; + } + + return tag; + +} + + +void FPU_stack_overflow(void) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + top--; + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + } + + EXCEPTION(EX_StackOver); + + return; + +} + + +void FPU_stack_underflow(void) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + + +void FPU_stack_underflow_i(int i) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + + +void FPU_stack_underflow_pop(int i) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); + FPU_pop(); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + diff --git a/arch/x86/math-emu/exception.h b/arch/x86/math-emu/exception.h new file mode 100644 index 00000000000..b463f21a811 --- /dev/null +++ b/arch/x86/math-emu/exception.h @@ -0,0 +1,53 @@ +/*---------------------------------------------------------------------------+ + | exception.h | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _EXCEPTION_H_ +#define _EXCEPTION_H_ + + +#ifdef __ASSEMBLY__ +#define Const_(x) $##x +#else +#define Const_(x) x +#endif + +#ifndef SW_C1 +#include "fpu_emu.h" +#endif /* SW_C1 */ + +#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */ +#define EX_ErrorSummary Const_(0x0080) /* Error summary status */ +/* Special exceptions: */ +#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */ +#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */ +#define EX_StackUnder Const_(0x0041) /* stack underflow */ +/* Exception flags: */ +#define EX_Precision Const_(0x0020) /* loss of precision */ +#define EX_Underflow Const_(0x0010) /* underflow */ +#define EX_Overflow Const_(0x0008) /* overflow */ +#define EX_ZeroDiv Const_(0x0004) /* divide by zero */ +#define EX_Denormal Const_(0x0002) /* denormalized operand */ +#define EX_Invalid Const_(0x0001) /* invalid operation */ + + +#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1)) +#define PRECISION_LOST_DOWN Const_(EX_Precision) + + +#ifndef __ASSEMBLY__ + +#ifdef DEBUG +#define EXCEPTION(x) { printk("exception in %s at line %d\n", \ + __FILE__, __LINE__); FPU_exception(x); } +#else +#define EXCEPTION(x) FPU_exception(x) +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _EXCEPTION_H_ */ diff --git a/arch/x86/math-emu/fpu_arith.c b/arch/x86/math-emu/fpu_arith.c new file mode 100644 index 00000000000..6972dec01af --- /dev/null +++ b/arch/x86/math-emu/fpu_arith.c @@ -0,0 +1,174 @@ +/*---------------------------------------------------------------------------+ + | fpu_arith.c | + | | + | Code to implement the FPU register/register arithmetic instructions | + | | + | Copyright (C) 1992,1993,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "status_w.h" + + +void fadd__(void) +{ + /* fadd st,st(i) */ + int i = FPU_rm; + clear_C1(); + FPU_add(&st(i), FPU_gettagi(i), 0, control_word); +} + + +void fmul__(void) +{ + /* fmul st,st(i) */ + int i = FPU_rm; + clear_C1(); + FPU_mul(&st(i), FPU_gettagi(i), 0, control_word); +} + + + +void fsub__(void) +{ + /* fsub st,st(i) */ + clear_C1(); + FPU_sub(0, FPU_rm, control_word); +} + + +void fsubr_(void) +{ + /* fsubr st,st(i) */ + clear_C1(); + FPU_sub(REV, FPU_rm, control_word); +} + + +void fdiv__(void) +{ + /* fdiv st,st(i) */ + clear_C1(); + FPU_div(0, FPU_rm, control_word); +} + + +void fdivr_(void) +{ + /* fdivr st,st(i) */ + clear_C1(); + FPU_div(REV, FPU_rm, control_word); +} + + + +void fadd_i(void) +{ + /* fadd st(i),st */ + int i = FPU_rm; + clear_C1(); + FPU_add(&st(i), FPU_gettagi(i), i, control_word); +} + + +void fmul_i(void) +{ + /* fmul st(i),st */ + clear_C1(); + FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word); +} + + +void fsubri(void) +{ + /* fsubr st(i),st */ + clear_C1(); + FPU_sub(DEST_RM, FPU_rm, control_word); +} + + +void fsub_i(void) +{ + /* fsub st(i),st */ + clear_C1(); + FPU_sub(REV|DEST_RM, FPU_rm, control_word); +} + + +void fdivri(void) +{ + /* fdivr st(i),st */ + clear_C1(); + FPU_div(DEST_RM, FPU_rm, control_word); +} + + +void fdiv_i(void) +{ + /* fdiv st(i),st */ + clear_C1(); + FPU_div(REV|DEST_RM, FPU_rm, control_word); +} + + + +void faddp_(void) +{ + /* faddp st(i),st */ + int i = FPU_rm; + clear_C1(); + if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 ) + FPU_pop(); +} + + +void fmulp_(void) +{ + /* fmulp st(i),st */ + clear_C1(); + if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 ) + FPU_pop(); +} + + + +void fsubrp(void) +{ + /* fsubrp st(i),st */ + clear_C1(); + if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 ) + FPU_pop(); +} + + +void fsubp_(void) +{ + /* fsubp st(i),st */ + clear_C1(); + if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 ) + FPU_pop(); +} + + +void fdivrp(void) +{ + /* fdivrp st(i),st */ + clear_C1(); + if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 ) + FPU_pop(); +} + + +void fdivp_(void) +{ + /* fdivp st(i),st */ + clear_C1(); + if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 ) + FPU_pop(); +} diff --git a/arch/x86/math-emu/fpu_asm.h b/arch/x86/math-emu/fpu_asm.h new file mode 100644 index 00000000000..9ba12416df1 --- /dev/null +++ b/arch/x86/math-emu/fpu_asm.h @@ -0,0 +1,32 @@ +/*---------------------------------------------------------------------------+ + | fpu_asm.h | + | | + | Copyright (C) 1992,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _FPU_ASM_H_ +#define _FPU_ASM_H_ + +#include + +#define EXCEPTION FPU_exception + + +#define PARAM1 8(%ebp) +#define PARAM2 12(%ebp) +#define PARAM3 16(%ebp) +#define PARAM4 20(%ebp) +#define PARAM5 24(%ebp) +#define PARAM6 28(%ebp) +#define PARAM7 32(%ebp) + +#define SIGL_OFFSET 0 +#define EXP(x) 8(x) +#define SIG(x) SIGL_OFFSET##(x) +#define SIGL(x) SIGL_OFFSET##(x) +#define SIGH(x) 4(x) + +#endif /* _FPU_ASM_H_ */ diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c new file mode 100644 index 00000000000..20886cfb9f7 --- /dev/null +++ b/arch/x86/math-emu/fpu_aux.c @@ -0,0 +1,204 @@ +/*---------------------------------------------------------------------------+ + | fpu_aux.c | + | | + | Code to implement some of the FPU auxiliary instructions. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" + + +static void fnop(void) +{ +} + +static void fclex(void) +{ + partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision| + SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op| + SW_Invalid); + no_ip_update = 1; +} + +/* Needs to be externally visible */ +void finit(void) +{ + control_word = 0x037f; + partial_status = 0; + top = 0; /* We don't keep top in the status word internally. */ + fpu_tag_word = 0xffff; + /* The behaviour is different from that detailed in + Section 15.1.6 of the Intel manual */ + operand_address.offset = 0; + operand_address.selector = 0; + instruction_address.offset = 0; + instruction_address.selector = 0; + instruction_address.opcode = 0; + no_ip_update = 1; +} + +/* + * These are nops on the i387.. + */ +#define feni fnop +#define fdisi fnop +#define fsetpm fnop + +static FUNC const finit_table[] = { + feni, fdisi, fclex, finit, + fsetpm, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void finit_(void) +{ + (finit_table[FPU_rm])(); +} + + +static void fstsw_ax(void) +{ + *(short *) &FPU_EAX = status_word(); + no_ip_update = 1; +} + +static FUNC const fstsw_table[] = { + fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal, + FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void fstsw_(void) +{ + (fstsw_table[FPU_rm])(); +} + + +static FUNC const fp_nop_table[] = { + fnop, FPU_illegal, FPU_illegal, FPU_illegal, + FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void fp_nop(void) +{ + (fp_nop_table[FPU_rm])(); +} + + +void fld_i_(void) +{ + FPU_REG *st_new_ptr; + int i; + u_char tag; + + if ( STACK_OVERFLOW ) + { FPU_stack_overflow(); return; } + + /* fld st(i) */ + i = FPU_rm; + if ( NOT_EMPTY(i) ) + { + reg_copy(&st(i), st_new_ptr); + tag = FPU_gettagi(i); + push(); + FPU_settag0(tag); + } + else + { + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_stack_underflow(); + } + else + EXCEPTION(EX_StackUnder); + } + +} + + +void fxch_i(void) +{ + /* fxch st(i) */ + FPU_REG t; + int i = FPU_rm; + FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i); + long tag_word = fpu_tag_word; + int regnr = top & 7, regnri = ((regnr + i) & 7); + u_char st0_tag = (tag_word >> (regnr*2)) & 3; + u_char sti_tag = (tag_word >> (regnri*2)) & 3; + + if ( st0_tag == TAG_Empty ) + { + if ( sti_tag == TAG_Empty ) + { + FPU_stack_underflow(); + FPU_stack_underflow_i(i); + return; + } + if ( control_word & CW_Invalid ) + { + /* Masked response */ + FPU_copy_to_reg0(sti_ptr, sti_tag); + } + FPU_stack_underflow_i(i); + return; + } + if ( sti_tag == TAG_Empty ) + { + if ( control_word & CW_Invalid ) + { + /* Masked response */ + FPU_copy_to_regi(st0_ptr, st0_tag, i); + } + FPU_stack_underflow(); + return; + } + clear_C1(); + + reg_copy(st0_ptr, &t); + reg_copy(sti_ptr, st0_ptr); + reg_copy(&t, sti_ptr); + + tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2)); + tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2)); + fpu_tag_word = tag_word; +} + + +void ffree_(void) +{ + /* ffree st(i) */ + FPU_settagi(FPU_rm, TAG_Empty); +} + + +void ffreep(void) +{ + /* ffree st(i) + pop - unofficial code */ + FPU_settagi(FPU_rm, TAG_Empty); + FPU_pop(); +} + + +void fst_i_(void) +{ + /* fst st(i) */ + FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); +} + + +void fstp_i(void) +{ + /* fstp st(i) */ + FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); + FPU_pop(); +} + diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h new file mode 100644 index 00000000000..65120f52385 --- /dev/null +++ b/arch/x86/math-emu/fpu_emu.h @@ -0,0 +1,218 @@ +/*---------------------------------------------------------------------------+ + | fpu_emu.h | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + +---------------------------------------------------------------------------*/ + + +#ifndef _FPU_EMU_H_ +#define _FPU_EMU_H_ + +/* + * Define PECULIAR_486 to get a closer approximation to 80486 behaviour, + * rather than behaviour which appears to be cleaner. + * This is a matter of opinion: for all I know, the 80486 may simply + * be complying with the IEEE spec. Maybe one day I'll get to see the + * spec... + */ +#define PECULIAR_486 + +#ifdef __ASSEMBLY__ +#include "fpu_asm.h" +#define Const(x) $##x +#else +#define Const(x) x +#endif + +#define EXP_BIAS Const(0) +#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */ +#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */ +#define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but + still a 16 bit nr. */ +#define EXP_Infinity EXP_OVER +#define EXP_NaN EXP_OVER + +#define EXTENDED_Ebias Const(0x3fff) +#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */ + +#define SIGN_POS Const(0) +#define SIGN_NEG Const(0x80) + +#define SIGN_Positive Const(0) +#define SIGN_Negative Const(0x8000) + + +/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */ +/* The following fold to 2 (Special) in the Tag Word */ +#define TW_Denormal Const(4) /* De-normal */ +#define TW_Infinity Const(5) /* + or - infinity */ +#define TW_NaN Const(6) /* Not a Number */ +#define TW_Unsupported Const(7) /* Not supported by an 80486 */ + +#define TAG_Valid Const(0) /* valid */ +#define TAG_Zero Const(1) /* zero */ +#define TAG_Special Const(2) /* De-normal, + or - infinity, + or Not a Number */ +#define TAG_Empty Const(3) /* empty */ +#define TAG_Error Const(0x80) /* probably need to abort */ + +#define LOADED_DATA Const(10101) /* Special st() number to identify + loaded data (not on stack). */ + +/* A few flags (must be >= 0x10). */ +#define REV 0x10 +#define DEST_RM 0x20 +#define LOADED 0x40 + +#define FPU_Exception Const(0x80000000) /* Added to tag returns. */ + + +#ifndef __ASSEMBLY__ + +#include "fpu_system.h" + +#include /* for struct _fpstate */ +#include +#include + +/* +#define RE_ENTRANT_CHECKING + */ + +#ifdef RE_ENTRANT_CHECKING +extern u_char emulating; +# define RE_ENTRANT_CHECK_OFF emulating = 0 +# define RE_ENTRANT_CHECK_ON emulating = 1 +#else +# define RE_ENTRANT_CHECK_OFF +# define RE_ENTRANT_CHECK_ON +#endif /* RE_ENTRANT_CHECKING */ + +#define FWAIT_OPCODE 0x9b +#define OP_SIZE_PREFIX 0x66 +#define ADDR_SIZE_PREFIX 0x67 +#define PREFIX_CS 0x2e +#define PREFIX_DS 0x3e +#define PREFIX_ES 0x26 +#define PREFIX_SS 0x36 +#define PREFIX_FS 0x64 +#define PREFIX_GS 0x65 +#define PREFIX_REPE 0xf3 +#define PREFIX_REPNE 0xf2 +#define PREFIX_LOCK 0xf0 +#define PREFIX_CS_ 1 +#define PREFIX_DS_ 2 +#define PREFIX_ES_ 3 +#define PREFIX_FS_ 4 +#define PREFIX_GS_ 5 +#define PREFIX_SS_ 6 +#define PREFIX_DEFAULT 7 + +struct address { + unsigned int offset; + unsigned int selector:16; + unsigned int opcode:11; + unsigned int empty:5; +}; +struct fpu__reg { + unsigned sigl; + unsigned sigh; + short exp; +}; + +typedef void (*FUNC)(void); +typedef struct fpu__reg FPU_REG; +typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag); +typedef struct { u_char address_size, operand_size, segment; } + overrides; +/* This structure is 32 bits: */ +typedef struct { overrides override; + u_char default_mode; } fpu_addr_modes; +/* PROTECTED has a restricted meaning in the emulator; it is used + to signal that the emulator needs to do special things to ensure + that protection is respected in a segmented model. */ +#define PROTECTED 4 +#define SIXTEEN 1 /* We rely upon this being 1 (true) */ +#define VM86 SIXTEEN +#define PM16 (SIXTEEN | PROTECTED) +#define SEG32 PROTECTED +extern u_char const data_sizes_16[32]; + +#define register_base ((u_char *) registers ) +#define fpu_register(x) ( * ((FPU_REG *)( register_base + 10 * (x & 7) )) ) +#define st(x) ( * ((FPU_REG *)( register_base + 10 * ((top+x) & 7) )) ) + +#define STACK_OVERFLOW (FPU_stackoverflow(&st_new_ptr)) +#define NOT_EMPTY(i) (!FPU_empty_i(i)) + +#define NOT_EMPTY_ST0 (st0_tag ^ TAG_Empty) + +#define poppop() { FPU_pop(); FPU_pop(); } + +/* push() does not affect the tags */ +#define push() { top--; } + +#define signbyte(a) (((u_char *)(a))[9]) +#define getsign(a) (signbyte(a) & 0x80) +#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; } +#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \ + else signbyte(b) &= 0x7f; } +#define changesign(a) { signbyte(a) ^= 0x80; } +#define setpositive(a) { signbyte(a) &= 0x7f; } +#define setnegative(a) { signbyte(a) |= 0x80; } +#define signpositive(a) ( (signbyte(a) & 0x80) == 0 ) +#define signnegative(a) (signbyte(a) & 0x80) + +static inline void reg_copy(FPU_REG const *x, FPU_REG *y) +{ + *(short *)&(y->exp) = *(const short *)&(x->exp); + *(long long *)&(y->sigl) = *(const long long *)&(x->sigl); +} + +#define exponent(x) (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias) +#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \ + ((y) + EXTENDED_Ebias) & 0x7fff; } +#define exponent16(x) (*(short *)&((x)->exp)) +#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); } +#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); } +#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; } + +#define isdenormal(ptr) (exponent(ptr) == EXP_BIAS+EXP_UNDER) + +#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] ) + + +/*----- Prototypes for functions written in assembler -----*/ +/* extern void reg_move(FPU_REG *a, FPU_REG *b); */ + +asmlinkage int FPU_normalize(FPU_REG *x); +asmlinkage int FPU_normalize_nuo(FPU_REG *x); +asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w, u_char sign, + int expa, int expb); +asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w, u_char sign, + int expon); +asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w, u_char sign); +asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w, u_char sign, + int expa, int expb); +asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2, + unsigned int control_w, u_char sign); +asmlinkage unsigned FPU_shrx(void *l, unsigned x); +asmlinkage unsigned FPU_shrxs(void *v, unsigned x); +asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y); +asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy, + unsigned int control_w, u_char sign); + +#ifndef MAKING_PROTO +#include "fpu_proto.h" +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _FPU_EMU_H_ */ diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c new file mode 100644 index 00000000000..1853524c8b5 --- /dev/null +++ b/arch/x86/math-emu/fpu_entry.c @@ -0,0 +1,761 @@ +/*---------------------------------------------------------------------------+ + | fpu_entry.c | + | | + | The entry functions for wm-FPU-emu | + | | + | Copyright (C) 1992,1993,1994,1996,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | See the files "README" and "COPYING" for further copyright and warranty | + | information. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | math_emulate(), restore_i387_soft() and save_i387_soft() are the only | + | entry points for wm-FPU-emu. | + +---------------------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "exception.h" +#include "control_w.h" +#include "status_w.h" + +#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ + +#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */ + +/* WARNING: These codes are not documented by Intel in their 80486 manual + and may not work on FPU clones or later Intel FPUs. */ + +/* Changes to support the un-doc codes provided by Linus Torvalds. */ + +#define _d9_d8_ fstp_i /* unofficial code (19) */ +#define _dc_d0_ fcom_st /* unofficial code (14) */ +#define _dc_d8_ fcompst /* unofficial code (1c) */ +#define _dd_c8_ fxch_i /* unofficial code (0d) */ +#define _de_d0_ fcompst /* unofficial code (16) */ +#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */ +#define _df_c8_ fxch_i /* unofficial code (0f) */ +#define _df_d0_ fstp_i /* unofficial code (17) */ +#define _df_d8_ fstp_i /* unofficial code (1f) */ + +static FUNC const st_instr_table[64] = { + fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_, + fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_, + fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_, + fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_, + fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, + fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, + fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, + fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, +}; + +#else /* Support only documented FPU op-codes */ + +static FUNC const st_instr_table[64] = { + fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__, + fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__, + fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__, + fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__, + fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, + fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, + fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, + fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, +}; + +#endif /* NO_UNDOC_CODE */ + + +#define _NONE_ 0 /* Take no special action */ +#define _REG0_ 1 /* Need to check for not empty st(0) */ +#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ +#define _REGi_ 0 /* Uses st(rm) */ +#define _PUSH_ 3 /* Need to check for space to push onto stack */ +#define _null_ 4 /* Function illegal or not implemented */ +#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */ +#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */ +#define _REGIc 0 /* Compare st(0) and st(rm) */ +#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ + +#ifndef NO_UNDOC_CODE + +/* Un-documented FPU op-codes supported by default. (see above) */ + +static u_char const type_table[64] = { + _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_, + _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_, + _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, + _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, + _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, + _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ +}; + +#else /* Support only documented FPU op-codes */ + +static u_char const type_table[64] = { + _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_, + _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_, + _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_, + _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_, + _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, + _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ +}; + +#endif /* NO_UNDOC_CODE */ + + +#ifdef RE_ENTRANT_CHECKING +u_char emulating=0; +#endif /* RE_ENTRANT_CHECKING */ + +static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, + overrides *override); + +asmlinkage void math_emulate(long arg) +{ + u_char FPU_modrm, byte1; + unsigned short code; + fpu_addr_modes addr_modes; + int unmasked; + FPU_REG loaded_data; + FPU_REG *st0_ptr; + u_char loaded_tag, st0_tag; + void __user *data_address; + struct address data_sel_off; + struct address entry_sel_off; + unsigned long code_base = 0; + unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ + struct desc_struct code_descriptor; + +#ifdef RE_ENTRANT_CHECKING + if ( emulating ) + { + printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); + } + RE_ENTRANT_CHECK_ON; +#endif /* RE_ENTRANT_CHECKING */ + + if (!used_math()) + { + finit(); + set_used_math(); + } + + SETUP_DATA_AREA(arg); + + FPU_ORIG_EIP = FPU_EIP; + + if ( (FPU_EFLAGS & 0x00020000) != 0 ) + { + /* Virtual 8086 mode */ + addr_modes.default_mode = VM86; + FPU_EIP += code_base = FPU_CS << 4; + code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */ + } + else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS ) + { + addr_modes.default_mode = 0; + } + else if ( FPU_CS == __KERNEL_CS ) + { + printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP); + panic("Math emulation needed in kernel"); + } + else + { + + if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */ + { + /* Can only handle segmented addressing via the LDT + for now, and it must be 16 bit */ + printk("FPU emulator: Unsupported addressing mode\n"); + math_abort(FPU_info, SIGILL); + } + + code_descriptor = LDT_DESCRIPTOR(FPU_CS); + if ( SEG_D_SIZE(code_descriptor) ) + { + /* The above test may be wrong, the book is not clear */ + /* Segmented 32 bit protected mode */ + addr_modes.default_mode = SEG32; + } + else + { + /* 16 bit protected mode */ + addr_modes.default_mode = PM16; + } + FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor); + code_limit = code_base + + (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor) + - 1; + if ( code_limit < code_base ) code_limit = 0xffffffff; + } + + FPU_lookahead = 1; + if (current->ptrace & PT_PTRACED) + FPU_lookahead = 0; + + if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP, + &addr_modes.override) ) + { + RE_ENTRANT_CHECK_OFF; + printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n" + "FPU emulator: self-modifying code! (emulation impossible)\n", + byte1); + RE_ENTRANT_CHECK_ON; + EXCEPTION(EX_INTERNAL|0x126); + math_abort(FPU_info,SIGILL); + } + +do_another_FPU_instruction: + + no_ip_update = 0; + + FPU_EIP++; /* We have fetched the prefix and first code bytes. */ + + if ( addr_modes.default_mode ) + { + /* This checks for the minimum instruction bytes. + We also need to check any extra (address mode) code access. */ + if ( FPU_EIP > code_limit ) + math_abort(FPU_info,SIGSEGV); + } + + if ( (byte1 & 0xf8) != 0xd8 ) + { + if ( byte1 == FWAIT_OPCODE ) + { + if (partial_status & SW_Summary) + goto do_the_FPU_interrupt; + else + goto FPU_fwait_done; + } +#ifdef PARANOID + EXCEPTION(EX_INTERNAL|0x128); + math_abort(FPU_info,SIGILL); +#endif /* PARANOID */ + } + + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP); + RE_ENTRANT_CHECK_ON; + FPU_EIP++; + + if (partial_status & SW_Summary) + { + /* Ignore the error for now if the current instruction is a no-wait + control instruction */ + /* The 80486 manual contradicts itself on this topic, + but a real 80486 uses the following instructions: + fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex. + */ + code = (FPU_modrm << 8) | byte1; + if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */ + (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv, + fnstsw */ + ((code & 0xc000) != 0xc000))) ) ) + { + /* + * We need to simulate the action of the kernel to FPU + * interrupts here. + */ + do_the_FPU_interrupt: + + FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */ + + RE_ENTRANT_CHECK_OFF; + current->thread.trap_no = 16; + current->thread.error_code = 0; + send_sig(SIGFPE, current, 1); + return; + } + } + + entry_sel_off.offset = FPU_ORIG_EIP; + entry_sel_off.selector = FPU_CS; + entry_sel_off.opcode = (byte1 << 8) | FPU_modrm; + + FPU_rm = FPU_modrm & 7; + + if ( FPU_modrm < 0300 ) + { + /* All of these instructions use the mod/rm byte to get a data address */ + + if ( (addr_modes.default_mode & SIXTEEN) + ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) ) + data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off, + addr_modes); + else + data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off, + addr_modes); + + if ( addr_modes.default_mode ) + { + if ( FPU_EIP-1 > code_limit ) + math_abort(FPU_info,SIGSEGV); + } + + if ( !(byte1 & 1) ) + { + unsigned short status1 = partial_status; + + st0_ptr = &st(0); + st0_tag = FPU_gettag0(); + + /* Stack underflow has priority */ + if ( NOT_EMPTY_ST0 ) + { + if ( addr_modes.default_mode & PROTECTED ) + { + /* This table works for 16 and 32 bit protected mode */ + if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] ) + math_abort(FPU_info,SIGSEGV); + } + + unmasked = 0; /* Do this here to stop compiler warnings. */ + switch ( (byte1 >> 1) & 3 ) + { + case 0: + unmasked = FPU_load_single((float __user *)data_address, + &loaded_data); + loaded_tag = unmasked & 0xff; + unmasked &= ~0xff; + break; + case 1: + loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); + break; + case 2: + unmasked = FPU_load_double((double __user *)data_address, + &loaded_data); + loaded_tag = unmasked & 0xff; + unmasked &= ~0xff; + break; + case 3: + default: /* Used here to suppress gcc warnings. */ + loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); + break; + } + + /* No more access to user memory, it is safe + to use static data now */ + + /* NaN operands have the next priority. */ + /* We have to delay looking at st(0) until after + loading the data, because that data might contain an SNaN */ + if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) || + ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) ) + { + /* Restore the status word; we might have loaded a + denormal. */ + partial_status = status1; + if ( (FPU_modrm & 0x30) == 0x10 ) + { + /* fcom or fcomp */ + EXCEPTION(EX_Invalid); + setcc(SW_C3 | SW_C2 | SW_C0); + if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) + FPU_pop(); /* fcomp, masked, so we pop. */ + } + else + { + if ( loaded_tag == TAG_Special ) + loaded_tag = FPU_Special(&loaded_data); +#ifdef PECULIAR_486 + /* This is not really needed, but gives behaviour + identical to an 80486 */ + if ( (FPU_modrm & 0x28) == 0x20 ) + /* fdiv or fsub */ + real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data); + else +#endif /* PECULIAR_486 */ + /* fadd, fdivr, fmul, or fsubr */ + real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr); + } + goto reg_mem_instr_done; + } + + if ( unmasked && !((FPU_modrm & 0x30) == 0x10) ) + { + /* Is not a comparison instruction. */ + if ( (FPU_modrm & 0x38) == 0x38 ) + { + /* fdivr */ + if ( (st0_tag == TAG_Zero) && + ((loaded_tag == TAG_Valid) + || (loaded_tag == TAG_Special + && isdenormal(&loaded_data))) ) + { + if ( FPU_divide_by_zero(0, getsign(&loaded_data)) + < 0 ) + { + /* We use the fact here that the unmasked + exception in the loaded data was for a + denormal operand */ + /* Restore the state of the denormal op bit */ + partial_status &= ~SW_Denorm_Op; + partial_status |= status1 & SW_Denorm_Op; + } + else + setsign(st0_ptr, getsign(&loaded_data)); + } + } + goto reg_mem_instr_done; + } + + switch ( (FPU_modrm >> 3) & 7 ) + { + case 0: /* fadd */ + clear_C1(); + FPU_add(&loaded_data, loaded_tag, 0, control_word); + break; + case 1: /* fmul */ + clear_C1(); + FPU_mul(&loaded_data, loaded_tag, 0, control_word); + break; + case 2: /* fcom */ + FPU_compare_st_data(&loaded_data, loaded_tag); + break; + case 3: /* fcomp */ + if ( !FPU_compare_st_data(&loaded_data, loaded_tag) + && !unmasked ) + FPU_pop(); + break; + case 4: /* fsub */ + clear_C1(); + FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word); + break; + case 5: /* fsubr */ + clear_C1(); + FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word); + break; + case 6: /* fdiv */ + clear_C1(); + FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word); + break; + case 7: /* fdivr */ + clear_C1(); + if ( st0_tag == TAG_Zero ) + partial_status = status1; /* Undo any denorm tag, + zero-divide has priority. */ + FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word); + break; + } + } + else + { + if ( (FPU_modrm & 0x30) == 0x10 ) + { + /* The instruction is fcom or fcomp */ + EXCEPTION(EX_StackUnder); + setcc(SW_C3 | SW_C2 | SW_C0); + if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) + FPU_pop(); /* fcomp */ + } + else + FPU_stack_underflow(); + } + reg_mem_instr_done: + operand_address = data_sel_off; + } + else + { + if ( !(no_ip_update = + FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1, + addr_modes, data_address)) ) + { + operand_address = data_sel_off; + } + } + + } + else + { + /* None of these instructions access user memory */ + u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7); + +#ifdef PECULIAR_486 + /* This is supposed to be undefined, but a real 80486 seems + to do this: */ + operand_address.offset = 0; + operand_address.selector = FPU_DS; +#endif /* PECULIAR_486 */ + + st0_ptr = &st(0); + st0_tag = FPU_gettag0(); + switch ( type_table[(int) instr_index] ) + { + case _NONE_: /* also _REGIc: _REGIn */ + break; + case _REG0_: + if ( !NOT_EMPTY_ST0 ) + { + FPU_stack_underflow(); + goto FPU_instruction_done; + } + break; + case _REGIi: + if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) + { + FPU_stack_underflow_i(FPU_rm); + goto FPU_instruction_done; + } + break; + case _REGIp: + if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) + { + FPU_stack_underflow_pop(FPU_rm); + goto FPU_instruction_done; + } + break; + case _REGI_: + if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) + { + FPU_stack_underflow(); + goto FPU_instruction_done; + } + break; + case _PUSH_: /* Only used by the fld st(i) instruction */ + break; + case _null_: + FPU_illegal(); + goto FPU_instruction_done; + default: + EXCEPTION(EX_INTERNAL|0x111); + goto FPU_instruction_done; + } + (*st_instr_table[(int) instr_index])(); + +FPU_instruction_done: + ; + } + + if ( ! no_ip_update ) + instruction_address = entry_sel_off; + +FPU_fwait_done: + +#ifdef DEBUG + RE_ENTRANT_CHECK_OFF; + FPU_printall(); + RE_ENTRANT_CHECK_ON; +#endif /* DEBUG */ + + if (FPU_lookahead && !need_resched()) + { + FPU_ORIG_EIP = FPU_EIP - code_base; + if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP, + &addr_modes.override) ) + goto do_another_FPU_instruction; + } + + if ( addr_modes.default_mode ) + FPU_EIP -= code_base; + + RE_ENTRANT_CHECK_OFF; +} + + +/* Support for prefix bytes is not yet complete. To properly handle + all prefix bytes, further changes are needed in the emulator code + which accesses user address space. Access to separate segments is + important for msdos emulation. */ +static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, + overrides *override) +{ + u_char byte; + u_char __user *ip = *fpu_eip; + + *override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */ + + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(byte, ip); + RE_ENTRANT_CHECK_ON; + + while ( 1 ) + { + switch ( byte ) + { + case ADDR_SIZE_PREFIX: + override->address_size = ADDR_SIZE_PREFIX; + goto do_next_byte; + + case OP_SIZE_PREFIX: + override->operand_size = OP_SIZE_PREFIX; + goto do_next_byte; + + case PREFIX_CS: + override->segment = PREFIX_CS_; + goto do_next_byte; + case PREFIX_ES: + override->segment = PREFIX_ES_; + goto do_next_byte; + case PREFIX_SS: + override->segment = PREFIX_SS_; + goto do_next_byte; + case PREFIX_FS: + override->segment = PREFIX_FS_; + goto do_next_byte; + case PREFIX_GS: + override->segment = PREFIX_GS_; + goto do_next_byte; + case PREFIX_DS: + override->segment = PREFIX_DS_; + goto do_next_byte; + +/* lock is not a valid prefix for FPU instructions, + let the cpu handle it to generate a SIGILL. */ +/* case PREFIX_LOCK: */ + + /* rep.. prefixes have no meaning for FPU instructions */ + case PREFIX_REPE: + case PREFIX_REPNE: + + do_next_byte: + ip++; + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(byte, ip); + RE_ENTRANT_CHECK_ON; + break; + case FWAIT_OPCODE: + *Byte = byte; + return 1; + default: + if ( (byte & 0xf8) == 0xd8 ) + { + *Byte = byte; + *fpu_eip = ip; + return 1; + } + else + { + /* Not a valid sequence of prefix bytes followed by + an FPU instruction. */ + *Byte = byte; /* Needed for error message. */ + return 0; + } + } + } +} + + +void math_abort(struct info * info, unsigned int signal) +{ + FPU_EIP = FPU_ORIG_EIP; + current->thread.trap_no = 16; + current->thread.error_code = 0; + send_sig(signal,current,1); + RE_ENTRANT_CHECK_OFF; + __asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4)); +#ifdef PARANOID + printk("ERROR: wm-FPU-emu math_abort failed!\n"); +#endif /* PARANOID */ +} + + + +#define S387 ((struct i387_soft_struct *)s387) +#define sstatus_word() \ + ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) + +int restore_i387_soft(void *s387, struct _fpstate __user *buf) +{ + u_char __user *d = (u_char __user *)buf; + int offset, other, i, tags, regnr, tag, newtop; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10); + if (__copy_from_user(&S387->cwd, d, 7*4)) + return -1; + RE_ENTRANT_CHECK_ON; + + d += 7*4; + + S387->ftop = (S387->swd >> SW_Top_Shift) & 7; + offset = (S387->ftop & 7) * 10; + other = 80 - offset; + + RE_ENTRANT_CHECK_OFF; + /* Copy all registers in stack order. */ + if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other)) + return -1; + if ( offset ) + if (__copy_from_user((u_char *)&S387->st_space, d+other, offset)) + return -1; + RE_ENTRANT_CHECK_ON; + + /* The tags may need to be corrected now. */ + tags = S387->twd; + newtop = S387->ftop; + for ( i = 0; i < 8; i++ ) + { + regnr = (i+newtop) & 7; + if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty ) + { + /* The loaded data over-rides all other cases. */ + tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr)); + tags &= ~(3 << (regnr*2)); + tags |= (tag & 3) << (regnr*2); + } + } + S387->twd = tags; + + return 0; +} + + +int save_i387_soft(void *s387, struct _fpstate __user * buf) +{ + u_char __user *d = (u_char __user *)buf; + int offset = (S387->ftop & 7) * 10, other = 80 - offset; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10); +#ifdef PECULIAR_486 + S387->cwd &= ~0xe080; + /* An 80486 sets nearly all of the reserved bits to 1. */ + S387->cwd |= 0xffff0040; + S387->swd = sstatus_word() | 0xffff0000; + S387->twd |= 0xffff0000; + S387->fcs &= ~0xf8000000; + S387->fos |= 0xffff0000; +#endif /* PECULIAR_486 */ + if (__copy_to_user(d, &S387->cwd, 7*4)) + return -1; + RE_ENTRANT_CHECK_ON; + + d += 7*4; + + RE_ENTRANT_CHECK_OFF; + /* Copy all registers in stack order. */ + if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other)) + return -1; + if ( offset ) + if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset)) + return -1; + RE_ENTRANT_CHECK_ON; + + return 1; +} diff --git a/arch/x86/math-emu/fpu_etc.c b/arch/x86/math-emu/fpu_etc.c new file mode 100644 index 00000000000..e3b5d465587 --- /dev/null +++ b/arch/x86/math-emu/fpu_etc.c @@ -0,0 +1,143 @@ +/*---------------------------------------------------------------------------+ + | fpu_etc.c | + | | + | Implement a few FPU instructions. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "reg_constant.h" + + +static void fchs(FPU_REG *st0_ptr, u_char st0tag) +{ + if ( st0tag ^ TAG_Empty ) + { + signbyte(st0_ptr) ^= SIGN_NEG; + clear_C1(); + } + else + FPU_stack_underflow(); +} + + +static void fabs(FPU_REG *st0_ptr, u_char st0tag) +{ + if ( st0tag ^ TAG_Empty ) + { + setpositive(st0_ptr); + clear_C1(); + } + else + FPU_stack_underflow(); +} + + +static void ftst_(FPU_REG *st0_ptr, u_char st0tag) +{ + switch (st0tag) + { + case TAG_Zero: + setcc(SW_C3); + break; + case TAG_Valid: + if (getsign(st0_ptr) == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + break; + case TAG_Special: + switch ( FPU_Special(st0_ptr) ) + { + case TW_Denormal: + if (getsign(st0_ptr) == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + if ( denormal_operand() < 0 ) + { +#ifdef PECULIAR_486 + /* This is weird! */ + if (getsign(st0_ptr) == SIGN_POS) + setcc(SW_C3); +#endif /* PECULIAR_486 */ + return; + } + break; + case TW_NaN: + setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ + EXCEPTION(EX_Invalid); + break; + case TW_Infinity: + if (getsign(st0_ptr) == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + break; + default: + setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ + EXCEPTION(EX_INTERNAL|0x14); + break; + } + break; + case TAG_Empty: + setcc(SW_C0|SW_C2|SW_C3); + EXCEPTION(EX_StackUnder); + break; + } +} + + +static void fxam(FPU_REG *st0_ptr, u_char st0tag) +{ + int c = 0; + switch (st0tag) + { + case TAG_Empty: + c = SW_C3|SW_C0; + break; + case TAG_Zero: + c = SW_C3; + break; + case TAG_Valid: + c = SW_C2; + break; + case TAG_Special: + switch ( FPU_Special(st0_ptr) ) + { + case TW_Denormal: + c = SW_C2|SW_C3; /* Denormal */ + break; + case TW_NaN: + /* We also use NaN for unsupported types. */ + if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) ) + c = SW_C0; + break; + case TW_Infinity: + c = SW_C2|SW_C0; + break; + } + } + if ( getsign(st0_ptr) == SIGN_NEG ) + c |= SW_C1; + setcc(c); +} + + +static FUNC_ST0 const fp_etc_table[] = { + fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal, + ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal +}; + +void FPU_etc(void) +{ + (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0()); +} diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h new file mode 100644 index 00000000000..37a8a7fe7e2 --- /dev/null +++ b/arch/x86/math-emu/fpu_proto.h @@ -0,0 +1,140 @@ +#ifndef _FPU_PROTO_H +#define _FPU_PROTO_H + +/* errors.c */ +extern void FPU_illegal(void); +extern void FPU_printall(void); +asmlinkage void FPU_exception(int n); +extern int real_1op_NaN(FPU_REG *a); +extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr, + FPU_REG const *defaultNaN); +asmlinkage int arith_invalid(int deststnr); +asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign); +extern int set_precision_flag(int flags); +asmlinkage void set_precision_flag_up(void); +asmlinkage void set_precision_flag_down(void); +asmlinkage int denormal_operand(void); +asmlinkage int arith_overflow(FPU_REG *dest); +asmlinkage int arith_underflow(FPU_REG *dest); +extern void FPU_stack_overflow(void); +extern void FPU_stack_underflow(void); +extern void FPU_stack_underflow_i(int i); +extern void FPU_stack_underflow_pop(int i); +/* fpu_arith.c */ +extern void fadd__(void); +extern void fmul__(void); +extern void fsub__(void); +extern void fsubr_(void); +extern void fdiv__(void); +extern void fdivr_(void); +extern void fadd_i(void); +extern void fmul_i(void); +extern void fsubri(void); +extern void fsub_i(void); +extern void fdivri(void); +extern void fdiv_i(void); +extern void faddp_(void); +extern void fmulp_(void); +extern void fsubrp(void); +extern void fsubp_(void); +extern void fdivrp(void); +extern void fdivp_(void); +/* fpu_aux.c */ +extern void finit(void); +extern void finit_(void); +extern void fstsw_(void); +extern void fp_nop(void); +extern void fld_i_(void); +extern void fxch_i(void); +extern void ffree_(void); +extern void ffreep(void); +extern void fst_i_(void); +extern void fstp_i(void); +/* fpu_entry.c */ +asmlinkage extern void math_emulate(long arg); +extern void math_abort(struct info *info, unsigned int signal); +/* fpu_etc.c */ +extern void FPU_etc(void); +/* fpu_tags.c */ +extern int FPU_gettag0(void); +extern int FPU_gettagi(int stnr); +extern int FPU_gettag(int regnr); +extern void FPU_settag0(int tag); +extern void FPU_settagi(int stnr, int tag); +extern void FPU_settag(int regnr, int tag); +extern int FPU_Special(FPU_REG const *ptr); +extern int isNaN(FPU_REG const *ptr); +extern void FPU_pop(void); +extern int FPU_empty_i(int stnr); +extern int FPU_stackoverflow(FPU_REG **st_new_ptr); +extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr); +extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag); +extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag); +/* fpu_trig.c */ +extern void FPU_triga(void); +extern void FPU_trigb(void); +/* get_address.c */ +extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, fpu_addr_modes addr_modes); +extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, fpu_addr_modes addr_modes); +/* load_store.c */ +extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes, + void __user *data_address); +/* poly_2xm1.c */ +extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result); +/* poly_atan.c */ +extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr, + u_char st1_tag); +/* poly_l2.c */ +extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign); +extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1, + FPU_REG *d); +/* poly_sin.c */ +extern void poly_sine(FPU_REG *st0_ptr); +extern void poly_cos(FPU_REG *st0_ptr); +/* poly_tan.c */ +extern void poly_tan(FPU_REG *st0_ptr); +/* reg_add_sub.c */ +extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w); +extern int FPU_sub(int flags, int rm, int control_w); +/* reg_compare.c */ +extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag); +extern void fcom_st(void); +extern void fcompst(void); +extern void fcompp(void); +extern void fucom_(void); +extern void fucomp(void); +extern void fucompp(void); +/* reg_constant.c */ +extern void fconst(void); +/* reg_ld_str.c */ +extern int FPU_load_extended(long double __user *s, int stnr); +extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data); +extern int FPU_load_single(float __user *single, FPU_REG *loaded_data); +extern int FPU_load_int64(long long __user *_s); +extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data); +extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data); +extern int FPU_load_bcd(u_char __user *s); +extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, + long double __user *d); +extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat); +extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single); +extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d); +extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d); +extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d); +extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d); +extern int FPU_round_to_int(FPU_REG *r, u_char tag); +extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s); +extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address); +extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d); +extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address); +extern int FPU_tagof(FPU_REG *ptr); +/* reg_mul.c */ +extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w); + +extern int FPU_div(int flags, int regrm, int control_w); +/* reg_convert.c */ +extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x); +#endif /* _FPU_PROTO_H */ + diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h new file mode 100644 index 00000000000..a3ae28c49dd --- /dev/null +++ b/arch/x86/math-emu/fpu_system.h @@ -0,0 +1,90 @@ +/*---------------------------------------------------------------------------+ + | fpu_system.h | + | | + | Copyright (C) 1992,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _FPU_SYSTEM_H +#define _FPU_SYSTEM_H + +/* system dependent definitions */ + +#include +#include +#include + +/* This sets the pointer FPU_info to point to the argument part + of the stack frame of math_emulate() */ +#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg + +/* s is always from a cpu register, and the cpu does bounds checking + * during register load --> no further bounds checks needed */ +#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) +#define SEG_D_SIZE(x) ((x).b & (3 << 21)) +#define SEG_G_BIT(x) ((x).b & (1 << 23)) +#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) +#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23))) +#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \ + | (((s).b & 0xff) << 16) | ((s).a >> 16)) +#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff)) +#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11)) +#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9)) +#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ + == (1 << 10)) + +#define I387 (current->thread.i387) +#define FPU_info (I387.soft.info) + +#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) +#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) +#define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) +#define FPU_EAX (FPU_info->___eax) +#define FPU_EFLAGS (FPU_info->___eflags) +#define FPU_EIP (FPU_info->___eip) +#define FPU_ORIG_EIP (FPU_info->___orig_eip) + +#define FPU_lookahead (I387.soft.lookahead) + +/* nz if ip_offset and cs_selector are not to be set for the current + instruction. */ +#define no_ip_update (*(u_char *)&(I387.soft.no_update)) +#define FPU_rm (*(u_char *)&(I387.soft.rm)) + +/* Number of bytes of data which can be legally accessed by the current + instruction. This only needs to hold a number <= 108, so a byte will do. */ +#define access_limit (*(u_char *)&(I387.soft.alimit)) + +#define partial_status (I387.soft.swd) +#define control_word (I387.soft.cwd) +#define fpu_tag_word (I387.soft.twd) +#define registers (I387.soft.st_space) +#define top (I387.soft.ftop) + +#define instruction_address (*(struct address *)&I387.soft.fip) +#define operand_address (*(struct address *)&I387.soft.foo) + +#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ + math_abort(FPU_info,SIGSEGV) +#define FPU_abort math_abort(FPU_info, SIGSEGV) + +#undef FPU_IGNORE_CODE_SEGV +#ifdef FPU_IGNORE_CODE_SEGV +/* access_ok() is very expensive, and causes the emulator to run + about 20% slower if applied to the code. Anyway, errors due to bad + code addresses should be much rarer than errors due to bad data + addresses. */ +#define FPU_code_access_ok(z) +#else +/* A simpler test than access_ok() can probably be done for + FPU_code_access_ok() because the only possible error is to step + past the upper boundary of a legal code area. */ +#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user *)FPU_EIP,z) +#endif + +#define FPU_get_user(x,y) get_user((x),(y)) +#define FPU_put_user(x,y) put_user((x),(y)) + +#endif diff --git a/arch/x86/math-emu/fpu_tags.c b/arch/x86/math-emu/fpu_tags.c new file mode 100644 index 00000000000..cb436fe20e4 --- /dev/null +++ b/arch/x86/math-emu/fpu_tags.c @@ -0,0 +1,127 @@ +/*---------------------------------------------------------------------------+ + | fpu_tags.c | + | | + | Set FPU register tags. | + | | + | Copyright (C) 1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" +#include "fpu_system.h" +#include "exception.h" + + +void FPU_pop(void) +{ + fpu_tag_word |= 3 << ((top & 7)*2); + top++; +} + + +int FPU_gettag0(void) +{ + return (fpu_tag_word >> ((top & 7)*2)) & 3; +} + + +int FPU_gettagi(int stnr) +{ + return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3; +} + + +int FPU_gettag(int regnr) +{ + return (fpu_tag_word >> ((regnr & 7)*2)) & 3; +} + + +void FPU_settag0(int tag) +{ + int regnr = top; + regnr &= 7; + fpu_tag_word &= ~(3 << (regnr*2)); + fpu_tag_word |= (tag & 3) << (regnr*2); +} + + +void FPU_settagi(int stnr, int tag) +{ + int regnr = stnr+top; + regnr &= 7; + fpu_tag_word &= ~(3 << (regnr*2)); + fpu_tag_word |= (tag & 3) << (regnr*2); +} + + +void FPU_settag(int regnr, int tag) +{ + regnr &= 7; + fpu_tag_word &= ~(3 << (regnr*2)); + fpu_tag_word |= (tag & 3) << (regnr*2); +} + + +int FPU_Special(FPU_REG const *ptr) +{ + int exp = exponent(ptr); + + if ( exp == EXP_BIAS+EXP_UNDER ) + return TW_Denormal; + else if ( exp != EXP_BIAS+EXP_OVER ) + return TW_NaN; + else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) ) + return TW_Infinity; + return TW_NaN; +} + + +int isNaN(FPU_REG const *ptr) +{ + return ( (exponent(ptr) == EXP_BIAS+EXP_OVER) + && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) ); +} + + +int FPU_empty_i(int stnr) +{ + int regnr = (top+stnr) & 7; + + return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty; +} + + +int FPU_stackoverflow(FPU_REG **st_new_ptr) +{ + *st_new_ptr = &st(-1); + + return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty; +} + + +void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr) +{ + reg_copy(r, &st(stnr)); + FPU_settagi(stnr, tag); +} + +void FPU_copy_to_reg1(FPU_REG const *r, u_char tag) +{ + reg_copy(r, &st(1)); + FPU_settagi(1, tag); +} + +void FPU_copy_to_reg0(FPU_REG const *r, u_char tag) +{ + int regnr = top; + regnr &= 7; + + reg_copy(r, &st(0)); + + fpu_tag_word &= ~(3 << (regnr*2)); + fpu_tag_word |= (tag & 3) << (regnr*2); +} diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c new file mode 100644 index 00000000000..403cbde1d42 --- /dev/null +++ b/arch/x86/math-emu/fpu_trig.c @@ -0,0 +1,1845 @@ +/*---------------------------------------------------------------------------+ + | fpu_trig.c | + | | + | Implementation of the FPU "transcendental" functions. | + | | + | Copyright (C) 1992,1993,1994,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" +#include "reg_constant.h" + +static void rem_kernel(unsigned long long st0, unsigned long long *y, + unsigned long long st1, + unsigned long long q, int n); + +#define BETTER_THAN_486 + +#define FCOS 4 + +/* Used only by fptan, fsin, fcos, and fsincos. */ +/* This routine produces very accurate results, similar to + using a value of pi with more than 128 bits precision. */ +/* Limited measurements show no results worse than 64 bit precision + except for the results for arguments close to 2^63, where the + precision of the result sometimes degrades to about 63.9 bits */ +static int trig_arg(FPU_REG *st0_ptr, int even) +{ + FPU_REG tmp; + u_char tmptag; + unsigned long long q; + int old_cw = control_word, saved_status = partial_status; + int tag, st0_tag = TAG_Valid; + + if ( exponent(st0_ptr) >= 63 ) + { + partial_status |= SW_C2; /* Reduction incomplete. */ + return -1; + } + + control_word &= ~CW_RC; + control_word |= RC_CHOP; + + setpositive(st0_ptr); + tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f, + SIGN_POS); + + FPU_round_to_int(&tmp, tag); /* Fortunately, this can't overflow + to 2^64 */ + q = significand(&tmp); + if ( q ) + { + rem_kernel(significand(st0_ptr), + &significand(&tmp), + significand(&CONST_PI2), + q, exponent(st0_ptr) - exponent(&CONST_PI2)); + setexponent16(&tmp, exponent(&CONST_PI2)); + st0_tag = FPU_normalize(&tmp); + FPU_copy_to_reg0(&tmp, st0_tag); + } + + if ( (even && !(q & 1)) || (!even && (q & 1)) ) + { + st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION); + +#ifdef BETTER_THAN_486 + /* So far, the results are exact but based upon a 64 bit + precision approximation to pi/2. The technique used + now is equivalent to using an approximation to pi/2 which + is accurate to about 128 bits. */ + if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) ) + { + /* This code gives the effect of having pi/2 to better than + 128 bits precision. */ + + significand(&tmp) = q + 1; + setexponent16(&tmp, 63); + FPU_normalize(&tmp); + tmptag = + FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS, + exponent(&CONST_PI2extra) + exponent(&tmp)); + setsign(&tmp, getsign(&CONST_PI2extra)); + st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION); + if ( signnegative(st0_ptr) ) + { + /* CONST_PI2extra is negative, so the result of the addition + can be negative. This means that the argument is actually + in a different quadrant. The correction is always < pi/2, + so it can't overflow into yet another quadrant. */ + setpositive(st0_ptr); + q++; + } + } +#endif /* BETTER_THAN_486 */ + } +#ifdef BETTER_THAN_486 + else + { + /* So far, the results are exact but based upon a 64 bit + precision approximation to pi/2. The technique used + now is equivalent to using an approximation to pi/2 which + is accurate to about 128 bits. */ + if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64)) + || (q > 1) ) + { + /* This code gives the effect of having p/2 to better than + 128 bits precision. */ + + significand(&tmp) = q; + setexponent16(&tmp, 63); + FPU_normalize(&tmp); /* This must return TAG_Valid */ + tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, + SIGN_POS, + exponent(&CONST_PI2extra) + exponent(&tmp)); + setsign(&tmp, getsign(&CONST_PI2extra)); + st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp, + FULL_PRECISION); + if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) && + ((st0_ptr->sigh > CONST_PI2.sigh) + || ((st0_ptr->sigh == CONST_PI2.sigh) + && (st0_ptr->sigl > CONST_PI2.sigl))) ) + { + /* CONST_PI2extra is negative, so the result of the + subtraction can be larger than pi/2. This means + that the argument is actually in a different quadrant. + The correction is always < pi/2, so it can't overflow + into yet another quadrant. */ + st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, + FULL_PRECISION); + q++; + } + } + } +#endif /* BETTER_THAN_486 */ + + FPU_settag0(st0_tag); + control_word = old_cw; + partial_status = saved_status & ~SW_C2; /* Reduction complete. */ + + return (q & 3) | even; +} + + +/* Convert a long to register */ +static void convert_l2reg(long const *arg, int deststnr) +{ + int tag; + long num = *arg; + u_char sign; + FPU_REG *dest = &st(deststnr); + + if (num == 0) + { + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + return; + } + + if (num > 0) + { sign = SIGN_POS; } + else + { num = -num; sign = SIGN_NEG; } + + dest->sigh = num; + dest->sigl = 0; + setexponent16(dest, 31); + tag = FPU_normalize(dest); + FPU_settagi(deststnr, tag); + setsign(dest, sign); + return; +} + + +static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag) +{ + if ( st0_tag == TAG_Empty ) + FPU_stack_underflow(); /* Puts a QNaN in st(0) */ + else if ( st0_tag == TW_NaN ) + real_1op_NaN(st0_ptr); /* return with a NaN in st(0) */ +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL|0x0112); +#endif /* PARANOID */ +} + + +static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag) +{ + int isNaN; + + switch ( st0_tag ) + { + case TW_NaN: + isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000); + if ( isNaN && !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */ + { + EXCEPTION(EX_Invalid); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + /* Convert to a QNaN */ + st0_ptr->sigh |= 0x40000000; + push(); + FPU_copy_to_reg0(st0_ptr, TAG_Special); + } + } + else if ( isNaN ) + { + /* A QNaN */ + push(); + FPU_copy_to_reg0(st0_ptr, TAG_Special); + } + else + { + /* pseudoNaN or other unsupported */ + EXCEPTION(EX_Invalid); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + push(); + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + } + } + break; /* return with a NaN in st(0) */ +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x0112); +#endif /* PARANOID */ + } +} + + +/*---------------------------------------------------------------------------*/ + +static void f2xm1(FPU_REG *st0_ptr, u_char tag) +{ + FPU_REG a; + + clear_C1(); + + if ( tag == TAG_Valid ) + { + /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */ + if ( exponent(st0_ptr) < 0 ) + { + denormal_arg: + + FPU_to_exp16(st0_ptr, &a); + + /* poly_2xm1(x) requires 0 < st(0) < 1. */ + poly_2xm1(getsign(st0_ptr), &a, st0_ptr); + } + set_precision_flag_up(); /* 80486 appears to always do this */ + return; + } + + if ( tag == TAG_Zero ) + return; + + if ( tag == TAG_Special ) + tag = FPU_Special(st0_ptr); + + switch ( tag ) + { + case TW_Denormal: + if ( denormal_operand() < 0 ) + return; + goto denormal_arg; + case TW_Infinity: + if ( signnegative(st0_ptr) ) + { + /* -infinity gives -1 (p16-10) */ + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + setnegative(st0_ptr); + } + return; + default: + single_arg_error(st0_ptr, tag); + } +} + + +static void fptan(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st_new_ptr; + int q; + u_char arg_sign = getsign(st0_ptr); + + /* Stack underflow has higher priority */ + if ( st0_tag == TAG_Empty ) + { + FPU_stack_underflow(); /* Puts a QNaN in st(0) */ + if ( control_word & CW_Invalid ) + { + st_new_ptr = &st(-1); + push(); + FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ + } + return; + } + + if ( STACK_OVERFLOW ) + { FPU_stack_overflow(); return; } + + if ( st0_tag == TAG_Valid ) + { + if ( exponent(st0_ptr) > -40 ) + { + if ( (q = trig_arg(st0_ptr, 0)) == -1 ) + { + /* Operand is out of range */ + return; + } + + poly_tan(st0_ptr); + setsign(st0_ptr, (q & 1) ^ (arg_sign != 0)); + set_precision_flag_up(); /* We do not really know if up or down */ + } + else + { + /* For a small arg, the result == the argument */ + /* Underflow may happen */ + + denormal_arg: + + FPU_to_exp16(st0_ptr, st0_ptr); + + st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); + FPU_settag0(st0_tag); + } + push(); + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + return; + } + + if ( st0_tag == TAG_Zero ) + { + push(); + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + setcc(0); + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + + if ( st0_tag == TW_Denormal ) + { + if ( denormal_operand() < 0 ) + return; + + goto denormal_arg; + } + + if ( st0_tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + if ( arith_invalid(0) >= 0 ) + { + st_new_ptr = &st(-1); + push(); + arith_invalid(0); + } + return; + } + + single_arg_2_error(st0_ptr, st0_tag); +} + + +static void fxtract(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st_new_ptr; + u_char sign; + register FPU_REG *st1_ptr = st0_ptr; /* anticipate */ + + if ( STACK_OVERFLOW ) + { FPU_stack_overflow(); return; } + + clear_C1(); + + if ( st0_tag == TAG_Valid ) + { + long e; + + push(); + sign = getsign(st1_ptr); + reg_copy(st1_ptr, st_new_ptr); + setexponent16(st_new_ptr, exponent(st_new_ptr)); + + denormal_arg: + + e = exponent16(st_new_ptr); + convert_l2reg(&e, 1); + setexponentpos(st_new_ptr, 0); + setsign(st_new_ptr, sign); + FPU_settag0(TAG_Valid); /* Needed if arg was a denormal */ + return; + } + else if ( st0_tag == TAG_Zero ) + { + sign = getsign(st0_ptr); + + if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 ) + return; + + push(); + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + setsign(st_new_ptr, sign); + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + + if ( st0_tag == TW_Denormal ) + { + if (denormal_operand() < 0 ) + return; + + push(); + sign = getsign(st1_ptr); + FPU_to_exp16(st1_ptr, st_new_ptr); + goto denormal_arg; + } + else if ( st0_tag == TW_Infinity ) + { + sign = getsign(st0_ptr); + setpositive(st0_ptr); + push(); + FPU_copy_to_reg0(&CONST_INF, TAG_Special); + setsign(st_new_ptr, sign); + return; + } + else if ( st0_tag == TW_NaN ) + { + if ( real_1op_NaN(st0_ptr) < 0 ) + return; + + push(); + FPU_copy_to_reg0(st0_ptr, TAG_Special); + return; + } + else if ( st0_tag == TAG_Empty ) + { + /* Is this the correct behaviour? */ + if ( control_word & EX_Invalid ) + { + FPU_stack_underflow(); + push(); + FPU_stack_underflow(); + } + else + EXCEPTION(EX_StackUnder); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL | 0x119); +#endif /* PARANOID */ +} + + +static void fdecstp(void) +{ + clear_C1(); + top--; +} + +static void fincstp(void) +{ + clear_C1(); + top++; +} + + +static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag) +{ + int expon; + + clear_C1(); + + if ( st0_tag == TAG_Valid ) + { + u_char tag; + + if (signnegative(st0_ptr)) + { + arith_invalid(0); /* sqrt(negative) is invalid */ + return; + } + + /* make st(0) in [1.0 .. 4.0) */ + expon = exponent(st0_ptr); + + denormal_arg: + + setexponent16(st0_ptr, (expon & 1)); + + /* Do the computation, the sign of the result will be positive. */ + tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS); + addexponent(st0_ptr, expon >> 1); + FPU_settag0(tag); + return; + } + + if ( st0_tag == TAG_Zero ) + return; + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + + if ( st0_tag == TW_Infinity ) + { + if ( signnegative(st0_ptr) ) + arith_invalid(0); /* sqrt(-Infinity) is invalid */ + return; + } + else if ( st0_tag == TW_Denormal ) + { + if (signnegative(st0_ptr)) + { + arith_invalid(0); /* sqrt(negative) is invalid */ + return; + } + + if ( denormal_operand() < 0 ) + return; + + FPU_to_exp16(st0_ptr, st0_ptr); + + expon = exponent16(st0_ptr); + + goto denormal_arg; + } + + single_arg_error(st0_ptr, st0_tag); + +} + + +static void frndint_(FPU_REG *st0_ptr, u_char st0_tag) +{ + int flags, tag; + + if ( st0_tag == TAG_Valid ) + { + u_char sign; + + denormal_arg: + + sign = getsign(st0_ptr); + + if (exponent(st0_ptr) > 63) + return; + + if ( st0_tag == TW_Denormal ) + { + if (denormal_operand() < 0 ) + return; + } + + /* Fortunately, this can't overflow to 2^64 */ + if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) ) + set_precision_flag(flags); + + setexponent16(st0_ptr, 63); + tag = FPU_normalize(st0_ptr); + setsign(st0_ptr, sign); + FPU_settag0(tag); + return; + } + + if ( st0_tag == TAG_Zero ) + return; + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + + if ( st0_tag == TW_Denormal ) + goto denormal_arg; + else if ( st0_tag == TW_Infinity ) + return; + else + single_arg_error(st0_ptr, st0_tag); +} + + +static int fsin(FPU_REG *st0_ptr, u_char tag) +{ + u_char arg_sign = getsign(st0_ptr); + + if ( tag == TAG_Valid ) + { + int q; + + if ( exponent(st0_ptr) > -40 ) + { + if ( (q = trig_arg(st0_ptr, 0)) == -1 ) + { + /* Operand is out of range */ + return 1; + } + + poly_sine(st0_ptr); + + if (q & 2) + changesign(st0_ptr); + + setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign); + + /* We do not really know if up or down */ + set_precision_flag_up(); + return 0; + } + else + { + /* For a small arg, the result == the argument */ + set_precision_flag_up(); /* Must be up. */ + return 0; + } + } + + if ( tag == TAG_Zero ) + { + setcc(0); + return 0; + } + + if ( tag == TAG_Special ) + tag = FPU_Special(st0_ptr); + + if ( tag == TW_Denormal ) + { + if ( denormal_operand() < 0 ) + return 1; + + /* For a small arg, the result == the argument */ + /* Underflow may happen */ + FPU_to_exp16(st0_ptr, st0_ptr); + + tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); + + FPU_settag0(tag); + + return 0; + } + else if ( tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + arith_invalid(0); + return 1; + } + else + { + single_arg_error(st0_ptr, tag); + return 1; + } +} + + +static int f_cos(FPU_REG *st0_ptr, u_char tag) +{ + u_char st0_sign; + + st0_sign = getsign(st0_ptr); + + if ( tag == TAG_Valid ) + { + int q; + + if ( exponent(st0_ptr) > -40 ) + { + if ( (exponent(st0_ptr) < 0) + || ((exponent(st0_ptr) == 0) + && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) ) + { + poly_cos(st0_ptr); + + /* We do not really know if up or down */ + set_precision_flag_down(); + + return 0; + } + else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 ) + { + poly_sine(st0_ptr); + + if ((q+1) & 2) + changesign(st0_ptr); + + /* We do not really know if up or down */ + set_precision_flag_down(); + + return 0; + } + else + { + /* Operand is out of range */ + return 1; + } + } + else + { + denormal_arg: + + setcc(0); + FPU_copy_to_reg0(&CONST_1, TAG_Valid); +#ifdef PECULIAR_486 + set_precision_flag_down(); /* 80486 appears to do this. */ +#else + set_precision_flag_up(); /* Must be up. */ +#endif /* PECULIAR_486 */ + return 0; + } + } + else if ( tag == TAG_Zero ) + { + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + setcc(0); + return 0; + } + + if ( tag == TAG_Special ) + tag = FPU_Special(st0_ptr); + + if ( tag == TW_Denormal ) + { + if ( denormal_operand() < 0 ) + return 1; + + goto denormal_arg; + } + else if ( tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + arith_invalid(0); + return 1; + } + else + { + single_arg_error(st0_ptr, tag); /* requires st0_ptr == &st(0) */ + return 1; + } +} + + +static void fcos(FPU_REG *st0_ptr, u_char st0_tag) +{ + f_cos(st0_ptr, st0_tag); +} + + +static void fsincos(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st_new_ptr; + FPU_REG arg; + u_char tag; + + /* Stack underflow has higher priority */ + if ( st0_tag == TAG_Empty ) + { + FPU_stack_underflow(); /* Puts a QNaN in st(0) */ + if ( control_word & CW_Invalid ) + { + st_new_ptr = &st(-1); + push(); + FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ + } + return; + } + + if ( STACK_OVERFLOW ) + { FPU_stack_overflow(); return; } + + if ( st0_tag == TAG_Special ) + tag = FPU_Special(st0_ptr); + else + tag = st0_tag; + + if ( tag == TW_NaN ) + { + single_arg_2_error(st0_ptr, TW_NaN); + return; + } + else if ( tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + if ( arith_invalid(0) >= 0 ) + { + /* Masked response */ + push(); + arith_invalid(0); + } + return; + } + + reg_copy(st0_ptr, &arg); + if ( !fsin(st0_ptr, st0_tag) ) + { + push(); + FPU_copy_to_reg0(&arg, st0_tag); + f_cos(&st(0), st0_tag); + } + else + { + /* An error, so restore st(0) */ + FPU_copy_to_reg0(&arg, st0_tag); + } +} + + +/*---------------------------------------------------------------------------*/ +/* The following all require two arguments: st(0) and st(1) */ + +/* A lean, mean kernel for the fprem instructions. This relies upon + the division and rounding to an integer in do_fprem giving an + exact result. Because of this, rem_kernel() needs to deal only with + the least significant 64 bits, the more significant bits of the + result must be zero. + */ +static void rem_kernel(unsigned long long st0, unsigned long long *y, + unsigned long long st1, + unsigned long long q, int n) +{ + int dummy; + unsigned long long x; + + x = st0 << n; + + /* Do the required multiplication and subtraction in the one operation */ + + /* lsw x -= lsw st1 * lsw q */ + asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1" + :"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]), + "=a" (dummy) + :"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0]) + :"%dx"); + /* msw x -= msw st1 * lsw q */ + asm volatile ("mull %3; subl %%eax,%0" + :"=m" (((unsigned *)&x)[1]), "=a" (dummy) + :"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0]) + :"%dx"); + /* msw x -= lsw st1 * msw q */ + asm volatile ("mull %3; subl %%eax,%0" + :"=m" (((unsigned *)&x)[1]), "=a" (dummy) + :"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1]) + :"%dx"); + + *y = x; +} + + +/* Remainder of st(0) / st(1) */ +/* This routine produces exact results, i.e. there is never any + rounding or truncation, etc of the result. */ +static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round) +{ + FPU_REG *st1_ptr = &st(1); + u_char st1_tag = FPU_gettagi(1); + + if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) + { + FPU_REG tmp, st0, st1; + u_char st0_sign, st1_sign; + u_char tmptag; + int tag; + int old_cw; + int expdif; + long long q; + unsigned short saved_status; + int cc; + + fprem_valid: + /* Convert registers for internal use. */ + st0_sign = FPU_to_exp16(st0_ptr, &st0); + st1_sign = FPU_to_exp16(st1_ptr, &st1); + expdif = exponent16(&st0) - exponent16(&st1); + + old_cw = control_word; + cc = 0; + + /* We want the status following the denorm tests, but don't want + the status changed by the arithmetic operations. */ + saved_status = partial_status; + control_word &= ~CW_RC; + control_word |= RC_CHOP; + + if ( expdif < 64 ) + { + /* This should be the most common case */ + + if ( expdif > -2 ) + { + u_char sign = st0_sign ^ st1_sign; + tag = FPU_u_div(&st0, &st1, &tmp, + PR_64_BITS | RC_CHOP | 0x3f, + sign); + setsign(&tmp, sign); + + if ( exponent(&tmp) >= 0 ) + { + FPU_round_to_int(&tmp, tag); /* Fortunately, this can't + overflow to 2^64 */ + q = significand(&tmp); + + rem_kernel(significand(&st0), + &significand(&tmp), + significand(&st1), + q, expdif); + + setexponent16(&tmp, exponent16(&st1)); + } + else + { + reg_copy(&st0, &tmp); + q = 0; + } + + if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) ) + { + /* We may need to subtract st(1) once more, + to get a result <= 1/2 of st(1). */ + unsigned long long x; + expdif = exponent16(&st1) - exponent16(&tmp); + if ( expdif <= 1 ) + { + if ( expdif == 0 ) + x = significand(&st1) - significand(&tmp); + else /* expdif is 1 */ + x = (significand(&st1) << 1) - significand(&tmp); + if ( (x < significand(&tmp)) || + /* or equi-distant (from 0 & st(1)) and q is odd */ + ((x == significand(&tmp)) && (q & 1) ) ) + { + st0_sign = ! st0_sign; + significand(&tmp) = x; + q++; + } + } + } + + if (q & 4) cc |= SW_C0; + if (q & 2) cc |= SW_C3; + if (q & 1) cc |= SW_C1; + } + else + { + control_word = old_cw; + setcc(0); + return; + } + } + else + { + /* There is a large exponent difference ( >= 64 ) */ + /* To make much sense, the code in this section should + be done at high precision. */ + int exp_1, N; + u_char sign; + + /* prevent overflow here */ + /* N is 'a number between 32 and 63' (p26-113) */ + reg_copy(&st0, &tmp); + tmptag = st0_tag; + N = (expdif & 0x0000001f) + 32; /* This choice gives results + identical to an AMD 486 */ + setexponent16(&tmp, N); + exp_1 = exponent16(&st1); + setexponent16(&st1, 0); + expdif -= N; + + sign = getsign(&tmp) ^ st1_sign; + tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f, + sign); + setsign(&tmp, sign); + + FPU_round_to_int(&tmp, tag); /* Fortunately, this can't + overflow to 2^64 */ + + rem_kernel(significand(&st0), + &significand(&tmp), + significand(&st1), + significand(&tmp), + exponent(&tmp) + ); + setexponent16(&tmp, exp_1 + expdif); + + /* It is possible for the operation to be complete here. + What does the IEEE standard say? The Intel 80486 manual + implies that the operation will never be completed at this + point, and the behaviour of a real 80486 confirms this. + */ + if ( !(tmp.sigh | tmp.sigl) ) + { + /* The result is zero */ + control_word = old_cw; + partial_status = saved_status; + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + setsign(&st0, st0_sign); +#ifdef PECULIAR_486 + setcc(SW_C2); +#else + setcc(0); +#endif /* PECULIAR_486 */ + return; + } + cc = SW_C2; + } + + control_word = old_cw; + partial_status = saved_status; + tag = FPU_normalize_nuo(&tmp); + reg_copy(&tmp, st0_ptr); + + /* The only condition to be looked for is underflow, + and it can occur here only if underflow is unmasked. */ + if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero) + && !(control_word & CW_Underflow) ) + { + setcc(cc); + tag = arith_underflow(st0_ptr); + setsign(st0_ptr, st0_sign); + FPU_settag0(tag); + return; + } + else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) ) + { + stdexp(st0_ptr); + setsign(st0_ptr, st0_sign); + } + else + { + tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign); + } + FPU_settag0(tag); + setcc(cc); + + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + if ( st1_tag == TAG_Special ) + st1_tag = FPU_Special(st1_ptr); + + if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) + || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) + || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) + { + if ( denormal_operand() < 0 ) + return; + goto fprem_valid; + } + else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) + { + FPU_stack_underflow(); + return; + } + else if ( st0_tag == TAG_Zero ) + { + if ( st1_tag == TAG_Valid ) + { + setcc(0); return; + } + else if ( st1_tag == TW_Denormal ) + { + if ( denormal_operand() < 0 ) + return; + setcc(0); return; + } + else if ( st1_tag == TAG_Zero ) + { arith_invalid(0); return; } /* fprem(?,0) always invalid */ + else if ( st1_tag == TW_Infinity ) + { setcc(0); return; } + } + else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) + { + if ( st1_tag == TAG_Zero ) + { + arith_invalid(0); /* fprem(Valid,Zero) is invalid */ + return; + } + else if ( st1_tag != TW_NaN ) + { + if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal)) + && (denormal_operand() < 0) ) + return; + + if ( st1_tag == TW_Infinity ) + { + /* fprem(Valid,Infinity) is o.k. */ + setcc(0); return; + } + } + } + else if ( st0_tag == TW_Infinity ) + { + if ( st1_tag != TW_NaN ) + { + arith_invalid(0); /* fprem(Infinity,?) is invalid */ + return; + } + } + + /* One of the registers must contain a NaN if we got here. */ + +#ifdef PARANOID + if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) ) + EXCEPTION(EX_INTERNAL | 0x118); +#endif /* PARANOID */ + + real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr); + +} + + +/* ST(1) <- ST(1) * log ST; pop ST */ +static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st1_ptr = &st(1), exponent; + u_char st1_tag = FPU_gettagi(1); + u_char sign; + int e, tag; + + clear_C1(); + + if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) ) + { + both_valid: + /* Both regs are Valid or Denormal */ + if ( signpositive(st0_ptr) ) + { + if ( st0_tag == TW_Denormal ) + FPU_to_exp16(st0_ptr, st0_ptr); + else + /* Convert st(0) for internal use. */ + setexponent16(st0_ptr, exponent(st0_ptr)); + + if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) ) + { + /* Special case. The result can be precise. */ + u_char esign; + e = exponent16(st0_ptr); + if ( e >= 0 ) + { + exponent.sigh = e; + esign = SIGN_POS; + } + else + { + exponent.sigh = -e; + esign = SIGN_NEG; + } + exponent.sigl = 0; + setexponent16(&exponent, 31); + tag = FPU_normalize_nuo(&exponent); + stdexp(&exponent); + setsign(&exponent, esign); + tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION); + if ( tag >= 0 ) + FPU_settagi(1, tag); + } + else + { + /* The usual case */ + sign = getsign(st1_ptr); + if ( st1_tag == TW_Denormal ) + FPU_to_exp16(st1_ptr, st1_ptr); + else + /* Convert st(1) for internal use. */ + setexponent16(st1_ptr, exponent(st1_ptr)); + poly_l2(st0_ptr, st1_ptr, sign); + } + } + else + { + /* negative */ + if ( arith_invalid(1) < 0 ) + return; + } + + FPU_pop(); + + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + if ( st1_tag == TAG_Special ) + st1_tag = FPU_Special(st1_ptr); + + if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) + { + FPU_stack_underflow_pop(1); + return; + } + else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) ) + { + if ( st0_tag == TAG_Zero ) + { + if ( st1_tag == TAG_Zero ) + { + /* Both args zero is invalid */ + if ( arith_invalid(1) < 0 ) + return; + } + else + { + u_char sign; + sign = getsign(st1_ptr)^SIGN_NEG; + if ( FPU_divide_by_zero(1, sign) < 0 ) + return; + + setsign(st1_ptr, sign); + } + } + else if ( st1_tag == TAG_Zero ) + { + /* st(1) contains zero, st(0) valid <> 0 */ + /* Zero is the valid answer */ + sign = getsign(st1_ptr); + + if ( signnegative(st0_ptr) ) + { + /* log(negative) */ + if ( arith_invalid(1) < 0 ) + return; + } + else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + else + { + if ( exponent(st0_ptr) < 0 ) + sign ^= SIGN_NEG; + + FPU_copy_to_reg1(&CONST_Z, TAG_Zero); + setsign(st1_ptr, sign); + } + } + else + { + /* One or both operands are denormals. */ + if ( denormal_operand() < 0 ) + return; + goto both_valid; + } + } + else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) + { + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) + return; + } + /* One or both arg must be an infinity */ + else if ( st0_tag == TW_Infinity ) + { + if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) ) + { + /* log(-infinity) or 0*log(infinity) */ + if ( arith_invalid(1) < 0 ) + return; + } + else + { + u_char sign = getsign(st1_ptr); + + if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + FPU_copy_to_reg1(&CONST_INF, TAG_Special); + setsign(st1_ptr, sign); + } + } + /* st(1) must be infinity here */ + else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) + && ( signpositive(st0_ptr) ) ) + { + if ( exponent(st0_ptr) >= 0 ) + { + if ( (exponent(st0_ptr) == 0) && + (st0_ptr->sigh == 0x80000000) && + (st0_ptr->sigl == 0) ) + { + /* st(0) holds 1.0 */ + /* infinity*log(1) */ + if ( arith_invalid(1) < 0 ) + return; + } + /* else st(0) is positive and > 1.0 */ + } + else + { + /* st(0) is positive and < 1.0 */ + + if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + changesign(st1_ptr); + } + } + else + { + /* st(0) must be zero or negative */ + if ( st0_tag == TAG_Zero ) + { + /* This should be invalid, but a real 80486 is happy with it. */ + +#ifndef PECULIAR_486 + sign = getsign(st1_ptr); + if ( FPU_divide_by_zero(1, sign) < 0 ) + return; +#endif /* PECULIAR_486 */ + + changesign(st1_ptr); + } + else if ( arith_invalid(1) < 0 ) /* log(negative) */ + return; + } + + FPU_pop(); +} + + +static void fpatan(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st1_ptr = &st(1); + u_char st1_tag = FPU_gettagi(1); + int tag; + + clear_C1(); + if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) + { + valid_atan: + + poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag); + + FPU_pop(); + + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + if ( st1_tag == TAG_Special ) + st1_tag = FPU_Special(st1_ptr); + + if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) + || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) + || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) + { + if ( denormal_operand() < 0 ) + return; + + goto valid_atan; + } + else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) + { + FPU_stack_underflow_pop(1); + return; + } + else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) + { + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 ) + FPU_pop(); + return; + } + else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) ) + { + u_char sign = getsign(st1_ptr); + if ( st0_tag == TW_Infinity ) + { + if ( st1_tag == TW_Infinity ) + { + if ( signpositive(st0_ptr) ) + { + FPU_copy_to_reg1(&CONST_PI4, TAG_Valid); + } + else + { + setpositive(st1_ptr); + tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr, + FULL_PRECISION, SIGN_POS, + exponent(&CONST_PI4), exponent(&CONST_PI2)); + if ( tag >= 0 ) + FPU_settagi(1, tag); + } + } + else + { + if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + if ( signpositive(st0_ptr) ) + { + FPU_copy_to_reg1(&CONST_Z, TAG_Zero); + setsign(st1_ptr, sign); /* An 80486 preserves the sign */ + FPU_pop(); + return; + } + else + { + FPU_copy_to_reg1(&CONST_PI, TAG_Valid); + } + } + } + else + { + /* st(1) is infinity, st(0) not infinity */ + if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); + } + setsign(st1_ptr, sign); + } + else if ( st1_tag == TAG_Zero ) + { + /* st(0) must be valid or zero */ + u_char sign = getsign(st1_ptr); + + if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + if ( signpositive(st0_ptr) ) + { + /* An 80486 preserves the sign */ + FPU_pop(); + return; + } + + FPU_copy_to_reg1(&CONST_PI, TAG_Valid); + setsign(st1_ptr, sign); + } + else if ( st0_tag == TAG_Zero ) + { + /* st(1) must be TAG_Valid here */ + u_char sign = getsign(st1_ptr); + + if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); + setsign(st1_ptr, sign); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL | 0x125); +#endif /* PARANOID */ + + FPU_pop(); + set_precision_flag_up(); /* We do not really know if up or down */ +} + + +static void fprem(FPU_REG *st0_ptr, u_char st0_tag) +{ + do_fprem(st0_ptr, st0_tag, RC_CHOP); +} + + +static void fprem1(FPU_REG *st0_ptr, u_char st0_tag) +{ + do_fprem(st0_ptr, st0_tag, RC_RND); +} + + +static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag) +{ + u_char sign, sign1; + FPU_REG *st1_ptr = &st(1), a, b; + u_char st1_tag = FPU_gettagi(1); + + clear_C1(); + if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) + { + valid_yl2xp1: + + sign = getsign(st0_ptr); + sign1 = getsign(st1_ptr); + + FPU_to_exp16(st0_ptr, &a); + FPU_to_exp16(st1_ptr, &b); + + if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) ) + return; + + FPU_pop(); + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + if ( st1_tag == TAG_Special ) + st1_tag = FPU_Special(st1_ptr); + + if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) + || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) + || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) + { + if ( denormal_operand() < 0 ) + return; + + goto valid_yl2xp1; + } + else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) ) + { + FPU_stack_underflow_pop(1); + return; + } + else if ( st0_tag == TAG_Zero ) + { + switch ( st1_tag ) + { + case TW_Denormal: + if ( denormal_operand() < 0 ) + return; + + case TAG_Zero: + case TAG_Valid: + setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr)); + FPU_copy_to_reg1(st0_ptr, st0_tag); + break; + + case TW_Infinity: + /* Infinity*log(1) */ + if ( arith_invalid(1) < 0 ) + return; + break; + + case TW_NaN: + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) + return; + break; + + default: +#ifdef PARANOID + EXCEPTION(EX_INTERNAL | 0x116); + return; +#endif /* PARANOID */ + break; + } + } + else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) + { + switch ( st1_tag ) + { + case TAG_Zero: + if ( signnegative(st0_ptr) ) + { + if ( exponent(st0_ptr) >= 0 ) + { + /* st(0) holds <= -1.0 */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + changesign(st1_ptr); +#else + if ( arith_invalid(1) < 0 ) + return; +#endif /* PECULIAR_486 */ + } + else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + else + changesign(st1_ptr); + } + else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + break; + + case TW_Infinity: + if ( signnegative(st0_ptr) ) + { + if ( (exponent(st0_ptr) >= 0) && + !((st0_ptr->sigh == 0x80000000) && + (st0_ptr->sigl == 0)) ) + { + /* st(0) holds < -1.0 */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + changesign(st1_ptr); +#else + if ( arith_invalid(1) < 0 ) return; +#endif /* PECULIAR_486 */ + } + else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + else + changesign(st1_ptr); + } + else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + break; + + case TW_NaN: + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) + return; + } + + } + else if ( st0_tag == TW_NaN ) + { + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) + return; + } + else if ( st0_tag == TW_Infinity ) + { + if ( st1_tag == TW_NaN ) + { + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) + return; + } + else if ( signnegative(st0_ptr) ) + { +#ifndef PECULIAR_486 + /* This should have higher priority than denormals, but... */ + if ( arith_invalid(1) < 0 ) /* log(-infinity) */ + return; +#endif /* PECULIAR_486 */ + if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; +#ifdef PECULIAR_486 + /* Denormal operands actually get higher priority */ + if ( arith_invalid(1) < 0 ) /* log(-infinity) */ + return; +#endif /* PECULIAR_486 */ + } + else if ( st1_tag == TAG_Zero ) + { + /* log(infinity) */ + if ( arith_invalid(1) < 0 ) + return; + } + + /* st(1) must be valid here. */ + + else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + /* The Manual says that log(Infinity) is invalid, but a real + 80486 sensibly says that it is o.k. */ + else + { + u_char sign = getsign(st1_ptr); + FPU_copy_to_reg1(&CONST_INF, TAG_Special); + setsign(st1_ptr, sign); + } + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL | 0x117); + return; + } +#endif /* PARANOID */ + + FPU_pop(); + return; + +} + + +static void fscale(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st1_ptr = &st(1); + u_char st1_tag = FPU_gettagi(1); + int old_cw = control_word; + u_char sign = getsign(st0_ptr); + + clear_C1(); + if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) + { + long scale; + FPU_REG tmp; + + /* Convert register for internal use. */ + setexponent16(st0_ptr, exponent(st0_ptr)); + + valid_scale: + + if ( exponent(st1_ptr) > 30 ) + { + /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */ + + if ( signpositive(st1_ptr) ) + { + EXCEPTION(EX_Overflow); + FPU_copy_to_reg0(&CONST_INF, TAG_Special); + } + else + { + EXCEPTION(EX_Underflow); + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + } + setsign(st0_ptr, sign); + return; + } + + control_word &= ~CW_RC; + control_word |= RC_CHOP; + reg_copy(st1_ptr, &tmp); + FPU_round_to_int(&tmp, st1_tag); /* This can never overflow here */ + control_word = old_cw; + scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl; + scale += exponent16(st0_ptr); + + setexponent16(st0_ptr, scale); + + /* Use FPU_round() to properly detect under/overflow etc */ + FPU_round(st0_ptr, 0, 0, control_word, sign); + + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + if ( st1_tag == TAG_Special ) + st1_tag = FPU_Special(st1_ptr); + + if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) + { + switch ( st1_tag ) + { + case TAG_Valid: + /* st(0) must be a denormal */ + if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + FPU_to_exp16(st0_ptr, st0_ptr); /* Will not be left on stack */ + goto valid_scale; + + case TAG_Zero: + if ( st0_tag == TW_Denormal ) + denormal_operand(); + return; + + case TW_Denormal: + denormal_operand(); + return; + + case TW_Infinity: + if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + if ( signpositive(st1_ptr) ) + FPU_copy_to_reg0(&CONST_INF, TAG_Special); + else + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + setsign(st0_ptr, sign); + return; + + case TW_NaN: + real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); + return; + } + } + else if ( st0_tag == TAG_Zero ) + { + switch ( st1_tag ) + { + case TAG_Valid: + case TAG_Zero: + return; + + case TW_Denormal: + denormal_operand(); + return; + + case TW_Infinity: + if ( signpositive(st1_ptr) ) + arith_invalid(0); /* Zero scaled by +Infinity */ + return; + + case TW_NaN: + real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); + return; + } + } + else if ( st0_tag == TW_Infinity ) + { + switch ( st1_tag ) + { + case TAG_Valid: + case TAG_Zero: + return; + + case TW_Denormal: + denormal_operand(); + return; + + case TW_Infinity: + if ( signnegative(st1_ptr) ) + arith_invalid(0); /* Infinity scaled by -Infinity */ + return; + + case TW_NaN: + real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); + return; + } + } + else if ( st0_tag == TW_NaN ) + { + if ( st1_tag != TAG_Empty ) + { real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; } + } + +#ifdef PARANOID + if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) ) + { + EXCEPTION(EX_INTERNAL | 0x115); + return; + } +#endif + + /* At least one of st(0), st(1) must be empty */ + FPU_stack_underflow(); + +} + + +/*---------------------------------------------------------------------------*/ + +static FUNC_ST0 const trig_table_a[] = { + f2xm1, fyl2x, fptan, fpatan, + fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp +}; + +void FPU_triga(void) +{ + (trig_table_a[FPU_rm])(&st(0), FPU_gettag0()); +} + + +static FUNC_ST0 const trig_table_b[] = + { + fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos + }; + +void FPU_trigb(void) +{ + (trig_table_b[FPU_rm])(&st(0), FPU_gettag0()); +} diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c new file mode 100644 index 00000000000..2e2c51a8bd3 --- /dev/null +++ b/arch/x86/math-emu/get_address.c @@ -0,0 +1,438 @@ +/*---------------------------------------------------------------------------+ + | get_address.c | + | | + | Get the effective address from an FPU instruction. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + + +#include + +#include +#include + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" + + +#define FPU_WRITE_BIT 0x10 + +static int reg_offset[] = { + offsetof(struct info,___eax), + offsetof(struct info,___ecx), + offsetof(struct info,___edx), + offsetof(struct info,___ebx), + offsetof(struct info,___esp), + offsetof(struct info,___ebp), + offsetof(struct info,___esi), + offsetof(struct info,___edi) +}; + +#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) + +static int reg_offset_vm86[] = { + offsetof(struct info,___cs), + offsetof(struct info,___vm86_ds), + offsetof(struct info,___vm86_es), + offsetof(struct info,___vm86_fs), + offsetof(struct info,___vm86_gs), + offsetof(struct info,___ss), + offsetof(struct info,___vm86_ds) + }; + +#define VM86_REG_(x) (*(unsigned short *) \ + (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) + +/* This dummy, gs is not saved on the stack. */ +#define ___GS ___ds + +static int reg_offset_pm[] = { + offsetof(struct info,___cs), + offsetof(struct info,___ds), + offsetof(struct info,___es), + offsetof(struct info,___fs), + offsetof(struct info,___GS), + offsetof(struct info,___ss), + offsetof(struct info,___ds) + }; + +#define PM_REG_(x) (*(unsigned short *) \ + (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info)) + + +/* Decode the SIB byte. This function assumes mod != 0 */ +static int sib(int mod, unsigned long *fpu_eip) +{ + u_char ss,index,base; + long offset; + + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */ + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + ss = base >> 6; + index = (base >> 3) & 7; + base &= 7; + + if ((mod == 0) && (base == 5)) + offset = 0; /* No base register */ + else + offset = REG_(base); + + if (index == 4) + { + /* No index register */ + /* A non-zero ss is illegal */ + if ( ss ) + EXCEPTION(EX_Invalid); + } + else + { + offset += (REG_(index)) << ss; + } + + if (mod == 1) + { + /* 8 bit signed displacement */ + long displacement; + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(displacement, (signed char __user *) (*fpu_eip)); + offset += displacement; + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + } + else if (mod == 2 || base == 5) /* The second condition also has mod==0 */ + { + /* 32 bit displacement */ + long displacement; + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(4); + FPU_get_user(displacement, (long __user *) (*fpu_eip)); + offset += displacement; + RE_ENTRANT_CHECK_ON; + (*fpu_eip) += 4; + } + + return offset; +} + + +static unsigned long vm86_segment(u_char segment, + struct address *addr) +{ + segment--; +#ifdef PARANOID + if ( segment > PREFIX_SS_ ) + { + EXCEPTION(EX_INTERNAL|0x130); + math_abort(FPU_info,SIGSEGV); + } +#endif /* PARANOID */ + addr->selector = VM86_REG_(segment); + return (unsigned long)VM86_REG_(segment) << 4; +} + + +/* This should work for 16 and 32 bit protected mode. */ +static long pm_address(u_char FPU_modrm, u_char segment, + struct address *addr, long offset) +{ + struct desc_struct descriptor; + unsigned long base_address, limit, address, seg_top; + + segment--; + +#ifdef PARANOID + /* segment is unsigned, so this also detects if segment was 0: */ + if ( segment > PREFIX_SS_ ) + { + EXCEPTION(EX_INTERNAL|0x132); + math_abort(FPU_info,SIGSEGV); + } +#endif /* PARANOID */ + + switch ( segment ) + { + /* gs isn't used by the kernel, so it still has its + user-space value. */ + case PREFIX_GS_-1: + /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ + savesegment(gs, addr->selector); + break; + default: + addr->selector = PM_REG_(segment); + } + + descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); + base_address = SEG_BASE_ADDR(descriptor); + address = base_address + offset; + limit = base_address + + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1; + if ( limit < base_address ) limit = 0xffffffff; + + if ( SEG_EXPAND_DOWN(descriptor) ) + { + if ( SEG_G_BIT(descriptor) ) + seg_top = 0xffffffff; + else + { + seg_top = base_address + (1 << 20); + if ( seg_top < base_address ) seg_top = 0xffffffff; + } + access_limit = + (address <= limit) || (address >= seg_top) ? 0 : + ((seg_top-address) >= 255 ? 255 : seg_top-address); + } + else + { + access_limit = + (address > limit) || (address < base_address) ? 0 : + ((limit-address) >= 254 ? 255 : limit-address+1); + } + if ( SEG_EXECUTE_ONLY(descriptor) || + (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) ) + { + access_limit = 0; + } + return address; +} + + +/* + MOD R/M byte: MOD == 3 has a special use for the FPU + SIB byte used iff R/M = 100b + + 7 6 5 4 3 2 1 0 + ..... ......... ......... + MOD OPCODE(2) R/M + + + SIB byte + + 7 6 5 4 3 2 1 0 + ..... ......... ......... + SS INDEX BASE + +*/ + +void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, + fpu_addr_modes addr_modes) +{ + u_char mod; + unsigned rm = FPU_modrm & 7; + long *cpu_reg_ptr; + int address = 0; /* Initialized just to stop compiler warnings. */ + + /* Memory accessed via the cs selector is write protected + in `non-segmented' 32 bit protected mode. */ + if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) + && (addr_modes.override.segment == PREFIX_CS_) ) + { + math_abort(FPU_info,SIGSEGV); + } + + addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ + + mod = (FPU_modrm >> 6) & 3; + + if (rm == 4 && mod != 3) + { + address = sib(mod, fpu_eip); + } + else + { + cpu_reg_ptr = & REG_(rm); + switch (mod) + { + case 0: + if (rm == 5) + { + /* Special case: disp32 */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(4); + FPU_get_user(address, (unsigned long __user *) (*fpu_eip)); + (*fpu_eip) += 4; + RE_ENTRANT_CHECK_ON; + addr->offset = address; + return (void __user *) address; + } + else + { + address = *cpu_reg_ptr; /* Just return the contents + of the cpu register */ + addr->offset = address; + return (void __user *) address; + } + case 1: + /* 8 bit signed displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(address, (signed char __user *) (*fpu_eip)); + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + break; + case 2: + /* 32 bit displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(4); + FPU_get_user(address, (long __user *) (*fpu_eip)); + (*fpu_eip) += 4; + RE_ENTRANT_CHECK_ON; + break; + case 3: + /* Not legal for the FPU */ + EXCEPTION(EX_Invalid); + } + address += *cpu_reg_ptr; + } + + addr->offset = address; + + switch ( addr_modes.default_mode ) + { + case 0: + break; + case VM86: + address += vm86_segment(addr_modes.override.segment, addr); + break; + case PM16: + case SEG32: + address = pm_address(FPU_modrm, addr_modes.override.segment, + addr, address); + break; + default: + EXCEPTION(EX_INTERNAL|0x133); + } + + return (void __user *)address; +} + + +void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, + fpu_addr_modes addr_modes) +{ + u_char mod; + unsigned rm = FPU_modrm & 7; + int address = 0; /* Default used for mod == 0 */ + + /* Memory accessed via the cs selector is write protected + in `non-segmented' 32 bit protected mode. */ + if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) + && (addr_modes.override.segment == PREFIX_CS_) ) + { + math_abort(FPU_info,SIGSEGV); + } + + addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ + + mod = (FPU_modrm >> 6) & 3; + + switch (mod) + { + case 0: + if (rm == 6) + { + /* Special case: disp16 */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(2); + FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); + (*fpu_eip) += 2; + RE_ENTRANT_CHECK_ON; + goto add_segment; + } + break; + case 1: + /* 8 bit signed displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(address, (signed char __user *) (*fpu_eip)); + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + break; + case 2: + /* 16 bit displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(2); + FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); + (*fpu_eip) += 2; + RE_ENTRANT_CHECK_ON; + break; + case 3: + /* Not legal for the FPU */ + EXCEPTION(EX_Invalid); + break; + } + switch ( rm ) + { + case 0: + address += FPU_info->___ebx + FPU_info->___esi; + break; + case 1: + address += FPU_info->___ebx + FPU_info->___edi; + break; + case 2: + address += FPU_info->___ebp + FPU_info->___esi; + if ( addr_modes.override.segment == PREFIX_DEFAULT ) + addr_modes.override.segment = PREFIX_SS_; + break; + case 3: + address += FPU_info->___ebp + FPU_info->___edi; + if ( addr_modes.override.segment == PREFIX_DEFAULT ) + addr_modes.override.segment = PREFIX_SS_; + break; + case 4: + address += FPU_info->___esi; + break; + case 5: + address += FPU_info->___edi; + break; + case 6: + address += FPU_info->___ebp; + if ( addr_modes.override.segment == PREFIX_DEFAULT ) + addr_modes.override.segment = PREFIX_SS_; + break; + case 7: + address += FPU_info->___ebx; + break; + } + + add_segment: + address &= 0xffff; + + addr->offset = address; + + switch ( addr_modes.default_mode ) + { + case 0: + break; + case VM86: + address += vm86_segment(addr_modes.override.segment, addr); + break; + case PM16: + case SEG32: + address = pm_address(FPU_modrm, addr_modes.override.segment, + addr, address); + break; + default: + EXCEPTION(EX_INTERNAL|0x131); + } + + return (void __user *)address ; +} diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c new file mode 100644 index 00000000000..eebd6fb1c8a --- /dev/null +++ b/arch/x86/math-emu/load_store.c @@ -0,0 +1,272 @@ +/*---------------------------------------------------------------------------+ + | load_store.c | + | | + | This file contains most of the code to interpret the FPU instructions | + | which load and store from user memory. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" + + +#define _NONE_ 0 /* st0_ptr etc not needed */ +#define _REG0_ 1 /* Will be storing st(0) */ +#define _PUSH_ 3 /* Need to check for space to push onto stack */ +#define _null_ 4 /* Function illegal or not implemented */ + +#define pop_0() { FPU_settag0(TAG_Empty); top++; } + + +static u_char const type_table[32] = { + _PUSH_, _PUSH_, _PUSH_, _PUSH_, + _null_, _null_, _null_, _null_, + _REG0_, _REG0_, _REG0_, _REG0_, + _REG0_, _REG0_, _REG0_, _REG0_, + _NONE_, _null_, _NONE_, _PUSH_, + _NONE_, _PUSH_, _null_, _PUSH_, + _NONE_, _null_, _NONE_, _REG0_, + _NONE_, _REG0_, _NONE_, _REG0_ + }; + +u_char const data_sizes_16[32] = { + 4, 4, 8, 2, 0, 0, 0, 0, + 4, 4, 8, 2, 4, 4, 8, 2, + 14, 0, 94, 10, 2, 10, 0, 8, + 14, 0, 94, 10, 2, 10, 2, 8 +}; + +static u_char const data_sizes_32[32] = { + 4, 4, 8, 2, 0, 0, 0, 0, + 4, 4, 8, 2, 4, 4, 8, 2, + 28, 0,108, 10, 2, 10, 0, 8, + 28, 0,108, 10, 2, 10, 2, 8 +}; + +int FPU_load_store(u_char type, fpu_addr_modes addr_modes, + void __user *data_address) +{ + FPU_REG loaded_data; + FPU_REG *st0_ptr; + u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */ + u_char loaded_tag; + + st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ + + if ( addr_modes.default_mode & PROTECTED ) + { + if ( addr_modes.default_mode == SEG32 ) + { + if ( access_limit < data_sizes_32[type] ) + math_abort(FPU_info,SIGSEGV); + } + else if ( addr_modes.default_mode == PM16 ) + { + if ( access_limit < data_sizes_16[type] ) + math_abort(FPU_info,SIGSEGV); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL|0x140); +#endif /* PARANOID */ + } + + switch ( type_table[type] ) + { + case _NONE_: + break; + case _REG0_: + st0_ptr = &st(0); /* Some of these instructions pop after + storing */ + st0_tag = FPU_gettag0(); + break; + case _PUSH_: + { + if ( FPU_gettagi(-1) != TAG_Empty ) + { FPU_stack_overflow(); return 0; } + top--; + st0_ptr = &st(0); + } + break; + case _null_: + FPU_illegal(); + return 0; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x141); + return 0; +#endif /* PARANOID */ + } + + switch ( type ) + { + case 000: /* fld m32real */ + clear_C1(); + loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data); + if ( (loaded_tag == TAG_Special) + && isNaN(&loaded_data) + && (real_1op_NaN(&loaded_data) < 0) ) + { + top++; + break; + } + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 001: /* fild m32int */ + clear_C1(); + loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 002: /* fld m64real */ + clear_C1(); + loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data); + if ( (loaded_tag == TAG_Special) + && isNaN(&loaded_data) + && (real_1op_NaN(&loaded_data) < 0) ) + { + top++; + break; + } + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 003: /* fild m16int */ + clear_C1(); + loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 010: /* fst m32real */ + clear_C1(); + FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address); + break; + case 011: /* fist m32int */ + clear_C1(); + FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address); + break; + case 012: /* fst m64real */ + clear_C1(); + FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address); + break; + case 013: /* fist m16int */ + clear_C1(); + FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address); + break; + case 014: /* fstp m32real */ + clear_C1(); + if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 015: /* fistp m32int */ + clear_C1(); + if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 016: /* fstp m64real */ + clear_C1(); + if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 017: /* fistp m16int */ + clear_C1(); + if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 020: /* fldenv m14/28byte */ + fldenv(addr_modes, (u_char __user *)data_address); + /* Ensure that the values just loaded are not changed by + fix-up operations. */ + return 1; + case 022: /* frstor m94/108byte */ + frstor(addr_modes, (u_char __user *)data_address); + /* Ensure that the values just loaded are not changed by + fix-up operations. */ + return 1; + case 023: /* fbld m80dec */ + clear_C1(); + loaded_tag = FPU_load_bcd((u_char __user *)data_address); + FPU_settag0(loaded_tag); + break; + case 024: /* fldcw */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, data_address, 2); + FPU_get_user(control_word, (unsigned short __user *) data_address); + RE_ENTRANT_CHECK_ON; + if ( partial_status & ~control_word & CW_Exceptions ) + partial_status |= (SW_Summary | SW_Backward); + else + partial_status &= ~(SW_Summary | SW_Backward); +#ifdef PECULIAR_486 + control_word |= 0x40; /* An 80486 appears to always set this bit */ +#endif /* PECULIAR_486 */ + return 1; + case 025: /* fld m80real */ + clear_C1(); + loaded_tag = FPU_load_extended((long double __user *)data_address, 0); + FPU_settag0(loaded_tag); + break; + case 027: /* fild m64int */ + clear_C1(); + loaded_tag = FPU_load_int64((long long __user *)data_address); + if (loaded_tag == TAG_Error) + return 0; + FPU_settag0(loaded_tag); + break; + case 030: /* fstenv m14/28byte */ + fstenv(addr_modes, (u_char __user *)data_address); + return 1; + case 032: /* fsave */ + fsave(addr_modes, (u_char __user *)data_address); + return 1; + case 033: /* fbstp m80dec */ + clear_C1(); + if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 034: /* fstcw m16int */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,data_address,2); + FPU_put_user(control_word, (unsigned short __user *) data_address); + RE_ENTRANT_CHECK_ON; + return 1; + case 035: /* fstp m80real */ + clear_C1(); + if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 036: /* fstsw m2byte */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,data_address,2); + FPU_put_user(status_word(),(unsigned short __user *) data_address); + RE_ENTRANT_CHECK_ON; + return 1; + case 037: /* fistp m64int */ + clear_C1(); + if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + } + return 0; +} diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S new file mode 100644 index 00000000000..717785a53eb --- /dev/null +++ b/arch/x86/math-emu/mul_Xsig.S @@ -0,0 +1,176 @@ +/*---------------------------------------------------------------------------+ + | mul_Xsig.S | + | | + | Multiply a 12 byte fixed point number by another fixed point number. | + | | + | Copyright (C) 1992,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | void mul32_Xsig(Xsig *x, unsigned b) | + | | + | void mul64_Xsig(Xsig *x, unsigned long long *b) | + | | + | void mul_Xsig_Xsig(Xsig *x, unsigned *b) | + | | + | The result is neither rounded nor normalized, and the ls bit or so may | + | be wrong. | + | | + +---------------------------------------------------------------------------*/ + .file "mul_Xsig.S" + + +#include "fpu_emu.h" + +.text +ENTRY(mul32_Xsig) + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull %ecx /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull %ecx /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull %ecx /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%eax + movl %eax,(%esi) + movl -8(%ebp),%eax + movl %eax,4(%esi) + movl -4(%ebp),%eax + movl %eax,8(%esi) + + popl %esi + leave + ret + + +ENTRY(mul64_Xsig) + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull 4(%ecx) /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 4(%ecx) /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 4(%ecx) /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%eax + movl %eax,(%esi) + movl -8(%ebp),%eax + movl %eax,4(%esi) + movl -4(%ebp),%eax + movl %eax,8(%esi) + + popl %esi + leave + ret + + + +ENTRY(mul_Xsig_Xsig) + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull 8(%ecx) /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 4(%ecx) /* midl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 8(%ecx) /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 4(%ecx) /* midl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 8(%ecx) /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%edx + movl %edx,(%esi) + movl -8(%ebp),%edx + movl %edx,4(%esi) + movl -4(%ebp),%edx + movl %edx,8(%esi) + + popl %esi + leave + ret + diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h new file mode 100644 index 00000000000..4db79811492 --- /dev/null +++ b/arch/x86/math-emu/poly.h @@ -0,0 +1,121 @@ +/*---------------------------------------------------------------------------+ + | poly.h | + | | + | Header file for the FPU-emu poly*.c source files. | + | | + | Copyright (C) 1994,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | Declarations and definitions for functions operating on Xsig (12-byte | + | extended-significand) quantities. | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _POLY_H +#define _POLY_H + +/* This 12-byte structure is used to improve the accuracy of computation + of transcendental functions. + Intended to be used to get results better than 8-byte computation + allows. 9-byte would probably be sufficient. + */ +typedef struct { + unsigned long lsw; + unsigned long midw; + unsigned long msw; +} Xsig; + +asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, + unsigned long long *result); +asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x, + const unsigned long long terms[], const int n); + +asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); +asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); +asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); + +asmlinkage void shr_Xsig(Xsig *, const int n); +asmlinkage int round_Xsig(Xsig *); +asmlinkage int norm_Xsig(Xsig *); +asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); + +/* Macro to extract the most significant 32 bits from a long long */ +#define LL_MSW(x) (((unsigned long *)&x)[1]) + +/* Macro to initialize an Xsig struct */ +#define MK_XSIG(a,b,c) { c, b, a } + +/* Macro to access the 8 ms bytes of an Xsig as a long long */ +#define XSIG_LL(x) (*(unsigned long long *)&x.midw) + + +/* + Need to run gcc with optimizations on to get these to + actually be in-line. + */ + +/* Multiply two fixed-point 32 bit numbers, producing a 32 bit result. + The answer is the ms word of the product. */ +/* Some versions of gcc make it difficult to stop eax from being clobbered. + Merely specifying that it is used doesn't work... + */ +static inline unsigned long mul_32_32(const unsigned long arg1, + const unsigned long arg2) +{ + int retval; + asm volatile ("mull %2; movl %%edx,%%eax" \ + :"=a" (retval) \ + :"0" (arg1), "g" (arg2) \ + :"dx"); + return retval; +} + + +/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ +static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) +{ + asm volatile ("movl %1,%%edi; movl %2,%%esi;\n" + "movl (%%esi),%%eax; addl %%eax,(%%edi);\n" + "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n" + "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n" + :"=g" (*dest):"g" (dest), "g" (x2) + :"ax","si","di"); +} + + +/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ +/* Note: the constraints in the asm statement didn't always work properly + with gcc 2.5.8. Changing from using edi to using ecx got around the + problem, but keep fingers crossed! */ +static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) +{ + asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n" + "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n" + "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n" + "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n" + "jnc 0f;\n" + "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n" + "movl %4,%%ecx; incl (%%ecx)\n" + "movl $1,%%eax; jmp 1f;\n" + "0: xorl %%eax,%%eax;\n" + "1:\n" + :"=g" (*exp), "=g" (*dest) + :"g" (dest), "g" (x2), "g" (exp) + :"cx","si","ax"); +} + + +/* Negate (subtract from 1.0) the 12 byte Xsig */ +/* This is faster in a loop on my 386 than using the "neg" instruction. */ +static inline void negate_Xsig(Xsig *x) +{ + asm volatile("movl %1,%%esi;\n" + "xorl %%ecx,%%ecx;\n" + "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n" + "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n" + "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n" + :"=g" (*x):"g" (x):"si","ax","cx"); +} + +#endif /* _POLY_H */ diff --git a/arch/x86/math-emu/poly_2xm1.c b/arch/x86/math-emu/poly_2xm1.c new file mode 100644 index 00000000000..9766ad5e974 --- /dev/null +++ b/arch/x86/math-emu/poly_2xm1.c @@ -0,0 +1,156 @@ +/*---------------------------------------------------------------------------+ + | poly_2xm1.c | + | | + | Function to compute 2^x-1 by a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + + +#define HIPOWER 11 +static const unsigned long long lterms[HIPOWER] = +{ + 0x0000000000000000LL, /* This term done separately as 12 bytes */ + 0xf5fdeffc162c7543LL, + 0x1c6b08d704a0bfa6LL, + 0x0276556df749cc21LL, + 0x002bb0ffcf14f6b8LL, + 0x0002861225ef751cLL, + 0x00001ffcbfcd5422LL, + 0x00000162c005d5f1LL, + 0x0000000da96ccb1bLL, + 0x0000000078d1b897LL, + 0x000000000422b029LL +}; + +static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194); + +/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0, + These numbers are 2^(1/4), 2^(1/2), and 2^(3/4) + */ +static const Xsig shiftterm0 = MK_XSIG(0, 0, 0); +static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318); +static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3); +static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9); + +static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1, + &shiftterm2, &shiftterm3 }; + + +/*--- poly_2xm1() -----------------------------------------------------------+ + | Requires st(0) which is TAG_Valid and < 1. | + +---------------------------------------------------------------------------*/ +int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result) +{ + long int exponent, shift; + unsigned long long Xll; + Xsig accumulator, Denom, argSignif; + u_char tag; + + exponent = exponent16(arg); + +#ifdef PARANOID + if ( exponent >= 0 ) /* Don't want a |number| >= 1.0 */ + { + /* Number negative, too large, or not Valid. */ + EXCEPTION(EX_INTERNAL|0x127); + return 1; + } +#endif /* PARANOID */ + + argSignif.lsw = 0; + XSIG_LL(argSignif) = Xll = significand(arg); + + if ( exponent == -1 ) + { + shift = (argSignif.msw & 0x40000000) ? 3 : 2; + /* subtract 0.5 or 0.75 */ + exponent -= 2; + XSIG_LL(argSignif) <<= 2; + Xll <<= 2; + } + else if ( exponent == -2 ) + { + shift = 1; + /* subtract 0.25 */ + exponent--; + XSIG_LL(argSignif) <<= 1; + Xll <<= 1; + } + else + shift = 0; + + if ( exponent < -2 ) + { + /* Shift the argument right by the required places. */ + if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U ) + Xll++; /* round up */ + } + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1); + mul_Xsig_Xsig(&accumulator, &argSignif); + shr_Xsig(&accumulator, 3); + + mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */ + add_two_Xsig(&accumulator, &argSignif, &exponent); + + if ( shift ) + { + /* The argument is large, use the identity: + f(x+a) = f(a) * (f(x) + 1) - 1; + */ + shr_Xsig(&accumulator, - exponent); + accumulator.msw |= 0x80000000; /* add 1.0 */ + mul_Xsig_Xsig(&accumulator, shiftterm[shift]); + accumulator.msw &= 0x3fffffff; /* subtract 1.0 */ + exponent = 1; + } + + if ( sign != SIGN_POS ) + { + /* The argument is negative, use the identity: + f(-x) = -f(x) / (1 + f(x)) + */ + Denom.lsw = accumulator.lsw; + XSIG_LL(Denom) = XSIG_LL(accumulator); + if ( exponent < 0 ) + shr_Xsig(&Denom, - exponent); + else if ( exponent > 0 ) + { + /* exponent must be 1 here */ + XSIG_LL(Denom) <<= 1; + if ( Denom.lsw & 0x80000000 ) + XSIG_LL(Denom) |= 1; + (Denom.lsw) <<= 1; + } + Denom.msw |= 0x80000000; /* add 1.0 */ + div_Xsig(&accumulator, &Denom, &accumulator); + } + + /* Convert to 64 bit signed-compatible */ + exponent += round_Xsig(&accumulator); + + result = &st(0); + significand(result) = XSIG_LL(accumulator); + setexponent16(result, exponent); + + tag = FPU_round(result, 1, 0, FULL_PRECISION, sign); + + setsign(result, sign); + FPU_settag0(tag); + + return 0; + +} diff --git a/arch/x86/math-emu/poly_atan.c b/arch/x86/math-emu/poly_atan.c new file mode 100644 index 00000000000..82f702952f6 --- /dev/null +++ b/arch/x86/math-emu/poly_atan.c @@ -0,0 +1,229 @@ +/*---------------------------------------------------------------------------+ + | poly_atan.c | + | | + | Compute the arctan of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "status_w.h" +#include "control_w.h" +#include "poly.h" + + +#define HIPOWERon 6 /* odd poly, negative terms */ +static const unsigned long long oddnegterms[HIPOWERon] = +{ + 0x0000000000000000LL, /* Dummy (not for - 1.0) */ + 0x015328437f756467LL, + 0x0005dda27b73dec6LL, + 0x0000226bf2bfb91aLL, + 0x000000ccc439c5f7LL, + 0x0000000355438407LL +} ; + +#define HIPOWERop 6 /* odd poly, positive terms */ +static const unsigned long long oddplterms[HIPOWERop] = +{ +/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */ + 0x0db55a71875c9ac2LL, + 0x0029fce2d67880b0LL, + 0x0000dfd3908b4596LL, + 0x00000550fd61dab4LL, + 0x0000001c9422b3f9LL, + 0x000000003e3301e1LL +}; + +static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL; + +static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa); + +static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b); + + +/*--- poly_atan() -----------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, + FPU_REG *st1_ptr, u_char st1_tag) +{ + u_char transformed, inverted, + sign1, sign2; + int exponent; + long int dummy_exp; + Xsig accumulator, Numer, Denom, accumulatore, argSignif, + argSq, argSqSq; + u_char tag; + + sign1 = getsign(st0_ptr); + sign2 = getsign(st1_ptr); + if ( st0_tag == TAG_Valid ) + { + exponent = exponent(st0_ptr); + } + else + { + /* This gives non-compatible stack contents... */ + FPU_to_exp16(st0_ptr, st0_ptr); + exponent = exponent16(st0_ptr); + } + if ( st1_tag == TAG_Valid ) + { + exponent -= exponent(st1_ptr); + } + else + { + /* This gives non-compatible stack contents... */ + FPU_to_exp16(st1_ptr, st1_ptr); + exponent -= exponent16(st1_ptr); + } + + if ( (exponent < 0) || ((exponent == 0) && + ((st0_ptr->sigh < st1_ptr->sigh) || + ((st0_ptr->sigh == st1_ptr->sigh) && + (st0_ptr->sigl < st1_ptr->sigl))) ) ) + { + inverted = 1; + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = significand(st0_ptr); + XSIG_LL(Denom) = significand(st1_ptr); + } + else + { + inverted = 0; + exponent = -exponent; + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = significand(st1_ptr); + XSIG_LL(Denom) = significand(st0_ptr); + } + div_Xsig(&Numer, &Denom, &argSignif); + exponent += norm_Xsig(&argSignif); + + if ( (exponent >= -1) + || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) ) + { + /* The argument is greater than sqrt(2)-1 (=0.414213562...) */ + /* Convert the argument by an identity for atan */ + transformed = 1; + + if ( exponent >= 0 ) + { +#ifdef PARANOID + if ( !( (exponent == 0) && + (argSignif.lsw == 0) && (argSignif.midw == 0) && + (argSignif.msw == 0x80000000) ) ) + { + EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */ + return; + } +#endif /* PARANOID */ + argSignif.msw = 0; /* Make the transformed arg -> 0.0 */ + } + else + { + Numer.lsw = Denom.lsw = argSignif.lsw; + XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif); + + if ( exponent < -1 ) + shr_Xsig(&Numer, -1-exponent); + negate_Xsig(&Numer); + + shr_Xsig(&Denom, -exponent); + Denom.msw |= 0x80000000; + + div_Xsig(&Numer, &Denom, &argSignif); + + exponent = -1 + norm_Xsig(&argSignif); + } + } + else + { + transformed = 0; + } + + argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw; + argSq.msw = argSignif.msw; + mul_Xsig_Xsig(&argSq, &argSq); + + argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw; + mul_Xsig_Xsig(&argSqSq, &argSqSq); + + accumulatore.lsw = argSq.lsw; + XSIG_LL(accumulatore) = XSIG_LL(argSq); + + shr_Xsig(&argSq, 2*(-1-exponent-1)); + shr_Xsig(&argSqSq, 4*(-1-exponent-1)); + + /* Now have argSq etc with binary point at the left + .1xxxxxxxx */ + + /* Do the basic fixed point polynomial evaluation */ + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), + oddplterms, HIPOWERop-1); + mul64_Xsig(&accumulator, &XSIG_LL(argSq)); + negate_Xsig(&accumulator); + polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1); + negate_Xsig(&accumulator); + add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp); + + mul64_Xsig(&accumulatore, &denomterm); + shr_Xsig(&accumulatore, 1 + 2*(-1-exponent)); + accumulatore.msw |= 0x80000000; + + div_Xsig(&accumulator, &accumulatore, &accumulator); + + mul_Xsig_Xsig(&accumulator, &argSignif); + mul_Xsig_Xsig(&accumulator, &argSq); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &argSignif); + + if ( transformed ) + { + /* compute pi/4 - accumulator */ + shr_Xsig(&accumulator, -1-exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = -1; + } + + if ( inverted ) + { + /* compute pi/2 - accumulator */ + shr_Xsig(&accumulator, -exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = 0; + } + + if ( sign1 ) + { + /* compute pi - accumulator */ + shr_Xsig(&accumulator, 1 - exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = 1; + } + + exponent += round_Xsig(&accumulator); + + significand(st1_ptr) = XSIG_LL(accumulator); + setexponent16(st1_ptr, exponent); + + tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2); + FPU_settagi(1, tag); + + set_precision_flag_up(); /* We do not really know if up or down, + use this as the default. */ + +} diff --git a/arch/x86/math-emu/poly_l2.c b/arch/x86/math-emu/poly_l2.c new file mode 100644 index 00000000000..dd00e1d5b07 --- /dev/null +++ b/arch/x86/math-emu/poly_l2.c @@ -0,0 +1,272 @@ +/*---------------------------------------------------------------------------+ + | poly_l2.c | + | | + | Compute the base 2 log of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + + +static void log2_kernel(FPU_REG const *arg, u_char argsign, + Xsig *accum_result, long int *expon); + + +/*--- poly_l2() -------------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + +---------------------------------------------------------------------------*/ +void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign) +{ + long int exponent, expon, expon_expon; + Xsig accumulator, expon_accum, yaccum; + u_char sign, argsign; + FPU_REG x; + int tag; + + exponent = exponent16(st0_ptr); + + /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */ + if ( st0_ptr->sigh > (unsigned)0xb504f334 ) + { + /* Treat as sqrt(2)/2 < st0_ptr < 1 */ + significand(&x) = - significand(st0_ptr); + setexponent16(&x, -1); + exponent++; + argsign = SIGN_NEG; + } + else + { + /* Treat as 1 <= st0_ptr < sqrt(2) */ + x.sigh = st0_ptr->sigh - 0x80000000; + x.sigl = st0_ptr->sigl; + setexponent16(&x, 0); + argsign = SIGN_POS; + } + tag = FPU_normalize_nuo(&x); + + if ( tag == TAG_Zero ) + { + expon = 0; + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + } + else + { + log2_kernel(&x, argsign, &accumulator, &expon); + } + + if ( exponent < 0 ) + { + sign = SIGN_NEG; + exponent = -exponent; + } + else + sign = SIGN_POS; + expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0; + if ( exponent ) + { + expon_expon = 31 + norm_Xsig(&expon_accum); + shr_Xsig(&accumulator, expon_expon - expon); + + if ( sign ^ argsign ) + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &expon_accum); + } + else + { + expon_expon = expon; + sign = argsign; + } + + yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr); + mul_Xsig_Xsig(&accumulator, &yaccum); + + expon_expon += round_Xsig(&accumulator); + + if ( accumulator.msw == 0 ) + { + FPU_copy_to_reg1(&CONST_Z, TAG_Zero); + return; + } + + significand(st1_ptr) = XSIG_LL(accumulator); + setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1); + + tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign); + FPU_settagi(1, tag); + + set_precision_flag_up(); /* 80486 appears to always do this */ + + return; + +} + + +/*--- poly_l2p1() -----------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + | log2(x+1) | + +---------------------------------------------------------------------------*/ +int poly_l2p1(u_char sign0, u_char sign1, + FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest) +{ + u_char tag; + long int exponent; + Xsig accumulator, yaccum; + + if ( exponent16(st0_ptr) < 0 ) + { + log2_kernel(st0_ptr, sign0, &accumulator, &exponent); + + yaccum.lsw = 0; + XSIG_LL(yaccum) = significand(st1_ptr); + mul_Xsig_Xsig(&accumulator, &yaccum); + + exponent += round_Xsig(&accumulator); + + exponent += exponent16(st1_ptr) + 1; + if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER; + + significand(dest) = XSIG_LL(accumulator); + setexponent16(dest, exponent); + + tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1); + FPU_settagi(1, tag); + + if ( tag == TAG_Valid ) + set_precision_flag_up(); /* 80486 appears to always do this */ + } + else + { + /* The magnitude of st0_ptr is far too large. */ + + if ( sign0 != SIGN_POS ) + { + /* Trying to get the log of a negative number. */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + changesign(st1_ptr); +#else + if ( arith_invalid(1) < 0 ) + return 1; +#endif /* PECULIAR_486 */ + } + + /* 80486 appears to do this */ + if ( sign0 == SIGN_NEG ) + set_precision_flag_down(); + else + set_precision_flag_up(); + } + + if ( exponent(dest) <= EXP_UNDER ) + EXCEPTION(EX_Underflow); + + return 0; + +} + + + + +#undef HIPOWER +#define HIPOWER 10 +static const unsigned long long logterms[HIPOWER] = +{ + 0x2a8eca5705fc2ef0LL, + 0xf6384ee1d01febceLL, + 0x093bb62877cdf642LL, + 0x006985d8a9ec439bLL, + 0x0005212c4f55a9c8LL, + 0x00004326a16927f0LL, + 0x0000038d1d80a0e7LL, + 0x0000003141cc80c6LL, + 0x00000002b1668c9fLL, + 0x000000002c7a46aaLL +}; + +static const unsigned long leadterm = 0xb8000000; + + +/*--- log2_kernel() ---------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + | log2(x+1) | + +---------------------------------------------------------------------------*/ +static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result, + long int *expon) +{ + long int exponent, adj; + unsigned long long Xsq; + Xsig accumulator, Numer, Denom, argSignif, arg_signif; + + exponent = exponent16(arg); + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg); + if ( argsign == SIGN_POS ) + { + shr_Xsig(&Denom, 2 - (1 + exponent)); + Denom.msw |= 0x80000000; + div_Xsig(&Numer, &Denom, &argSignif); + } + else + { + shr_Xsig(&Denom, 1 - (1 + exponent)); + negate_Xsig(&Denom); + if ( Denom.msw & 0x80000000 ) + { + div_Xsig(&Numer, &Denom, &argSignif); + exponent ++; + } + else + { + /* Denom must be 1.0 */ + argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw; + argSignif.msw = Numer.msw; + } + } + +#ifndef PECULIAR_486 + /* Should check here that |local_arg| is within the valid range */ + if ( exponent >= -2 ) + { + if ( (exponent > -2) || + (argSignif.msw > (unsigned)0xafb0ccc0) ) + { + /* The argument is too large */ + } + } +#endif /* PECULIAR_486 */ + + arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif); + adj = norm_Xsig(&argSignif); + accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif); + mul_Xsig_Xsig(&accumulator, &accumulator); + shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj))); + Xsq = XSIG_LL(accumulator); + if ( accumulator.lsw & 0x80000000 ) + Xsq++; + + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + /* Do the basic fixed point polynomial evaluation */ + polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1); + + mul_Xsig_Xsig(&accumulator, &argSignif); + shr_Xsig(&accumulator, 6 - adj); + + mul32_Xsig(&arg_signif, leadterm); + add_two_Xsig(&accumulator, &arg_signif, &exponent); + + *expon = exponent + 1; + accum_result->lsw = accumulator.lsw; + accum_result->midw = accumulator.midw; + accum_result->msw = accumulator.msw; + +} diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c new file mode 100644 index 00000000000..a36313fb06f --- /dev/null +++ b/arch/x86/math-emu/poly_sin.c @@ -0,0 +1,397 @@ +/*---------------------------------------------------------------------------+ + | poly_sin.c | + | | + | Computation of an approximation of the sin function and the cosine | + | function by a polynomial. | + | | + | Copyright (C) 1992,1993,1994,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + + +#define N_COEFF_P 4 +#define N_COEFF_N 4 + +static const unsigned long long pos_terms_l[N_COEFF_P] = +{ + 0xaaaaaaaaaaaaaaabLL, + 0x00d00d00d00cf906LL, + 0x000006b99159a8bbLL, + 0x000000000d7392e6LL +}; + +static const unsigned long long neg_terms_l[N_COEFF_N] = +{ + 0x2222222222222167LL, + 0x0002e3bc74aab624LL, + 0x0000000b09229062LL, + 0x00000000000c7973LL +}; + + + +#define N_COEFF_PH 4 +#define N_COEFF_NH 4 +static const unsigned long long pos_terms_h[N_COEFF_PH] = +{ + 0x0000000000000000LL, + 0x05b05b05b05b0406LL, + 0x000049f93edd91a9LL, + 0x00000000c9c9ed62LL +}; + +static const unsigned long long neg_terms_h[N_COEFF_NH] = +{ + 0xaaaaaaaaaaaaaa98LL, + 0x001a01a01a019064LL, + 0x0000008f76c68a77LL, + 0x0000000000d58f5eLL +}; + + +/*--- poly_sine() -----------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_sine(FPU_REG *st0_ptr) +{ + int exponent, echange; + Xsig accumulator, argSqrd, argTo4; + unsigned long fix_up, adj; + unsigned long long fixed_arg; + FPU_REG result; + + exponent = exponent(st0_ptr); + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + + /* Split into two ranges, for arguments below and above 1.0 */ + /* The boundary between upper and lower is approx 0.88309101259 */ + if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) ) + { + /* The argument is <= 0.88309101259 */ + + argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &significand(st0_ptr)); + shr_Xsig(&argSqrd, 2*(-1-exponent)); + argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, + N_COEFF_N-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, + N_COEFF_P-1); + + shr_Xsig(&accumulator, 2); /* Divide by four */ + accumulator.msw |= 0x80000000; /* Add 1.0 */ + + mul64_Xsig(&accumulator, &significand(st0_ptr)); + mul64_Xsig(&accumulator, &significand(st0_ptr)); + mul64_Xsig(&accumulator, &significand(st0_ptr)); + + /* Divide by four, FPU_REG compatible, etc */ + exponent = 3*exponent; + + /* The minimum exponent difference is 3 */ + shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); + + negate_Xsig(&accumulator); + XSIG_LL(accumulator) += significand(st0_ptr); + + echange = round_Xsig(&accumulator); + + setexponentpos(&result, exponent(st0_ptr) + echange); + } + else + { + /* The argument is > 0.88309101259 */ + /* We use sin(st(0)) = cos(pi/2-st(0)) */ + + fixed_arg = significand(st0_ptr); + + if ( exponent == 0 ) + { + /* The argument is >= 1.0 */ + + /* Put the binary point at the left. */ + fixed_arg <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + fixed_arg = 0x921fb54442d18469LL - fixed_arg; + /* There is a special case which arises due to rounding, to fix here. */ + if ( fixed_arg == 0xffffffffffffffffLL ) + fixed_arg = 0; + + XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &fixed_arg); + + XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, + N_COEFF_NH-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, + N_COEFF_PH-1); + negate_Xsig(&accumulator); + + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + + add_Xsig_Xsig(&accumulator, &argSqrd); + + shr_Xsig(&accumulator, 1); + + accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ + negate_Xsig(&accumulator); + + /* The basic computation is complete. Now fix the answer to + compensate for the error due to the approximation used for + pi/2 + */ + + /* This has an exponent of -65 */ + fix_up = 0x898cc517; + /* The fix-up needs to be improved for larger args */ + if ( argSqrd.msw & 0xffc00000 ) + { + /* Get about 32 bit precision in these: */ + fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; + } + fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); + + adj = accumulator.lsw; /* temp save */ + accumulator.lsw -= fix_up; + if ( accumulator.lsw > adj ) + XSIG_LL(accumulator) --; + + echange = round_Xsig(&accumulator); + + setexponentpos(&result, echange - 1); + } + + significand(&result) = XSIG_LL(accumulator); + setsign(&result, getsign(st0_ptr)); + FPU_copy_to_reg0(&result, TAG_Valid); + +#ifdef PARANOID + if ( (exponent(&result) >= 0) + && (significand(&result) > 0x8000000000000000LL) ) + { + EXCEPTION(EX_INTERNAL|0x150); + } +#endif /* PARANOID */ + +} + + + +/*--- poly_cos() ------------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_cos(FPU_REG *st0_ptr) +{ + FPU_REG result; + long int exponent, exp2, echange; + Xsig accumulator, argSqrd, fix_up, argTo4; + unsigned long long fixed_arg; + +#ifdef PARANOID + if ( (exponent(st0_ptr) > 0) + || ((exponent(st0_ptr) == 0) + && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) ) + { + EXCEPTION(EX_Invalid); + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + return; + } +#endif /* PARANOID */ + + exponent = exponent(st0_ptr); + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + + if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) ) + { + /* arg is < 0.687705 */ + + argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; + argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &significand(st0_ptr)); + + if ( exponent < -1 ) + { + /* shift the argument right by the required places */ + shr_Xsig(&argSqrd, 2*(-1-exponent)); + } + + argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, + N_COEFF_NH-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, + N_COEFF_PH-1); + negate_Xsig(&accumulator); + + mul64_Xsig(&accumulator, &significand(st0_ptr)); + mul64_Xsig(&accumulator, &significand(st0_ptr)); + shr_Xsig(&accumulator, -2*(1+exponent)); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + + add_Xsig_Xsig(&accumulator, &argSqrd); + + shr_Xsig(&accumulator, 1); + + /* It doesn't matter if accumulator is all zero here, the + following code will work ok */ + negate_Xsig(&accumulator); + + if ( accumulator.lsw & 0x80000000 ) + XSIG_LL(accumulator) ++; + if ( accumulator.msw == 0 ) + { + /* The result is 1.0 */ + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + return; + } + else + { + significand(&result) = XSIG_LL(accumulator); + + /* will be a valid positive nr with expon = -1 */ + setexponentpos(&result, -1); + } + } + else + { + fixed_arg = significand(st0_ptr); + + if ( exponent == 0 ) + { + /* The argument is >= 1.0 */ + + /* Put the binary point at the left. */ + fixed_arg <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + fixed_arg = 0x921fb54442d18469LL - fixed_arg; + /* There is a special case which arises due to rounding, to fix here. */ + if ( fixed_arg == 0xffffffffffffffffLL ) + fixed_arg = 0; + + exponent = -1; + exp2 = -1; + + /* A shift is needed here only for a narrow range of arguments, + i.e. for fixed_arg approx 2^-32, but we pick up more... */ + if ( !(LL_MSW(fixed_arg) & 0xffff0000) ) + { + fixed_arg <<= 16; + exponent -= 16; + exp2 -= 16; + } + + XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &fixed_arg); + + if ( exponent < -1 ) + { + /* shift the argument right by the required places */ + shr_Xsig(&argSqrd, 2*(-1-exponent)); + } + + argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, + N_COEFF_N-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, + N_COEFF_P-1); + + shr_Xsig(&accumulator, 2); /* Divide by four */ + accumulator.msw |= 0x80000000; /* Add 1.0 */ + + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + + /* Divide by four, FPU_REG compatible, etc */ + exponent = 3*exponent; + + /* The minimum exponent difference is 3 */ + shr_Xsig(&accumulator, exp2 - exponent); + + negate_Xsig(&accumulator); + XSIG_LL(accumulator) += fixed_arg; + + /* The basic computation is complete. Now fix the answer to + compensate for the error due to the approximation used for + pi/2 + */ + + /* This has an exponent of -65 */ + XSIG_LL(fix_up) = 0x898cc51701b839a2ll; + fix_up.lsw = 0; + + /* The fix-up needs to be improved for larger args */ + if ( argSqrd.msw & 0xffc00000 ) + { + /* Get about 32 bit precision in these: */ + fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; + fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; + } + + exp2 += norm_Xsig(&accumulator); + shr_Xsig(&accumulator, 1); /* Prevent overflow */ + exp2++; + shr_Xsig(&fix_up, 65 + exp2); + + add_Xsig_Xsig(&accumulator, &fix_up); + + echange = round_Xsig(&accumulator); + + setexponentpos(&result, exp2 + echange); + significand(&result) = XSIG_LL(accumulator); + } + + FPU_copy_to_reg0(&result, TAG_Valid); + +#ifdef PARANOID + if ( (exponent(&result) >= 0) + && (significand(&result) > 0x8000000000000000LL) ) + { + EXCEPTION(EX_INTERNAL|0x151); + } +#endif /* PARANOID */ + +} diff --git a/arch/x86/math-emu/poly_tan.c b/arch/x86/math-emu/poly_tan.c new file mode 100644 index 00000000000..8df3e03b6e6 --- /dev/null +++ b/arch/x86/math-emu/poly_tan.c @@ -0,0 +1,222 @@ +/*---------------------------------------------------------------------------+ + | poly_tan.c | + | | + | Compute the tan of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + + +#define HiPOWERop 3 /* odd poly, positive terms */ +static const unsigned long long oddplterm[HiPOWERop] = +{ + 0x0000000000000000LL, + 0x0051a1cf08fca228LL, + 0x0000000071284ff7LL +}; + +#define HiPOWERon 2 /* odd poly, negative terms */ +static const unsigned long long oddnegterm[HiPOWERon] = +{ + 0x1291a9a184244e80LL, + 0x0000583245819c21LL +}; + +#define HiPOWERep 2 /* even poly, positive terms */ +static const unsigned long long evenplterm[HiPOWERep] = +{ + 0x0e848884b539e888LL, + 0x00003c7f18b887daLL +}; + +#define HiPOWERen 2 /* even poly, negative terms */ +static const unsigned long long evennegterm[HiPOWERen] = +{ + 0xf1f0200fd51569ccLL, + 0x003afb46105c4432LL +}; + +static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL; + + +/*--- poly_tan() ------------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_tan(FPU_REG *st0_ptr) +{ + long int exponent; + int invert; + Xsig argSq, argSqSq, accumulatoro, accumulatore, accum, + argSignif, fix_up; + unsigned long adj; + + exponent = exponent(st0_ptr); + +#ifdef PARANOID + if ( signnegative(st0_ptr) ) /* Can't hack a number < 0.0 */ + { arith_invalid(0); return; } /* Need a positive number */ +#endif /* PARANOID */ + + /* Split the problem into two domains, smaller and larger than pi/4 */ + if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) ) + { + /* The argument is greater than (approx) pi/4 */ + invert = 1; + accum.lsw = 0; + XSIG_LL(accum) = significand(st0_ptr); + + if ( exponent == 0 ) + { + /* The argument is >= 1.0 */ + /* Put the binary point at the left. */ + XSIG_LL(accum) <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum); + /* This is a special case which arises due to rounding. */ + if ( XSIG_LL(accum) == 0xffffffffffffffffLL ) + { + FPU_settag0(TAG_Valid); + significand(st0_ptr) = 0x8a51e04daabda360LL; + setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative); + return; + } + + argSignif.lsw = accum.lsw; + XSIG_LL(argSignif) = XSIG_LL(accum); + exponent = -1 + norm_Xsig(&argSignif); + } + else + { + invert = 0; + argSignif.lsw = 0; + XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr); + + if ( exponent < -1 ) + { + /* shift the argument right by the required places */ + if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U ) + XSIG_LL(accum) ++; /* round up */ + } + } + + XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw; + mul_Xsig_Xsig(&argSq, &argSq); + XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw; + mul_Xsig_Xsig(&argSqSq, &argSqSq); + + /* Compute the negative terms for the numerator polynomial */ + accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0; + polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1); + mul_Xsig_Xsig(&accumulatoro, &argSq); + negate_Xsig(&accumulatoro); + /* Add the positive terms */ + polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1); + + + /* Compute the positive terms for the denominator polynomial */ + accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0; + polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1); + mul_Xsig_Xsig(&accumulatore, &argSq); + negate_Xsig(&accumulatore); + /* Add the negative terms */ + polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1); + /* Multiply by arg^2 */ + mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); + mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); + /* de-normalize and divide by 2 */ + shr_Xsig(&accumulatore, -2*(1+exponent) + 1); + negate_Xsig(&accumulatore); /* This does 1 - accumulator */ + + /* Now find the ratio. */ + if ( accumulatore.msw == 0 ) + { + /* accumulatoro must contain 1.0 here, (actually, 0) but it + really doesn't matter what value we use because it will + have negligible effect in later calculations + */ + XSIG_LL(accum) = 0x8000000000000000LL; + accum.lsw = 0; + } + else + { + div_Xsig(&accumulatoro, &accumulatore, &accum); + } + + /* Multiply by 1/3 * arg^3 */ + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &twothirds); + shr_Xsig(&accum, -2*(exponent+1)); + + /* tan(arg) = arg + accum */ + add_two_Xsig(&accum, &argSignif, &exponent); + + if ( invert ) + { + /* We now have the value of tan(pi_2 - arg) where pi_2 is an + approximation for pi/2 + */ + /* The next step is to fix the answer to compensate for the + error due to the approximation used for pi/2 + */ + + /* This is (approx) delta, the error in our approx for pi/2 + (see above). It has an exponent of -65 + */ + XSIG_LL(fix_up) = 0x898cc51701b839a2LL; + fix_up.lsw = 0; + + if ( exponent == 0 ) + adj = 0xffffffff; /* We want approx 1.0 here, but + this is close enough. */ + else if ( exponent > -30 ) + { + adj = accum.msw >> -(exponent+1); /* tan */ + adj = mul_32_32(adj, adj); /* tan^2 */ + } + else + adj = 0; + adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */ + + fix_up.msw += adj; + if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */ + { + /* Yes, we need to add an msb */ + shr_Xsig(&fix_up, 1); + fix_up.msw |= 0x80000000; + shr_Xsig(&fix_up, 64 + exponent); + } + else + shr_Xsig(&fix_up, 65 + exponent); + + add_two_Xsig(&accum, &fix_up, &exponent); + + /* accum now contains tan(pi/2 - arg). + Use tan(arg) = 1.0 / tan(pi/2 - arg) + */ + accumulatoro.lsw = accumulatoro.midw = 0; + accumulatoro.msw = 0x80000000; + div_Xsig(&accumulatoro, &accum, &accum); + exponent = - exponent - 1; + } + + /* Transfer the result */ + round_Xsig(&accum); + FPU_settag0(TAG_Valid); + significand(st0_ptr) = XSIG_LL(accum); + setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */ + +} diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S new file mode 100644 index 00000000000..17315c89ff3 --- /dev/null +++ b/arch/x86/math-emu/polynom_Xsig.S @@ -0,0 +1,135 @@ +/*---------------------------------------------------------------------------+ + | polynomial_Xsig.S | + | | + | Fixed point arithmetic polynomial evaluation. | + | | + | Copyright (C) 1992,1993,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | void polynomial_Xsig(Xsig *accum, unsigned long long x, | + | unsigned long long terms[], int n) | + | | + | Computes: | + | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | + | and adds the result to the 12 byte Xsig. | + | The terms[] are each 8 bytes, but all computation is performed to 12 byte | + | precision. | + | | + | This function must be used carefully: most overflow of intermediate | + | results is controlled, but overflow of the result is not. | + | | + +---------------------------------------------------------------------------*/ + .file "polynomial_Xsig.S" + +#include "fpu_emu.h" + + +#define TERM_SIZE $8 +#define SUM_MS -20(%ebp) /* sum ms long */ +#define SUM_MIDDLE -24(%ebp) /* sum middle long */ +#define SUM_LS -28(%ebp) /* sum ls long */ +#define ACCUM_MS -4(%ebp) /* accum ms long */ +#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ +#define ACCUM_LS -12(%ebp) /* accum ls long */ +#define OVERFLOWED -16(%ebp) /* addition overflow flag */ + +.text +ENTRY(polynomial_Xsig) + pushl %ebp + movl %esp,%ebp + subl $32,%esp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM2,%esi /* x */ + movl PARAM3,%edi /* terms */ + + movl TERM_SIZE,%eax + mull PARAM4 /* n */ + addl %eax,%edi + + movl 4(%edi),%edx /* terms[n] */ + movl %edx,SUM_MS + movl (%edi),%edx /* terms[n] */ + movl %edx,SUM_MIDDLE + xor %eax,%eax + movl %eax,SUM_LS + movb %al,OVERFLOWED + + subl TERM_SIZE,%edi + decl PARAM4 + js L_accum_done + +L_accum_loop: + xor %eax,%eax + movl %eax,ACCUM_MS + movl %eax,ACCUM_MIDDLE + + movl SUM_MIDDLE,%eax + mull (%esi) /* x ls long */ + movl %edx,ACCUM_LS + + movl SUM_MIDDLE,%eax + mull 4(%esi) /* x ms long */ + addl %eax,ACCUM_LS + adcl %edx,ACCUM_MIDDLE + adcl $0,ACCUM_MS + + movl SUM_MS,%eax + mull (%esi) /* x ls long */ + addl %eax,ACCUM_LS + adcl %edx,ACCUM_MIDDLE + adcl $0,ACCUM_MS + + movl SUM_MS,%eax + mull 4(%esi) /* x ms long */ + addl %eax,ACCUM_MIDDLE + adcl %edx,ACCUM_MS + + testb $0xff,OVERFLOWED + jz L_no_overflow + + movl (%esi),%eax + addl %eax,ACCUM_MIDDLE + movl 4(%esi),%eax + adcl %eax,ACCUM_MS /* This could overflow too */ + +L_no_overflow: + +/* + * Now put the sum of next term and the accumulator + * into the sum register + */ + movl ACCUM_LS,%eax + addl (%edi),%eax /* term ls long */ + movl %eax,SUM_LS + movl ACCUM_MIDDLE,%eax + adcl (%edi),%eax /* term ls long */ + movl %eax,SUM_MIDDLE + movl ACCUM_MS,%eax + adcl 4(%edi),%eax /* term ms long */ + movl %eax,SUM_MS + sbbb %al,%al + movb %al,OVERFLOWED /* Used in the next iteration */ + + subl TERM_SIZE,%edi + decl PARAM4 + jns L_accum_loop + +L_accum_done: + movl PARAM1,%edi /* accum */ + movl SUM_LS,%eax + addl %eax,(%edi) + movl SUM_MIDDLE,%eax + adcl %eax,4(%edi) + movl SUM_MS,%eax + adcl %eax,8(%edi) + + popl %ebx + popl %edi + popl %esi + leave + ret diff --git a/arch/x86/math-emu/reg_add_sub.c b/arch/x86/math-emu/reg_add_sub.c new file mode 100644 index 00000000000..7cd3b37ac08 --- /dev/null +++ b/arch/x86/math-emu/reg_add_sub.c @@ -0,0 +1,374 @@ +/*---------------------------------------------------------------------------+ + | reg_add_sub.c | + | | + | Functions to add or subtract two registers and put the result in a third. | + | | + | Copyright (C) 1992,1993,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | For each function, the destination may be any FPU_REG, including one of | + | the source FPU_REGs. | + | Each function returns 0 if the answer is o.k., otherwise a non-zero | + | value is returned, indicating either an exception condition or an | + | internal error. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "fpu_system.h" + +static +int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, + FPU_REG const *b, u_char tagb, u_char signb, + FPU_REG *dest, int deststnr, int control_w); + +/* + Operates on st(0) and st(n), or on st(0) and temporary data. + The destination must be one of the source st(x). + */ +int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w) +{ + FPU_REG *a = &st(0); + FPU_REG *dest = &st(deststnr); + u_char signb = getsign(b); + u_char taga = FPU_gettag0(); + u_char signa = getsign(a); + u_char saved_sign = getsign(dest); + int diff, tag, expa, expb; + + if ( !(taga | tagb) ) + { + expa = exponent(a); + expb = exponent(b); + + valid_add: + /* Both registers are valid */ + if (!(signa ^ signb)) + { + /* signs are the same */ + tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb); + } + else + { + /* The signs are different, so do a subtraction */ + diff = expa - expb; + if (!diff) + { + diff = a->sigh - b->sigh; /* This works only if the ms bits + are identical. */ + if (!diff) + { + diff = a->sigl > b->sigl; + if (!diff) + diff = -(a->sigl < b->sigl); + } + } + + if (diff > 0) + { + tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb); + } + else if ( diff < 0 ) + { + tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa); + } + else + { + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + /* sign depends upon rounding mode */ + setsign(dest, ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG); + return TAG_Zero; + } + } + + if ( tag < 0 ) + { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + + if ( taga == TAG_Special ) + taga = FPU_Special(a); + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + + if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) + { + FPU_REG x, y; + + if ( denormal_operand() < 0 ) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + a = &x; + b = &y; + expa = exponent16(a); + expb = exponent16(b); + goto valid_add; + } + + if ( (taga == TW_NaN) || (tagb == TW_NaN) ) + { + if ( deststnr == 0 ) + return real_2op_NaN(b, tagb, deststnr, a); + else + return real_2op_NaN(a, taga, deststnr, a); + } + + return add_sub_specials(a, taga, signa, b, tagb, signb, + dest, deststnr, control_w); +} + + +/* Subtract b from a. (a-b) -> dest */ +int FPU_sub(int flags, int rm, int control_w) +{ + FPU_REG const *a, *b; + FPU_REG *dest; + u_char taga, tagb, signa, signb, saved_sign, sign; + int diff, tag = 0, expa, expb, deststnr; + + a = &st(0); + taga = FPU_gettag0(); + + deststnr = 0; + if ( flags & LOADED ) + { + b = (FPU_REG *)rm; + tagb = flags & 0x0f; + } + else + { + b = &st(rm); + tagb = FPU_gettagi(rm); + + if ( flags & DEST_RM ) + deststnr = rm; + } + + signa = getsign(a); + signb = getsign(b); + + if ( flags & REV ) + { + signa ^= SIGN_NEG; + signb ^= SIGN_NEG; + } + + dest = &st(deststnr); + saved_sign = getsign(dest); + + if ( !(taga | tagb) ) + { + expa = exponent(a); + expb = exponent(b); + + valid_subtract: + /* Both registers are valid */ + + diff = expa - expb; + + if (!diff) + { + diff = a->sigh - b->sigh; /* Works only if ms bits are identical */ + if (!diff) + { + diff = a->sigl > b->sigl; + if (!diff) + diff = -(a->sigl < b->sigl); + } + } + + switch ( (((int)signa)*2 + signb) / SIGN_NEG ) + { + case 0: /* P - P */ + case 3: /* N - N */ + if (diff > 0) + { + /* |a| > |b| */ + tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb); + } + else if ( diff == 0 ) + { + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + + /* sign depends upon rounding mode */ + setsign(dest, ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG); + return TAG_Zero; + } + else + { + sign = signa ^ SIGN_NEG; + tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa); + } + break; + case 1: /* P - N */ + tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb); + break; + case 2: /* N - P */ + tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb); + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x111); + return -1; +#endif + } + if ( tag < 0 ) + { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + + if ( taga == TAG_Special ) + taga = FPU_Special(a); + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + + if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) + { + FPU_REG x, y; + + if ( denormal_operand() < 0 ) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + a = &x; + b = &y; + expa = exponent16(a); + expb = exponent16(b); + + goto valid_subtract; + } + + if ( (taga == TW_NaN) || (tagb == TW_NaN) ) + { + FPU_REG const *d1, *d2; + if ( flags & REV ) + { + d1 = b; + d2 = a; + } + else + { + d1 = a; + d2 = b; + } + if ( flags & LOADED ) + return real_2op_NaN(b, tagb, deststnr, d1); + if ( flags & DEST_RM ) + return real_2op_NaN(a, taga, deststnr, d2); + else + return real_2op_NaN(b, tagb, deststnr, d2); + } + + return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG, + dest, deststnr, control_w); +} + + +static +int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, + FPU_REG const *b, u_char tagb, u_char signb, + FPU_REG *dest, int deststnr, int control_w) +{ + if ( ((taga == TW_Denormal) || (tagb == TW_Denormal)) + && (denormal_operand() < 0) ) + return FPU_Exception; + + if (taga == TAG_Zero) + { + if (tagb == TAG_Zero) + { + /* Both are zero, result will be zero. */ + u_char different_signs = signa ^ signb; + + FPU_copy_to_regi(a, TAG_Zero, deststnr); + if ( different_signs ) + { + /* Signs are different. */ + /* Sign of answer depends upon rounding mode. */ + setsign(dest, ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG); + } + else + setsign(dest, signa); /* signa may differ from the sign of a. */ + return TAG_Zero; + } + else + { + reg_copy(b, dest); + if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) ) + { + /* A pseudoDenormal, convert it. */ + addexponent(dest, 1); + tagb = TAG_Valid; + } + else if ( tagb > TAG_Empty ) + tagb = TAG_Special; + setsign(dest, signb); /* signb may differ from the sign of b. */ + FPU_settagi(deststnr, tagb); + return tagb; + } + } + else if (tagb == TAG_Zero) + { + reg_copy(a, dest); + if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) ) + { + /* A pseudoDenormal */ + addexponent(dest, 1); + taga = TAG_Valid; + } + else if ( taga > TAG_Empty ) + taga = TAG_Special; + setsign(dest, signa); /* signa may differ from the sign of a. */ + FPU_settagi(deststnr, taga); + return taga; + } + else if (taga == TW_Infinity) + { + if ( (tagb != TW_Infinity) || (signa == signb) ) + { + FPU_copy_to_regi(a, TAG_Special, deststnr); + setsign(dest, signa); /* signa may differ from the sign of a. */ + return taga; + } + /* Infinity-Infinity is undefined. */ + return arith_invalid(deststnr); + } + else if (tagb == TW_Infinity) + { + FPU_copy_to_regi(b, TAG_Special, deststnr); + setsign(dest, signb); /* signb may differ from the sign of b. */ + return tagb; + } + +#ifdef PARANOID + EXCEPTION(EX_INTERNAL|0x101); +#endif + + return FPU_Exception; +} + diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c new file mode 100644 index 00000000000..f37c5b5a35a --- /dev/null +++ b/arch/x86/math-emu/reg_compare.c @@ -0,0 +1,381 @@ +/*---------------------------------------------------------------------------+ + | reg_compare.c | + | | + | Compare two floating point registers | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | compare() is the core FPU_REG comparison function | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "status_w.h" + + +static int compare(FPU_REG const *b, int tagb) +{ + int diff, exp0, expb; + u_char st0_tag; + FPU_REG *st0_ptr; + FPU_REG x, y; + u_char st0_sign, signb = getsign(b); + + st0_ptr = &st(0); + st0_tag = FPU_gettag0(); + st0_sign = getsign(st0_ptr); + + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + + if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal)) + || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) ) + { + if ( st0_tag == TAG_Zero ) + { + if ( tagb == TAG_Zero ) return COMP_A_eq_B; + if ( tagb == TAG_Valid ) + return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); + if ( tagb == TW_Denormal ) + return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) + | COMP_Denormal; + } + else if ( tagb == TAG_Zero ) + { + if ( st0_tag == TAG_Valid ) + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); + if ( st0_tag == TW_Denormal ) + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | COMP_Denormal; + } + + if ( st0_tag == TW_Infinity ) + { + if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) ) + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); + else if ( tagb == TW_Denormal ) + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | COMP_Denormal; + else if ( tagb == TW_Infinity ) + { + /* The 80486 book says that infinities can be equal! */ + return (st0_sign == signb) ? COMP_A_eq_B : + ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); + } + /* Fall through to the NaN code */ + } + else if ( tagb == TW_Infinity ) + { + if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) ) + return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); + if ( st0_tag == TW_Denormal ) + return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) + | COMP_Denormal; + /* Fall through to the NaN code */ + } + + /* The only possibility now should be that one of the arguments + is a NaN */ + if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) ) + { + int signalling = 0, unsupported = 0; + if ( st0_tag == TW_NaN ) + { + signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000; + unsupported = !((exponent(st0_ptr) == EXP_OVER) + && (st0_ptr->sigh & 0x80000000)); + } + if ( tagb == TW_NaN ) + { + signalling |= (b->sigh & 0xc0000000) == 0x80000000; + unsupported |= !((exponent(b) == EXP_OVER) + && (b->sigh & 0x80000000)); + } + if ( signalling || unsupported ) + return COMP_No_Comp | COMP_SNaN | COMP_NaN; + else + /* Neither is a signaling NaN */ + return COMP_No_Comp | COMP_NaN; + } + + EXCEPTION(EX_Invalid); + } + + if (st0_sign != signb) + { + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + } + + if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) ) + { + FPU_to_exp16(st0_ptr, &x); + FPU_to_exp16(b, &y); + st0_ptr = &x; + b = &y; + exp0 = exponent16(st0_ptr); + expb = exponent16(b); + } + else + { + exp0 = exponent(st0_ptr); + expb = exponent(b); + } + +#ifdef PARANOID + if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid); + if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid); +#endif /* PARANOID */ + + diff = exp0 - expb; + if ( diff == 0 ) + { + diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are + identical */ + if ( diff == 0 ) + { + diff = st0_ptr->sigl > b->sigl; + if ( diff == 0 ) + diff = -(st0_ptr->sigl < b->sigl); + } + } + + if ( diff > 0 ) + { + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + } + if ( diff < 0 ) + { + return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) + | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + } + + return COMP_A_eq_B + | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + +} + + +/* This function requires that st(0) is not empty */ +int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag) +{ + int f = 0, c; + + c = compare(loaded_data, loaded_tag); + + if (c & COMP_NaN) + { + EXCEPTION(EX_Invalid); + f = SW_C3 | SW_C2 | SW_C0; + } + else + switch (c & 7) + { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x121); + f = SW_C3 | SW_C2 | SW_C0; + break; +#endif /* PARANOID */ + } + setcc(f); + if (c & COMP_Denormal) + { + return denormal_operand() < 0; + } + return 0; +} + + +static int compare_st_st(int nr) +{ + int f = 0, c; + FPU_REG *st_ptr; + + if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) + { + setcc(SW_C3 | SW_C2 | SW_C0); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + st_ptr = &st(nr); + c = compare(st_ptr, FPU_gettagi(nr)); + if (c & COMP_NaN) + { + setcc(SW_C3 | SW_C2 | SW_C0); + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + else + switch (c & 7) + { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x122); + f = SW_C3 | SW_C2 | SW_C0; + break; +#endif /* PARANOID */ + } + setcc(f); + if (c & COMP_Denormal) + { + return denormal_operand() < 0; + } + return 0; +} + + +static int compare_u_st_st(int nr) +{ + int f = 0, c; + FPU_REG *st_ptr; + + if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) + { + setcc(SW_C3 | SW_C2 | SW_C0); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + st_ptr = &st(nr); + c = compare(st_ptr, FPU_gettagi(nr)); + if (c & COMP_NaN) + { + setcc(SW_C3 | SW_C2 | SW_C0); + if (c & COMP_SNaN) /* This is the only difference between + un-ordered and ordinary comparisons */ + { + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + return 0; + } + else + switch (c & 7) + { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x123); + f = SW_C3 | SW_C2 | SW_C0; + break; +#endif /* PARANOID */ + } + setcc(f); + if (c & COMP_Denormal) + { + return denormal_operand() < 0; + } + return 0; +} + +/*---------------------------------------------------------------------------*/ + +void fcom_st(void) +{ + /* fcom st(i) */ + compare_st_st(FPU_rm); +} + + +void fcompst(void) +{ + /* fcomp st(i) */ + if ( !compare_st_st(FPU_rm) ) + FPU_pop(); +} + + +void fcompp(void) +{ + /* fcompp */ + if (FPU_rm != 1) + { + FPU_illegal(); + return; + } + if ( !compare_st_st(1) ) + poppop(); +} + + +void fucom_(void) +{ + /* fucom st(i) */ + compare_u_st_st(FPU_rm); + +} + + +void fucomp(void) +{ + /* fucomp st(i) */ + if ( !compare_u_st_st(FPU_rm) ) + FPU_pop(); +} + + +void fucompp(void) +{ + /* fucompp */ + if (FPU_rm == 1) + { + if ( !compare_u_st_st(1) ) + poppop(); + } + else + FPU_illegal(); +} diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c new file mode 100644 index 00000000000..a8501580196 --- /dev/null +++ b/arch/x86/math-emu/reg_constant.c @@ -0,0 +1,120 @@ +/*---------------------------------------------------------------------------+ + | reg_constant.c | + | | + | All of the constant FPU_REGs | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "reg_constant.h" +#include "control_w.h" + + +#define MAKE_REG(s,e,l,h) { l, h, \ + ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } + +FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); +#if 0 +FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000); +FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000); +#endif /* 0 */ +static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b); +static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29); +FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2); +FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2); +FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2); +static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84); +static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7); + +/* Extra bits to take pi/2 to more than 128 bits precision. */ +FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66, + 0xfc8f8cbb, 0xece675d1); + +/* Only the sign (and tag) is used in internal zeroes */ +FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); + +/* Only the sign and significand (and tag) are used in internal NaNs */ +/* The 80486 never generates one of these +FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); + */ +/* This is the real indefinite QNaN */ +FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000); + +/* Only the sign (and tag) is used in internal infinities */ +FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); + + +static void fld_const(FPU_REG const *c, int adj, u_char tag) +{ + FPU_REG *st_new_ptr; + + if ( STACK_OVERFLOW ) + { + FPU_stack_overflow(); + return; + } + push(); + reg_copy(c, st_new_ptr); + st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to + borrow or carry. */ + FPU_settag0(tag); + clear_C1(); +} + +/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP + (and not one of RC_RND or RC_UP). + */ +#define DOWN_OR_CHOP(x) (x & RC_DOWN) + +static void fld1(int rc) +{ + fld_const(&CONST_1, 0, TAG_Valid); +} + +static void fldl2t(int rc) +{ + fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid); +} + +static void fldl2e(int rc) +{ + fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldpi(int rc) +{ + fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldlg2(int rc) +{ + fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldln2(int rc) +{ + fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldz(int rc) +{ + fld_const(&CONST_Z, 0, TAG_Zero); +} + +typedef void (*FUNC_RC)(int); + +static FUNC_RC constants_table[] = { + fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal +}; + +void fconst(void) +{ + (constants_table[FPU_rm])(control_word & CW_RC); +} diff --git a/arch/x86/math-emu/reg_constant.h b/arch/x86/math-emu/reg_constant.h new file mode 100644 index 00000000000..1bffaec3a13 --- /dev/null +++ b/arch/x86/math-emu/reg_constant.h @@ -0,0 +1,25 @@ +/*---------------------------------------------------------------------------+ + | reg_constant.h | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _REG_CONSTANT_H_ +#define _REG_CONSTANT_H_ + +#include "fpu_emu.h" + +extern FPU_REG const CONST_1; +extern FPU_REG const CONST_PI; +extern FPU_REG const CONST_PI2; +extern FPU_REG const CONST_PI2extra; +extern FPU_REG const CONST_PI4; +extern FPU_REG const CONST_Z; +extern FPU_REG const CONST_PINF; +extern FPU_REG const CONST_INF; +extern FPU_REG const CONST_MINF; +extern FPU_REG const CONST_QNaN; + +#endif /* _REG_CONSTANT_H_ */ diff --git a/arch/x86/math-emu/reg_convert.c b/arch/x86/math-emu/reg_convert.c new file mode 100644 index 00000000000..45a25875270 --- /dev/null +++ b/arch/x86/math-emu/reg_convert.c @@ -0,0 +1,53 @@ +/*---------------------------------------------------------------------------+ + | reg_convert.c | + | | + | Convert register representation. | + | | + | Copyright (C) 1992,1993,1994,1996,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" + + +int FPU_to_exp16(FPU_REG const *a, FPU_REG *x) +{ + int sign = getsign(a); + + *(long long *)&(x->sigl) = *(const long long *)&(a->sigl); + + /* Set up the exponent as a 16 bit quantity. */ + setexponent16(x, exponent(a)); + + if ( exponent16(x) == EXP_UNDER ) + { + /* The number is a de-normal or pseudodenormal. */ + /* We only deal with the significand and exponent. */ + + if (x->sigh & 0x80000000) + { + /* Is a pseudodenormal. */ + /* This is non-80486 behaviour because the number + loses its 'denormal' identity. */ + addexponent(x, 1); + } + else + { + /* Is a denormal. */ + addexponent(x, 1); + FPU_normalize_nuo(x); + } + } + + if ( !(x->sigh & 0x80000000) ) + { + EXCEPTION(EX_INTERNAL | 0x180); + } + + return sign; +} + diff --git a/arch/x86/math-emu/reg_divide.c b/arch/x86/math-emu/reg_divide.c new file mode 100644 index 00000000000..5cee7ff920d --- /dev/null +++ b/arch/x86/math-emu/reg_divide.c @@ -0,0 +1,207 @@ +/*---------------------------------------------------------------------------+ + | reg_divide.c | + | | + | Divide one FPU_REG by another and put the result in a destination FPU_REG.| + | | + | Copyright (C) 1996 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@jacobi.maths.monash.edu.au | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The destination may be any FPU_REG, including one of the source FPU_REGs. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" + +/* + Divide one register by another and put the result into a third register. + */ +int FPU_div(int flags, int rm, int control_w) +{ + FPU_REG x, y; + FPU_REG const *a, *b, *st0_ptr, *st_ptr; + FPU_REG *dest; + u_char taga, tagb, signa, signb, sign, saved_sign; + int tag, deststnr; + + if ( flags & DEST_RM ) + deststnr = rm; + else + deststnr = 0; + + if ( flags & REV ) + { + b = &st(0); + st0_ptr = b; + tagb = FPU_gettag0(); + if ( flags & LOADED ) + { + a = (FPU_REG *)rm; + taga = flags & 0x0f; + } + else + { + a = &st(rm); + st_ptr = a; + taga = FPU_gettagi(rm); + } + } + else + { + a = &st(0); + st0_ptr = a; + taga = FPU_gettag0(); + if ( flags & LOADED ) + { + b = (FPU_REG *)rm; + tagb = flags & 0x0f; + } + else + { + b = &st(rm); + st_ptr = b; + tagb = FPU_gettagi(rm); + } + } + + signa = getsign(a); + signb = getsign(b); + + sign = signa ^ signb; + + dest = &st(deststnr); + saved_sign = getsign(dest); + + if ( !(taga | tagb) ) + { + /* Both regs Valid, this should be the most common case. */ + reg_copy(a, &x); + reg_copy(b, &y); + setpositive(&x); + setpositive(&y); + tag = FPU_u_div(&x, &y, dest, control_w, sign); + + if ( tag < 0 ) + return tag; + + FPU_settagi(deststnr, tag); + return tag; + } + + if ( taga == TAG_Special ) + taga = FPU_Special(a); + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + + if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) + { + if ( denormal_operand() < 0 ) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + tag = FPU_u_div(&x, &y, dest, control_w, sign); + if ( tag < 0 ) + return tag; + + FPU_settagi(deststnr, tag); + return tag; + } + else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) ) + { + if ( tagb != TAG_Zero ) + { + /* Want to find Zero/Valid */ + if ( tagb == TW_Denormal ) + { + if ( denormal_operand() < 0 ) + return FPU_Exception; + } + + /* The result is zero. */ + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + setsign(dest, sign); + return TAG_Zero; + } + /* We have an exception condition, either 0/0 or Valid/Zero. */ + if ( taga == TAG_Zero ) + { + /* 0/0 */ + return arith_invalid(deststnr); + } + /* Valid/Zero */ + return FPU_divide_by_zero(deststnr, sign); + } + /* Must have infinities, NaNs, etc */ + else if ( (taga == TW_NaN) || (tagb == TW_NaN) ) + { + if ( flags & LOADED ) + return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr); + + if ( flags & DEST_RM ) + { + int tag; + tag = FPU_gettag0(); + if ( tag == TAG_Special ) + tag = FPU_Special(st0_ptr); + return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm)); + } + else + { + int tag; + tag = FPU_gettagi(rm); + if ( tag == TAG_Special ) + tag = FPU_Special(&st(rm)); + return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm)); + } + } + else if (taga == TW_Infinity) + { + if (tagb == TW_Infinity) + { + /* infinity/infinity */ + return arith_invalid(deststnr); + } + else + { + /* tagb must be Valid or Zero */ + if ( (tagb == TW_Denormal) && (denormal_operand() < 0) ) + return FPU_Exception; + + /* Infinity divided by Zero or Valid does + not raise and exception, but returns Infinity */ + FPU_copy_to_regi(a, TAG_Special, deststnr); + setsign(dest, sign); + return taga; + } + } + else if (tagb == TW_Infinity) + { + if ( (taga == TW_Denormal) && (denormal_operand() < 0) ) + return FPU_Exception; + + /* The result is zero. */ + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + setsign(dest, sign); + return TAG_Zero; + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL|0x102); + return FPU_Exception; + } +#endif /* PARANOID */ + + return 0; +} diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c new file mode 100644 index 00000000000..e976caef649 --- /dev/null +++ b/arch/x86/math-emu/reg_ld_str.c @@ -0,0 +1,1375 @@ +/*---------------------------------------------------------------------------+ + | reg_ld_str.c | + | | + | All of the functions which transfer data between user memory and FPU_REGs.| + | | + | Copyright (C) 1992,1993,1994,1996,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +#include + +#include "fpu_system.h" +#include "exception.h" +#include "reg_constant.h" +#include "control_w.h" +#include "status_w.h" + + +#define DOUBLE_Emax 1023 /* largest valid exponent */ +#define DOUBLE_Ebias 1023 +#define DOUBLE_Emin (-1022) /* smallest valid exponent */ + +#define SINGLE_Emax 127 /* largest valid exponent */ +#define SINGLE_Ebias 127 +#define SINGLE_Emin (-126) /* smallest valid exponent */ + + +static u_char normalize_no_excep(FPU_REG *r, int exp, int sign) +{ + u_char tag; + + setexponent16(r, exp); + + tag = FPU_normalize_nuo(r); + stdexp(r); + if ( sign ) + setnegative(r); + + return tag; +} + + +int FPU_tagof(FPU_REG *ptr) +{ + int exp; + + exp = exponent16(ptr) & 0x7fff; + if ( exp == 0 ) + { + if ( !(ptr->sigh | ptr->sigl) ) + { + return TAG_Zero; + } + /* The number is a de-normal or pseudodenormal. */ + return TAG_Special; + } + + if ( exp == 0x7fff ) + { + /* Is an Infinity, a NaN, or an unsupported data type. */ + return TAG_Special; + } + + if ( !(ptr->sigh & 0x80000000) ) + { + /* Unsupported data type. */ + /* Valid numbers have the ms bit set to 1. */ + /* Unnormal. */ + return TAG_Special; + } + + return TAG_Valid; +} + + +/* Get a long double from user memory */ +int FPU_load_extended(long double __user *s, int stnr) +{ + FPU_REG *sti_ptr = &st(stnr); + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 10); + __copy_from_user(sti_ptr, s, 10); + RE_ENTRANT_CHECK_ON; + + return FPU_tagof(sti_ptr); +} + + +/* Get a double from user memory */ +int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data) +{ + int exp, tag, negative; + unsigned m64, l64; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, dfloat, 8); + FPU_get_user(m64, 1 + (unsigned long __user *) dfloat); + FPU_get_user(l64, (unsigned long __user *) dfloat); + RE_ENTRANT_CHECK_ON; + + negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive; + exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias; + m64 &= 0xfffff; + if ( exp > DOUBLE_Emax + EXTENDED_Ebias ) + { + /* Infinity or NaN */ + if ((m64 == 0) && (l64 == 0)) + { + /* +- infinity */ + loaded_data->sigh = 0x80000000; + loaded_data->sigl = 0x00000000; + exp = EXP_Infinity + EXTENDED_Ebias; + tag = TAG_Special; + } + else + { + /* Must be a signaling or quiet NaN */ + exp = EXP_NaN + EXTENDED_Ebias; + loaded_data->sigh = (m64 << 11) | 0x80000000; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + tag = TAG_Special; /* The calling function must look for NaNs */ + } + } + else if ( exp < DOUBLE_Emin + EXTENDED_Ebias ) + { + /* Zero or de-normal */ + if ((m64 == 0) && (l64 == 0)) + { + /* Zero */ + reg_copy(&CONST_Z, loaded_data); + exp = 0; + tag = TAG_Zero; + } + else + { + /* De-normal */ + loaded_data->sigh = m64 << 11; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + + return normalize_no_excep(loaded_data, DOUBLE_Emin, negative) + | (denormal_operand() < 0 ? FPU_Exception : 0); + } + } + else + { + loaded_data->sigh = (m64 << 11) | 0x80000000; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + + tag = TAG_Valid; + } + + setexponent16(loaded_data, exp | negative); + + return tag; +} + + +/* Get a float from user memory */ +int FPU_load_single(float __user *single, FPU_REG *loaded_data) +{ + unsigned m32; + int exp, tag, negative; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, single, 4); + FPU_get_user(m32, (unsigned long __user *) single); + RE_ENTRANT_CHECK_ON; + + negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive; + + if (!(m32 & 0x7fffffff)) + { + /* Zero */ + reg_copy(&CONST_Z, loaded_data); + addexponent(loaded_data, negative); + return TAG_Zero; + } + exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias; + m32 = (m32 & 0x7fffff) << 8; + if ( exp < SINGLE_Emin + EXTENDED_Ebias ) + { + /* De-normals */ + loaded_data->sigh = m32; + loaded_data->sigl = 0; + + return normalize_no_excep(loaded_data, SINGLE_Emin, negative) + | (denormal_operand() < 0 ? FPU_Exception : 0); + } + else if ( exp > SINGLE_Emax + EXTENDED_Ebias ) + { + /* Infinity or NaN */ + if ( m32 == 0 ) + { + /* +- infinity */ + loaded_data->sigh = 0x80000000; + loaded_data->sigl = 0x00000000; + exp = EXP_Infinity + EXTENDED_Ebias; + tag = TAG_Special; + } + else + { + /* Must be a signaling or quiet NaN */ + exp = EXP_NaN + EXTENDED_Ebias; + loaded_data->sigh = m32 | 0x80000000; + loaded_data->sigl = 0; + tag = TAG_Special; /* The calling function must look for NaNs */ + } + } + else + { + loaded_data->sigh = m32 | 0x80000000; + loaded_data->sigl = 0; + tag = TAG_Valid; + } + + setexponent16(loaded_data, exp | negative); /* Set the sign. */ + + return tag; +} + + +/* Get a long long from user memory */ +int FPU_load_int64(long long __user *_s) +{ + long long s; + int sign; + FPU_REG *st0_ptr = &st(0); + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, _s, 8); + if (copy_from_user(&s,_s,8)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + + if (s == 0) + { + reg_copy(&CONST_Z, st0_ptr); + return TAG_Zero; + } + + if (s > 0) + sign = SIGN_Positive; + else + { + s = -s; + sign = SIGN_Negative; + } + + significand(st0_ptr) = s; + + return normalize_no_excep(st0_ptr, 63, sign); +} + + +/* Get a long from user memory */ +int FPU_load_int32(long __user *_s, FPU_REG *loaded_data) +{ + long s; + int negative; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, _s, 4); + FPU_get_user(s, _s); + RE_ENTRANT_CHECK_ON; + + if (s == 0) + { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; } + + if (s > 0) + negative = SIGN_Positive; + else + { + s = -s; + negative = SIGN_Negative; + } + + loaded_data->sigh = s; + loaded_data->sigl = 0; + + return normalize_no_excep(loaded_data, 31, negative); +} + + +/* Get a short from user memory */ +int FPU_load_int16(short __user *_s, FPU_REG *loaded_data) +{ + int s, negative; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, _s, 2); + /* Cast as short to get the sign extended. */ + FPU_get_user(s, _s); + RE_ENTRANT_CHECK_ON; + + if (s == 0) + { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; } + + if (s > 0) + negative = SIGN_Positive; + else + { + s = -s; + negative = SIGN_Negative; + } + + loaded_data->sigh = s << 16; + loaded_data->sigl = 0; + + return normalize_no_excep(loaded_data, 15, negative); +} + + +/* Get a packed bcd array from user memory */ +int FPU_load_bcd(u_char __user *s) +{ + FPU_REG *st0_ptr = &st(0); + int pos; + u_char bcd; + long long l=0; + int sign; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 10); + RE_ENTRANT_CHECK_ON; + for ( pos = 8; pos >= 0; pos--) + { + l *= 10; + RE_ENTRANT_CHECK_OFF; + FPU_get_user(bcd, s+pos); + RE_ENTRANT_CHECK_ON; + l += bcd >> 4; + l *= 10; + l += bcd & 0x0f; + } + + RE_ENTRANT_CHECK_OFF; + FPU_get_user(sign, s+9); + sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive; + RE_ENTRANT_CHECK_ON; + + if ( l == 0 ) + { + reg_copy(&CONST_Z, st0_ptr); + addexponent(st0_ptr, sign); /* Set the sign. */ + return TAG_Zero; + } + else + { + significand(st0_ptr) = l; + return normalize_no_excep(st0_ptr, 63, sign); + } +} + +/*===========================================================================*/ + +/* Put a long double into user memory */ +int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d) +{ + /* + The only exception raised by an attempt to store to an + extended format is the Invalid Stack exception, i.e. + attempting to store from an empty register. + */ + + if ( st0_tag != TAG_Empty ) + { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 10); + + FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d); + FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4)); + FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8)); + RE_ENTRANT_CHECK_ON; + + return 1; + } + + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,10); + FPU_put_user(0, (unsigned long __user *) d); + FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d); + FPU_put_user(0xffff, 4 + (short __user *) d); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + +} + + +/* Put a double into user memory */ +int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat) +{ + unsigned long l[2]; + unsigned long increment = 0; /* avoid gcc warnings */ + int precision_loss; + int exp; + FPU_REG tmp; + + if ( st0_tag == TAG_Valid ) + { + reg_copy(st0_ptr, &tmp); + exp = exponent(&tmp); + + if ( exp < DOUBLE_Emin ) /* It may be a denormal */ + { + addexponent(&tmp, -DOUBLE_Emin + 52); /* largest exp to be 51 */ + + denormal_arg: + + if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) ) + { +#ifdef PECULIAR_486 + /* Did it round to a non-denormal ? */ + /* This behaviour might be regarded as peculiar, it appears + that the 80486 rounds to the dest precision, then + converts to decide underflow. */ + if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) && + (st0_ptr->sigl & 0x000007ff)) ) +#endif /* PECULIAR_486 */ + { + EXCEPTION(EX_Underflow); + /* This is a special case: see sec 16.2.5.1 of + the 80486 book */ + if ( !(control_word & CW_Underflow) ) + return 0; + } + EXCEPTION(precision_loss); + if ( !(control_word & CW_Precision) ) + return 0; + } + l[0] = tmp.sigl; + l[1] = tmp.sigh; + } + else + { + if ( tmp.sigl & 0x000007ff ) + { + precision_loss = 1; + switch (control_word & CW_RC) + { + case RC_RND: + /* Rounding can get a little messy.. */ + increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */ + ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */ + break; + case RC_DOWN: /* towards -infinity */ + increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff; + break; + case RC_UP: /* towards +infinity */ + increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0; + break; + case RC_CHOP: + increment = 0; + break; + } + + /* Truncate the mantissa */ + tmp.sigl &= 0xfffff800; + + if ( increment ) + { + if ( tmp.sigl >= 0xfffff800 ) + { + /* the sigl part overflows */ + if ( tmp.sigh == 0xffffffff ) + { + /* The sigh part overflows */ + tmp.sigh = 0x80000000; + exp++; + if (exp >= EXP_OVER) + goto overflow; + } + else + { + tmp.sigh ++; + } + tmp.sigl = 0x00000000; + } + else + { + /* We only need to increment sigl */ + tmp.sigl += 0x00000800; + } + } + } + else + precision_loss = 0; + + l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21); + l[1] = ((tmp.sigh >> 11) & 0xfffff); + + if ( exp > DOUBLE_Emax ) + { + overflow: + EXCEPTION(EX_Overflow); + if ( !(control_word & CW_Overflow) ) + return 0; + set_precision_flag_up(); + if ( !(control_word & CW_Precision) ) + return 0; + + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + /* Overflow to infinity */ + l[0] = 0x00000000; /* Set to */ + l[1] = 0x7ff00000; /* + INF */ + } + else + { + if ( precision_loss ) + { + if ( increment ) + set_precision_flag_up(); + else + set_precision_flag_down(); + } + /* Add the exponent */ + l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20); + } + } + } + else if (st0_tag == TAG_Zero) + { + /* Number is zero */ + l[0] = 0; + l[1] = 0; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if ( st0_tag == TW_Denormal ) + { + /* A denormal will always underflow. */ +#ifndef PECULIAR_486 + /* An 80486 is supposed to be able to generate + a denormal exception here, but... */ + /* Underflow has priority. */ + if ( control_word & CW_Underflow ) + denormal_operand(); +#endif /* PECULIAR_486 */ + reg_copy(st0_ptr, &tmp); + goto denormal_arg; + } + else if (st0_tag == TW_Infinity) + { + l[0] = 0; + l[1] = 0x7ff00000; + } + else if (st0_tag == TW_NaN) + { + /* Is it really a NaN ? */ + if ( (exponent(st0_ptr) == EXP_OVER) + && (st0_ptr->sigh & 0x80000000) ) + { + /* See if we can get a valid NaN from the FPU_REG */ + l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21); + l[1] = ((st0_ptr->sigh >> 11) & 0xfffff); + if ( !(st0_ptr->sigh & 0x40000000) ) + { + /* It is a signalling NaN */ + EXCEPTION(EX_Invalid); + if ( !(control_word & CW_Invalid) ) + return 0; + l[1] |= (0x40000000 >> 11); + } + l[1] |= 0x7ff00000; + } + else + { + /* It is an unsupported data type */ + EXCEPTION(EX_Invalid); + if ( !(control_word & CW_Invalid) ) + return 0; + l[0] = 0; + l[1] = 0xfff80000; + } + } + } + else if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,dfloat,8); + FPU_put_user(0, (unsigned long __user *) dfloat); + FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + } + if ( getsign(st0_ptr) ) + l[1] |= 0x80000000; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,dfloat,8); + FPU_put_user(l[0], (unsigned long __user *)dfloat); + FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a float into user memory */ +int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single) +{ + long templ = 0; + unsigned long increment = 0; /* avoid gcc warnings */ + int precision_loss; + int exp; + FPU_REG tmp; + + if ( st0_tag == TAG_Valid ) + { + + reg_copy(st0_ptr, &tmp); + exp = exponent(&tmp); + + if ( exp < SINGLE_Emin ) + { + addexponent(&tmp, -SINGLE_Emin + 23); /* largest exp to be 22 */ + + denormal_arg: + + if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) ) + { +#ifdef PECULIAR_486 + /* Did it round to a non-denormal ? */ + /* This behaviour might be regarded as peculiar, it appears + that the 80486 rounds to the dest precision, then + converts to decide underflow. */ + if ( !((tmp.sigl == 0x00800000) && + ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) ) +#endif /* PECULIAR_486 */ + { + EXCEPTION(EX_Underflow); + /* This is a special case: see sec 16.2.5.1 of + the 80486 book */ + if ( !(control_word & CW_Underflow) ) + return 0; + } + EXCEPTION(precision_loss); + if ( !(control_word & CW_Precision) ) + return 0; + } + templ = tmp.sigl; + } + else + { + if ( tmp.sigl | (tmp.sigh & 0x000000ff) ) + { + unsigned long sigh = tmp.sigh; + unsigned long sigl = tmp.sigl; + + precision_loss = 1; + switch (control_word & CW_RC) + { + case RC_RND: + increment = ((sigh & 0xff) > 0x80) /* more than half */ + || (((sigh & 0xff) == 0x80) && sigl) /* more than half */ + || ((sigh & 0x180) == 0x180); /* round to even */ + break; + case RC_DOWN: /* towards -infinity */ + increment = signpositive(&tmp) + ? 0 : (sigl | (sigh & 0xff)); + break; + case RC_UP: /* towards +infinity */ + increment = signpositive(&tmp) + ? (sigl | (sigh & 0xff)) : 0; + break; + case RC_CHOP: + increment = 0; + break; + } + + /* Truncate part of the mantissa */ + tmp.sigl = 0; + + if (increment) + { + if ( sigh >= 0xffffff00 ) + { + /* The sigh part overflows */ + tmp.sigh = 0x80000000; + exp++; + if ( exp >= EXP_OVER ) + goto overflow; + } + else + { + tmp.sigh &= 0xffffff00; + tmp.sigh += 0x100; + } + } + else + { + tmp.sigh &= 0xffffff00; /* Finish the truncation */ + } + } + else + precision_loss = 0; + + templ = (tmp.sigh >> 8) & 0x007fffff; + + if ( exp > SINGLE_Emax ) + { + overflow: + EXCEPTION(EX_Overflow); + if ( !(control_word & CW_Overflow) ) + return 0; + set_precision_flag_up(); + if ( !(control_word & CW_Precision) ) + return 0; + + /* This is a special case: see sec 16.2.5.1 of the 80486 book. */ + /* Masked response is overflow to infinity. */ + templ = 0x7f800000; + } + else + { + if ( precision_loss ) + { + if ( increment ) + set_precision_flag_up(); + else + set_precision_flag_down(); + } + /* Add the exponent */ + templ |= ((exp+SINGLE_Ebias) & 0xff) << 23; + } + } + } + else if (st0_tag == TAG_Zero) + { + templ = 0; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if (st0_tag == TW_Denormal) + { + reg_copy(st0_ptr, &tmp); + + /* A denormal will always underflow. */ +#ifndef PECULIAR_486 + /* An 80486 is supposed to be able to generate + a denormal exception here, but... */ + /* Underflow has priority. */ + if ( control_word & CW_Underflow ) + denormal_operand(); +#endif /* PECULIAR_486 */ + goto denormal_arg; + } + else if (st0_tag == TW_Infinity) + { + templ = 0x7f800000; + } + else if (st0_tag == TW_NaN) + { + /* Is it really a NaN ? */ + if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) ) + { + /* See if we can get a valid NaN from the FPU_REG */ + templ = st0_ptr->sigh >> 8; + if ( !(st0_ptr->sigh & 0x40000000) ) + { + /* It is a signalling NaN */ + EXCEPTION(EX_Invalid); + if ( !(control_word & CW_Invalid) ) + return 0; + templ |= (0x40000000 >> 8); + } + templ |= 0x7f800000; + } + else + { + /* It is an unsupported data type */ + EXCEPTION(EX_Invalid); + if ( !(control_word & CW_Invalid) ) + return 0; + templ = 0xffc00000; + } + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL|0x164); + return 0; + } +#endif + } + else if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if ( control_word & EX_Invalid ) + { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,single,4); + FPU_put_user(0xffc00000, (unsigned long __user *) single); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL|0x163); + return 0; + } +#endif + if ( getsign(st0_ptr) ) + templ |= 0x80000000; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,single,4); + FPU_put_user(templ,(unsigned long __user *) single); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a long long into user memory */ +int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d) +{ + FPU_REG t; + long long tll; + int precision_loss; + + if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + ((long *)&tll)[0] = t.sigl; + ((long *)&tll)[1] = t.sigh; + if ( (precision_loss == 1) || + ((t.sigh & 0x80000000) && + !((t.sigh == 0x80000000) && (t.sigl == 0) && + signnegative(&t))) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & EX_Invalid ) + { + /* Produce something like QNaN "indefinite" */ + tll = 0x8000000000000000LL; + } + else + return 0; + } + else + { + if ( precision_loss ) + set_precision_flag(precision_loss); + if ( signnegative(&t) ) + tll = - tll; + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,8); + if (copy_to_user(d, &tll, 8)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a long into user memory */ +int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d) +{ + FPU_REG t; + int precision_loss; + + if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + if (t.sigh || + ((t.sigl & 0x80000000) && + !((t.sigl == 0x80000000) && signnegative(&t))) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & EX_Invalid ) + { + /* Produce something like QNaN "indefinite" */ + t.sigl = 0x80000000; + } + else + return 0; + } + else + { + if ( precision_loss ) + set_precision_flag(precision_loss); + if ( signnegative(&t) ) + t.sigl = -(long)t.sigl; + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,4); + FPU_put_user(t.sigl, (unsigned long __user *) d); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a short into user memory */ +int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d) +{ + FPU_REG t; + int precision_loss; + + if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + if (t.sigh || + ((t.sigl & 0xffff8000) && + !((t.sigl == 0x8000) && signnegative(&t))) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & EX_Invalid ) + { + /* Produce something like QNaN "indefinite" */ + t.sigl = 0x8000; + } + else + return 0; + } + else + { + if ( precision_loss ) + set_precision_flag(precision_loss); + if ( signnegative(&t) ) + t.sigl = -t.sigl; + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,2); + FPU_put_user((short)t.sigl, d); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a packed bcd array into user memory */ +int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d) +{ + FPU_REG t; + unsigned long long ll; + u_char b; + int i, precision_loss; + u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0; + + if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + ll = significand(&t); + + /* Check for overflow, by comparing with 999999999999999999 decimal. */ + if ( (t.sigh > 0x0de0b6b3) || + ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & CW_Invalid ) + { + /* Produce the QNaN "indefinite" */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,10); + for ( i = 0; i < 7; i++) + FPU_put_user(0, d+i); /* These bytes "undefined" */ + FPU_put_user(0xc0, d+7); /* This byte "undefined" */ + FPU_put_user(0xff, d+8); + FPU_put_user(0xff, d+9); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + } + else if ( precision_loss ) + { + /* Precision loss doesn't stop the data transfer */ + set_precision_flag(precision_loss); + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,10); + RE_ENTRANT_CHECK_ON; + for ( i = 0; i < 9; i++) + { + b = FPU_div_small(&ll, 10); + b |= (FPU_div_small(&ll, 10)) << 4; + RE_ENTRANT_CHECK_OFF; + FPU_put_user(b, d+i); + RE_ENTRANT_CHECK_ON; + } + RE_ENTRANT_CHECK_OFF; + FPU_put_user(sign, d+9); + RE_ENTRANT_CHECK_ON; + + return 1; +} + +/*===========================================================================*/ + +/* r gets mangled such that sig is int, sign: + it is NOT normalized */ +/* The return value (in eax) is zero if the result is exact, + if bits are changed due to rounding, truncation, etc, then + a non-zero value is returned */ +/* Overflow is signalled by a non-zero return value (in eax). + In the case of overflow, the returned significand always has the + largest possible value */ +int FPU_round_to_int(FPU_REG *r, u_char tag) +{ + u_char very_big; + unsigned eax; + + if (tag == TAG_Zero) + { + /* Make sure that zero is returned */ + significand(r) = 0; + return 0; /* o.k. */ + } + + if (exponent(r) > 63) + { + r->sigl = r->sigh = ~0; /* The largest representable number */ + return 1; /* overflow */ + } + + eax = FPU_shrxs(&r->sigl, 63 - exponent(r)); + very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */ +#define half_or_more (eax & 0x80000000) +#define frac_part (eax) +#define more_than_half ((eax & 0x80000001) == 0x80000001) + switch (control_word & CW_RC) + { + case RC_RND: + if ( more_than_half /* nearest */ + || (half_or_more && (r->sigl & 1)) ) /* odd -> even */ + { + if ( very_big ) return 1; /* overflow */ + significand(r) ++; + return PRECISION_LOST_UP; + } + break; + case RC_DOWN: + if (frac_part && getsign(r)) + { + if ( very_big ) return 1; /* overflow */ + significand(r) ++; + return PRECISION_LOST_UP; + } + break; + case RC_UP: + if (frac_part && !getsign(r)) + { + if ( very_big ) return 1; /* overflow */ + significand(r) ++; + return PRECISION_LOST_UP; + } + break; + case RC_CHOP: + break; + } + + return eax ? PRECISION_LOST_DOWN : 0; + +} + +/*===========================================================================*/ + +u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s) +{ + unsigned short tag_word = 0; + u_char tag; + int i; + + if ( (addr_modes.default_mode == VM86) || + ((addr_modes.default_mode == PM16) + ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) + { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 0x0e); + FPU_get_user(control_word, (unsigned short __user *) s); + FPU_get_user(partial_status, (unsigned short __user *) (s+2)); + FPU_get_user(tag_word, (unsigned short __user *) (s+4)); + FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6)); + FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8)); + FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a)); + FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c)); + RE_ENTRANT_CHECK_ON; + s += 0x0e; + if ( addr_modes.default_mode == VM86 ) + { + instruction_address.offset + += (instruction_address.selector & 0xf000) << 4; + operand_address.offset += (operand_address.selector & 0xf000) << 4; + } + } + else + { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 0x1c); + FPU_get_user(control_word, (unsigned short __user *) s); + FPU_get_user(partial_status, (unsigned short __user *) (s+4)); + FPU_get_user(tag_word, (unsigned short __user *) (s+8)); + FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c)); + FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10)); + FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12)); + FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14)); + FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18)); + RE_ENTRANT_CHECK_ON; + s += 0x1c; + } + +#ifdef PECULIAR_486 + control_word &= ~0xe080; +#endif /* PECULIAR_486 */ + + top = (partial_status >> SW_Top_Shift) & 7; + + if ( partial_status & ~control_word & CW_Exceptions ) + partial_status |= (SW_Summary | SW_Backward); + else + partial_status &= ~(SW_Summary | SW_Backward); + + for ( i = 0; i < 8; i++ ) + { + tag = tag_word & 3; + tag_word >>= 2; + + if ( tag == TAG_Empty ) + /* New tag is empty. Accept it */ + FPU_settag(i, TAG_Empty); + else if ( FPU_gettag(i) == TAG_Empty ) + { + /* Old tag is empty and new tag is not empty. New tag is determined + by old reg contents */ + if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias ) + { + if ( !(fpu_register(i).sigl | fpu_register(i).sigh) ) + FPU_settag(i, TAG_Zero); + else + FPU_settag(i, TAG_Special); + } + else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias ) + { + FPU_settag(i, TAG_Special); + } + else if ( fpu_register(i).sigh & 0x80000000 ) + FPU_settag(i, TAG_Valid); + else + FPU_settag(i, TAG_Special); /* An Un-normal */ + } + /* Else old tag is not empty and new tag is not empty. Old tag + remains correct */ + } + + return s; +} + + +void frstor(fpu_addr_modes addr_modes, u_char __user *data_address) +{ + int i, regnr; + u_char __user *s = fldenv(addr_modes, data_address); + int offset = (top & 7) * 10, other = 80 - offset; + + /* Copy all registers in stack order. */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ,s,80); + __copy_from_user(register_base+offset, s, other); + if ( offset ) + __copy_from_user(register_base, s+other, offset); + RE_ENTRANT_CHECK_ON; + + for ( i = 0; i < 8; i++ ) + { + regnr = (i+top) & 7; + if ( FPU_gettag(regnr) != TAG_Empty ) + /* The loaded data over-rides all other cases. */ + FPU_settag(regnr, FPU_tagof(&st(i))); + } + +} + + +u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d) +{ + if ( (addr_modes.default_mode == VM86) || + ((addr_modes.default_mode == PM16) + ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) + { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,14); +#ifdef PECULIAR_486 + FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d); +#else + FPU_put_user(control_word, (unsigned short __user *) d); +#endif /* PECULIAR_486 */ + FPU_put_user(status_word(), (unsigned short __user *) (d+2)); + FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4)); + FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6)); + FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a)); + if ( addr_modes.default_mode == VM86 ) + { + FPU_put_user((instruction_address.offset & 0xf0000) >> 4, + (unsigned short __user *) (d+8)); + FPU_put_user((operand_address.offset & 0xf0000) >> 4, + (unsigned short __user *) (d+0x0c)); + } + else + { + FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8)); + FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c)); + } + RE_ENTRANT_CHECK_ON; + d += 0x0e; + } + else + { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 7*4); +#ifdef PECULIAR_486 + control_word &= ~0xe080; + /* An 80486 sets nearly all of the reserved bits to 1. */ + control_word |= 0xffff0040; + partial_status = status_word() | 0xffff0000; + fpu_tag_word |= 0xffff0000; + I387.soft.fcs &= ~0xf8000000; + I387.soft.fos |= 0xffff0000; +#endif /* PECULIAR_486 */ + if (__copy_to_user(d, &control_word, 7*4)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + d += 0x1c; + } + + control_word |= CW_Exceptions; + partial_status &= ~(SW_Summary | SW_Backward); + + return d; +} + + +void fsave(fpu_addr_modes addr_modes, u_char __user *data_address) +{ + u_char __user *d; + int offset = (top & 7) * 10, other = 80 - offset; + + d = fstenv(addr_modes, data_address); + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,80); + + /* Copy all registers in stack order. */ + if (__copy_to_user(d, register_base+offset, other)) + FPU_abort; + if ( offset ) + if (__copy_to_user(d+other, register_base, offset)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + + finit(); +} + +/*===========================================================================*/ diff --git a/arch/x86/math-emu/reg_mul.c b/arch/x86/math-emu/reg_mul.c new file mode 100644 index 00000000000..40f50b61bc6 --- /dev/null +++ b/arch/x86/math-emu/reg_mul.c @@ -0,0 +1,132 @@ +/*---------------------------------------------------------------------------+ + | reg_mul.c | + | | + | Multiply one FPU_REG by another, put the result in a destination FPU_REG. | + | | + | Copyright (C) 1992,1993,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | Returns the tag of the result if no exceptions or errors occurred. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The destination may be any FPU_REG, including one of the source FPU_REGs. | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" +#include "exception.h" +#include "reg_constant.h" +#include "fpu_system.h" + + +/* + Multiply two registers to give a register result. + The sources are st(deststnr) and (b,tagb,signb). + The destination is st(deststnr). + */ +/* This routine must be called with non-empty source registers */ +int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w) +{ + FPU_REG *a = &st(deststnr); + FPU_REG *dest = a; + u_char taga = FPU_gettagi(deststnr); + u_char saved_sign = getsign(dest); + u_char sign = (getsign(a) ^ getsign(b)); + int tag; + + + if ( !(taga | tagb) ) + { + /* Both regs Valid, this should be the most common case. */ + + tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b)); + if ( tag < 0 ) + { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + + if ( taga == TAG_Special ) + taga = FPU_Special(a); + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + + if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) + { + FPU_REG x, y; + if ( denormal_operand() < 0 ) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + tag = FPU_u_mul(&x, &y, dest, control_w, sign, + exponent16(&x) + exponent16(&y)); + if ( tag < 0 ) + { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) ) + { + if ( ((tagb == TW_Denormal) || (taga == TW_Denormal)) + && (denormal_operand() < 0) ) + return FPU_Exception; + + /* Must have either both arguments == zero, or + one valid and the other zero. + The result is therefore zero. */ + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + /* The 80486 book says that the answer is +0, but a real + 80486 behaves this way. + IEEE-754 apparently says it should be this way. */ + setsign(dest, sign); + return TAG_Zero; + } + /* Must have infinities, NaNs, etc */ + else if ( (taga == TW_NaN) || (tagb == TW_NaN) ) + { + return real_2op_NaN(b, tagb, deststnr, &st(0)); + } + else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero)) + || ((tagb == TW_Infinity) && (taga == TAG_Zero)) ) + { + return arith_invalid(deststnr); /* Zero*Infinity is invalid */ + } + else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal)) + && (denormal_operand() < 0) ) + { + return FPU_Exception; + } + else if (taga == TW_Infinity) + { + FPU_copy_to_regi(a, TAG_Special, deststnr); + setsign(dest, sign); + return TAG_Special; + } + else if (tagb == TW_Infinity) + { + FPU_copy_to_regi(b, TAG_Special, deststnr); + setsign(dest, sign); + return TAG_Special; + } + +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL|0x102); + return FPU_Exception; + } +#endif /* PARANOID */ + + return 0; +} diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S new file mode 100644 index 00000000000..8b6352efcee --- /dev/null +++ b/arch/x86/math-emu/reg_norm.S @@ -0,0 +1,147 @@ +/*---------------------------------------------------------------------------+ + | reg_norm.S | + | | + | Copyright (C) 1992,1993,1994,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | Normalize the value in a FPU_REG. | + | | + | Call from C as: | + | int FPU_normalize(FPU_REG *n) | + | | + | int FPU_normalize_nuo(FPU_REG *n) | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + + +.text +ENTRY(FPU_normalize) + pushl %ebp + movl %esp,%ebp + pushl %ebx + + movl PARAM1,%ebx + + movl SIGH(%ebx),%edx + movl SIGL(%ebx),%eax + + orl %edx,%edx /* ms bits */ + js L_done /* Already normalized */ + jnz L_shift_1 /* Shift left 1 - 31 bits */ + + orl %eax,%eax + jz L_zero /* The contents are zero */ + + movl %eax,%edx + xorl %eax,%eax + subw $32,EXP(%ebx) /* This can cause an underflow */ + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%eax,%edx + shl %cl,%eax + subw %cx,EXP(%ebx) /* This can cause an underflow */ + + movl %edx,SIGH(%ebx) + movl %eax,SIGL(%ebx) + +L_done: + cmpw EXP_OVER,EXP(%ebx) + jge L_overflow + + cmpw EXP_UNDER,EXP(%ebx) + jle L_underflow + +L_exit_valid: + movl TAG_Valid,%eax + + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%ebx) + andw $0x7fff,EXP(%ebx) + +L_exit: + popl %ebx + leave + ret + + +L_zero: + movw $0,EXP(%ebx) + movl TAG_Zero,%eax + jmp L_exit + +L_underflow: + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%ebx) + push %ebx + call arith_underflow + pop %ebx + jmp L_exit + +L_overflow: + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%ebx) + push %ebx + call arith_overflow + pop %ebx + jmp L_exit + + + +/* Normalise without reporting underflow or overflow */ +ENTRY(FPU_normalize_nuo) + pushl %ebp + movl %esp,%ebp + pushl %ebx + + movl PARAM1,%ebx + + movl SIGH(%ebx),%edx + movl SIGL(%ebx),%eax + + orl %edx,%edx /* ms bits */ + js L_exit_nuo_valid /* Already normalized */ + jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */ + + orl %eax,%eax + jz L_exit_nuo_zero /* The contents are zero */ + + movl %eax,%edx + xorl %eax,%eax + subw $32,EXP(%ebx) /* This can cause an underflow */ + +/* We need to shift left by 1 - 31 bits */ +L_nuo_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%eax,%edx + shl %cl,%eax + subw %cx,EXP(%ebx) /* This can cause an underflow */ + + movl %edx,SIGH(%ebx) + movl %eax,SIGL(%ebx) + +L_exit_nuo_valid: + movl TAG_Valid,%eax + + popl %ebx + leave + ret + +L_exit_nuo_zero: + movl TAG_Zero,%eax + movw EXP_UNDER,EXP(%ebx) + + popl %ebx + leave + ret diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S new file mode 100644 index 00000000000..d1d4e48b4f6 --- /dev/null +++ b/arch/x86/math-emu/reg_round.S @@ -0,0 +1,708 @@ + .file "reg_round.S" +/*---------------------------------------------------------------------------+ + | reg_round.S | + | | + | Rounding/truncation/etc for FPU basic arithmetic functions. | + | | + | Copyright (C) 1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | This code has four possible entry points. | + | The following must be entered by a jmp instruction: | + | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | + | | + | The FPU_round entry point is intended to be used by C code. | + | From C, call as: | + | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + | For correct "up" and "down" rounding, the argument must have the correct | + | sign. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Four entry points. | + | | + | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | + | %eax:%ebx 64 bit significand | + | %edx 32 bit extension of the significand | + | %edi pointer to an FPU_REG for the result to be stored | + | stack calling function must have set up a C stack frame and | + | pushed %esi, %edi, and %ebx | + | | + | Needed just for the fpu_reg_round_sqrt entry point: | + | %cx A control word in the same format as the FPU control word. | + | Otherwise, PARAM4 must give such a value. | + | | + | | + | The significand and its extension are assumed to be exact in the | + | following sense: | + | If the significand by itself is the exact result then the significand | + | extension (%edx) must contain 0, otherwise the significand extension | + | must be non-zero. | + | If the significand extension is non-zero then the significand is | + | smaller than the magnitude of the correct exact result by an amount | + | greater than zero and less than one ls bit of the significand. | + | The significand extension is only required to have three possible | + | non-zero values: | + | less than 0x80000000 <=> the significand is less than 1/2 an ls | + | bit smaller than the magnitude of the | + | true exact result. | + | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | + | smaller than the magnitude of the true | + | exact result. | + | greater than 0x80000000 <=> the significand is more than 1/2 an ls | + | bit smaller than the magnitude of the | + | true exact result. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The code in this module has become quite complex, but it should handle | + | all of the FPU flags which are set at this stage of the basic arithmetic | + | computations. | + | There are a few rare cases where the results are not set identically to | + | a real FPU. These require a bit more thought because at this stage the | + | results of the code here appear to be more consistent... | + | This may be changed in a future version. | + +---------------------------------------------------------------------------*/ + + +#include "fpu_emu.h" +#include "exception.h" +#include "control_w.h" + +/* Flags for FPU_bits_lost */ +#define LOST_DOWN $1 +#define LOST_UP $2 + +/* Flags for FPU_denormal */ +#define DENORMAL $1 +#define UNMASKED_UNDERFLOW $2 + + +#ifndef NON_REENTRANT_FPU +/* Make the code re-entrant by putting + local storage on the stack: */ +#define FPU_bits_lost (%esp) +#define FPU_denormal 1(%esp) + +#else +/* Not re-entrant, so we can gain speed by putting + local storage in a static area: */ +.data + .align 4,0 +FPU_bits_lost: + .byte 0 +FPU_denormal: + .byte 0 +#endif /* NON_REENTRANT_FPU */ + + +.text +.globl fpu_reg_round +.globl fpu_Arith_exit + +/* Entry point when called from C */ +ENTRY(FPU_round) + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%edi + movl SIGH(%edi),%eax + movl SIGL(%edi),%ebx + movl PARAM2,%edx + +fpu_reg_round: /* Normal entry point */ + movl PARAM4,%ecx + +#ifndef NON_REENTRANT_FPU + pushl %ebx /* adjust the stack pointer */ +#endif /* NON_REENTRANT_FPU */ + +#ifdef PARANOID +/* Cannot use this here yet */ +/* orl %eax,%eax */ +/* jns L_entry_bugged */ +#endif /* PARANOID */ + + cmpw EXP_UNDER,EXP(%edi) + jle L_Make_denorm /* The number is a de-normal */ + + movb $0,FPU_denormal /* 0 -> not a de-normal */ + +Denorm_done: + movb $0,FPU_bits_lost /* No bits yet lost in rounding */ + + movl %ecx,%esi + andl CW_PC,%ecx + cmpl PR_64_BITS,%ecx + je LRound_To_64 + + cmpl PR_53_BITS,%ecx + je LRound_To_53 + + cmpl PR_24_BITS,%ecx + je LRound_To_24 + +#ifdef PECULIAR_486 +/* With the precision control bits set to 01 "(reserved)", a real 80486 + behaves as if the precision control bits were set to 11 "64 bits" */ + cmpl PR_RESERVED_BITS,%ecx + je LRound_To_64 +#ifdef PARANOID + jmp L_bugged_denorm_486 +#endif /* PARANOID */ +#else +#ifdef PARANOID + jmp L_bugged_denorm /* There is no bug, just a bad control word */ +#endif /* PARANOID */ +#endif /* PECULIAR_486 */ + + +/* Round etc to 24 bit precision */ +LRound_To_24: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_24 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_24 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_24 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_24 + +#ifdef PARANOID + jmp L_bugged_round24 +#endif /* PARANOID */ + +LUp_24: + cmpb SIGN_POS,PARAM5 + jne LCheck_truncate_24 /* If negative then up==truncate */ + + jmp LCheck_24_round_up + +LDown_24: + cmpb SIGN_POS,PARAM5 + je LCheck_truncate_24 /* If positive then down==truncate */ + +LCheck_24_round_up: + movl %eax,%ecx + andl $0x000000ff,%ecx + orl %ebx,%ecx + orl %edx,%ecx + jnz LDo_24_round_up + jmp L_Re_normalise + +LRound_nearest_24: + /* Do rounding of the 24th bit if needed (nearest or even) */ + movl %eax,%ecx + andl $0x000000ff,%ecx + cmpl $0x00000080,%ecx + jc LCheck_truncate_24 /* less than half, no increment needed */ + + jne LGreater_Half_24 /* greater than half, increment needed */ + + /* Possibly half, we need to check the ls bits */ + orl %ebx,%ebx + jnz LGreater_Half_24 /* greater than half, increment needed */ + + orl %edx,%edx + jnz LGreater_Half_24 /* greater than half, increment needed */ + + /* Exactly half, increment only if 24th bit is 1 (round to even) */ + testl $0x00000100,%eax + jz LDo_truncate_24 + +LGreater_Half_24: /* Rounding: increment at the 24th bit */ +LDo_24_round_up: + andl $0xffffff00,%eax /* Truncate to 24 bits */ + xorl %ebx,%ebx + movb LOST_UP,FPU_bits_lost + addl $0x00000100,%eax + jmp LCheck_Round_Overflow + +LCheck_truncate_24: + movl %eax,%ecx + andl $0x000000ff,%ecx + orl %ebx,%ecx + orl %edx,%ecx + jz L_Re_normalise /* No truncation needed */ + +LDo_truncate_24: + andl $0xffffff00,%eax /* Truncate to 24 bits */ + xorl %ebx,%ebx + movb LOST_DOWN,FPU_bits_lost + jmp L_Re_normalise + + +/* Round etc to 53 bit precision */ +LRound_To_53: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_53 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_53 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_53 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_53 + +#ifdef PARANOID + jmp L_bugged_round53 +#endif /* PARANOID */ + +LUp_53: + cmpb SIGN_POS,PARAM5 + jne LCheck_truncate_53 /* If negative then up==truncate */ + + jmp LCheck_53_round_up + +LDown_53: + cmpb SIGN_POS,PARAM5 + je LCheck_truncate_53 /* If positive then down==truncate */ + +LCheck_53_round_up: + movl %ebx,%ecx + andl $0x000007ff,%ecx + orl %edx,%ecx + jnz LDo_53_round_up + jmp L_Re_normalise + +LRound_nearest_53: + /* Do rounding of the 53rd bit if needed (nearest or even) */ + movl %ebx,%ecx + andl $0x000007ff,%ecx + cmpl $0x00000400,%ecx + jc LCheck_truncate_53 /* less than half, no increment needed */ + + jnz LGreater_Half_53 /* greater than half, increment needed */ + + /* Possibly half, we need to check the ls bits */ + orl %edx,%edx + jnz LGreater_Half_53 /* greater than half, increment needed */ + + /* Exactly half, increment only if 53rd bit is 1 (round to even) */ + testl $0x00000800,%ebx + jz LTruncate_53 + +LGreater_Half_53: /* Rounding: increment at the 53rd bit */ +LDo_53_round_up: + movb LOST_UP,FPU_bits_lost + andl $0xfffff800,%ebx /* Truncate to 53 bits */ + addl $0x00000800,%ebx + adcl $0,%eax + jmp LCheck_Round_Overflow + +LCheck_truncate_53: + movl %ebx,%ecx + andl $0x000007ff,%ecx + orl %edx,%ecx + jz L_Re_normalise + +LTruncate_53: + movb LOST_DOWN,FPU_bits_lost + andl $0xfffff800,%ebx /* Truncate to 53 bits */ + jmp L_Re_normalise + + +/* Round etc to 64 bit precision */ +LRound_To_64: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_64 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_64 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_64 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_64 + +#ifdef PARANOID + jmp L_bugged_round64 +#endif /* PARANOID */ + +LUp_64: + cmpb SIGN_POS,PARAM5 + jne LCheck_truncate_64 /* If negative then up==truncate */ + + orl %edx,%edx + jnz LDo_64_round_up + jmp L_Re_normalise + +LDown_64: + cmpb SIGN_POS,PARAM5 + je LCheck_truncate_64 /* If positive then down==truncate */ + + orl %edx,%edx + jnz LDo_64_round_up + jmp L_Re_normalise + +LRound_nearest_64: + cmpl $0x80000000,%edx + jc LCheck_truncate_64 + + jne LDo_64_round_up + + /* Now test for round-to-even */ + testb $1,%bl + jz LCheck_truncate_64 + +LDo_64_round_up: + movb LOST_UP,FPU_bits_lost + addl $1,%ebx + adcl $0,%eax + +LCheck_Round_Overflow: + jnc L_Re_normalise + + /* Overflow, adjust the result (significand to 1.0) */ + rcrl $1,%eax + rcrl $1,%ebx + incw EXP(%edi) + jmp L_Re_normalise + +LCheck_truncate_64: + orl %edx,%edx + jz L_Re_normalise + +LTruncate_64: + movb LOST_DOWN,FPU_bits_lost + +L_Re_normalise: + testb $0xff,FPU_denormal + jnz Normalise_result + +L_Normalised: + movl TAG_Valid,%edx + +L_deNormalised: + cmpb LOST_UP,FPU_bits_lost + je L_precision_lost_up + + cmpb LOST_DOWN,FPU_bits_lost + je L_precision_lost_down + +L_no_precision_loss: + /* store the result */ + +L_Store_significand: + movl %eax,SIGH(%edi) + movl %ebx,SIGL(%edi) + + cmpw EXP_OVER,EXP(%edi) + jge L_overflow + + movl %edx,%eax + + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%edi) + andw $0x7fff,EXP(%edi) + +fpu_reg_round_signed_special_exit: + + cmpb SIGN_POS,PARAM5 + je fpu_reg_round_special_exit + + orw $0x8000,EXP(%edi) /* Negative sign for the result. */ + +fpu_reg_round_special_exit: + +#ifndef NON_REENTRANT_FPU + popl %ebx /* adjust the stack pointer */ +#endif /* NON_REENTRANT_FPU */ + +fpu_Arith_exit: + popl %ebx + popl %edi + popl %esi + leave + ret + + +/* + * Set the FPU status flags to represent precision loss due to + * round-up. + */ +L_precision_lost_up: + push %edx + push %eax + call set_precision_flag_up + popl %eax + popl %edx + jmp L_no_precision_loss + +/* + * Set the FPU status flags to represent precision loss due to + * truncation. + */ +L_precision_lost_down: + push %edx + push %eax + call set_precision_flag_down + popl %eax + popl %edx + jmp L_no_precision_loss + + +/* + * The number is a denormal (which might get rounded up to a normal) + * Shift the number right the required number of bits, which will + * have to be undone later... + */ +L_Make_denorm: + /* The action to be taken depends upon whether the underflow + exception is masked */ + testb CW_Underflow,%cl /* Underflow mask. */ + jz Unmasked_underflow /* Do not make a denormal. */ + + movb DENORMAL,FPU_denormal + + pushl %ecx /* Save */ + movw EXP_UNDER+1,%cx + subw EXP(%edi),%cx + + cmpw $64,%cx /* shrd only works for 0..31 bits */ + jnc Denorm_shift_more_than_63 + + cmpw $32,%cx /* shrd only works for 0..31 bits */ + jnc Denorm_shift_more_than_32 + +/* + * We got here without jumps by assuming that the most common requirement + * is for a small de-normalising shift. + * Shift by [1..31] bits + */ + addw %cx,EXP(%edi) + orl %edx,%edx /* extension */ + setne %ch /* Save whether %edx is non-zero */ + xorl %edx,%edx + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orb %ch,%dl + popl %ecx + jmp Denorm_done + +/* Shift by [32..63] bits */ +Denorm_shift_more_than_32: + addw %cx,EXP(%edi) + subb $32,%cl + orl %edx,%edx + setne %ch + orb %ch,%bl + xorl %edx,%edx + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orl %edx,%edx /* test these 32 bits */ + setne %cl + orb %ch,%bl + orb %cl,%bl + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + popl %ecx + jmp Denorm_done + +/* Shift by [64..) bits */ +Denorm_shift_more_than_63: + cmpw $64,%cx + jne Denorm_shift_more_than_64 + +/* Exactly 64 bit shift */ + addw %cx,EXP(%edi) + xorl %ecx,%ecx + orl %edx,%edx + setne %cl + orl %ebx,%ebx + setne %ch + orb %ch,%cl + orb %cl,%al + movl %eax,%edx + xorl %eax,%eax + xorl %ebx,%ebx + popl %ecx + jmp Denorm_done + +Denorm_shift_more_than_64: + movw EXP_UNDER+1,EXP(%edi) +/* This is easy, %eax must be non-zero, so.. */ + movl $1,%edx + xorl %eax,%eax + xorl %ebx,%ebx + popl %ecx + jmp Denorm_done + + +Unmasked_underflow: + movb UNMASKED_UNDERFLOW,FPU_denormal + jmp Denorm_done + + +/* Undo the de-normalisation. */ +Normalise_result: + cmpb UNMASKED_UNDERFLOW,FPU_denormal + je Signal_underflow + +/* The number must be a denormal if we got here. */ +#ifdef PARANOID + /* But check it... just in case. */ + cmpw EXP_UNDER+1,EXP(%edi) + jne L_norm_bugged +#endif /* PARANOID */ + +#ifdef PECULIAR_486 + /* + * This implements a special feature of 80486 behaviour. + * Underflow will be signalled even if the number is + * not a denormal after rounding. + * This difference occurs only for masked underflow, and not + * in the unmasked case. + * Actual 80486 behaviour differs from this in some circumstances. + */ + orl %eax,%eax /* ms bits */ + js LPseudoDenormal /* Will be masked underflow */ +#else + orl %eax,%eax /* ms bits */ + js L_Normalised /* No longer a denormal */ +#endif /* PECULIAR_486 */ + + jnz LDenormal_adj_exponent + + orl %ebx,%ebx + jz L_underflow_to_zero /* The contents are zero */ + +LDenormal_adj_exponent: + decw EXP(%edi) + +LPseudoDenormal: + testb $0xff,FPU_bits_lost /* bits lost == underflow */ + movl TAG_Special,%edx + jz L_deNormalised + + /* There must be a masked underflow */ + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + movl TAG_Special,%edx + jmp L_deNormalised + + +/* + * The operations resulted in a number too small to represent. + * Masked response. + */ +L_underflow_to_zero: + push %eax + call set_precision_flag_down + popl %eax + + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + +/* Reduce the exponent to EXP_UNDER */ + movw EXP_UNDER,EXP(%edi) + movl TAG_Zero,%edx + jmp L_Store_significand + + +/* The operations resulted in a number too large to represent. */ +L_overflow: + addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ + push %edi + call arith_overflow + pop %edi + jmp fpu_reg_round_signed_special_exit + + +Signal_underflow: + /* The number may have been changed to a non-denormal */ + /* by the rounding operations. */ + cmpw EXP_UNDER,EXP(%edi) + jle Do_unmasked_underflow + + jmp L_Normalised + +Do_unmasked_underflow: + /* Increase the exponent by the magic number */ + addw $(3*(1<<13)),EXP(%edi) + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + jmp L_Normalised + + +#ifdef PARANOID +#ifdef PECULIAR_486 +L_bugged_denorm_486: + pushl EX_INTERNAL|0x236 + call EXCEPTION + popl %ebx + jmp L_exception_exit +#else +L_bugged_denorm: + pushl EX_INTERNAL|0x230 + call EXCEPTION + popl %ebx + jmp L_exception_exit +#endif /* PECULIAR_486 */ + +L_bugged_round24: + pushl EX_INTERNAL|0x231 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_bugged_round53: + pushl EX_INTERNAL|0x232 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_bugged_round64: + pushl EX_INTERNAL|0x233 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_norm_bugged: + pushl EX_INTERNAL|0x234 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_entry_bugged: + pushl EX_INTERNAL|0x235 + call EXCEPTION + popl %ebx +L_exception_exit: + mov $-1,%eax + jmp fpu_reg_round_special_exit +#endif /* PARANOID */ diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S new file mode 100644 index 00000000000..47c4c2434d8 --- /dev/null +++ b/arch/x86/math-emu/reg_u_add.S @@ -0,0 +1,167 @@ + .file "reg_u_add.S" +/*---------------------------------------------------------------------------+ + | reg_u_add.S | + | | + | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the | + | result in a destination FPU_REG. | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | Call from C as: | + | int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | + | int control_w) | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +/* + | Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ). + | Takes two valid reg f.p. numbers (TAG_Valid), which are + | treated as unsigned numbers, + | and returns their sum as a TAG_Valid or TAG_Special f.p. number. + | The returned number is normalized. + | Basic checks are performed if PARANOID is defined. + */ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + +.text +ENTRY(FPU_u_add) + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* source 1 */ + movl PARAM2,%edi /* source 2 */ + + movl PARAM6,%ecx + movl %ecx,%edx + subl PARAM7,%ecx /* exp1 - exp2 */ + jge L_arg1_larger + + /* num1 is smaller */ + movl SIGL(%esi),%ebx + movl SIGH(%esi),%eax + + movl %edi,%esi + movl PARAM7,%edx + negw %cx + jmp L_accum_loaded + +L_arg1_larger: + /* num1 has larger or equal exponent */ + movl SIGL(%edi),%ebx + movl SIGH(%edi),%eax + +L_accum_loaded: + movl PARAM3,%edi /* destination */ + movw %dx,EXP(%edi) /* Copy exponent to destination */ + + xorl %edx,%edx /* clear the extension */ + +#ifdef PARANOID + testl $0x80000000,%eax + je L_bugged + + testl $0x80000000,SIGH(%esi) + je L_bugged +#endif /* PARANOID */ + +/* The number to be shifted is in %eax:%ebx:%edx */ + cmpw $32,%cx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + jmp L_shift_done + +L_more_than_31: + cmpw $64,%cx + jnc L_more_than_63 + + subb $32,%cl + jz L_exactly_32 + + shrd %cl,%eax,%edx + shr %cl,%eax + orl %ebx,%ebx + jz L_more_31_no_low /* none of the lowest bits is set */ + + orl $1,%edx /* record the fact in the extension */ + +L_more_31_no_low: + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_exactly_32: + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_more_than_63: + cmpw $65,%cx + jnc L_more_than_64 + + movl %eax,%edx + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_64: + movl $1,%edx /* The shifted nr always at least one '1' */ + +L_more_63_no_low: + xorl %ebx,%ebx + xorl %eax,%eax + +L_shift_done: + /* Now do the addition */ + addl SIGL(%esi),%ebx + adcl SIGH(%esi),%eax + jnc L_round_the_result + + /* Overflow, adjust the result */ + rcrl $1,%eax + rcrl $1,%ebx + rcrl $1,%edx + jnc L_no_bit_lost + + orl $1,%edx + +L_no_bit_lost: + incw EXP(%edi) + +L_round_the_result: + jmp fpu_reg_round /* Round the result */ + + + +#ifdef PARANOID +/* If we ever get here then we have problems! */ +L_bugged: + pushl EX_INTERNAL|0x201 + call EXCEPTION + pop %ebx + movl $-1,%eax + jmp L_exit + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret +#endif /* PARANOID */ diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S new file mode 100644 index 00000000000..cc00654b6f9 --- /dev/null +++ b/arch/x86/math-emu/reg_u_div.S @@ -0,0 +1,471 @@ + .file "reg_u_div.S" +/*---------------------------------------------------------------------------+ + | reg_u_div.S | + | | + | Divide one FPU_REG by another and put the result in a destination FPU_REG.| + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Call from C as: | + | int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, | + | unsigned int control_word, char *sign) | + | | + | Does not compute the destination exponent, but does adjust it. | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + + +/* #define dSIGL(x) (x) */ +/* #define dSIGH(x) 4(x) */ + + +#ifndef NON_REENTRANT_FPU +/* + Local storage on the stack: + Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + Overflow flag: ovfl_flag + */ +#define FPU_accum_3 -4(%ebp) +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) +#define FPU_result_1 -20(%ebp) +#define FPU_result_2 -24(%ebp) +#define FPU_ovfl_flag -28(%ebp) + +#else +.data +/* + Local storage in a static area: + Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + Overflow flag: ovfl_flag + */ + .align 4,0 +FPU_accum_3: + .long 0 +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 +FPU_result_1: + .long 0 +FPU_result_2: + .long 0 +FPU_ovfl_flag: + .byte 0 +#endif /* NON_REENTRANT_FPU */ + +#define REGA PARAM1 +#define REGB PARAM2 +#define DEST PARAM3 + +.text +ENTRY(FPU_u_div) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif /* NON_REENTRANT_FPU */ + + pushl %esi + pushl %edi + pushl %ebx + + movl REGA,%esi + movl REGB,%ebx + movl DEST,%edi + + movswl EXP(%esi),%edx + movswl EXP(%ebx),%eax + subl %eax,%edx + addl EXP_BIAS,%edx + + /* A denormal and a large number can cause an exponent underflow */ + cmpl EXP_WAY_UNDER,%edx + jg xExp_not_underflow + + /* Set to a really low value allow correct handling */ + movl EXP_WAY_UNDER,%edx + +xExp_not_underflow: + + movw %dx,EXP(%edi) + +#ifdef PARANOID +/* testl $0x80000000, SIGH(%esi) // Dividend */ +/* je L_bugged */ + testl $0x80000000, SIGH(%ebx) /* Divisor */ + je L_bugged +#endif /* PARANOID */ + +/* Check if the divisor can be treated as having just 32 bits */ + cmpl $0,SIGL(%ebx) + jnz L_Full_Division /* Can't do a quick divide */ + +/* We should be able to zip through the division here */ + movl SIGH(%ebx),%ecx /* The divisor */ + movl SIGH(%esi),%edx /* Dividend */ + movl SIGL(%esi),%eax /* Dividend */ + + cmpl %ecx,%edx + setaeb FPU_ovfl_flag /* Keep a record */ + jb L_no_adjust + + subl %ecx,%edx /* Prevent the overflow */ + +L_no_adjust: + /* Divide the 64 bit number by the 32 bit denominator */ + divl %ecx + movl %eax,FPU_result_2 + + /* Work on the remainder of the first division */ + xorl %eax,%eax + divl %ecx + movl %eax,FPU_result_1 + + /* Work on the remainder of the 64 bit division */ + xorl %eax,%eax + divl %ecx + + testb $255,FPU_ovfl_flag /* was the num > denom ? */ + je L_no_overflow + + /* Do the shifting here */ + /* increase the exponent */ + incw EXP(%edi) + + /* shift the mantissa right one bit */ + stc /* To set the ms bit */ + rcrl FPU_result_2 + rcrl FPU_result_1 + rcrl %eax + +L_no_overflow: + jmp LRound_precision /* Do the rounding as required */ + + +/*---------------------------------------------------------------------------+ + | Divide: Return arg1/arg2 to arg3. | + | | + | This routine does not use the exponents of arg1 and arg2, but does | + | adjust the exponent of arg3. | + | | + | The maximum returned value is (ignoring exponents) | + | .ffffffff ffffffff | + | ------------------ = 1.ffffffff fffffffe | + | .80000000 00000000 | + | and the minimum is | + | .80000000 00000000 | + | ------------------ = .80000000 00000001 (rounded) | + | .ffffffff ffffffff | + | | + +---------------------------------------------------------------------------*/ + + +L_Full_Division: + /* Save extended dividend in local register */ + movl SIGL(%esi),%eax + movl %eax,FPU_accum_2 + movl SIGH(%esi),%eax + movl %eax,FPU_accum_3 + xorl %eax,%eax + movl %eax,FPU_accum_1 /* zero the extension */ + movl %eax,FPU_accum_0 /* zero the extension */ + + movl SIGL(%esi),%eax /* Get the current num */ + movl SIGH(%esi),%edx + +/*----------------------------------------------------------------------*/ +/* Initialization done. + Do the first 32 bits. */ + + movb $0,FPU_ovfl_flag + cmpl SIGH(%ebx),%edx /* Test for imminent overflow */ + jb LLess_than_1 + ja LGreater_than_1 + + cmpl SIGL(%ebx),%eax + jb LLess_than_1 + +LGreater_than_1: +/* The dividend is greater or equal, would cause overflow */ + setaeb FPU_ovfl_flag /* Keep a record */ + + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx /* Prevent the overflow */ + movl %eax,FPU_accum_2 + movl %edx,FPU_accum_3 + +LLess_than_1: +/* At this point, we have a dividend < divisor, with a record of + adjustment in FPU_ovfl_flag */ + + /* We will divide by a number which is too large */ + movl SIGH(%ebx),%ecx + addl $1,%ecx + jnc LFirst_div_not_1 + + /* here we need to divide by 100000000h, + i.e., no division at all.. */ + mov %edx,%eax + jmp LFirst_div_done + +LFirst_div_not_1: + divl %ecx /* Divide the numerator by the augmented + denom ms dw */ + +LFirst_div_done: + movl %eax,FPU_result_2 /* Put the result in the answer */ + + mull SIGH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_2 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_3 + + movl FPU_result_2,%eax /* Get the result back */ + mull SIGL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + sbbl $0,FPU_accum_3 + je LDo_2nd_32_bits /* Must check for non-zero result here */ + +#ifdef PARANOID + jb L_bugged_1 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + incl FPU_result_2 /* Correct the answer */ + + movl SIGL(%ebx),%eax + movl SIGH(%ebx),%edx + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + sbbl $0,FPU_accum_3 + jne L_bugged_1 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* Half of the main problem is done, there is just a reduced numerator + to handle now. + Work with the second 32 bits, FPU_accum_0 not used from now on */ +LDo_2nd_32_bits: + movl FPU_accum_2,%edx /* get the reduced num */ + movl FPU_accum_1,%eax + + /* need to check for possible subsequent overflow */ + cmpl SIGH(%ebx),%edx + jb LDo_2nd_div + ja LPrevent_2nd_overflow + + cmpl SIGL(%ebx),%eax + jb LDo_2nd_div + +LPrevent_2nd_overflow: +/* The numerator is greater or equal, would cause overflow */ + /* prevent overflow */ + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx + movl %edx,FPU_accum_2 + movl %eax,FPU_accum_1 + + incl FPU_result_2 /* Reflect the subtraction in the answer */ + +#ifdef PARANOID + je L_bugged_2 /* Can't bump the result to 1.0 */ +#endif /* PARANOID */ + +LDo_2nd_div: + cmpl $0,%ecx /* augmented denom msw */ + jnz LSecond_div_not_1 + + /* %ecx == 0, we are dividing by 1.0 */ + mov %edx,%eax + jmp LSecond_div_done + +LSecond_div_not_1: + divl %ecx /* Divide the numerator by the denom ms dw */ + +LSecond_div_done: + movl %eax,FPU_result_1 /* Put the result in the answer */ + + mull SIGH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + movl FPU_result_1,%eax /* Get the result back */ + mull SIGL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + jz LDo_3rd_32_bits + +#ifdef PARANOID + cmpl $1,FPU_accum_2 + jne L_bugged_2 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + movl SIGL(%ebx),%eax + movl SIGH(%ebx),%edx + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 + jne L_bugged_2 +#endif /* PARANOID */ + + addl $1,FPU_result_1 /* Correct the answer */ + adcl $0,FPU_result_2 + +#ifdef PARANOID + jc L_bugged_2 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* The division is essentially finished here, we just need to perform + tidying operations. + Deal with the 3rd 32 bits */ +LDo_3rd_32_bits: + movl FPU_accum_1,%edx /* get the reduced num */ + movl FPU_accum_0,%eax + + /* need to check for possible subsequent overflow */ + cmpl SIGH(%ebx),%edx /* denom */ + jb LRound_prep + ja LPrevent_3rd_overflow + + cmpl SIGL(%ebx),%eax /* denom */ + jb LRound_prep + +LPrevent_3rd_overflow: + /* prevent overflow */ + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx + movl %edx,FPU_accum_1 + movl %eax,FPU_accum_0 + + addl $1,FPU_result_1 /* Reflect the subtraction in the answer */ + adcl $0,FPU_result_2 + jne LRound_prep + jnc LRound_prep + + /* This is a tricky spot, there is an overflow of the answer */ + movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */ + +LRound_prep: +/* + * Prepare for rounding. + * To test for rounding, we just need to compare 2*accum with the + * denom. + */ + movl FPU_accum_0,%ecx + movl FPU_accum_1,%edx + movl %ecx,%eax + orl %edx,%eax + jz LRound_ovfl /* The accumulator contains zero. */ + + /* Multiply by 2 */ + clc + rcll $1,%ecx + rcll $1,%edx + jc LRound_large /* No need to compare, denom smaller */ + + subl SIGL(%ebx),%ecx + sbbl SIGH(%ebx),%edx + jnc LRound_not_small + + movl $0x70000000,%eax /* Denom was larger */ + jmp LRound_ovfl + +LRound_not_small: + jnz LRound_large + + movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */ + jmp LRound_ovfl + +LRound_large: + movl $0xff000000,%eax /* Denom was smaller */ + +LRound_ovfl: +/* We are now ready to deal with rounding, but first we must get + the bits properly aligned */ + testb $255,FPU_ovfl_flag /* was the num > denom ? */ + je LRound_precision + + incw EXP(%edi) + + /* shift the mantissa right one bit */ + stc /* Will set the ms bit */ + rcrl FPU_result_2 + rcrl FPU_result_1 + rcrl %eax + +/* Round the result as required */ +LRound_precision: + decw EXP(%edi) /* binary point between 1st & 2nd bits */ + + movl %eax,%edx + movl FPU_result_1,%ebx + movl FPU_result_2,%eax + jmp fpu_reg_round + + +#ifdef PARANOID +/* The logic is wrong if we got here */ +L_bugged: + pushl EX_INTERNAL|0x202 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_1: + pushl EX_INTERNAL|0x203 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_2: + pushl EX_INTERNAL|0x204 + call EXCEPTION + pop %ebx + jmp L_exit + +L_exit: + movl $-1,%eax + popl %ebx + popl %edi + popl %esi + + leave + ret +#endif /* PARANOID */ diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S new file mode 100644 index 00000000000..973f12af97d --- /dev/null +++ b/arch/x86/math-emu/reg_u_mul.S @@ -0,0 +1,148 @@ + .file "reg_u_mul.S" +/*---------------------------------------------------------------------------+ + | reg_u_mul.S | + | | + | Core multiplication routine | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Basic multiplication routine. | + | Does not check the resulting exponent for overflow/underflow | + | | + | FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); | + | | + | Internal working is at approx 128 bits. | + | Result is rounded to nearest 53 or 64 bits, using "nearest or even". | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + + + +#ifndef NON_REENTRANT_FPU +/* Local storage on the stack: */ +#define FPU_accum_0 -4(%ebp) /* ms word */ +#define FPU_accum_1 -8(%ebp) + +#else +/* Local storage in a static area: */ +.data + .align 4,0 +FPU_accum_0: + .long 0 +FPU_accum_1: + .long 0 +#endif /* NON_REENTRANT_FPU */ + + +.text +ENTRY(FPU_u_mul) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $8,%esp +#endif /* NON_REENTRANT_FPU */ + + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi + movl PARAM2,%edi + +#ifdef PARANOID + testl $0x80000000,SIGH(%esi) + jz L_bugged + testl $0x80000000,SIGH(%edi) + jz L_bugged +#endif /* PARANOID */ + + xorl %ecx,%ecx + xorl %ebx,%ebx + + movl SIGL(%esi),%eax + mull SIGL(%edi) + movl %eax,FPU_accum_0 + movl %edx,FPU_accum_1 + + movl SIGL(%esi),%eax + mull SIGH(%edi) + addl %eax,FPU_accum_1 + adcl %edx,%ebx +/* adcl $0,%ecx // overflow here is not possible */ + + movl SIGH(%esi),%eax + mull SIGL(%edi) + addl %eax,FPU_accum_1 + adcl %edx,%ebx + adcl $0,%ecx + + movl SIGH(%esi),%eax + mull SIGH(%edi) + addl %eax,%ebx + adcl %edx,%ecx + + /* Get the sum of the exponents. */ + movl PARAM6,%eax + subl EXP_BIAS-1,%eax + + /* Two denormals can cause an exponent underflow */ + cmpl EXP_WAY_UNDER,%eax + jg Exp_not_underflow + + /* Set to a really low value allow correct handling */ + movl EXP_WAY_UNDER,%eax + +Exp_not_underflow: + +/* Have now finished with the sources */ + movl PARAM3,%edi /* Point to the destination */ + movw %ax,EXP(%edi) + +/* Now make sure that the result is normalized */ + testl $0x80000000,%ecx + jnz LResult_Normalised + + /* Normalize by shifting left one bit */ + shll $1,FPU_accum_0 + rcll $1,FPU_accum_1 + rcll $1,%ebx + rcll $1,%ecx + decw EXP(%edi) + +LResult_Normalised: + movl FPU_accum_0,%eax + movl FPU_accum_1,%edx + orl %eax,%eax + jz L_extent_zero + + orl $1,%edx + +L_extent_zero: + movl %ecx,%eax + jmp fpu_reg_round + + +#ifdef PARANOID +L_bugged: + pushl EX_INTERNAL|0x205 + call EXCEPTION + pop %ebx + jmp L_exit + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret +#endif /* PARANOID */ + diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S new file mode 100644 index 00000000000..1b6c24801d2 --- /dev/null +++ b/arch/x86/math-emu/reg_u_sub.S @@ -0,0 +1,272 @@ + .file "reg_u_sub.S" +/*---------------------------------------------------------------------------+ + | reg_u_sub.S | + | | + | Core floating point subtraction routine. | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | Call from C as: | + | int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | + | int control_w) | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +/* + | Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ). + | Takes two valid reg f.p. numbers (TAG_Valid), which are + | treated as unsigned numbers, + | and returns their difference as a TAG_Valid or TAG_Zero f.p. + | number. + | The first number (arg1) must be the larger. + | The returned number is normalized. + | Basic checks are performed if PARANOID is defined. + */ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + +.text +ENTRY(FPU_u_sub) + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* source 1 */ + movl PARAM2,%edi /* source 2 */ + + movl PARAM6,%ecx + subl PARAM7,%ecx /* exp1 - exp2 */ + +#ifdef PARANOID + /* source 2 is always smaller than source 1 */ + js L_bugged_1 + + testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */ + je L_bugged_2 + + testl $0x80000000,SIGH(%esi) + je L_bugged_2 +#endif /* PARANOID */ + +/*--------------------------------------+ + | Form a register holding the | + | smaller number | + +--------------------------------------*/ + movl SIGH(%edi),%eax /* register ms word */ + movl SIGL(%edi),%ebx /* register ls word */ + + movl PARAM3,%edi /* destination */ + movl PARAM6,%edx + movw %dx,EXP(%edi) /* Copy exponent to destination */ + + xorl %edx,%edx /* register extension */ + +/*--------------------------------------+ + | Shift the temporary register | + | right the required number of | + | places. | + +--------------------------------------*/ + + cmpw $32,%cx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + jmp L_shift_done + +L_more_than_31: + cmpw $64,%cx + jnc L_more_than_63 + + subb $32,%cl + jz L_exactly_32 + + shrd %cl,%eax,%edx + shr %cl,%eax + orl %ebx,%ebx + jz L_more_31_no_low /* none of the lowest bits is set */ + + orl $1,%edx /* record the fact in the extension */ + +L_more_31_no_low: + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_exactly_32: + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_more_than_63: + cmpw $65,%cx + jnc L_more_than_64 + + /* Shift right by 64 bits */ + movl %eax,%edx + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_64: + jne L_more_than_65 + + /* Shift right by 65 bits */ + /* Carry is clear if we get here */ + movl %eax,%edx + rcrl %edx + jnc L_shift_65_nc + + orl $1,%edx + jmp L_more_63_no_low + +L_shift_65_nc: + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_65: + movl $1,%edx /* The shifted nr always at least one '1' */ + +L_more_63_no_low: + xorl %ebx,%ebx + xorl %eax,%eax + +L_shift_done: +L_subtr: +/*------------------------------+ + | Do the subtraction | + +------------------------------*/ + xorl %ecx,%ecx + subl %edx,%ecx + movl %ecx,%edx + movl SIGL(%esi),%ecx + sbbl %ebx,%ecx + movl %ecx,%ebx + movl SIGH(%esi),%ecx + sbbl %eax,%ecx + movl %ecx,%eax + +#ifdef PARANOID + /* We can never get a borrow */ + jc L_bugged +#endif /* PARANOID */ + +/*--------------------------------------+ + | Normalize the result | + +--------------------------------------*/ + testl $0x80000000,%eax + jnz L_round /* no shifting needed */ + + orl %eax,%eax + jnz L_shift_1 /* shift left 1 - 31 bits */ + + orl %ebx,%ebx + jnz L_shift_32 /* shift left 32 - 63 bits */ + +/* + * A rare case, the only one which is non-zero if we got here + * is: 1000000 .... 0000 + * -0111111 .... 1111 1 + * -------------------- + * 0000000 .... 0000 1 + */ + + cmpl $0x80000000,%edx + jnz L_must_be_zero + + /* Shift left 64 bits */ + subw $64,EXP(%edi) + xchg %edx,%eax + jmp fpu_reg_round + +L_must_be_zero: +#ifdef PARANOID + orl %edx,%edx + jnz L_bugged_3 +#endif /* PARANOID */ + + /* The result is zero */ + movw $0,EXP(%edi) /* exponent */ + movl $0,SIGL(%edi) + movl $0,SIGH(%edi) + movl TAG_Zero,%eax + jmp L_exit + +L_shift_32: + movl %ebx,%eax + movl %edx,%ebx + movl $0,%edx + subw $32,EXP(%edi) /* Can get underflow here */ + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %eax,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%ebx,%eax + shld %cl,%edx,%ebx + shl %cl,%edx + subw %cx,EXP(%edi) /* Can get underflow here */ + +L_round: + jmp fpu_reg_round /* Round the result */ + + +#ifdef PARANOID +L_bugged_1: + pushl EX_INTERNAL|0x206 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_2: + pushl EX_INTERNAL|0x209 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_3: + pushl EX_INTERNAL|0x210 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_4: + pushl EX_INTERNAL|0x211 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged: + pushl EX_INTERNAL|0x212 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_error_exit: + movl $-1,%eax + +#endif /* PARANOID */ + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S new file mode 100644 index 00000000000..bbe0e87718e --- /dev/null +++ b/arch/x86/math-emu/round_Xsig.S @@ -0,0 +1,141 @@ +/*---------------------------------------------------------------------------+ + | round_Xsig.S | + | | + | Copyright (C) 1992,1993,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Normalize and round a 12 byte quantity. | + | Call from C as: | + | int round_Xsig(Xsig *n) | + | | + | Normalize a 12 byte quantity. | + | Call from C as: | + | int norm_Xsig(Xsig *n) | + | | + | Each function returns the size of the shift (nr of bits). | + | | + +---------------------------------------------------------------------------*/ + .file "round_Xsig.S" + +#include "fpu_emu.h" + + +.text +ENTRY(round_Xsig) + pushl %ebp + movl %esp,%ebp + pushl %ebx /* Reserve some space */ + pushl %ebx + pushl %esi + + movl PARAM1,%esi + + movl 8(%esi),%edx + movl 4(%esi),%ebx + movl (%esi),%eax + + movl $0,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_round /* Already normalized */ + jnz L_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + movl $-32,-4(%ebp) + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + subl %ecx,-4(%ebp) + shld %cl,%ebx,%edx + shld %cl,%eax,%ebx + shl %cl,%eax + +L_round: + testl $0x80000000,%eax + jz L_exit + + addl $1,%ebx + adcl $0,%edx + jnz L_exit + + movl $0x80000000,%edx + incl -4(%ebp) + +L_exit: + movl %edx,8(%esi) + movl %ebx,4(%esi) + movl %eax,(%esi) + + movl -4(%ebp),%eax + + popl %esi + popl %ebx + leave + ret + + + + +ENTRY(norm_Xsig) + pushl %ebp + movl %esp,%ebp + pushl %ebx /* Reserve some space */ + pushl %ebx + pushl %esi + + movl PARAM1,%esi + + movl 8(%esi),%edx + movl 4(%esi),%ebx + movl (%esi),%eax + + movl $0,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_n_exit /* Already normalized */ + jnz L_n_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + movl $-32,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_n_exit /* Normalized now */ + jnz L_n_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + addl $-32,-4(%ebp) + jmp L_n_exit /* Might not be normalized, + but shift no more. */ + +/* We need to shift left by 1 - 31 bits */ +L_n_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + subl %ecx,-4(%ebp) + shld %cl,%ebx,%edx + shld %cl,%eax,%ebx + shl %cl,%eax + +L_n_exit: + movl %edx,8(%esi) + movl %ebx,4(%esi) + movl %eax,(%esi) + + movl -4(%ebp),%eax + + popl %esi + popl %ebx + leave + ret + diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S new file mode 100644 index 00000000000..31cdd118e91 --- /dev/null +++ b/arch/x86/math-emu/shr_Xsig.S @@ -0,0 +1,87 @@ + .file "shr_Xsig.S" +/*---------------------------------------------------------------------------+ + | shr_Xsig.S | + | | + | 12 byte right shift function | + | | + | Copyright (C) 1992,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | void shr_Xsig(Xsig *arg, unsigned nr) | + | | + | Extended shift right function. | + | Fastest for small shifts. | + | Shifts the 12 byte quantity pointed to by the first arg (arg) | + | right by the number of bits specified by the second arg (nr). | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +.text +ENTRY(shr_Xsig) + push %ebp + movl %esp,%ebp + pushl %esi + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + pushl %ebx + movl (%esi),%eax /* lsl */ + movl 4(%esi),%ebx /* midl */ + movl 8(%esi),%edx /* msl */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edx,8(%esi) + popl %ebx + popl %esi + leave + ret + +L_more_than_31: + cmpl $64,%ecx + jnc L_more_than_63 + + subb $32,%cl + movl 4(%esi),%eax /* midl */ + movl 8(%esi),%edx /* msl */ + shrd %cl,%edx,%eax + shr %cl,%edx + movl %eax,(%esi) + movl %edx,4(%esi) + movl $0,8(%esi) + popl %esi + leave + ret + +L_more_than_63: + cmpl $96,%ecx + jnc L_more_than_95 + + subb $64,%cl + movl 8(%esi),%eax /* msl */ + shr %cl,%eax + xorl %edx,%edx + movl %eax,(%esi) + movl %edx,4(%esi) + movl %edx,8(%esi) + popl %esi + leave + ret + +L_more_than_95: + xorl %eax,%eax + movl %eax,(%esi) + movl %eax,4(%esi) + movl %eax,8(%esi) + popl %esi + leave + ret diff --git a/arch/x86/math-emu/status_w.h b/arch/x86/math-emu/status_w.h new file mode 100644 index 00000000000..59e73302aa6 --- /dev/null +++ b/arch/x86/math-emu/status_w.h @@ -0,0 +1,67 @@ +/*---------------------------------------------------------------------------+ + | status_w.h | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _STATUS_H_ +#define _STATUS_H_ + +#include "fpu_emu.h" /* for definition of PECULIAR_486 */ + +#ifdef __ASSEMBLY__ +#define Const__(x) $##x +#else +#define Const__(x) x +#endif + +#define SW_Backward Const__(0x8000) /* backward compatibility */ +#define SW_C3 Const__(0x4000) /* condition bit 3 */ +#define SW_Top Const__(0x3800) /* top of stack */ +#define SW_Top_Shift Const__(11) /* shift for top of stack bits */ +#define SW_C2 Const__(0x0400) /* condition bit 2 */ +#define SW_C1 Const__(0x0200) /* condition bit 1 */ +#define SW_C0 Const__(0x0100) /* condition bit 0 */ +#define SW_Summary Const__(0x0080) /* exception summary */ +#define SW_Stack_Fault Const__(0x0040) /* stack fault */ +#define SW_Precision Const__(0x0020) /* loss of precision */ +#define SW_Underflow Const__(0x0010) /* underflow */ +#define SW_Overflow Const__(0x0008) /* overflow */ +#define SW_Zero_Div Const__(0x0004) /* divide by zero */ +#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */ +#define SW_Invalid Const__(0x0001) /* invalid operation */ + +#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */ + +#ifndef __ASSEMBLY__ + +#define COMP_A_gt_B 1 +#define COMP_A_eq_B 2 +#define COMP_A_lt_B 3 +#define COMP_No_Comp 4 +#define COMP_Denormal 0x20 +#define COMP_NaN 0x40 +#define COMP_SNaN 0x80 + +#define status_word() \ + ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) +static inline void setcc(int cc) +{ + partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); + partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); +} + +#ifdef PECULIAR_486 + /* Default, this conveys no information, but an 80486 does it. */ + /* Clear the SW_C1 bit, "other bits undefined". */ +# define clear_C1() { partial_status &= ~SW_C1; } +# else +# define clear_C1() +#endif /* PECULIAR_486 */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _STATUS_H_ */ diff --git a/arch/x86/math-emu/version.h b/arch/x86/math-emu/version.h new file mode 100644 index 00000000000..a0d73a1d2b6 --- /dev/null +++ b/arch/x86/math-emu/version.h @@ -0,0 +1,12 @@ +/*---------------------------------------------------------------------------+ + | version.h | + | | + | | + | Copyright (C) 1992,1993,1994,1996,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#define FPU_VERSION "wm-FPU-emu version 2.01" diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S new file mode 100644 index 00000000000..51842831798 --- /dev/null +++ b/arch/x86/math-emu/wm_shrx.S @@ -0,0 +1,204 @@ + .file "wm_shrx.S" +/*---------------------------------------------------------------------------+ + | wm_shrx.S | + | | + | 64 bit right shift functions | + | | + | Copyright (C) 1992,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | unsigned FPU_shrx(void *arg1, unsigned arg2) | + | and | + | unsigned FPU_shrxs(void *arg1, unsigned arg2) | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +.text +/*---------------------------------------------------------------------------+ + | unsigned FPU_shrx(void *arg1, unsigned arg2) | + | | + | Extended shift right function. | + | Fastest for small shifts. | + | Shifts the 64 bit quantity pointed to by the first arg (arg1) | + | right by the number of bits specified by the second arg (arg2). | + | Forms a 96 bit quantity from the 64 bit arg and eax: | + | [ 64 bit arg ][ eax ] | + | shift right ---------> | + | The eax register is initialized to 0 before the shifting. | + | Results returned in the 64 bit arg and eax. | + +---------------------------------------------------------------------------*/ + +ENTRY(FPU_shrx) + push %ebp + movl %esp,%ebp + pushl %esi + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + pushl %ebx + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %eax,%eax /* extension */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + movl %ebx,(%esi) + movl %edx,4(%esi) + popl %ebx + popl %esi + leave + ret + +L_more_than_31: + cmpl $64,%ecx + jnc L_more_than_63 + + subb $32,%cl + movl (%esi),%eax /* lsl */ + movl 4(%esi),%edx /* msl */ + shrd %cl,%edx,%eax + shr %cl,%edx + movl %edx,(%esi) + movl $0,4(%esi) + popl %esi + leave + ret + +L_more_than_63: + cmpl $96,%ecx + jnc L_more_than_95 + + subb $64,%cl + movl 4(%esi),%eax /* msl */ + shr %cl,%eax + xorl %edx,%edx + movl %edx,(%esi) + movl %edx,4(%esi) + popl %esi + leave + ret + +L_more_than_95: + xorl %eax,%eax + movl %eax,(%esi) + movl %eax,4(%esi) + popl %esi + leave + ret + + +/*---------------------------------------------------------------------------+ + | unsigned FPU_shrxs(void *arg1, unsigned arg2) | + | | + | Extended shift right function (optimized for small floating point | + | integers). | + | Shifts the 64 bit quantity pointed to by the first arg (arg1) | + | right by the number of bits specified by the second arg (arg2). | + | Forms a 96 bit quantity from the 64 bit arg and eax: | + | [ 64 bit arg ][ eax ] | + | shift right ---------> | + | The eax register is initialized to 0 before the shifting. | + | The lower 8 bits of eax are lost and replaced by a flag which is | + | set (to 0x01) if any bit, apart from the first one, is set in the | + | part which has been shifted out of the arg. | + | Results returned in the 64 bit arg and eax. | + +---------------------------------------------------------------------------*/ +ENTRY(FPU_shrxs) + push %ebp + movl %esp,%ebp + pushl %esi + pushl %ebx + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $64,%ecx /* shrd only works for 0..31 bits */ + jnc Ls_more_than_63 + + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jc Ls_less_than_32 + +/* We got here without jumps by assuming that the most common requirement + is for small integers */ +/* Shift by [32..63] bits */ + subb $32,%cl + movl (%esi),%eax /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %ebx,%ebx + shrd %cl,%eax,%ebx + shrd %cl,%edx,%eax + shr %cl,%edx + orl %ebx,%ebx /* test these 32 bits */ + setne %bl + test $0x7fffffff,%eax /* and 31 bits here */ + setne %bh + orw %bx,%bx /* Any of the 63 bit set ? */ + setne %al + movl %edx,(%esi) + movl $0,4(%esi) + popl %ebx + popl %esi + leave + ret + +/* Shift by [0..31] bits */ +Ls_less_than_32: + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %eax,%eax /* extension */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + test $0x7fffffff,%eax /* only need to look at eax here */ + setne %al + movl %ebx,(%esi) + movl %edx,4(%esi) + popl %ebx + popl %esi + leave + ret + +/* Shift by [64..95] bits */ +Ls_more_than_63: + cmpl $96,%ecx + jnc Ls_more_than_95 + + subb $64,%cl + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%eax /* msl */ + xorl %edx,%edx /* extension */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orl %ebx,%edx + setne %bl + test $0x7fffffff,%eax /* only need to look at eax here */ + setne %bh + orw %bx,%bx + setne %al + xorl %edx,%edx + movl %edx,(%esi) /* set to zero */ + movl %edx,4(%esi) /* set to zero */ + popl %ebx + popl %esi + leave + ret + +Ls_more_than_95: +/* Shift by [96..inf) bits */ + xorl %eax,%eax + movl (%esi),%ebx + orl 4(%esi),%ebx + setne %al + xorl %ebx,%ebx + movl %ebx,(%esi) + movl %ebx,4(%esi) + popl %ebx + popl %esi + leave + ret diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S new file mode 100644 index 00000000000..d258f59564e --- /dev/null +++ b/arch/x86/math-emu/wm_sqrt.S @@ -0,0 +1,470 @@ + .file "wm_sqrt.S" +/*---------------------------------------------------------------------------+ + | wm_sqrt.S | + | | + | Fixed point arithmetic square root evaluation. | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | Call from C as: | + | int wm_sqrt(FPU_REG *n, unsigned int control_word) | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | wm_sqrt(FPU_REG *n, unsigned int control_word) | + | returns the square root of n in n. | + | | + | Use Newton's method to compute the square root of a number, which must | + | be in the range [1.0 .. 4.0), to 64 bits accuracy. | + | Does not check the sign or tag of the argument. | + | Sets the exponent, but not the sign or tag of the result. | + | | + | The guess is kept in %esi:%edi | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" + + +#ifndef NON_REENTRANT_FPU +/* Local storage on the stack: */ +#define FPU_accum_3 -4(%ebp) /* ms word */ +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) + +/* + * The de-normalised argument: + * sq_2 sq_1 sq_0 + * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 + * ^ binary point here + */ +#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ +#define FPU_fsqrt_arg_1 -24(%ebp) +#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ + +#else +/* Local storage in a static area: */ +.data + .align 4,0 +FPU_accum_3: + .long 0 /* ms word */ +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 + +/* The de-normalised argument: + sq_2 sq_1 sq_0 + b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 + ^ binary point here + */ +FPU_fsqrt_arg_2: + .long 0 /* ms word */ +FPU_fsqrt_arg_1: + .long 0 +FPU_fsqrt_arg_0: + .long 0 /* ls word, at most the ms bit is set */ +#endif /* NON_REENTRANT_FPU */ + + +.text +ENTRY(wm_sqrt) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif /* NON_REENTRANT_FPU */ + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi + + movl SIGH(%esi),%eax + movl SIGL(%esi),%ecx + xorl %edx,%edx + +/* We use a rough linear estimate for the first guess.. */ + + cmpw EXP_BIAS,EXP(%esi) + jnz sqrt_arg_ge_2 + + shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ + rcrl $1,%ecx + rcrl $1,%edx + +sqrt_arg_ge_2: +/* From here on, n is never accessed directly again until it is + replaced by the answer. */ + + movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ + movl %ecx,FPU_fsqrt_arg_1 + movl %edx,FPU_fsqrt_arg_0 + +/* Make a linear first estimate */ + shrl $1,%eax + addl $0x40000000,%eax + movl $0xaaaaaaaa,%ecx + mull %ecx + shll %edx /* max result was 7fff... */ + testl $0x80000000,%edx /* but min was 3fff... */ + jnz sqrt_prelim_no_adjust + + movl $0x80000000,%edx /* round up */ + +sqrt_prelim_no_adjust: + movl %edx,%esi /* Our first guess */ + +/* We have now computed (approx) (2 + x) / 3, which forms the basis + for a few iterations of Newton's method */ + + movl FPU_fsqrt_arg_2,%ecx /* ms word */ + +/* + * From our initial estimate, three iterations are enough to get us + * to 30 bits or so. This will then allow two iterations at better + * precision to complete the process. + */ + +/* Compute (g + n/g)/2 at each iteration (g is the guess). */ + shrl %ecx /* Doing this first will prevent a divide */ + /* overflow later. */ + + movl %ecx,%edx /* msw of the arg / 2 */ + divl %esi /* current estimate */ + shrl %esi /* divide by 2 */ + addl %eax,%esi /* the new estimate */ + + movl %ecx,%edx + divl %esi + shrl %esi + addl %eax,%esi + + movl %ecx,%edx + divl %esi + shrl %esi + addl %eax,%esi + +/* + * Now that an estimate accurate to about 30 bits has been obtained (in %esi), + * we improve it to 60 bits or so. + * + * The strategy from now on is to compute new estimates from + * guess := guess + (n - guess^2) / (2 * guess) + */ + +/* First, find the square of the guess */ + movl %esi,%eax + mull %esi +/* guess^2 now in %edx:%eax */ + + movl FPU_fsqrt_arg_1,%ecx + subl %ecx,%eax + movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ + sbbl %ecx,%edx + jnc sqrt_stage_2_positive + +/* Subtraction gives a negative result, + negate the result before division. */ + notl %edx + notl %eax + addl $1,%eax + adcl $0,%edx + + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + jmp sqrt_stage_2_finish + +sqrt_stage_2_positive: + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + notl %ecx + notl %eax + addl $1,%eax + adcl $0,%ecx + +sqrt_stage_2_finish: + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* Form the new estimate in %esi:%edi */ + movl %eax,%edi + addl %ecx,%esi + + jnz sqrt_stage_2_done /* result should be [1..2) */ + +#ifdef PARANOID +/* It should be possible to get here only if the arg is ffff....ffff */ + cmp $0xffffffff,FPU_fsqrt_arg_1 + jnz sqrt_stage_2_error +#endif /* PARANOID */ + +/* The best rounded result. */ + xorl %eax,%eax + decl %eax + movl %eax,%edi + movl %eax,%esi + movl $0x7fffffff,%eax + jmp sqrt_round_result + +#ifdef PARANOID +sqrt_stage_2_error: + pushl EX_INTERNAL|0x213 + call EXCEPTION +#endif /* PARANOID */ + +sqrt_stage_2_done: + +/* Now the square root has been computed to better than 60 bits. */ + +/* Find the square of the guess. */ + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,FPU_accum_1 + + movl %esi,%eax + mull %esi + movl %edx,FPU_accum_3 + movl %eax,FPU_accum_2 + + movl %edi,%eax + mull %esi + addl %eax,FPU_accum_1 + adcl %edx,FPU_accum_2 + adcl $0,FPU_accum_3 + +/* movl %esi,%eax */ +/* mull %edi */ + addl %eax,FPU_accum_1 + adcl %edx,FPU_accum_2 + adcl $0,FPU_accum_3 + +/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ + + movl FPU_fsqrt_arg_0,%eax /* get normalized n */ + subl %eax,FPU_accum_1 + movl FPU_fsqrt_arg_1,%eax + sbbl %eax,FPU_accum_2 + movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ + sbbl %eax,FPU_accum_3 + jnc sqrt_stage_3_positive + +/* Subtraction gives a negative result, + negate the result before division */ + notl FPU_accum_1 + notl FPU_accum_2 + notl FPU_accum_3 + addl $1,FPU_accum_1 + adcl $0,FPU_accum_2 + +#ifdef PARANOID + adcl $0,FPU_accum_3 /* This must be zero */ + jz sqrt_stage_3_no_error + +sqrt_stage_3_error: + pushl EX_INTERNAL|0x207 + call EXCEPTION + +sqrt_stage_3_no_error: +#endif /* PARANOID */ + + movl FPU_accum_2,%edx + movl FPU_accum_1,%eax + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* prepare to round the result */ + + addl %ecx,%edi + adcl $0,%esi + + jmp sqrt_stage_3_finished + +sqrt_stage_3_positive: + movl FPU_accum_2,%edx + movl FPU_accum_1,%eax + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* prepare to round the result */ + + notl %eax /* Negate the correction term */ + notl %ecx + addl $1,%eax + adcl $0,%ecx /* carry here ==> correction == 0 */ + adcl $0xffffffff,%esi + + addl %ecx,%edi + adcl $0,%esi + +sqrt_stage_3_finished: + +/* + * The result in %esi:%edi:%esi should be good to about 90 bits here, + * and the rounding information here does not have sufficient accuracy + * in a few rare cases. + */ + cmpl $0xffffffe0,%eax + ja sqrt_near_exact_x + + cmpl $0x00000020,%eax + jb sqrt_near_exact + + cmpl $0x7fffffe0,%eax + jb sqrt_round_result + + cmpl $0x80000020,%eax + jb sqrt_get_more_precision + +sqrt_round_result: +/* Set up for rounding operations */ + movl %eax,%edx + movl %esi,%eax + movl %edi,%ebx + movl PARAM1,%edi + movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ + jmp fpu_reg_round + + +sqrt_near_exact_x: +/* First, the estimate must be rounded up. */ + addl $1,%edi + adcl $0,%esi + +sqrt_near_exact: +/* + * This is an easy case because x^1/2 is monotonic. + * We need just find the square of our estimate, compare it + * with the argument, and deduce whether our estimate is + * above, below, or exact. We use the fact that the estimate + * is known to be accurate to about 90 bits. + */ + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,%ebx /* 2nd ls word of square */ + movl %eax,%ecx /* ls word of square */ + + movl %edi,%eax + mull %esi + addl %eax,%ebx + addl %eax,%ebx + +#ifdef PARANOID + cmp $0xffffffb0,%ebx + jb sqrt_near_exact_ok + + cmp $0x00000050,%ebx + ja sqrt_near_exact_ok + + pushl EX_INTERNAL|0x214 + call EXCEPTION + +sqrt_near_exact_ok: +#endif /* PARANOID */ + + or %ebx,%ebx + js sqrt_near_exact_small + + jnz sqrt_near_exact_large + + or %ebx,%edx + jnz sqrt_near_exact_large + +/* Our estimate is exactly the right answer */ + xorl %eax,%eax + jmp sqrt_round_result + +sqrt_near_exact_small: +/* Our estimate is too small */ + movl $0x000000ff,%eax + jmp sqrt_round_result + +sqrt_near_exact_large: +/* Our estimate is too large, we need to decrement it */ + subl $1,%edi + sbbl $0,%esi + movl $0xffffff00,%eax + jmp sqrt_round_result + + +sqrt_get_more_precision: +/* This case is almost the same as the above, except we start + with an extra bit of precision in the estimate. */ + stc /* The extra bit. */ + rcll $1,%edi /* Shift the estimate left one bit */ + rcll $1,%esi + + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,%ebx /* 2nd ls word of square */ + movl %eax,%ecx /* ls word of square */ + + movl %edi,%eax + mull %esi + addl %eax,%ebx + addl %eax,%ebx + +/* Put our estimate back to its original value */ + stc /* The ms bit. */ + rcrl $1,%esi /* Shift the estimate left one bit */ + rcrl $1,%edi + +#ifdef PARANOID + cmp $0xffffff60,%ebx + jb sqrt_more_prec_ok + + cmp $0x000000a0,%ebx + ja sqrt_more_prec_ok + + pushl EX_INTERNAL|0x215 + call EXCEPTION + +sqrt_more_prec_ok: +#endif /* PARANOID */ + + or %ebx,%ebx + js sqrt_more_prec_small + + jnz sqrt_more_prec_large + + or %ebx,%ecx + jnz sqrt_more_prec_large + +/* Our estimate is exactly the right answer */ + movl $0x80000000,%eax + jmp sqrt_round_result + +sqrt_more_prec_small: +/* Our estimate is too small */ + movl $0x800000ff,%eax + jmp sqrt_round_result + +sqrt_more_prec_large: +/* Our estimate is too large */ + movl $0x7fffff00,%eax + jmp sqrt_round_result -- cgit v1.2.3